diff --git a/rust/src/language_representation.rs b/rust/src/language_representation.rs new file mode 100644 index 000000000..32e175e0a --- /dev/null +++ b/rust/src/language_representation.rs @@ -0,0 +1,933 @@ +use core::ffi; +use std::mem::MaybeUninit; +use std::ptr; + +use binaryninjacore_sys::*; + +use crate::architecture::{Architecture, CoreArchitecture}; +use crate::basic_block::{BasicBlock, BlockContext}; +use crate::binary_view::BinaryView; +use crate::disassembly::{ + DisassemblySettings, DisassemblyTextLine, InstructionTextToken, InstructionTextTokenType, +}; +use crate::function::{Function, HighlightColor}; +use crate::high_level_il::{HighLevelILFunction, HighLevelInstructionIndex}; +use crate::rc::{Array, CoreArrayProvider, CoreArrayProviderInner, Ref, RefCountable}; +use crate::string::{BnStrCompatible, BnString}; +use crate::type_parser::CoreTypeParser; +use crate::type_printer::CoreTypePrinter; +use crate::variable::Variable; + +pub type InstructionTextTokenContext = BNInstructionTextTokenContext; +pub type ScopeType = BNScopeType; +pub type BraceRequirement = BNBraceRequirement; +pub type TokenEmitterExpr = BNTokenEmitterExpr; +pub type SymbolDisplayType = BNSymbolDisplayType; +pub type OperatorPrecedence = BNOperatorPrecedence; +pub type SymbolDisplayResult = BNSymbolDisplayResult; +pub type LineFormatterSettings = BNLineFormatterSettings; + +macro_rules! impl_simple_functions { + ( + $type_name:ident, + $( + $function_name:ident -> $function_name_ffi:ident( + $( + $arg_name:ident: + $arg_type:ty + $( + : $arg_type_ffi:ty = + $( $rust2ffi:expr )? + )? + ),* $(,)? + ) $(-> $ret_type:ty $(| $ret_name:ident = $ret2rust:expr)?)? + ),* $(,)? + ) => { + impl $type_name { + $( + pub fn $function_name(&self, $($arg_name: $arg_type),*) $(-> $ret_type)* { + $( + $($( + let $arg_name: $arg_type_ffi = $rust2ffi; + )*)* + )* + let result = unsafe { $function_name_ffi(self.as_raw(), $($arg_name),*) }; + $($( + let $ret_name = result; + let result = $ret2rust; + )*)* + result + } + )* + } + }; +} + +pub trait CustomLanguageRepresentationFunction: Send + Sync + 'static { + fn init_token_emitter(&self, tokens: &HighLevelILTokenEmitter); + fn expr_text( + &self, + il: &HighLevelILFunction, + expr_index: HighLevelInstructionIndex, + tokens: &HighLevelILTokenEmitter, + settings: &DisassemblySettings, + as_full_ast: bool, + precedence: OperatorPrecedence, + statement: bool, + ); + fn begin_lines( + &self, + il: &HighLevelILFunction, + expr_index: HighLevelInstructionIndex, + tokens: &HighLevelILTokenEmitter, + ); + fn end_lines( + &self, + il: &HighLevelILFunction, + expr_index: HighLevelInstructionIndex, + tokens: &HighLevelILTokenEmitter, + ); + fn comment_start_string(&self) -> &str; + fn comment_end_string(&self) -> &str; + fn annotation_start_string(&self) -> &str; + fn annotation_end_string(&self) -> &str; +} + +pub struct CoreLanguageRepresentationFunction { + handle: ptr::NonNull, +} + +impl CoreLanguageRepresentationFunction { + pub(crate) unsafe fn from_raw(handle: ptr::NonNull) -> Self { + Self { handle } + } + + pub(crate) unsafe fn ref_from_raw( + handle: ptr::NonNull, + ) -> Ref { + unsafe { Ref::new(Self { handle }) } + } + + pub(crate) unsafe fn into_raw(self) -> *mut BNLanguageRepresentationFunction { + // NOTE don't drop self, leak in the ptr form + let Self { handle } = self; + handle.as_ptr() + } + + pub(crate) fn as_raw(&self) -> *mut BNLanguageRepresentationFunction { + self.handle.as_ptr() + } +} + +unsafe impl RefCountable for CoreLanguageRepresentationFunction { + unsafe fn inc_ref(handle: &Self) -> Ref { + Self::ref_from_raw( + ptr::NonNull::new(BNNewLanguageRepresentationFunctionReference( + handle.as_raw(), + )) + .unwrap(), + ) + } + + unsafe fn dec_ref(handle: &Self) { + BNFreeLanguageRepresentationFunction(handle.as_raw()) + } +} + +impl ToOwned for CoreLanguageRepresentationFunction { + type Owned = Ref; + + fn to_owned(&self) -> Self::Owned { + unsafe { ::inc_ref(self) } + } +} + +impl_simple_functions! { + CoreLanguageRepresentationFunction, + get_type -> BNGetLanguageRepresentationType( + ) -> CoreLanguageRepresentationFunctionType | result + = unsafe { CoreLanguageRepresentationFunctionType::from_raw(ptr::NonNull::new(result).unwrap()) }, + arch -> BNGetLanguageRepresentationArchitecture() -> CoreArchitecture | result + = unsafe { CoreArchitecture::from_raw(result) }, + owner_function -> BNGetLanguageRepresentationOwnerFunction() -> Ref | result + = unsafe { Function::ref_from_raw(result) }, + il_function -> BNGetLanguageRepresentationILFunction() -> Ref | result + // TODO full_ast here is unclear + = unsafe { HighLevelILFunction::ref_from_raw(result, false) }, + comment_start_string -> BNGetLanguageRepresentationFunctionCommentStartString( + ) -> BnString | result = unsafe{ BnString::from_raw(result) }, + comment_end_string -> BNGetLanguageRepresentationFunctionCommentEndString( + ) -> BnString | result = unsafe{ BnString::from_raw(result) }, + annotation_start_string-> BNGetLanguageRepresentationFunctionAnnotationStartString( + ) -> BnString | result = unsafe{ BnString::from_raw(result) }, + annotation_end_string-> BNGetLanguageRepresentationFunctionAnnotationEndString( + ) -> BnString | result = unsafe{ BnString::from_raw(result) }, +} + +impl CoreLanguageRepresentationFunction { + pub fn expr_text( + &self, + il: &HighLevelILFunction, + expr_index: HighLevelInstructionIndex, + settings: &DisassemblySettings, + as_full_ast: bool, + precedence: OperatorPrecedence, + statement: bool, + ) -> Array { + let mut count = 0; + let result = unsafe { + BNGetLanguageRepresentationFunctionExprText( + self.as_raw(), + il.handle, + expr_index.0, + settings.handle, + as_full_ast, + precedence, + statement, + &mut count, + ) + }; + unsafe { Array::new(result, count, ()) } + } + + pub fn linear_lines( + &self, + il: &HighLevelILFunction, + expr_index: HighLevelInstructionIndex, + settings: &DisassemblySettings, + as_full_ast: bool, + ) -> Array { + let mut count = 0; + let result = unsafe { + BNGetLanguageRepresentationFunctionLinearLines( + self.as_raw(), + il.handle, + expr_index.0, + settings.handle, + as_full_ast, + &mut count, + ) + }; + unsafe { Array::new(result, count, ()) } + } + + pub fn block_lines( + &self, + block: &BasicBlock, + settings: &DisassemblySettings, + ) -> Array { + let mut count = 0; + let result = unsafe { + BNGetLanguageRepresentationFunctionBlockLines( + self.as_raw(), + block.handle, + settings.handle, + &mut count, + ) + }; + unsafe { Array::new(result, count, ()) } + } + + pub fn highlight(&self, block: &BasicBlock) -> HighlightColor { + let result = + unsafe { BNGetLanguageRepresentationFunctionHighlight(self.as_raw(), block.handle) }; + result.into() + } +} + +pub fn create_language_representation_function< + C: CustomLanguageRepresentationFunction, + A: Architecture, +>( + context: C, + type_: &CoreLanguageRepresentationFunctionType, + arch: &A, + func: &Function, + high_level_il: &HighLevelILFunction, +) -> CoreLanguageRepresentationFunction { + let core_arch: &CoreArchitecture = arch.as_ref(); + let context: &mut C = Box::leak(Box::new(context)); + let mut callbacks = BNCustomLanguageRepresentationFunction { + context: context as *mut C as *mut ffi::c_void, + freeObject: Some(function_free_ffi::), + externalRefTaken: Some(function_external_ref_taken::), + externalRefReleased: Some(function_external_ref_released::), + initTokenEmitter: Some(function_init_token_emitter::), + getExprText: Some(function_get_expr_text::), + beginLines: Some(function_begin_lines::), + endLines: Some(function_end_lines::), + getCommentStartString: Some(function_get_comment_start_string::), + getCommentEndString: Some(function_get_comment_end_string::), + getAnnotationStartString: Some(function_get_annotation_start_string::), + getAnnotationEndString: Some(function_get_annotation_end_string::), + }; + let handle = unsafe { + BNCreateCustomLanguageRepresentationFunction( + type_.as_raw(), + core_arch.handle, + func.handle, + high_level_il.handle, + &mut callbacks, + ) + }; + unsafe { CoreLanguageRepresentationFunction::from_raw(ptr::NonNull::new(handle).unwrap()) } +} + +unsafe extern "C" fn function_free_ffi( + ctxt: *mut ffi::c_void, +) { + let ctxt = ctxt as *mut C; + drop(Box::from_raw(ctxt)) +} + +unsafe extern "C" fn function_external_ref_taken( + _ctxt: *mut ffi::c_void, +) { + // TODO Make an Arc? conflict with free? +} + +unsafe extern "C" fn function_external_ref_released( + _ctxt: *mut ffi::c_void, +) { + // TODO Make an Arc? conflict with free? +} + +unsafe extern "C" fn function_init_token_emitter( + ctxt: *mut ffi::c_void, + tokens: *mut BNHighLevelILTokenEmitter, +) { + let ctxt = ctxt as *mut C; + let tokens = HighLevelILTokenEmitter::from_raw(ptr::NonNull::new(tokens).unwrap()); + (*ctxt).init_token_emitter(&tokens) +} + +unsafe extern "C" fn function_get_expr_text( + ctxt: *mut ffi::c_void, + il: *mut BNHighLevelILFunction, + expr_index: usize, + tokens: *mut BNHighLevelILTokenEmitter, + settings: *mut BNDisassemblySettings, + as_full_ast: bool, + precedence: BNOperatorPrecedence, + statement: bool, +) { + let ctxt = ctxt as *mut C; + let il = HighLevelILFunction { + full_ast: as_full_ast, + handle: il, + }; + let tokens = HighLevelILTokenEmitter::from_raw(ptr::NonNull::new(tokens).unwrap()); + let settings = DisassemblySettings { handle: settings }; + (*ctxt).expr_text( + &il, + expr_index.into(), + &tokens, + &settings, + as_full_ast, + precedence, + statement, + ); +} + +unsafe extern "C" fn function_begin_lines( + ctxt: *mut ffi::c_void, + il: *mut BNHighLevelILFunction, + expr_index: usize, + tokens: *mut BNHighLevelILTokenEmitter, +) { + let ctxt = ctxt as *mut C; + let il = HighLevelILFunction { + full_ast: false, + handle: il, + }; + let tokens = HighLevelILTokenEmitter::from_raw(ptr::NonNull::new(tokens).unwrap()); + (*ctxt).begin_lines(&il, expr_index.into(), &tokens) +} + +unsafe extern "C" fn function_end_lines( + ctxt: *mut ffi::c_void, + il: *mut BNHighLevelILFunction, + expr_index: usize, + tokens: *mut BNHighLevelILTokenEmitter, +) { + let ctxt = ctxt as *mut C; + let il = HighLevelILFunction { + full_ast: false, + handle: il, + }; + let tokens = HighLevelILTokenEmitter::from_raw(ptr::NonNull::new(tokens).unwrap()); + (*ctxt).end_lines(&il, expr_index.into(), &tokens) +} + +unsafe extern "C" fn function_get_comment_start_string( + ctxt: *mut ffi::c_void, +) -> *mut ffi::c_char { + let ctxt = ctxt as *mut C; + let result = (*ctxt).comment_start_string(); + BnString::into_raw(BnString::new(result)) +} + +unsafe extern "C" fn function_get_comment_end_string( + ctxt: *mut ffi::c_void, +) -> *mut ffi::c_char { + let ctxt = ctxt as *mut C; + let result = (*ctxt).comment_end_string(); + BnString::into_raw(BnString::new(result)) +} + +unsafe extern "C" fn function_get_annotation_start_string< + C: CustomLanguageRepresentationFunction, +>( + ctxt: *mut ffi::c_void, +) -> *mut ffi::c_char { + let ctxt = ctxt as *mut C; + let result = (*ctxt).annotation_start_string(); + BnString::into_raw(BnString::new(result)) +} + +unsafe extern "C" fn function_get_annotation_end_string( + ctxt: *mut ffi::c_void, +) -> *mut ffi::c_char { + let ctxt = ctxt as *mut C; + let result = (*ctxt).annotation_end_string(); + BnString::into_raw(BnString::new(result)) +} + +pub trait CustomLanguageRepresentationFunctionType { + fn create( + &self, + arch: &CoreArchitecture, + owner: &Function, + high_level_il: &HighLevelILFunction, + ) -> CoreLanguageRepresentationFunction; + fn is_valid(&self, view: &BinaryView) -> bool; + fn type_printer(&self) -> &CoreTypePrinter; + fn type_parser(&self) -> &CoreTypeParser; + fn line_formatter(&self) -> &CoreLineFormatter; + fn function_type_tokens( + &self, + func: &Function, + settings: &DisassemblySettings, + ) -> Vec; +} + +// NOTE static, it never gets freed, so we can clone/copy it +#[repr(transparent)] +#[derive(Clone, Copy)] +pub struct CoreLanguageRepresentationFunctionType { + handle: ptr::NonNull, +} + +impl CoreLanguageRepresentationFunctionType { + pub(crate) unsafe fn from_raw( + handle: ptr::NonNull, + ) -> Self { + Self { handle } + } + + pub(crate) fn as_raw(&self) -> *mut BNLanguageRepresentationFunctionType { + self.handle.as_ptr() + } + + pub fn get_by_name(name: B) -> Option { + let name = name.into_bytes_with_nul(); + let result = unsafe { + BNGetLanguageRepresentationFunctionTypeByName( + name.as_ref().as_ptr() as *const ffi::c_char + ) + }; + ptr::NonNull::new(result).map(|handle| unsafe { Self::from_raw(handle) }) + } + + pub fn get_all() -> Array { + let mut count = 0; + let result = unsafe { BNGetLanguageRepresentationFunctionTypeList(&mut count) }; + unsafe { Array::new(result, count, ()) } + } + + pub fn tokens( + &self, + func: &Function, + settings: &DisassemblySettings, + ) -> Array { + let mut count = 0; + let result = unsafe { + BNGetLanguageRepresentationFunctionTypeFunctionTypeTokens( + self.as_raw(), + func.handle, + settings.handle, + &mut count, + ) + }; + unsafe { Array::new(result, count, ()) } + } +} + +impl_simple_functions! { + CoreLanguageRepresentationFunctionType, + name -> BNGetLanguageRepresentationFunctionTypeName( + ) -> BnString | result = unsafe { BnString::from_raw(result) }, + create -> BNCreateLanguageRepresentationFunction( + arch: &CoreArchitecture: *mut BNArchitecture = arch.handle, + func: &Function: *mut BNFunction = func.handle, + high_level_il: &HighLevelILFunction: *mut BNHighLevelILFunction = high_level_il.handle, + ) -> CoreLanguageRepresentationFunction | result + = unsafe{ CoreLanguageRepresentationFunction::from_raw(ptr::NonNull::new(result).unwrap()) }, + is_valid -> BNIsLanguageRepresentationFunctionTypeValid( + view: &BinaryView: *mut BNBinaryView = view.handle, + ) -> bool, + printer -> BNGetLanguageRepresentationFunctionTypePrinter( + ) -> CoreTypePrinter | result + = unsafe { CoreTypePrinter::from_raw(ptr::NonNull::new(result).unwrap()) }, + parser -> BNGetLanguageRepresentationFunctionTypeParser( + ) -> CoreTypeParser | result + = unsafe { CoreTypeParser::from_raw(ptr::NonNull::new(result).unwrap()) }, + line_formatter -> BNGetLanguageRepresentationFunctionTypeLineFormatter( + ) -> CoreLineFormatter | result + = unsafe { CoreLineFormatter::from_raw(ptr::NonNull::new(result).unwrap()) }, +} + +impl CoreArrayProvider for CoreLanguageRepresentationFunctionType { + type Raw = *mut BNLanguageRepresentationFunctionType; + type Context = (); + type Wrapped<'a> = &'a CoreLanguageRepresentationFunctionType; +} + +unsafe impl CoreArrayProviderInner for CoreLanguageRepresentationFunctionType { + unsafe fn free(raw: *mut Self::Raw, _count: usize, _context: &Self::Context) { + BNFreeLanguageRepresentationFunctionTypeList(raw) + } + + unsafe fn wrap_raw<'a>(raw: &'a Self::Raw, _context: &'a Self::Context) -> Self::Wrapped<'a> { + // SAFETY: CoreLanguageRepresentationFunctionType and BNCoreLanguageRepresentationFunctionType + // transparent + core::mem::transmute::< + &*mut BNLanguageRepresentationFunctionType, + &CoreLanguageRepresentationFunctionType, + >(raw) + } +} + +pub fn register_language_representation_function_type< + C: CustomLanguageRepresentationFunctionType, + F: FnOnce(CoreLanguageRepresentationFunctionType) -> C, + B: BnStrCompatible, +>( + creator: F, + name: B, +) -> CoreLanguageRepresentationFunctionType { + let custom = Box::leak(Box::new(MaybeUninit::uninit())); + let mut callbacks = BNCustomLanguageRepresentationFunctionType { + context: custom as *mut MaybeUninit as *mut ffi::c_void, + create: Some(function_type_create_ffi::), + isValid: Some(function_type_is_valid_ffi::), + getTypePrinter: Some(function_type_get_type_printer_ffi::), + getTypeParser: Some(function_type_get_type_parser_ffi::), + getLineFormatter: Some(function_type_get_line_formatter_ffi::), + getFunctionTypeTokens: Some(function_type_get_function_type_tokens::), + freeLines: Some(function_type_free_lines_ffi), + }; + let name = name.into_bytes_with_nul(); + let core = unsafe { + BNRegisterLanguageRepresentationFunctionType( + name.as_ref().as_ptr() as *const ffi::c_char, + &mut callbacks, + ) + }; + let core = unsafe { + CoreLanguageRepresentationFunctionType::from_raw(ptr::NonNull::new(core).unwrap()) + }; + custom.write(creator(core)); + core +} + +unsafe extern "C" fn function_type_create_ffi( + ctxt: *mut ffi::c_void, + arch: *mut BNArchitecture, + owner: *mut BNFunction, + high_level_il: *mut BNHighLevelILFunction, +) -> *mut BNLanguageRepresentationFunction { + let ctxt = ctxt as *mut C; + let arch = CoreArchitecture::from_raw(arch); + let owner = Function::from_raw(owner); + let high_level_il = HighLevelILFunction { + full_ast: false, + handle: high_level_il, + }; + let result = (*ctxt).create(&arch, &owner, &high_level_il); + result.into_raw() +} + +unsafe extern "C" fn function_type_is_valid_ffi( + ctxt: *mut ffi::c_void, + view: *mut BNBinaryView, +) -> bool { + let ctxt = ctxt as *mut C; + let view = BinaryView::from_raw(view); + (*ctxt).is_valid(&view) +} + +unsafe extern "C" fn function_type_get_type_printer_ffi< + C: CustomLanguageRepresentationFunctionType, +>( + ctxt: *mut ffi::c_void, +) -> *mut BNTypePrinter { + let ctxt = ctxt as *mut C; + let result = (*ctxt).type_printer(); + result.as_raw() +} + +unsafe extern "C" fn function_type_get_type_parser_ffi< + C: CustomLanguageRepresentationFunctionType, +>( + ctxt: *mut ffi::c_void, +) -> *mut BNTypeParser { + let ctxt = ctxt as *mut C; + let result = (*ctxt).type_parser(); + result.as_raw() +} + +unsafe extern "C" fn function_type_get_line_formatter_ffi< + C: CustomLanguageRepresentationFunctionType, +>( + ctxt: *mut ffi::c_void, +) -> *mut BNLineFormatter { + let ctxt = ctxt as *mut C; + let result = (*ctxt).line_formatter(); + result.as_raw() +} + +unsafe extern "C" fn function_type_get_function_type_tokens< + C: CustomLanguageRepresentationFunctionType, +>( + ctxt: *mut ffi::c_void, + func: *mut BNFunction, + settings: *mut BNDisassemblySettings, + count: *mut usize, +) -> *mut BNDisassemblyTextLine { + let ctxt = ctxt as *mut C; + let func = Function::from_raw(func); + let settings = DisassemblySettings { handle: settings }; + let result = (*ctxt).function_type_tokens(&func, &settings); + *count = result.len(); + let result: Box<[BNDisassemblyTextLine]> = result + .into_iter() + .map(DisassemblyTextLine::into_raw) + .collect(); + // NOTE freed by function_type_free_lines_ffi + Box::leak(result).as_mut_ptr() +} + +unsafe extern "C" fn function_type_free_lines_ffi( + _ctxt: *mut ffi::c_void, + lines: *mut BNDisassemblyTextLine, + count: usize, +) { + let lines: Box<[BNDisassemblyTextLine]> = + Box::from_raw(core::slice::from_raw_parts_mut(lines, count)); + drop(lines); +} + +pub trait CustomLineFormatter { + fn format_lines( + &self, + lines: &[DisassemblyTextLine], + settings: &LineFormatterSettings, + ) -> Vec; +} + +pub struct CoreLineFormatter { + handle: ptr::NonNull, +} + +impl CoreLineFormatter { + pub(crate) unsafe fn from_raw(handle: ptr::NonNull) -> Self { + Self { handle } + } + + pub(crate) unsafe fn as_raw(&self) -> *mut BNLineFormatter { + self.handle.as_ptr() + } +} + +pub fn register_line_formatter( + name: B, + custom: C, +) -> CoreLineFormatter { + let custom = Box::leak(Box::new(custom)); + let mut callbacks = BNCustomLineFormatter { + context: custom as *mut C as *mut ffi::c_void, + formatLines: Some(line_formatter_format_lines_ffi::), + freeLines: Some(line_formatter_free_lines_ffi), + }; + let name = name.into_bytes_with_nul(); + let handle = unsafe { + BNRegisterLineFormatter(name.as_ref().as_ptr() as *const ffi::c_char, &mut callbacks) + }; + unsafe { CoreLineFormatter::from_raw(ptr::NonNull::new(handle).unwrap()) } +} + +unsafe extern "C" fn line_formatter_format_lines_ffi( + ctxt: *mut ffi::c_void, + in_lines: *mut BNDisassemblyTextLine, + in_count: usize, + settings: *const BNLineFormatterSettings, + out_count: *mut usize, +) -> *mut BNDisassemblyTextLine { + // NOTE dropped by line_formatter_free_lines_ffi + let ctxt = ctxt as *mut C; + let lines = core::slice::from_raw_parts(in_lines, in_count); + let lines: Vec<_> = lines.iter().map(DisassemblyTextLine::from_raw).collect(); + let result = (*ctxt).format_lines(&lines, &*settings); + *out_count = result.len(); + let result: Box<[BNDisassemblyTextLine]> = result + .into_iter() + .map(DisassemblyTextLine::into_raw) + .collect(); + Box::leak(result).as_mut_ptr() +} + +unsafe extern "C" fn line_formatter_free_lines_ffi( + _ctxt: *mut ffi::c_void, + lines: *mut BNDisassemblyTextLine, + count: usize, +) { + let lines: Box<[BNDisassemblyTextLine]> = + Box::from_raw(core::slice::from_raw_parts_mut(lines, count)); + drop(lines); +} + +/// High level token emitter +#[derive(PartialEq, Eq, Hash)] +pub struct HighLevelILTokenEmitter { + handle: ptr::NonNull, +} + +unsafe impl Send for HighLevelILTokenEmitter {} +unsafe impl Sync for HighLevelILTokenEmitter {} + +unsafe impl RefCountable for HighLevelILTokenEmitter { + unsafe fn inc_ref(handle: &Self) -> Ref { + let handle = BNNewHighLevelILTokenEmitterReference(handle.handle.as_ptr()); + let handle = ptr::NonNull::new(handle).unwrap(); + Ref::new(HighLevelILTokenEmitter { handle }) + } + + unsafe fn dec_ref(handle: &Self) { + BNFreeHighLevelILTokenEmitter(handle.handle.as_ptr()) + } +} + +impl ToOwned for HighLevelILTokenEmitter { + type Owned = Ref; + + fn to_owned(&self) -> Self::Owned { + unsafe { ::inc_ref(self) } + } +} + +impl HighLevelILTokenEmitter { + pub(crate) unsafe fn from_raw(handle: ptr::NonNull) -> Self { + Self { handle } + } + + pub(crate) fn as_raw(&self) -> *mut BNHighLevelILTokenEmitter { + self.handle.as_ptr() + } +} + +impl_simple_functions! { + HighLevelILTokenEmitter, + prepend_collapse_blank_indicator -> BNHighLevelILTokenPrependCollapseBlankIndicator(), + prepend_collapse_indicator -> BNHighLevelILTokenPrependCollapseIndicator( + context: InstructionTextTokenContext: BNInstructionTextTokenContext = context, + hash: u64, + ), + has_collapsable_regions -> BNHighLevelILTokenEmitterHasCollapsableRegions() -> bool, + set_has_collapsable_regions -> BNHighLevelILTokenEmitterSetHasCollapsableRegions( + state: bool, + ), + append -> BNHighLevelILTokenEmitterAppend ( + token: InstructionTextToken: *mut BNInstructionTextToken = &mut InstructionTextToken::into_raw(token), + ), + init_line -> BNHighLevelILTokenEmitterInitLine(), + new_line -> BNHighLevelILTokenEmitterNewLine(), + increase_indent -> BNHighLevelILTokenEmitterIncreaseIndent(), + decrease_indent -> BNHighLevelILTokenEmitterDecreaseIndent(), + scope_separator -> BNHighLevelILTokenEmitterScopeSeparator(), + begin_scope -> BNHighLevelILTokenEmitterBeginScope( + type_: ScopeType, + ), + end_scope -> BNHighLevelILTokenEmitterEndScope( + type_: ScopeType, + ), + scope_continuation -> BNHighLevelILTokenEmitterScopeContinuation( + force_same_line: bool, + ), + finalize_scope -> BNHighLevelILTokenEmitterFinalizeScope(), + no_indent_for_this_line -> BNHighLevelILTokenEmitterNoIndentForThisLine(), + begin_force_zero_confidence -> BNHighLevelILTokenEmitterBeginForceZeroConfidence(), + end_force_zero_confidence -> BNHighLevelILTokenEmitterEndForceZeroConfidence(), + set_current_expr -> BNHighLevelILTokenEmitterSetCurrentExpr( + expr: TokenEmitterExpr, + ) -> TokenEmitterExpr, + restore_current_expr -> BNHighLevelILTokenEmitterRestoreCurrentExpr( + expr: TokenEmitterExpr, + ), + finalize -> BNHighLevelILTokenEmitterFinalize(), + append_open_paren -> BNHighLevelILTokenEmitterAppendOpenParen(), + append_close_paren -> BNHighLevelILTokenEmitterAppendCloseParen(), + append_open_bracket -> BNHighLevelILTokenEmitterAppendOpenBracket(), + append_close_bracket -> BNHighLevelILTokenEmitterAppendCloseBracket(), + append_open_brace -> BNHighLevelILTokenEmitterAppendOpenBrace(), + append_close_brace -> BNHighLevelILTokenEmitterAppendCloseBrace(), + append_semicolon -> BNHighLevelILTokenEmitterAppendSemicolon(), + set_brace_requirement -> BNHighLevelILTokenEmitterSetBraceRequirement( + required: BraceRequirement, + ), + set_braces_around_switch_cases -> BNHighLevelILTokenEmitterSetBracesAroundSwitchCases( + braces: bool, + ), + set_default_braces_on_same_line -> BNHighLevelILTokenEmitterSetDefaultBracesOnSameLine( + same_line: bool, + ), + set_simple_scope_allowed -> BNHighLevelILTokenEmitterSetSimpleScopeAllowed( + allowed: bool, + ), + brace_requirement -> BNHighLevelILTokenEmitterGetBraceRequirement() -> BraceRequirement, + has_braces_around_switch_cases -> BNHighLevelILTokenEmitterHasBracesAroundSwitchCases() -> bool, + default_braces_on_same_line -> BNHighLevelILTokenEmitterGetDefaultBracesOnSameLine() -> bool, + is_simple_scope_allowed -> BNHighLevelILTokenEmitterIsSimpleScopeAllowed() -> bool, +} + +impl HighLevelILTokenEmitter { + pub fn current_tokens(&self) -> Array { + let mut count = 0; + let array = unsafe { BNHighLevelILTokenEmitterGetCurrentTokens(self.as_raw(), &mut count) }; + unsafe { Array::new(array, count, ()) } + } + pub fn lines(&self) -> Array { + let mut count = 0; + let array = unsafe { BNHighLevelILTokenEmitterGetLines(self.as_raw(), &mut count) }; + unsafe { Array::new(array, count, ()) } + } + + pub fn append_size_token(&self, size: usize, type_: InstructionTextTokenType) { + unsafe { BNAddHighLevelILSizeToken(size, type_, self.as_raw()) } + } + + pub fn append_float_size_token(&self, size: usize, type_: InstructionTextTokenType) { + unsafe { BNAddHighLevelILFloatSizeToken(size, type_, self.as_raw()) } + } + + pub fn append_var_text_token( + &self, + func: &HighLevelILFunction, + var: Variable, + expr_index: usize, + size: usize, + ) { + unsafe { + BNAddHighLevelILVarTextToken( + func.handle, + &BNVariable::from(var), + self.as_raw(), + expr_index, + size, + ) + } + } + + pub fn append_integer_text_token( + &self, + func: &HighLevelILFunction, + expr_index: usize, + val: i64, + size: usize, + ) { + unsafe { + BNAddHighLevelILIntegerTextToken(func.handle, expr_index, val, size, self.as_raw()) + } + } + + pub fn append_array_index_token( + &self, + func: &HighLevelILFunction, + expr_index: usize, + val: i64, + size: usize, + address: u64, + ) { + unsafe { + BNAddHighLevelILArrayIndexToken( + func.handle, + expr_index, + val, + size, + self.as_raw(), + address, + ) + } + } + + pub fn append_pointer_text_token( + &self, + func: &HighLevelILFunction, + expr_index: usize, + val: i64, + settings: &DisassemblySettings, + symbol_display: SymbolDisplayType, + precedence: OperatorPrecedence, + allow_short_string: bool, + ) -> SymbolDisplayResult { + unsafe { + BNAddHighLevelILPointerTextToken( + func.handle, + expr_index, + val, + self.as_raw(), + settings.handle, + symbol_display, + precedence, + allow_short_string, + ) + } + } + + pub fn append_constant_text_token( + &self, + func: &HighLevelILFunction, + expr_index: usize, + val: i64, + size: usize, + settings: &DisassemblySettings, + precedence: OperatorPrecedence, + ) { + unsafe { + BNAddHighLevelILConstantTextToken( + func.handle, + expr_index, + val, + size, + self.as_raw(), + settings.handle, + precedence, + ) + } + } +} + +pub fn get_function_language_representation( + func: &Function, + lang_name: B, +) -> Option { + let lang_name = lang_name.into_bytes_with_nul(); + let repr = unsafe { + BNGetFunctionLanguageRepresentationIfAvailable( + func.handle, + lang_name.as_ref().as_ptr() as *const ffi::c_char, + ) + }; + ptr::NonNull::new(repr) + .map(|handle| unsafe { CoreLanguageRepresentationFunction::from_raw(handle) }) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index a3df0cd35..21ed2ed13 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -57,6 +57,7 @@ pub mod function_recognizer; pub mod headless; pub mod high_level_il; pub mod interaction; +pub mod language_representation; pub mod linear_view; pub mod logger; pub mod low_level_il; diff --git a/rust/src/type_parser.rs b/rust/src/type_parser.rs index 656f0c9d4..b4e2985eb 100644 --- a/rust/src/type_parser.rs +++ b/rust/src/type_parser.rs @@ -49,6 +49,10 @@ impl CoreTypeParser { Self { handle } } + pub(crate) unsafe fn as_raw(&self) -> *mut BNTypeParser { + self.handle.as_ptr() + } + pub fn parsers() -> Array { let mut count = 0; let result = unsafe { BNGetTypeParserList(&mut count) }; diff --git a/rust/src/type_printer.rs b/rust/src/type_printer.rs index 12c716458..32f22d43a 100644 --- a/rust/src/type_printer.rs +++ b/rust/src/type_printer.rs @@ -54,6 +54,10 @@ impl CoreTypePrinter { Self { handle } } + pub(crate) unsafe fn as_raw(&self) -> *mut BNTypePrinter { + self.handle.as_ptr() + } + pub fn printers() -> Array { let mut count = 0; let result = unsafe { BNGetTypePrinterList(&mut count) }; diff --git a/rust/tests/language_representation.rs b/rust/tests/language_representation.rs new file mode 100644 index 000000000..bdce64e58 --- /dev/null +++ b/rust/tests/language_representation.rs @@ -0,0 +1,374 @@ +use std::path::PathBuf; + +use binaryninja::architecture::CoreArchitecture; +use binaryninja::binary_view::{BinaryView, BinaryViewExt}; +use binaryninja::disassembly::{ + DisassemblySettings, DisassemblyTextLine, InstructionTextToken, InstructionTextTokenKind, +}; +use binaryninja::function::Function; +use binaryninja::headless::Session; +use binaryninja::high_level_il::{HighLevelILFunction, HighLevelInstructionIndex}; +use binaryninja::language_representation::{ + create_language_representation_function, register_language_representation_function_type, + register_line_formatter, CoreLanguageRepresentationFunction, + CoreLanguageRepresentationFunctionType, CoreLineFormatter, + CustomLanguageRepresentationFunction, CustomLanguageRepresentationFunctionType, + CustomLineFormatter, HighLevelILTokenEmitter, LineFormatterSettings, OperatorPrecedence, +}; +use binaryninja::platform::Platform; +use binaryninja::rc::Ref; +use binaryninja::type_container::TypeContainer; +use binaryninja::type_parser::{ + register_type_parser, CoreTypeParser, TypeParser, TypeParserError, TypeParserOption, +}; +use binaryninja::type_printer::{ + register_type_printer, CoreTypePrinter, TokenEscapingType, TypeDefinitionLine, TypePrinter, +}; +use binaryninja::types::{QualifiedName, QualifiedNameAndType, Type}; + +struct MyLangRepr {} +struct MyLangReprType { + core: CoreLanguageRepresentationFunctionType, + printer: CoreTypePrinter, + parser: CoreTypeParser, + line_formatter: CoreLineFormatter, +} +struct MyTypePrinter {} +struct MyTypeParser {} +struct MyLineFormatter {} + +impl CustomLanguageRepresentationFunction for MyLangRepr { + fn init_token_emitter(&self, _tokens: &HighLevelILTokenEmitter) {} + + fn expr_text( + &self, + il: &HighLevelILFunction, + expr_index: HighLevelInstructionIndex, + tokens: &HighLevelILTokenEmitter, + _settings: &DisassemblySettings, + _as_full_ast: bool, + _precedence: OperatorPrecedence, + _statement: bool, + ) { + let instr = il.instruction_from_expr_index(expr_index).unwrap(); + let instr = instr.lift(); + use binaryninja::high_level_il::HighLevelILLiftedInstructionKind::*; + match &instr.kind { + Block(block) => { + tokens.append(InstructionTextToken::new( + format!("block {}\n", block.body.len()), + InstructionTextTokenKind::Text, + )); + for block_inst in &block.body { + self.expr_text( + il, + block_inst.expr_index, + tokens, + _settings, + _as_full_ast, + _precedence, + _statement, + ); + } + } + Unimpl | Unreachable | Undef => panic!(), + _kind => { + tokens.append(InstructionTextToken::new( + format!("other instr {:x}\n", instr.address), + InstructionTextTokenKind::Text, + )); + } + } + } + + fn begin_lines( + &self, + _il: &HighLevelILFunction, + _expr_index: HighLevelInstructionIndex, + _tokens: &HighLevelILTokenEmitter, + ) { + } + + fn end_lines( + &self, + _il: &HighLevelILFunction, + _expr_index: HighLevelInstructionIndex, + _tokens: &HighLevelILTokenEmitter, + ) { + } + + fn comment_start_string(&self) -> &str { + "/* " + } + + fn comment_end_string(&self) -> &str { + " */" + } + + fn annotation_start_string(&self) -> &str { + "{" + } + + fn annotation_end_string(&self) -> &str { + "}" + } +} + +impl CustomLanguageRepresentationFunctionType for MyLangReprType { + fn create( + &self, + arch: &CoreArchitecture, + func: &Function, + high_level_il: &HighLevelILFunction, + ) -> CoreLanguageRepresentationFunction { + create_language_representation_function( + MyLangRepr {}, + &self.core, + arch, + func, + high_level_il, + ) + } + + fn is_valid(&self, _view: &BinaryView) -> bool { + true + } + + fn type_printer(&self) -> &CoreTypePrinter { + &self.printer + } + + fn type_parser(&self) -> &CoreTypeParser { + &self.parser + } + + fn line_formatter(&self) -> &CoreLineFormatter { + &self.line_formatter + } + + fn function_type_tokens( + &self, + _func: &Function, + _settings: &DisassemblySettings, + ) -> Vec { + todo!() + } +} + +impl TypePrinter for MyTypePrinter { + fn get_type_tokens>( + &self, + _type_: Ref, + _platform: Option>, + _name: T, + _base_confidence: u8, + _escaping: TokenEscapingType, + ) -> Option> { + Some(vec![InstructionTextToken::new( + "SomeType", + InstructionTextTokenKind::Text, + )]) + } + + fn get_type_tokens_before_name( + &self, + _type_: Ref, + _platform: Option>, + _base_confidence: u8, + _parent_type: Option>, + _escaping: TokenEscapingType, + ) -> Option> { + Some(vec![InstructionTextToken::new( + "", + InstructionTextTokenKind::Text, + )]) + } + + fn get_type_tokens_after_name( + &self, + _type_: Ref, + _platform: Option>, + _base_confidence: u8, + _parent_type: Option>, + _escaping: TokenEscapingType, + ) -> Option> { + Some(vec![InstructionTextToken::new( + "", + InstructionTextTokenKind::Text, + )]) + } + + fn get_type_string>( + &self, + _type_: Ref, + _platform: Option>, + _name: T, + _escaping: TokenEscapingType, + ) -> Option { + None + } + + fn get_type_string_before_name( + &self, + _type_: Ref, + _platform: Option>, + _escaping: TokenEscapingType, + ) -> Option { + None + } + + fn get_type_string_after_name( + &self, + _type_: Ref, + _platform: Option>, + _escaping: TokenEscapingType, + ) -> Option { + None + } + + fn get_type_lines>( + &self, + _type_: Ref, + _types: &TypeContainer, + _name: T, + _padding_cols: isize, + _collapsed: bool, + _escaping: TokenEscapingType, + ) -> Option> { + None + } + + fn print_all_types( + &self, + _names: Vec, + _types: Vec>, + _data: Ref, + _padding_cols: isize, + _escaping: TokenEscapingType, + ) -> Option { + None + } +} + +impl TypeParser for MyTypeParser { + fn get_option_text(&self, _option: TypeParserOption, _value: &str) -> Option { + None + } + + fn preprocess_source( + &self, + _source: &str, + _file_name: &str, + _platform: &binaryninja::platform::Platform, + _existing_types: &TypeContainer, + _options: &[String], + _include_dirs: &[String], + ) -> Result> { + todo!() + } + + fn parse_types_from_source( + &self, + _source: &str, + _file_name: &str, + _platform: &binaryninja::platform::Platform, + _existing_types: &TypeContainer, + _options: &[String], + _include_dirs: &[String], + _auto_type_source: &str, + ) -> Result< + binaryninja::type_parser::TypeParserResult, + Vec, + > { + todo!() + } + + fn parse_type_string( + &self, + _source: &str, + _platform: &binaryninja::platform::Platform, + _existing_types: &TypeContainer, + ) -> Result> { + todo!() + } +} + +impl CustomLineFormatter for MyLineFormatter { + fn format_lines( + &self, + lines: &[DisassemblyTextLine], + _settings: &LineFormatterSettings, + ) -> Vec { + lines.to_vec() + } +} + +#[test] +fn test_custom_language_representation() { + const LANG_REPR_NAME: &str = "test_lang_repr"; + let _session = Session::new().expect("Failed to initialize session"); + let out_dir = env!("OUT_DIR").parse::().unwrap(); + let (_, printer) = register_type_printer("my_type_printer", MyTypePrinter {}); + let (_, parser) = register_type_parser("my_type_parser", MyTypeParser {}); + let line_formatter = register_line_formatter("my_line_formatter", MyLineFormatter {}); + let my_repr = register_language_representation_function_type( + |core| MyLangReprType { + core, + printer, + parser, + line_formatter, + }, + LANG_REPR_NAME, + ); + let view = binaryninja::load(out_dir.join("atox.obj")).expect("Failed to create view"); + let func = view + .function_at(&view.default_platform().unwrap(), 0x36760) + .unwrap(); + let _repr = my_repr.create( + &view.default_arch().unwrap().as_ref(), + &func, + &func.high_level_il(false).unwrap(), + ); + let il = func.high_level_il(false).unwrap(); + + let settings = DisassemblySettings::new(); + let root_idx = il.root_instruction_index(); + let result = _repr.linear_lines(&il, root_idx, &settings, false); + let output: String = result.iter().map(|dis| dis.to_string()).collect(); + let _repr = binaryninja::language_representation::get_function_language_representation( + &func, + LANG_REPR_NAME, + ) + .unwrap(); + assert_eq!( + format!("{output}"), + "block 26 +other instr 36775 +other instr 3679e +other instr 3679e +other instr 367ba +other instr 367e6 +other instr 3682f +other instr 3682f +other instr 36834 +other instr 3683e +other instr 3684e +other instr 36867 +other instr 36881 +other instr 36881 +other instr 36881 +other instr 36896 +other instr 368a0 +other instr 368bb +other instr 368d2 +other instr 3694a +other instr 36960 +other instr 369e1 +other instr 369ec +other instr 36a2e +other instr 36ab5 +other instr 36abd +other instr 36ac2 +" + ); +}