diff options
Diffstat (limited to 'third_party/rust/goblin/src/mach/imports.rs')
-rw-r--r-- | third_party/rust/goblin/src/mach/imports.rs | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/mach/imports.rs b/third_party/rust/goblin/src/mach/imports.rs new file mode 100644 index 0000000000..3c31329a96 --- /dev/null +++ b/third_party/rust/goblin/src/mach/imports.rs @@ -0,0 +1,308 @@ +//! Dynamically linked symbolic imports + +// table of tuples: +// <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend> +// symbol flags are undocumented + +use alloc::vec::Vec; +use core::fmt::{self, Debug}; +use core::ops::Range; +use scroll::{Pread, Sleb128, Uleb128}; + +use crate::container; +use crate::error; +use crate::mach::bind_opcodes; +use crate::mach::load_command; +use crate::mach::segment; + +#[derive(Debug)] +/// Import binding information generated by running the Finite State Automaton programmed via `bind_opcodes` +struct BindInformation<'a> { + seg_index: u8, + seg_offset: u64, + bind_type: u8, + symbol_library_ordinal: u8, + symbol_name: &'a str, + symbol_flags: u8, + addend: i64, + special_dylib: u8, // seeing self = 0 assuming this means the symbol is imported from itself, because its... libSystem.B.dylib? + is_lazy: bool, +} + +impl<'a> BindInformation<'a> { + pub fn new(is_lazy: bool) -> Self { + let mut bind_info = BindInformation::default(); + if is_lazy { + bind_info.is_lazy = true; + bind_info.bind_type = bind_opcodes::BIND_TYPE_POINTER; + } + bind_info + } + pub fn is_weak(&self) -> bool { + self.symbol_flags & bind_opcodes::BIND_SYMBOL_FLAGS_WEAK_IMPORT != 0 + } +} + +impl<'a> Default for BindInformation<'a> { + fn default() -> Self { + BindInformation { + seg_index: 0, + seg_offset: 0x0, + bind_type: 0x0, + special_dylib: 1, + symbol_library_ordinal: 0, + symbol_name: "", + symbol_flags: 0, + addend: 0, + is_lazy: false, + } + } +} + +#[derive(Debug)] +/// An dynamically linked symbolic import +pub struct Import<'a> { + /// The symbol name dyld uses to resolve this import + pub name: &'a str, + /// The library this symbol belongs to (thanks to two-level namespaces) + pub dylib: &'a str, + /// Whether the symbol is lazily resolved or not + pub is_lazy: bool, + /// The offset in the binary this import is found + pub offset: u64, + /// The size of this import + pub size: usize, + /// The virtual memory address at which this import is found + pub address: u64, + /// The addend of this import + pub addend: i64, + /// Whether this import is weak + pub is_weak: bool, + /// The offset in the stream of bind opcodes that caused this import + pub start_of_sequence_offset: u64, +} + +impl<'a> Import<'a> { + /// Create a new import from the import binding information in `bi` + fn new( + bi: &BindInformation<'a>, + libs: &[&'a str], + segments: &[segment::Segment], + start_of_sequence_offset: usize, + ) -> Import<'a> { + let (offset, address) = { + let segment = &segments[bi.seg_index as usize]; + ( + segment.fileoff + bi.seg_offset, + segment.vmaddr + bi.seg_offset, + ) + }; + let size = if bi.is_lazy { 8 } else { 0 }; + Import { + name: bi.symbol_name, + dylib: libs[bi.symbol_library_ordinal as usize], + is_lazy: bi.is_lazy, + offset, + size, + address, + addend: bi.addend, + is_weak: bi.is_weak(), + start_of_sequence_offset: start_of_sequence_offset as u64, + } + } +} + +/// An interpreter for mach BIND opcodes. +/// Runs on prebound (non lazy) symbols (usually dylib extern consts and extern variables), +/// and lazy symbols (usually dylib functions) +pub struct BindInterpreter<'a> { + data: &'a [u8], + location: Range<usize>, + lazy_location: Range<usize>, +} + +impl<'a> Debug for BindInterpreter<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("BindInterpreter") + .field("data", &"<... redacted ...>") + .field( + "location", + &format_args!("{:#x}..{:#x}", self.location.start, self.location.end), + ) + .field( + "lazy_location", + &format_args!( + "{:#x}..{:#x}", + self.lazy_location.start, self.lazy_location.end + ), + ) + .finish() + } +} + +impl<'a> BindInterpreter<'a> { + /// Construct a new import binding interpreter from `bytes` and the load `command` + pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self { + let get_pos = |off: u32, size: u32| -> Range<usize> { + let start = off as usize; + start..start.saturating_add(size as usize) + }; + let location = get_pos(command.bind_off, command.bind_size); + let lazy_location = get_pos(command.lazy_bind_off, command.lazy_bind_size); + BindInterpreter { + data: bytes, + location, + lazy_location, + } + } + /// Return the imports in this binary + pub fn imports( + &self, + libs: &[&'a str], + segments: &[segment::Segment], + ctx: container::Ctx, + ) -> error::Result<Vec<Import<'a>>> { + let mut imports = Vec::new(); + self.run(false, libs, segments, ctx, &mut imports)?; + self.run(true, libs, segments, ctx, &mut imports)?; + Ok(imports) + } + fn run( + &self, + is_lazy: bool, + libs: &[&'a str], + segments: &[segment::Segment], + ctx: container::Ctx, + imports: &mut Vec<Import<'a>>, + ) -> error::Result<()> { + use crate::mach::bind_opcodes::*; + let location = if is_lazy { + &self.lazy_location + } else { + &self.location + }; + let mut bind_info = BindInformation::new(is_lazy); + let mut offset = location.start; + let mut start_of_sequence: usize = 0; + while offset < location.end { + let opcode = self.data.gread::<i8>(&mut offset)? as bind_opcodes::Opcode; + // let mut input = String::new(); + // ::std::io::stdin().read_line(&mut input).unwrap(); + // println!("opcode: {} ({:#x}) offset: {:#x}\n {:?}", opcode_to_str(opcode & BIND_OPCODE_MASK), opcode, offset - location.start - 1, &bind_info); + match opcode & BIND_OPCODE_MASK { + // we do nothing, don't update our records, and add a new, fresh record + BIND_OPCODE_DONE => { + bind_info = BindInformation::new(is_lazy); + start_of_sequence = offset - location.start; + } + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + let symbol_library_ordinal = opcode & BIND_IMMEDIATE_MASK; + bind_info.symbol_library_ordinal = symbol_library_ordinal; + } + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + let symbol_library_ordinal = Uleb128::read(&self.data, &mut offset)?; + bind_info.symbol_library_ordinal = symbol_library_ordinal as u8; + } + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { + // dyld puts the immediate into the symbol_library_ordinal field... + let special_dylib = opcode & BIND_IMMEDIATE_MASK; + // Printf.printf "special_dylib: 0x%x\n" special_dylib + bind_info.special_dylib = special_dylib; + } + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + let symbol_flags = opcode & BIND_IMMEDIATE_MASK; + let symbol_name = self.data.pread::<&str>(offset)?; + offset += symbol_name.len() + 1; // second time this \0 caused debug woes + bind_info.symbol_name = symbol_name; + bind_info.symbol_flags = symbol_flags; + } + BIND_OPCODE_SET_TYPE_IMM => { + let bind_type = opcode & BIND_IMMEDIATE_MASK; + bind_info.bind_type = bind_type; + } + BIND_OPCODE_SET_ADDEND_SLEB => { + let addend = Sleb128::read(&self.data, &mut offset)?; + bind_info.addend = addend; + } + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + let seg_index = opcode & BIND_IMMEDIATE_MASK; + // dyld sets the address to the segActualLoadAddress(segIndex) + uleb128 + // address = segActualLoadAddress(segmentIndex) + read_uleb128(p, end); + let seg_offset = Uleb128::read(&self.data, &mut offset)?; + bind_info.seg_index = seg_index; + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_ADD_ADDR_ULEB => { + let addr = Uleb128::read(&self.data, &mut offset)?; + let seg_offset = bind_info.seg_offset.wrapping_add(addr); + bind_info.seg_offset = seg_offset; + } + // record the record by placing its value into our list + BIND_OPCODE_DO_BIND => { + // from dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += sizeof(intptr_t); + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let seg_offset = bind_info.seg_offset.wrapping_add(ctx.size() as u64); + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { + // dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += read_uleb128(p, end) + sizeof(intptr_t); + // we bind the old record, then increment bind info address for the next guy, plus the ptr offset *) + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let addr = Uleb128::read(&self.data, &mut offset)?; + let seg_offset = bind_info + .seg_offset + .wrapping_add(addr) + .wrapping_add(ctx.size() as u64); + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { + // dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += immediate*sizeof(intptr_t) + sizeof(intptr_t); + // break; + // similarly, we bind the old record, then perform address manipulation for the next record + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let scale = opcode & BIND_IMMEDIATE_MASK; + let size = ctx.size() as u64; + let seg_offset = bind_info + .seg_offset + .wrapping_add(u64::from(scale) * size) + .wrapping_add(size); + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { + // dyld: + // count = read_uleb128(p, end); + // skip = read_uleb128(p, end); + // for (uint32_t i=0; i < count; ++i) { + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += skip + sizeof(intptr_t); + // } + // break; + let count = Uleb128::read(&self.data, &mut offset)?; + let skip = Uleb128::read(&self.data, &mut offset)?; + let skip_plus_size = skip + ctx.size() as u64; + for _i in 0..count { + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let seg_offset = bind_info.seg_offset.wrapping_add(skip_plus_size); + bind_info.seg_offset = seg_offset; + } + } + _ => {} + } + } + Ok(()) + } +} |