diff options
Diffstat (limited to 'third_party/rust/object/src/read/macho.rs')
-rw-r--r-- | third_party/rust/object/src/read/macho.rs | 615 |
1 files changed, 615 insertions, 0 deletions
diff --git a/third_party/rust/object/src/read/macho.rs b/third_party/rust/object/src/read/macho.rs new file mode 100644 index 0000000000..ce54938a10 --- /dev/null +++ b/third_party/rust/object/src/read/macho.rs @@ -0,0 +1,615 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::vec::Vec; +use goblin::container; +use goblin::mach; +use goblin::mach::load_command::CommandVariant; +use std::{fmt, iter, ops, slice}; +use target_lexicon::{Aarch64Architecture, Architecture, ArmArchitecture}; +use uuid::Uuid; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// A Mach-O object file. +#[derive(Debug)] +pub struct MachOFile<'data> { + macho: mach::MachO<'data>, + data: &'data [u8], + ctx: container::Ctx, + sections: Vec<MachOSectionInternal<'data>>, +} + +impl<'data> MachOFile<'data> { + /// Get the Mach-O headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn macho(&self) -> &mach::MachO<'data> { + &self.macho + } + + /// Parse the raw Mach-O file data. + pub fn parse(data: &'data [u8]) -> Result<Self, &'static str> { + let (_magic, ctx) = + mach::parse_magic_and_ctx(data, 0).map_err(|_| "Could not parse Mach-O magic")?; + let ctx = ctx.ok_or("Invalid Mach-O magic")?; + let macho = mach::MachO::parse(data, 0).map_err(|_| "Could not parse Mach-O header")?; + // Build a list of sections to make some operations more efficient. + let mut sections = Vec::new(); + 'segments: for segment in &macho.segments { + for section_result in segment { + if let Ok((section, data)) = section_result { + sections.push(MachOSectionInternal::parse(section, data)); + } else { + break 'segments; + } + } + } + Ok(MachOFile { + macho, + data, + ctx, + sections, + }) + } + + /// Return the section at the given index. + #[inline] + fn section_internal(&self, index: SectionIndex) -> Option<&MachOSectionInternal<'data>> { + index + .0 + .checked_sub(1) + .and_then(|index| self.sections.get(index)) + } +} + +impl<'data, 'file> Object<'data, 'file> for MachOFile<'data> +where + 'data: 'file, +{ + type Segment = MachOSegment<'data, 'file>; + type SegmentIterator = MachOSegmentIterator<'data, 'file>; + type Section = MachOSection<'data, 'file>; + type SectionIterator = MachOSectionIterator<'data, 'file>; + type SymbolIterator = MachOSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.macho.header.cputype { + mach::cputype::CPU_TYPE_ARM => Architecture::Arm(ArmArchitecture::Arm), + mach::cputype::CPU_TYPE_ARM64 => Architecture::Aarch64(Aarch64Architecture::Aarch64), + mach::cputype::CPU_TYPE_X86 => Architecture::I386, + mach::cputype::CPU_TYPE_X86_64 => Architecture::X86_64, + mach::cputype::CPU_TYPE_MIPS => Architecture::Mips, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.macho.little_endian + } + + #[inline] + fn is_64(&self) -> bool { + self.macho.is_64 + } + + fn segments(&'file self) -> MachOSegmentIterator<'data, 'file> { + MachOSegmentIterator { + segments: self.macho.segments.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option<MachOSection<'data, 'file>> { + // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg + // ".debug_info" to "__debug_info". + let system_section = section_name.starts_with('.'); + let cmp_section_name = |section: &MachOSection| { + section + .name() + .map(|name| { + section_name == name + || (system_section + && name.starts_with("__") + && §ion_name[1..] == &name[2..]) + }) + .unwrap_or(false) + }; + + self.sections().find(cmp_section_name) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<MachOSection<'data, 'file>> { + self.section_internal(index) + .map(|_| MachOSection { file: self, index }) + } + + fn sections(&'file self) -> MachOSectionIterator<'data, 'file> { + MachOSectionIterator { + file: self, + iter: 0..self.sections.len(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option<Symbol<'data>> { + self.macho + .symbols + .as_ref() + .and_then(|symbols| symbols.get(index.0).ok()) + .and_then(|(name, nlist)| parse_symbol(self, name, &nlist)) + } + + fn symbols(&'file self) -> MachOSymbolIterator<'data, 'file> { + let symbols = match self.macho.symbols { + Some(ref symbols) => symbols.into_iter(), + None => mach::symbols::SymbolIterator::default(), + } + .enumerate(); + + MachOSymbolIterator { + file: self, + symbols, + } + } + + fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data, 'file> { + // The LC_DYSYMTAB command contains indices into the same symbol + // table as the LC_SYMTAB command, so return all of them. + self.symbols() + } + + fn symbol_map(&self) -> SymbolMap<'data> { + let mut symbols: Vec<_> = self.symbols().map(|(_, s)| s).collect(); + + // Add symbols for the end of each section. + for section in self.sections() { + symbols.push(Symbol { + name: None, + address: section.address() + section.size(), + size: 0, + kind: SymbolKind::Section, + section_index: None, + undefined: false, + weak: false, + scope: SymbolScope::Compilation, + }); + } + + // Calculate symbol sizes by sorting and finding the next symbol. + symbols.sort_by(|a, b| { + a.address.cmp(&b.address).then_with(|| { + // Place the end of section symbols last. + (a.kind == SymbolKind::Section).cmp(&(b.kind == SymbolKind::Section)) + }) + }); + + for i in 0..symbols.len() { + let (before, after) = symbols.split_at_mut(i + 1); + let symbol = &mut before[i]; + if symbol.kind != SymbolKind::Section { + if let Some(next) = after + .iter() + .skip_while(|x| x.kind != SymbolKind::Section && x.address == symbol.address) + .next() + { + symbol.size = next.address - symbol.address; + } + } + } + + symbols.retain(SymbolMap::filter); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + self.section_data_by_name(".debug_info").is_some() + } + + fn mach_uuid(&self) -> Option<Uuid> { + // Return the UUID from the `LC_UUID` load command, if one is present. + self.macho + .load_commands + .iter() + .filter_map(|lc| { + match lc.command { + CommandVariant::Uuid(ref cmd) => { + //TODO: Uuid should have a `from_array` method that can't fail. + Uuid::from_slice(&cmd.uuid).ok() + } + _ => None, + } + }) + .nth(0) + } + + fn entry(&self) -> u64 { + self.macho.entry + } +} + +/// An iterator over the segments of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + segments: slice::Iter<'file, mach::segment::Segment<'data>>, +} + +impl<'data, 'file> Iterator for MachOSegmentIterator<'data, 'file> { + type Item = MachOSegment<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.segments.next().map(|segment| MachOSegment { segment }) + } +} + +/// A segment of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegment<'data, 'file> +where + 'data: 'file, +{ + segment: &'file mach::segment::Segment<'data>, +} + +impl<'data, 'file> ObjectSegment<'data> for MachOSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + self.segment.vmaddr + } + + #[inline] + fn size(&self) -> u64 { + self.segment.vmsize + } + + #[inline] + fn align(&self) -> u64 { + // Page size. + 0x1000 + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + (self.segment.fileoff, self.segment.filesize) + } + + #[inline] + fn data(&self) -> &'data [u8] { + self.segment.data + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + self.segment.name().ok() + } +} + +/// An iterator over the sections of a `MachOFile`. +pub struct MachOSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + iter: ops::Range<usize>, +} + +impl<'data, 'file> fmt::Debug for MachOSectionIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("MachOSectionIterator").finish() + } +} + +impl<'data, 'file> Iterator for MachOSectionIterator<'data, 'file> { + type Item = MachOSection<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|index| MachOSection { + file: self.file, + index: SectionIndex(index + 1), + }) + } +} + +/// A section of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + index: SectionIndex, +} + +impl<'data, 'file> MachOSection<'data, 'file> { + #[inline] + fn internal(&self) -> &'file MachOSectionInternal<'data> { + // We ensure the index is always valid. + &self.file.section_internal(self.index).unwrap() + } +} + +impl<'data, 'file> ObjectSection<'data> for MachOSection<'data, 'file> { + type RelocationIterator = MachORelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + self.internal().section.addr + } + + #[inline] + fn size(&self) -> u64 { + self.internal().section.size + } + + #[inline] + fn align(&self) -> u64 { + 1 << self.internal().section.align + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + let internal = &self.internal().section; + Some((internal.offset as u64, internal.size)) + } + + #[inline] + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.internal().data) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.internal().data, self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + // TODO: does MachO support compression? + self.data() + } + + #[inline] + fn name(&self) -> Option<&str> { + self.internal().section.name().ok() + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + self.internal().section.segname().ok() + } + + fn kind(&self) -> SectionKind { + self.internal().kind + } + + fn relocations(&self) -> MachORelocationIterator<'data, 'file> { + MachORelocationIterator { + file: self.file, + relocations: self + .internal() + .section + .iter_relocations(self.file.data, self.file.ctx), + } + } +} + +#[derive(Debug)] +struct MachOSectionInternal<'data> { + section: mach::segment::Section, + data: mach::segment::SectionData<'data>, + kind: SectionKind, +} + +impl<'data> MachOSectionInternal<'data> { + fn parse(section: mach::segment::Section, data: mach::segment::SectionData<'data>) -> Self { + let kind = if let (Ok(segname), Ok(name)) = (section.segname(), section.name()) { + match (segname, name) { + ("__TEXT", "__text") => SectionKind::Text, + ("__TEXT", "__const") => SectionKind::ReadOnlyData, + ("__TEXT", "__cstring") => SectionKind::ReadOnlyString, + ("__TEXT", "__eh_frame") => SectionKind::ReadOnlyData, + ("__TEXT", "__gcc_except_tab") => SectionKind::ReadOnlyData, + ("__DATA", "__data") => SectionKind::Data, + ("__DATA", "__const") => SectionKind::ReadOnlyData, + ("__DATA", "__bss") => SectionKind::UninitializedData, + ("__DATA", "__thread_data") => SectionKind::Tls, + ("__DATA", "__thread_bss") => SectionKind::UninitializedTls, + ("__DATA", "__thread_vars") => SectionKind::TlsVariables, + ("__DWARF", _) => SectionKind::Debug, + _ => SectionKind::Unknown, + } + } else { + SectionKind::Unknown + }; + MachOSectionInternal { + section, + data, + kind, + } + } +} + +/// An iterator over the symbols of a `MachOFile`. +pub struct MachOSymbolIterator<'data, 'file> { + file: &'file MachOFile<'data>, + symbols: iter::Enumerate<mach::symbols::SymbolIterator<'data>>, +} + +impl<'data, 'file> fmt::Debug for MachOSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachOSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for MachOSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option<Self::Item> { + while let Some((index, Ok((name, nlist)))) = self.symbols.next() { + if let Some(symbol) = parse_symbol(self.file, name, &nlist) { + return Some((SymbolIndex(index), symbol)); + } + } + None + } +} + +fn parse_symbol<'data>( + file: &MachOFile<'data>, + name: &'data str, + nlist: &mach::symbols::Nlist, +) -> Option<Symbol<'data>> { + if nlist.n_type & mach::symbols::N_STAB != 0 { + return None; + } + let n_type = nlist.n_type & mach::symbols::NLIST_TYPE_MASK; + let section_index = if n_type == mach::symbols::N_SECT { + if nlist.n_sect == 0 { + None + } else { + Some(SectionIndex(nlist.n_sect)) + } + } else { + // TODO: better handling for other n_type values + None + }; + let kind = section_index + .and_then(|index| file.section_internal(index)) + .map(|section| match section.kind { + SectionKind::Text => SymbolKind::Text, + SectionKind::Data + | SectionKind::ReadOnlyData + | SectionKind::ReadOnlyString + | SectionKind::UninitializedData => SymbolKind::Data, + SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => { + SymbolKind::Tls + } + _ => SymbolKind::Unknown, + }) + .unwrap_or(SymbolKind::Unknown); + let undefined = nlist.is_undefined(); + let weak = nlist.is_weak(); + let scope = if undefined { + SymbolScope::Unknown + } else if nlist.n_type & mach::symbols::N_EXT == 0 { + SymbolScope::Compilation + } else if nlist.n_type & mach::symbols::N_PEXT != 0 { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + }; + Some(Symbol { + name: Some(name), + address: nlist.n_value, + // Only calculated for symbol maps + size: 0, + kind, + section_index, + undefined, + weak, + scope, + }) +} + +/// An iterator over the relocations in an `MachOSection`. +pub struct MachORelocationIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + relocations: mach::segment::RelocationIterator<'data>, +} + +impl<'data, 'file> Iterator for MachORelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + self.relocations.next()?.ok().map(|reloc| { + let mut encoding = RelocationEncoding::Generic; + let kind = match self.file.macho.header.cputype { + mach::cputype::CPU_TYPE_ARM => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::ARM_RELOC_VANILLA, 0) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + mach::cputype::CPU_TYPE_ARM64 => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::ARM64_RELOC_UNSIGNED, 0) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + mach::cputype::CPU_TYPE_X86 => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::GENERIC_RELOC_VANILLA, 0) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + mach::cputype::CPU_TYPE_X86_64 => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::X86_64_RELOC_UNSIGNED, 0) => RelocationKind::Absolute, + (mach::relocation::X86_64_RELOC_SIGNED, 1) => { + encoding = RelocationEncoding::X86RipRelative; + RelocationKind::Relative + } + (mach::relocation::X86_64_RELOC_BRANCH, 1) => { + encoding = RelocationEncoding::X86Branch; + RelocationKind::Relative + } + (mach::relocation::X86_64_RELOC_GOT, 1) => RelocationKind::GotRelative, + (mach::relocation::X86_64_RELOC_GOT_LOAD, 1) => { + encoding = RelocationEncoding::X86RipRelativeMovq; + RelocationKind::GotRelative + } + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }; + let size = 8 << reloc.r_length(); + let target = if reloc.is_extern() { + RelocationTarget::Symbol(SymbolIndex(reloc.r_symbolnum())) + } else { + RelocationTarget::Section(SectionIndex(reloc.r_symbolnum())) + }; + let addend = if reloc.r_pcrel() != 0 { -4 } else { 0 }; + ( + reloc.r_address as u64, + Relocation { + kind, + encoding, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file> fmt::Debug for MachORelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachORelocationIterator").finish() + } +} |