//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions use alloc::vec::Vec; use core::fmt; use log::debug; use scroll::ctx::SizeWith; use scroll::{Pread, BE}; use crate::{archive, container}; use crate::{error, take_hint_bytes}; pub mod bind_opcodes; pub mod constants; pub mod exports; pub mod fat; pub mod header; pub mod imports; pub mod load_command; pub mod relocation; pub mod segment; pub mod symbols; pub use self::constants::cputype; /// Returns a big endian magical number pub fn peek(bytes: &[u8], offset: usize) -> error::Result { Ok(bytes.pread_with::(offset, scroll::BE)?) } /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. pub fn parse_magic_and_ctx( bytes: &[u8], offset: usize, ) -> error::Result<(u32, Option)> { use crate::container::Container; use crate::mach::header::*; let magic = bytes.pread_with::(offset, BE)?; let ctx = match magic { MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64; let le = scroll::Endian::from(is_lsb); let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { Container::Big } else { Container::Little }; Some(container::Ctx::new(container, le)) } _ => None, }; Ok((magic, ctx)) } /// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser pub struct MachO<'a> { /// The mach-o header pub header: header::Header, /// The load commands tell the kernel and dynamic linker how to use/interpret this binary pub load_commands: Vec, /// The load command "segments" - typically the pieces of the binary that are loaded into memory pub segments: segment::Segments<'a>, /// The "Nlist" style symbols in this binary - strippable pub symbols: Option>, /// The dylibs this library depends on pub libs: Vec<&'a str>, /// The runtime search paths for dylibs this library depends on pub rpaths: Vec<&'a str>, /// The entry point (as a virtual memory address), 0 if none pub entry: u64, /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint pub old_style_entry: bool, /// The name of the dylib, if any pub name: Option<&'a str>, /// Are we a little-endian binary? pub little_endian: bool, /// Are we a 64-bit binary pub is_64: bool, data: &'a [u8], ctx: container::Ctx, export_trie: Option>, bind_interpreter: Option>, } impl<'a> fmt::Debug for MachO<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("MachO") .field("header", &self.header) .field("load_commands", &self.load_commands) .field("segments", &self.segments) .field("entry", &self.entry) .field("old_style_entry", &self.old_style_entry) .field("libs", &self.libs) .field("name", &self.name) .field("little_endian", &self.little_endian) .field("is_64", &self.is_64) .field("symbols()", &self.symbols().collect::>()) .field("exports()", &self.exports()) .field("imports()", &self.imports()) .finish() } } impl<'a> MachO<'a> { /// Is this a relocatable object file? pub fn is_object_file(&self) -> bool { self.header.filetype == header::MH_OBJECT } /// Return an iterator over all the symbols in this binary pub fn symbols(&self) -> symbols::SymbolIterator<'a> { if let Some(ref symbols) = self.symbols { symbols.into_iter() } else { symbols::SymbolIterator::default() } } /// Return a vector of the relocations in this binary pub fn relocations( &self, ) -> error::Result> { debug!("Iterating relocations"); let mut relocs = Vec::new(); for (_i, segment) in (&self.segments).into_iter().enumerate() { for (j, section) in segment.into_iter().enumerate() { let (section, _data) = section?; if section.nreloc > 0 { relocs.push((j, section.iter_relocations(self.data, self.ctx), section)); } } } Ok(relocs) } /// Return the exported symbols in this binary (if any) pub fn exports(&self) -> error::Result> { if let Some(ref trie) = self.export_trie { trie.exports(self.libs.as_slice()) } else { Ok(vec![]) } } /// Return the imported symbols in this binary that dyld knows about (if any) pub fn imports(&self) -> error::Result> { if let Some(ref interpreter) = self.bind_interpreter { interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) } else { Ok(vec![]) } } /// Parses the Mach-o binary from `bytes` at `offset` pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result> { let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(u64::from(magic))); }; debug!("Ctx: {:?}", ctx); let offset = &mut offset; let header: header::Header = bytes.pread_with(*offset, ctx)?; debug!("Mach-o header: {:?}", header); let little_endian = ctx.le.is_little(); let is_64 = ctx.container.is_big(); *offset += header::Header::size_with(&ctx.container); let ncmds = header.ncmds; let sizeofcmds = header.sizeofcmds as usize; // a load cmd is at least 2 * 4 bytes, (type, sizeof) if ncmds > sizeofcmds / 8 || sizeofcmds > bytes.len() { return Err(error::Error::BufferTooShort(ncmds, "load commands")); } let mut cmds: Vec = Vec::with_capacity(ncmds); let mut symbols = None; let mut libs = vec!["self"]; let mut rpaths = vec![]; let mut export_trie = None; let mut bind_interpreter = None; let mut unixthread_entry_address = None; let mut main_entry_offset = None; let mut name = None; let mut segments = segment::Segments::new(ctx); for i in 0..ncmds { let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; debug!("{} - {:?}", i, cmd); match cmd.command { load_command::CommandVariant::Segment32(command) => { // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) } load_command::CommandVariant::Segment64(command) => { segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) } load_command::CommandVariant::Symtab(command) => { symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); } load_command::CommandVariant::LoadDylib(command) | load_command::CommandVariant::LoadUpwardDylib(command) | load_command::CommandVariant::ReexportDylib(command) | load_command::CommandVariant::LoadWeakDylib(command) | load_command::CommandVariant::LazyLoadDylib(command) => { let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; libs.push(lib); } load_command::CommandVariant::Rpath(command) => { let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?; rpaths.push(rpath); } load_command::CommandVariant::DyldInfo(command) | load_command::CommandVariant::DyldInfoOnly(command) => { export_trie = Some(exports::ExportTrie::new(bytes, &command)); bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command)); } load_command::CommandVariant::DyldExportsTrie(command) => { export_trie = Some(exports::ExportTrie::new_from_linkedit_data_command( bytes, &command, )); } load_command::CommandVariant::Unixthread(command) => { // dyld cares only about the first LC_UNIXTHREAD if unixthread_entry_address.is_none() { unixthread_entry_address = Some(command.instruction_pointer(header.cputype)?); } } load_command::CommandVariant::Main(command) => { // dyld cares only about the first LC_MAIN if main_entry_offset.is_none() { main_entry_offset = Some(command.entryoff); } } load_command::CommandVariant::IdDylib(command) => { let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; libs[0] = id; name = Some(id); } _ => (), } cmds.push(cmd) } // dyld prefers LC_MAIN over LC_UNIXTHREAD // choose the same way here let (entry, old_style_entry) = if let Some(offset) = main_entry_offset { // map the entrypoint offset to a virtual memory address let base_address = segments .iter() .filter(|s| &s.segname[0..7] == b"__TEXT\0") .map(|s| s.vmaddr - s.fileoff) .next() .ok_or_else(|| { error::Error::Malformed(format!( "image specifies LC_MAIN offset {} but has no __TEXT segment", offset )) })?; (base_address + offset, false) } else if let Some(address) = unixthread_entry_address { (address, true) } else { (0, false) }; Ok(MachO { header, load_commands: cmds, segments, symbols, libs, rpaths, export_trie, bind_interpreter, entry, old_style_entry, name, ctx, is_64, little_endian, data: bytes, }) } } /// A Mach-o multi architecture (Fat) binary container pub struct MultiArch<'a> { data: &'a [u8], start: usize, pub narches: usize, } /// Iterator over the fat architecture headers in a `MultiArch` container pub struct FatArchIterator<'a> { index: usize, data: &'a [u8], narches: usize, start: usize, } /// A single architecture froma multi architecture binary container /// ([MultiArch]). #[derive(Debug)] #[allow(clippy::large_enum_variant)] pub enum SingleArch<'a> { MachO(MachO<'a>), Archive(archive::Archive<'a>), } impl<'a> Iterator for FatArchIterator<'a> { type Item = error::Result; fn next(&mut self) -> Option { if self.index >= self.narches { None } else { let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; let arch = self .data .pread_with::(offset, scroll::BE) .map_err(core::convert::Into::into); self.index += 1; Some(arch) } } } /// Iterator over every entry contained in this `MultiArch` container pub struct SingleArchIterator<'a> { index: usize, data: &'a [u8], narches: usize, start: usize, } pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result { if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC { Ok(crate::Hint::Archive) } else { let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?; match magic { header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => { if let Some(ctx) = maybe_ctx { Ok(crate::Hint::Mach(crate::HintData { is_lsb: ctx.le.is_little(), is_64: Some(ctx.container.is_big()), })) } else { Err(error::Error::Malformed(format!( "Correct mach magic {:#x} does not have a matching parsing context!", magic ))) } } fat::FAT_MAGIC => { // should probably verify this is always Big Endian... let narchitectures = bytes.pread_with::(4, BE)? as usize; Ok(crate::Hint::MachFat(narchitectures)) } _ => Ok(crate::Hint::Unknown(bytes.pread::(0)?)), } } } fn extract_multi_entry(bytes: &[u8]) -> error::Result { if let Some(hint_bytes) = take_hint_bytes(bytes) { match peek_bytes(hint_bytes)? { crate::Hint::Mach(_) => { let binary = MachO::parse(bytes, 0)?; Ok(SingleArch::MachO(binary)) } crate::Hint::Archive => { let archive = archive::Archive::parse(bytes)?; Ok(SingleArch::Archive(archive)) } _ => Err(error::Error::Malformed(format!( "multi-arch entry must be a Mach-O binary or an archive" ))), } } else { Err(error::Error::Malformed(format!("Object is too small"))) } } impl<'a> Iterator for SingleArchIterator<'a> { type Item = error::Result>; fn next(&mut self) -> Option { if self.index >= self.narches { None } else { let index = self.index; let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; self.index += 1; match self.data.pread_with::(offset, scroll::BE) { Ok(arch) => { let bytes = arch.slice(self.data); Some(extract_multi_entry(bytes)) } Err(e) => Some(Err(e.into())), } } } } impl<'a, 'b> IntoIterator for &'b MultiArch<'a> { type Item = error::Result>; type IntoIter = SingleArchIterator<'a>; fn into_iter(self) -> Self::IntoIter { SingleArchIterator { index: 0, data: self.data, narches: self.narches, start: self.start, } } } impl<'a> MultiArch<'a> { /// Lazily construct `Self` pub fn new(bytes: &'a [u8]) -> error::Result { let header = fat::FatHeader::parse(bytes)?; Ok(MultiArch { data: bytes, start: fat::SIZEOF_FAT_HEADER, narches: header.nfat_arch as usize, }) } /// Iterate every fat arch header pub fn iter_arches(&self) -> FatArchIterator { FatArchIterator { index: 0, data: self.data, narches: self.narches, start: self.start, } } /// Return all the architectures in this binary pub fn arches(&self) -> error::Result> { if self.narches > self.data.len() / fat::SIZEOF_FAT_ARCH { return Err(error::Error::BufferTooShort(self.narches, "arches")); } let mut arches = Vec::with_capacity(self.narches); for arch in self.iter_arches() { arches.push(arch?); } Ok(arches) } /// Try to get the Mach-o binary at `index` pub fn get(&self, index: usize) -> error::Result> { if index >= self.narches { return Err(error::Error::Malformed(format!( "Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches ))); } let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; let arch = self.data.pread_with::(offset, scroll::BE)?; let bytes = arch.slice(self.data); extract_multi_entry(bytes) } pub fn find) -> bool>( &'a self, f: F, ) -> Option>> { for (i, arch) in self.iter_arches().enumerate() { if f(arch) { return Some(self.get(i)); } } None } /// Try and find the `cputype` in `Self`, if there is one pub fn find_cputype(&self, cputype: u32) -> error::Result> { for arch in self.iter_arches() { let arch = arch?; if arch.cputype == cputype { return Ok(Some(arch)); } } Ok(None) } } impl<'a> fmt::Debug for MultiArch<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("MultiArch") .field("arches", &self.arches().unwrap_or_default()) .field("data", &self.data.len()) .finish() } } #[derive(Debug)] #[allow(clippy::large_enum_variant)] /// Either a collection of multiple architectures, or a single mach-o binary pub enum Mach<'a> { /// A "fat" multi-architecture binary container Fat(MultiArch<'a>), /// A regular Mach-o binary Binary(MachO<'a>), } impl<'a> Mach<'a> { /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary pub fn parse(bytes: &'a [u8]) -> error::Result { let size = bytes.len(); if size < 4 { let error = error::Error::Malformed("size is smaller than a magical number".into()); return Err(error); } let magic = peek(&bytes, 0)?; match magic { fat::FAT_MAGIC => { let multi = MultiArch::new(bytes)?; Ok(Mach::Fat(multi)) } // we might be a regular binary _ => { let binary = MachO::parse(bytes, 0)?; Ok(Mach::Binary(binary)) } } } } #[cfg(test)] mod test { use super::{Mach, SingleArch}; #[test] fn parse_multi_arch_of_macho_binaries() { // Create via: // clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c // clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c // lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64 // strip hello_world_fat_binaries let bytes = include_bytes!(concat!( env!("CARGO_MANIFEST_DIR"), "/assets/hello_world_fat_binaries" )); let mach = Mach::parse(bytes).expect("failed to parse input file"); match mach { Mach::Fat(fat) => { assert!(fat.into_iter().count() > 0); for entry in fat.into_iter() { let entry = entry.expect("failed to read entry"); match entry { SingleArch::MachO(macho) => { assert!(macho.symbols().count() > 0); } _ => panic!("expected MultiArchEntry::MachO, got {:?}", entry), } } } Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"), } } #[test] fn parse_multi_arch_of_archives() { // Created with: // clang -c -o /tmp/hello_world.o hello_world.c // ar -r /tmp/hello_world.a /tmp/hello_world.o // lipo -create -output hello_world_fat_archives /tmp/hello_world.a // strip hello_world_fat_archives let bytes = include_bytes!(concat!( env!("CARGO_MANIFEST_DIR"), "/assets/hello_world_fat_archives" )); let mach = Mach::parse(bytes).expect("failed to parse input file"); match mach { Mach::Fat(fat) => { assert!(fat.into_iter().count() > 0); for entry in fat.into_iter() { let entry = entry.expect("failed to read entry"); match entry { SingleArch::Archive(archive) => { assert!(!archive.members().is_empty()) } _ => panic!("expected MultiArchEntry::Archive, got {:?}", entry), } } } Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"), } } }