From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- third_party/rust/goblin/src/mach/mod.rs | 586 ++++++++++++++++++++++++++++++++ 1 file changed, 586 insertions(+) create mode 100644 third_party/rust/goblin/src/mach/mod.rs (limited to 'third_party/rust/goblin/src/mach/mod.rs') diff --git a/third_party/rust/goblin/src/mach/mod.rs b/third_party/rust/goblin/src/mach/mod.rs new file mode 100644 index 0000000000..1636000080 --- /dev/null +++ b/third_party/rust/goblin/src/mach/mod.rs @@ -0,0 +1,586 @@ +//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions +use alloc::vec::Vec; +use core::fmt; + +use log::debug; + +use scroll::ctx::SizeWith; +use scroll::{Pread, BE}; + +use crate::{archive, container}; +use crate::{error, take_hint_bytes}; + +pub mod bind_opcodes; +pub mod constants; +pub mod exports; +pub mod fat; +pub mod header; +pub mod imports; +pub mod load_command; +pub mod relocation; +pub mod segment; +pub mod symbols; + +pub use self::constants::cputype; + +/// Returns a big endian magical number +pub fn peek(bytes: &[u8], offset: usize) -> error::Result { + Ok(bytes.pread_with::(offset, scroll::BE)?) +} + +/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. +pub fn parse_magic_and_ctx( + bytes: &[u8], + offset: usize, +) -> error::Result<(u32, Option)> { + use crate::container::Container; + use crate::mach::header::*; + let magic = bytes.pread_with::(offset, BE)?; + let ctx = match magic { + MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { + let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64; + let le = scroll::Endian::from(is_lsb); + let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { + Container::Big + } else { + Container::Little + }; + Some(container::Ctx::new(container, le)) + } + _ => None, + }; + Ok((magic, ctx)) +} + +/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser +pub struct MachO<'a> { + /// The mach-o header + pub header: header::Header, + /// The load commands tell the kernel and dynamic linker how to use/interpret this binary + pub load_commands: Vec, + /// The load command "segments" - typically the pieces of the binary that are loaded into memory + pub segments: segment::Segments<'a>, + /// The "Nlist" style symbols in this binary - strippable + pub symbols: Option>, + /// The dylibs this library depends on + pub libs: Vec<&'a str>, + /// The runtime search paths for dylibs this library depends on + pub rpaths: Vec<&'a str>, + /// The entry point (as a virtual memory address), 0 if none + pub entry: u64, + /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint + pub old_style_entry: bool, + /// The name of the dylib, if any + pub name: Option<&'a str>, + /// Are we a little-endian binary? + pub little_endian: bool, + /// Are we a 64-bit binary + pub is_64: bool, + data: &'a [u8], + ctx: container::Ctx, + export_trie: Option>, + bind_interpreter: Option>, +} + +impl<'a> fmt::Debug for MachO<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MachO") + .field("header", &self.header) + .field("load_commands", &self.load_commands) + .field("segments", &self.segments) + .field("entry", &self.entry) + .field("old_style_entry", &self.old_style_entry) + .field("libs", &self.libs) + .field("name", &self.name) + .field("little_endian", &self.little_endian) + .field("is_64", &self.is_64) + .field("symbols()", &self.symbols().collect::>()) + .field("exports()", &self.exports()) + .field("imports()", &self.imports()) + .finish() + } +} + +impl<'a> MachO<'a> { + /// Is this a relocatable object file? + pub fn is_object_file(&self) -> bool { + self.header.filetype == header::MH_OBJECT + } + /// Return an iterator over all the symbols in this binary + pub fn symbols(&self) -> symbols::SymbolIterator<'a> { + if let Some(ref symbols) = self.symbols { + symbols.into_iter() + } else { + symbols::SymbolIterator::default() + } + } + /// Return a vector of the relocations in this binary + pub fn relocations( + &self, + ) -> error::Result> { + debug!("Iterating relocations"); + let mut relocs = Vec::new(); + for (_i, segment) in (&self.segments).into_iter().enumerate() { + for (j, section) in segment.into_iter().enumerate() { + let (section, _data) = section?; + if section.nreloc > 0 { + relocs.push((j, section.iter_relocations(self.data, self.ctx), section)); + } + } + } + Ok(relocs) + } + /// Return the exported symbols in this binary (if any) + pub fn exports(&self) -> error::Result> { + if let Some(ref trie) = self.export_trie { + trie.exports(self.libs.as_slice()) + } else { + Ok(vec![]) + } + } + /// Return the imported symbols in this binary that dyld knows about (if any) + pub fn imports(&self) -> error::Result> { + if let Some(ref interpreter) = self.bind_interpreter { + interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) + } else { + Ok(vec![]) + } + } + /// Parses the Mach-o binary from `bytes` at `offset` + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result> { + let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; + let ctx = if let Some(ctx) = maybe_ctx { + ctx + } else { + return Err(error::Error::BadMagic(u64::from(magic))); + }; + debug!("Ctx: {:?}", ctx); + let offset = &mut offset; + let header: header::Header = bytes.pread_with(*offset, ctx)?; + debug!("Mach-o header: {:?}", header); + let little_endian = ctx.le.is_little(); + let is_64 = ctx.container.is_big(); + *offset += header::Header::size_with(&ctx.container); + let ncmds = header.ncmds; + + let sizeofcmds = header.sizeofcmds as usize; + // a load cmd is at least 2 * 4 bytes, (type, sizeof) + if ncmds > sizeofcmds / 8 || sizeofcmds > bytes.len() { + return Err(error::Error::BufferTooShort(ncmds, "load commands")); + } + + let mut cmds: Vec = Vec::with_capacity(ncmds); + let mut symbols = None; + let mut libs = vec!["self"]; + let mut rpaths = vec![]; + let mut export_trie = None; + let mut bind_interpreter = None; + let mut unixthread_entry_address = None; + let mut main_entry_offset = None; + let mut name = None; + let mut segments = segment::Segments::new(ctx); + for i in 0..ncmds { + let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; + debug!("{} - {:?}", i, cmd); + match cmd.command { + load_command::CommandVariant::Segment32(command) => { + // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? + segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) + } + load_command::CommandVariant::Segment64(command) => { + segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) + } + load_command::CommandVariant::Symtab(command) => { + symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); + } + load_command::CommandVariant::LoadDylib(command) + | load_command::CommandVariant::LoadUpwardDylib(command) + | load_command::CommandVariant::ReexportDylib(command) + | load_command::CommandVariant::LoadWeakDylib(command) + | load_command::CommandVariant::LazyLoadDylib(command) => { + let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs.push(lib); + } + load_command::CommandVariant::Rpath(command) => { + let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?; + rpaths.push(rpath); + } + load_command::CommandVariant::DyldInfo(command) + | load_command::CommandVariant::DyldInfoOnly(command) => { + export_trie = Some(exports::ExportTrie::new(bytes, &command)); + bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command)); + } + load_command::CommandVariant::DyldExportsTrie(command) => { + export_trie = Some(exports::ExportTrie::new_from_linkedit_data_command( + bytes, &command, + )); + } + load_command::CommandVariant::Unixthread(command) => { + // dyld cares only about the first LC_UNIXTHREAD + if unixthread_entry_address.is_none() { + unixthread_entry_address = + Some(command.instruction_pointer(header.cputype)?); + } + } + load_command::CommandVariant::Main(command) => { + // dyld cares only about the first LC_MAIN + if main_entry_offset.is_none() { + main_entry_offset = Some(command.entryoff); + } + } + load_command::CommandVariant::IdDylib(command) => { + let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs[0] = id; + name = Some(id); + } + _ => (), + } + cmds.push(cmd) + } + + // dyld prefers LC_MAIN over LC_UNIXTHREAD + // choose the same way here + let (entry, old_style_entry) = if let Some(offset) = main_entry_offset { + // map the entrypoint offset to a virtual memory address + let base_address = segments + .iter() + .filter(|s| &s.segname[0..7] == b"__TEXT\0") + .map(|s| s.vmaddr - s.fileoff) + .next() + .ok_or_else(|| { + error::Error::Malformed(format!( + "image specifies LC_MAIN offset {} but has no __TEXT segment", + offset + )) + })?; + + (base_address + offset, false) + } else if let Some(address) = unixthread_entry_address { + (address, true) + } else { + (0, false) + }; + + Ok(MachO { + header, + load_commands: cmds, + segments, + symbols, + libs, + rpaths, + export_trie, + bind_interpreter, + entry, + old_style_entry, + name, + ctx, + is_64, + little_endian, + data: bytes, + }) + } +} + +/// A Mach-o multi architecture (Fat) binary container +pub struct MultiArch<'a> { + data: &'a [u8], + start: usize, + pub narches: usize, +} + +/// Iterator over the fat architecture headers in a `MultiArch` container +pub struct FatArchIterator<'a> { + index: usize, + data: &'a [u8], + narches: usize, + start: usize, +} + +/// A single architecture froma multi architecture binary container +/// ([MultiArch]). +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum SingleArch<'a> { + MachO(MachO<'a>), + Archive(archive::Archive<'a>), +} + +impl<'a> Iterator for FatArchIterator<'a> { + type Item = error::Result; + fn next(&mut self) -> Option { + if self.index >= self.narches { + None + } else { + let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self + .data + .pread_with::(offset, scroll::BE) + .map_err(core::convert::Into::into); + self.index += 1; + Some(arch) + } + } +} + +/// Iterator over every entry contained in this `MultiArch` container +pub struct SingleArchIterator<'a> { + index: usize, + data: &'a [u8], + narches: usize, + start: usize, +} + +pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result { + if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC { + Ok(crate::Hint::Archive) + } else { + let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?; + match magic { + header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => { + if let Some(ctx) = maybe_ctx { + Ok(crate::Hint::Mach(crate::HintData { + is_lsb: ctx.le.is_little(), + is_64: Some(ctx.container.is_big()), + })) + } else { + Err(error::Error::Malformed(format!( + "Correct mach magic {:#x} does not have a matching parsing context!", + magic + ))) + } + } + fat::FAT_MAGIC => { + // should probably verify this is always Big Endian... + let narchitectures = bytes.pread_with::(4, BE)? as usize; + Ok(crate::Hint::MachFat(narchitectures)) + } + _ => Ok(crate::Hint::Unknown(bytes.pread::(0)?)), + } + } +} + +fn extract_multi_entry(bytes: &[u8]) -> error::Result { + if let Some(hint_bytes) = take_hint_bytes(bytes) { + match peek_bytes(hint_bytes)? { + crate::Hint::Mach(_) => { + let binary = MachO::parse(bytes, 0)?; + Ok(SingleArch::MachO(binary)) + } + crate::Hint::Archive => { + let archive = archive::Archive::parse(bytes)?; + Ok(SingleArch::Archive(archive)) + } + _ => Err(error::Error::Malformed(format!( + "multi-arch entry must be a Mach-O binary or an archive" + ))), + } + } else { + Err(error::Error::Malformed(format!("Object is too small"))) + } +} + +impl<'a> Iterator for SingleArchIterator<'a> { + type Item = error::Result>; + fn next(&mut self) -> Option { + if self.index >= self.narches { + None + } else { + let index = self.index; + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + self.index += 1; + match self.data.pread_with::(offset, scroll::BE) { + Ok(arch) => { + let bytes = arch.slice(self.data); + Some(extract_multi_entry(bytes)) + } + Err(e) => Some(Err(e.into())), + } + } + } +} + +impl<'a, 'b> IntoIterator for &'b MultiArch<'a> { + type Item = error::Result>; + type IntoIter = SingleArchIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + SingleArchIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } +} + +impl<'a> MultiArch<'a> { + /// Lazily construct `Self` + pub fn new(bytes: &'a [u8]) -> error::Result { + let header = fat::FatHeader::parse(bytes)?; + Ok(MultiArch { + data: bytes, + start: fat::SIZEOF_FAT_HEADER, + narches: header.nfat_arch as usize, + }) + } + /// Iterate every fat arch header + pub fn iter_arches(&self) -> FatArchIterator { + FatArchIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } + /// Return all the architectures in this binary + pub fn arches(&self) -> error::Result> { + if self.narches > self.data.len() / fat::SIZEOF_FAT_ARCH { + return Err(error::Error::BufferTooShort(self.narches, "arches")); + } + + let mut arches = Vec::with_capacity(self.narches); + for arch in self.iter_arches() { + arches.push(arch?); + } + Ok(arches) + } + /// Try to get the Mach-o binary at `index` + pub fn get(&self, index: usize) -> error::Result> { + if index >= self.narches { + return Err(error::Error::Malformed(format!( + "Requested the {}-th binary, but there are only {} architectures in this container", + index, self.narches + ))); + } + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self.data.pread_with::(offset, scroll::BE)?; + let bytes = arch.slice(self.data); + extract_multi_entry(bytes) + } + + pub fn find) -> bool>( + &'a self, + f: F, + ) -> Option>> { + for (i, arch) in self.iter_arches().enumerate() { + if f(arch) { + return Some(self.get(i)); + } + } + None + } + /// Try and find the `cputype` in `Self`, if there is one + pub fn find_cputype(&self, cputype: u32) -> error::Result> { + for arch in self.iter_arches() { + let arch = arch?; + if arch.cputype == cputype { + return Ok(Some(arch)); + } + } + Ok(None) + } +} + +impl<'a> fmt::Debug for MultiArch<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MultiArch") + .field("arches", &self.arches().unwrap_or_default()) + .field("data", &self.data.len()) + .finish() + } +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +/// Either a collection of multiple architectures, or a single mach-o binary +pub enum Mach<'a> { + /// A "fat" multi-architecture binary container + Fat(MultiArch<'a>), + /// A regular Mach-o binary + Binary(MachO<'a>), +} + +impl<'a> Mach<'a> { + /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary + pub fn parse(bytes: &'a [u8]) -> error::Result { + let size = bytes.len(); + if size < 4 { + let error = error::Error::Malformed("size is smaller than a magical number".into()); + return Err(error); + } + let magic = peek(&bytes, 0)?; + match magic { + fat::FAT_MAGIC => { + let multi = MultiArch::new(bytes)?; + Ok(Mach::Fat(multi)) + } + // we might be a regular binary + _ => { + let binary = MachO::parse(bytes, 0)?; + Ok(Mach::Binary(binary)) + } + } + } +} + +#[cfg(test)] +mod test { + use super::{Mach, SingleArch}; + + #[test] + fn parse_multi_arch_of_macho_binaries() { + // Create via: + // clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c + // clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c + // lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64 + // strip hello_world_fat_binaries + let bytes = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/hello_world_fat_binaries" + )); + let mach = Mach::parse(bytes).expect("failed to parse input file"); + match mach { + Mach::Fat(fat) => { + assert!(fat.into_iter().count() > 0); + for entry in fat.into_iter() { + let entry = entry.expect("failed to read entry"); + match entry { + SingleArch::MachO(macho) => { + assert!(macho.symbols().count() > 0); + } + _ => panic!("expected MultiArchEntry::MachO, got {:?}", entry), + } + } + } + Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"), + } + } + + #[test] + fn parse_multi_arch_of_archives() { + // Created with: + // clang -c -o /tmp/hello_world.o hello_world.c + // ar -r /tmp/hello_world.a /tmp/hello_world.o + // lipo -create -output hello_world_fat_archives /tmp/hello_world.a + // strip hello_world_fat_archives + let bytes = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/hello_world_fat_archives" + )); + let mach = Mach::parse(bytes).expect("failed to parse input file"); + match mach { + Mach::Fat(fat) => { + assert!(fat.into_iter().count() > 0); + for entry in fat.into_iter() { + let entry = entry.expect("failed to read entry"); + match entry { + SingleArch::Archive(archive) => { + assert!(!archive.members().is_empty()) + } + _ => panic!("expected MultiArchEntry::Archive, got {:?}", entry), + } + } + } + Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"), + } + } +} -- cgit v1.2.3