From 2aa4a82499d4becd2284cdb482213d541b8804dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 16:29:10 +0200 Subject: Adding upstream version 86.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/goblin/src/mach/mod.rs | 413 ++++++++++++++++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 third_party/rust/goblin/src/mach/mod.rs (limited to 'third_party/rust/goblin/src/mach/mod.rs') diff --git a/third_party/rust/goblin/src/mach/mod.rs b/third_party/rust/goblin/src/mach/mod.rs new file mode 100644 index 0000000000..26a84afc54 --- /dev/null +++ b/third_party/rust/goblin/src/mach/mod.rs @@ -0,0 +1,413 @@ +//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions +use core::fmt; +use crate::alloc::vec::Vec; + +use log::debug; + +use scroll::{Pread, BE}; +use scroll::ctx::SizeWith; + +use crate::error; +use crate::container; + +pub mod header; +pub mod constants; +pub mod fat; +pub mod load_command; +pub mod symbols; +pub mod exports; +pub mod imports; +pub mod bind_opcodes; +pub mod relocation; +pub mod segment; + +pub use self::constants::cputype as cputype; + +/// Returns a big endian magical number +pub fn peek(bytes: &[u8], offset: usize) -> error::Result { + Ok(bytes.pread_with::(offset, scroll::BE)?) +} + +/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. +pub fn parse_magic_and_ctx(bytes: &[u8], offset: usize) -> error::Result<(u32, Option)> { + use crate::mach::header::*; + use crate::container::Container; + let magic = bytes.pread_with::(offset, BE)?; + let ctx = match magic { + MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { + let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64; + let le = scroll::Endian::from(is_lsb); + let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { Container::Big } else { Container::Little }; + Some(container::Ctx::new(container, le)) + }, + _ => None, + }; + Ok((magic, ctx)) +} + +/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser +pub struct MachO<'a> { + /// The mach-o header + pub header: header::Header, + /// The load commands tell the kernel and dynamic linker how to use/interpret this binary + pub load_commands: Vec, + /// The load command "segments" - typically the pieces of the binary that are loaded into memory + pub segments: segment::Segments<'a>, + /// The "Nlist" style symbols in this binary - strippable + pub symbols: Option>, + /// The dylibs this library depends on + pub libs: Vec<&'a str>, + /// The entry point (as a virtual memory address), 0 if none + pub entry: u64, + /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint + pub old_style_entry: bool, + /// The name of the dylib, if any + pub name: Option<&'a str>, + /// Are we a little-endian binary? + pub little_endian: bool, + /// Are we a 64-bit binary + pub is_64: bool, + data: &'a [u8], + ctx: container::Ctx, + export_trie: Option>, + bind_interpreter: Option>, +} + +impl<'a> fmt::Debug for MachO<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MachO") + .field("header", &self.header) + .field("load_commands", &self.load_commands) + .field("segments", &self.segments) + .field("entry", &self.entry) + .field("old_style_entry", &self.old_style_entry) + .field("libs", &self.libs) + .field("name", &self.name) + .field("little_endian", &self.little_endian) + .field("is_64", &self.is_64) + .field("symbols()", &self.symbols().collect::>()) + .field("exports()", &self.exports()) + .field("imports()", &self.imports()) + .finish() + } +} + +impl<'a> MachO<'a> { + /// Is this a relocatable object file? + pub fn is_object_file(&self) -> bool { + self.header.filetype == header::MH_OBJECT + } + /// Return an iterator over all the symbols in this binary + pub fn symbols(&self) -> symbols::SymbolIterator<'a> { + if let Some(ref symbols) = self.symbols { + symbols.into_iter() + } else { + symbols::SymbolIterator::default() + } + } + /// Return a vector of the relocations in this binary + pub fn relocations(&self) -> error::Result> { + debug!("Iterating relocations"); + let mut relocs = Vec::new(); + for (_i, segment) in (&self.segments).into_iter().enumerate() { + for (j, section) in segment.into_iter().enumerate() { + let (section, _data) = section?; + if section.nreloc > 0 { + relocs.push((j, section.iter_relocations(self.data, self.ctx), section)); + } + } + } + Ok(relocs) + } + /// Return the exported symbols in this binary (if any) + pub fn exports(&self) -> error::Result> { + if let Some(ref trie) = self.export_trie { + trie.exports(self.libs.as_slice()) + } else { + Ok(vec![]) + } + } + /// Return the imported symbols in this binary that dyld knows about (if any) + pub fn imports(&self) -> error::Result> { + if let Some(ref interpreter) = self.bind_interpreter { + interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) + } else { + Ok(vec![]) + } + } + /// Parses the Mach-o binary from `bytes` at `offset` + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result> { + let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; + let ctx = if let Some(ctx) = maybe_ctx { ctx } else { return Err(error::Error::BadMagic(u64::from(magic))) }; + debug!("Ctx: {:?}", ctx); + let offset = &mut offset; + let header: header::Header = bytes.pread_with(*offset, ctx)?; + debug!("Mach-o header: {:?}", header); + let little_endian = ctx.le.is_little(); + let is_64 = ctx.container.is_big(); + *offset += header::Header::size_with(&ctx.container); + let ncmds = header.ncmds; + let mut cmds: Vec = Vec::with_capacity(ncmds); + let mut symbols = None; + let mut libs = vec!["self"]; + let mut export_trie = None; + let mut bind_interpreter = None; + let mut unixthread_entry_address = None; + let mut main_entry_offset = None; + let mut name = None; + let mut segments = segment::Segments::new(ctx); + for i in 0..ncmds { + let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; + debug!("{} - {:?}", i, cmd); + match cmd.command { + load_command::CommandVariant::Segment32(command) => { + // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? + segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) + }, + load_command::CommandVariant::Segment64(command) => { + segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) + }, + load_command::CommandVariant::Symtab(command) => { + symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); + }, + load_command::CommandVariant::LoadDylib (command) + | load_command::CommandVariant::LoadUpwardDylib(command) + | load_command::CommandVariant::ReexportDylib (command) + | load_command::CommandVariant::LoadWeakDylib (command) + | load_command::CommandVariant::LazyLoadDylib (command) => { + let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs.push(lib); + }, + load_command::CommandVariant::DyldInfo (command) + | load_command::CommandVariant::DyldInfoOnly(command) => { + export_trie = Some(exports::ExportTrie::new(bytes, &command)); + bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command)); + }, + load_command::CommandVariant::Unixthread(command) => { + // dyld cares only about the first LC_UNIXTHREAD + if unixthread_entry_address.is_none() { + unixthread_entry_address = Some(command.instruction_pointer(header.cputype)?); + } + }, + load_command::CommandVariant::Main(command) => { + // dyld cares only about the first LC_MAIN + if main_entry_offset.is_none() { + main_entry_offset = Some(command.entryoff); + } + }, + load_command::CommandVariant::IdDylib(command) => { + let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs[0] = id; + name = Some(id); + }, + _ => () + } + cmds.push(cmd) + } + + // dyld prefers LC_MAIN over LC_UNIXTHREAD + // choose the same way here + let (entry, old_style_entry) = if let Some(offset) = main_entry_offset { + // map the entrypoint offset to a virtual memory address + let base_address = segments.iter() + .filter(|s| &s.segname[0..7] == b"__TEXT\0") + .map(|s| s.vmaddr - s.fileoff) + .next() + .ok_or_else(|| + error::Error::Malformed(format!("image specifies LC_MAIN offset {} but has no __TEXT segment", offset)) + )?; + + (base_address + offset, false) + } else if let Some(address) = unixthread_entry_address { + (address, true) + } else { + (0, false) + }; + + Ok(MachO { + header, + load_commands: cmds, + segments, + symbols, + libs, + export_trie, + bind_interpreter, + entry, + old_style_entry, + name, + ctx, + is_64, + little_endian, + data: bytes, + }) + } +} + +/// A Mach-o multi architecture (Fat) binary container +pub struct MultiArch<'a> { + data: &'a [u8], + start: usize, + pub narches: usize, +} + +/// Iterator over the fat architecture headers in a `MultiArch` container +pub struct FatArchIterator<'a> { + index: usize, + data: &'a[u8], + narches: usize, + start: usize, +} + +impl<'a> Iterator for FatArchIterator<'a> { + type Item = error::Result; + fn next(&mut self) -> Option { + if self.index >= self.narches { + None + } else { + let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self.data.pread_with::(offset, scroll::BE).map_err(core::convert::Into::into); + self.index += 1; + Some(arch) + } + } +} + +/// Iterator over every `MachO` binary contained in this `MultiArch` container +pub struct MachOIterator<'a> { + index: usize, + data: &'a[u8], + narches: usize, + start: usize, +} + +impl<'a> Iterator for MachOIterator<'a> { + type Item = error::Result>; + fn next(&mut self) -> Option { + if self.index >= self.narches { + None + } else { + let index = self.index; + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + self.index += 1; + match self.data.pread_with::(offset, scroll::BE) { + Ok(arch) => { + let bytes = arch.slice(self.data); + let binary = MachO::parse(bytes, 0); + Some(binary) + }, + Err(e) => Some(Err(e.into())) + } + } + } +} + +impl<'a, 'b> IntoIterator for &'b MultiArch<'a> { + type Item = error::Result>; + type IntoIter = MachOIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + MachOIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } +} + +impl<'a> MultiArch<'a> { + /// Lazily construct `Self` + pub fn new(bytes: &'a [u8]) -> error::Result { + let header = fat::FatHeader::parse(bytes)?; + Ok(MultiArch { + data: bytes, + start: fat::SIZEOF_FAT_HEADER, + narches: header.nfat_arch as usize + }) + } + /// Iterate every fat arch header + pub fn iter_arches(&self) -> FatArchIterator { + FatArchIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } + /// Return all the architectures in this binary + pub fn arches(&self) -> error::Result> { + let mut arches = Vec::with_capacity(self.narches); + for arch in self.iter_arches() { + arches.push(arch?); + } + Ok(arches) + } + /// Try to get the Mach-o binary at `index` + pub fn get(&self, index: usize) -> error::Result> { + if index >= self.narches { + return Err(error::Error::Malformed(format!("Requested the {}-th binary, but there are only {} architectures in this container", index, self.narches))) + } + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self.data.pread_with::(offset, scroll::BE)?; + let bytes = arch.slice(self.data); + Ok(MachO::parse(bytes, 0)?) + } + + pub fn find) -> bool>(&'a self, f: F) -> Option>> { + for (i, arch) in self.iter_arches().enumerate() { + if f(arch) { + return Some(self.get(i)); + } + } + None + } + /// Try and find the `cputype` in `Self`, if there is one + pub fn find_cputype(&self, cputype: u32) -> error::Result> { + for arch in self.iter_arches() { + let arch = arch?; + if arch.cputype == cputype { return Ok(Some(arch)) } + } + Ok(None) + } +} + +impl<'a> fmt::Debug for MultiArch<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MultiArch") + .field("arches", &self.arches().unwrap()) + .field("data", &self.data.len()) + .finish() + } +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +/// Either a collection of multiple architectures, or a single mach-o binary +pub enum Mach<'a> { + /// A "fat" multi-architecture binary container + Fat(MultiArch<'a>), + /// A regular Mach-o binary + Binary(MachO<'a>) +} + +impl<'a> Mach<'a> { + /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary + pub fn parse(bytes: &'a [u8]) -> error::Result { + let size = bytes.len(); + if size < 4 { + let error = error::Error::Malformed("size is smaller than a magical number".into()); + return Err(error); + } + let magic = peek(&bytes, 0)?; + match magic { + fat::FAT_MAGIC => { + let multi = MultiArch::new(bytes)?; + Ok(Mach::Fat(multi)) + }, + // we might be a regular binary + _ => { + let binary = MachO::parse(bytes, 0)?; + Ok(Mach::Binary(binary)) + } + } + } +} -- cgit v1.2.3