summaryrefslogtreecommitdiffstats
path: root/third_party/rust/goblin/src/mach/mod.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/goblin/src/mach/mod.rs
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/goblin/src/mach/mod.rs')
-rw-r--r--third_party/rust/goblin/src/mach/mod.rs586
1 files changed, 586 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/mach/mod.rs b/third_party/rust/goblin/src/mach/mod.rs
new file mode 100644
index 0000000000..1636000080
--- /dev/null
+++ b/third_party/rust/goblin/src/mach/mod.rs
@@ -0,0 +1,586 @@
+//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
+use alloc::vec::Vec;
+use core::fmt;
+
+use log::debug;
+
+use scroll::ctx::SizeWith;
+use scroll::{Pread, BE};
+
+use crate::{archive, container};
+use crate::{error, take_hint_bytes};
+
+pub mod bind_opcodes;
+pub mod constants;
+pub mod exports;
+pub mod fat;
+pub mod header;
+pub mod imports;
+pub mod load_command;
+pub mod relocation;
+pub mod segment;
+pub mod symbols;
+
+pub use self::constants::cputype;
+
+/// Returns a big endian magical number
+pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
+ Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
+}
+
+/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
+pub fn parse_magic_and_ctx(
+ bytes: &[u8],
+ offset: usize,
+) -> error::Result<(u32, Option<container::Ctx>)> {
+ use crate::container::Container;
+ use crate::mach::header::*;
+ let magic = bytes.pread_with::<u32>(offset, BE)?;
+ let ctx = match magic {
+ MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
+ let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
+ let le = scroll::Endian::from(is_lsb);
+ let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 {
+ Container::Big
+ } else {
+ Container::Little
+ };
+ Some(container::Ctx::new(container, le))
+ }
+ _ => None,
+ };
+ Ok((magic, ctx))
+}
+
+/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
+pub struct MachO<'a> {
+ /// The mach-o header
+ pub header: header::Header,
+ /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
+ pub load_commands: Vec<load_command::LoadCommand>,
+ /// The load command "segments" - typically the pieces of the binary that are loaded into memory
+ pub segments: segment::Segments<'a>,
+ /// The "Nlist" style symbols in this binary - strippable
+ pub symbols: Option<symbols::Symbols<'a>>,
+ /// The dylibs this library depends on
+ pub libs: Vec<&'a str>,
+ /// The runtime search paths for dylibs this library depends on
+ pub rpaths: Vec<&'a str>,
+ /// The entry point (as a virtual memory address), 0 if none
+ pub entry: u64,
+ /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
+ pub old_style_entry: bool,
+ /// The name of the dylib, if any
+ pub name: Option<&'a str>,
+ /// Are we a little-endian binary?
+ pub little_endian: bool,
+ /// Are we a 64-bit binary
+ pub is_64: bool,
+ data: &'a [u8],
+ ctx: container::Ctx,
+ export_trie: Option<exports::ExportTrie<'a>>,
+ bind_interpreter: Option<imports::BindInterpreter<'a>>,
+}
+
+impl<'a> fmt::Debug for MachO<'a> {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ fmt.debug_struct("MachO")
+ .field("header", &self.header)
+ .field("load_commands", &self.load_commands)
+ .field("segments", &self.segments)
+ .field("entry", &self.entry)
+ .field("old_style_entry", &self.old_style_entry)
+ .field("libs", &self.libs)
+ .field("name", &self.name)
+ .field("little_endian", &self.little_endian)
+ .field("is_64", &self.is_64)
+ .field("symbols()", &self.symbols().collect::<Vec<_>>())
+ .field("exports()", &self.exports())
+ .field("imports()", &self.imports())
+ .finish()
+ }
+}
+
+impl<'a> MachO<'a> {
+ /// Is this a relocatable object file?
+ pub fn is_object_file(&self) -> bool {
+ self.header.filetype == header::MH_OBJECT
+ }
+ /// Return an iterator over all the symbols in this binary
+ pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
+ if let Some(ref symbols) = self.symbols {
+ symbols.into_iter()
+ } else {
+ symbols::SymbolIterator::default()
+ }
+ }
+ /// Return a vector of the relocations in this binary
+ pub fn relocations(
+ &self,
+ ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
+ debug!("Iterating relocations");
+ let mut relocs = Vec::new();
+ for (_i, segment) in (&self.segments).into_iter().enumerate() {
+ for (j, section) in segment.into_iter().enumerate() {
+ let (section, _data) = section?;
+ if section.nreloc > 0 {
+ relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
+ }
+ }
+ }
+ Ok(relocs)
+ }
+ /// Return the exported symbols in this binary (if any)
+ pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
+ if let Some(ref trie) = self.export_trie {
+ trie.exports(self.libs.as_slice())
+ } else {
+ Ok(vec![])
+ }
+ }
+ /// Return the imported symbols in this binary that dyld knows about (if any)
+ pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
+ if let Some(ref interpreter) = self.bind_interpreter {
+ interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
+ } else {
+ Ok(vec![])
+ }
+ }
+ /// Parses the Mach-o binary from `bytes` at `offset`
+ pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> {
+ let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
+ let ctx = if let Some(ctx) = maybe_ctx {
+ ctx
+ } else {
+ return Err(error::Error::BadMagic(u64::from(magic)));
+ };
+ debug!("Ctx: {:?}", ctx);
+ let offset = &mut offset;
+ let header: header::Header = bytes.pread_with(*offset, ctx)?;
+ debug!("Mach-o header: {:?}", header);
+ let little_endian = ctx.le.is_little();
+ let is_64 = ctx.container.is_big();
+ *offset += header::Header::size_with(&ctx.container);
+ let ncmds = header.ncmds;
+
+ let sizeofcmds = header.sizeofcmds as usize;
+ // a load cmd is at least 2 * 4 bytes, (type, sizeof)
+ if ncmds > sizeofcmds / 8 || sizeofcmds > bytes.len() {
+ return Err(error::Error::BufferTooShort(ncmds, "load commands"));
+ }
+
+ let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
+ let mut symbols = None;
+ let mut libs = vec!["self"];
+ let mut rpaths = vec![];
+ let mut export_trie = None;
+ let mut bind_interpreter = None;
+ let mut unixthread_entry_address = None;
+ let mut main_entry_offset = None;
+ let mut name = None;
+ let mut segments = segment::Segments::new(ctx);
+ for i in 0..ncmds {
+ let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
+ debug!("{} - {:?}", i, cmd);
+ match cmd.command {
+ load_command::CommandVariant::Segment32(command) => {
+ // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue?
+ segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?)
+ }
+ load_command::CommandVariant::Segment64(command) => {
+ segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?)
+ }
+ load_command::CommandVariant::Symtab(command) => {
+ symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?);
+ }
+ load_command::CommandVariant::LoadDylib(command)
+ | load_command::CommandVariant::LoadUpwardDylib(command)
+ | load_command::CommandVariant::ReexportDylib(command)
+ | load_command::CommandVariant::LoadWeakDylib(command)
+ | load_command::CommandVariant::LazyLoadDylib(command) => {
+ let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
+ libs.push(lib);
+ }
+ load_command::CommandVariant::Rpath(command) => {
+ let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?;
+ rpaths.push(rpath);
+ }
+ load_command::CommandVariant::DyldInfo(command)
+ | load_command::CommandVariant::DyldInfoOnly(command) => {
+ export_trie = Some(exports::ExportTrie::new(bytes, &command));
+ bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
+ }
+ load_command::CommandVariant::DyldExportsTrie(command) => {
+ export_trie = Some(exports::ExportTrie::new_from_linkedit_data_command(
+ bytes, &command,
+ ));
+ }
+ load_command::CommandVariant::Unixthread(command) => {
+ // dyld cares only about the first LC_UNIXTHREAD
+ if unixthread_entry_address.is_none() {
+ unixthread_entry_address =
+ Some(command.instruction_pointer(header.cputype)?);
+ }
+ }
+ load_command::CommandVariant::Main(command) => {
+ // dyld cares only about the first LC_MAIN
+ if main_entry_offset.is_none() {
+ main_entry_offset = Some(command.entryoff);
+ }
+ }
+ load_command::CommandVariant::IdDylib(command) => {
+ let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
+ libs[0] = id;
+ name = Some(id);
+ }
+ _ => (),
+ }
+ cmds.push(cmd)
+ }
+
+ // dyld prefers LC_MAIN over LC_UNIXTHREAD
+ // choose the same way here
+ let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
+ // map the entrypoint offset to a virtual memory address
+ let base_address = segments
+ .iter()
+ .filter(|s| &s.segname[0..7] == b"__TEXT\0")
+ .map(|s| s.vmaddr - s.fileoff)
+ .next()
+ .ok_or_else(|| {
+ error::Error::Malformed(format!(
+ "image specifies LC_MAIN offset {} but has no __TEXT segment",
+ offset
+ ))
+ })?;
+
+ (base_address + offset, false)
+ } else if let Some(address) = unixthread_entry_address {
+ (address, true)
+ } else {
+ (0, false)
+ };
+
+ Ok(MachO {
+ header,
+ load_commands: cmds,
+ segments,
+ symbols,
+ libs,
+ rpaths,
+ export_trie,
+ bind_interpreter,
+ entry,
+ old_style_entry,
+ name,
+ ctx,
+ is_64,
+ little_endian,
+ data: bytes,
+ })
+ }
+}
+
+/// A Mach-o multi architecture (Fat) binary container
+pub struct MultiArch<'a> {
+ data: &'a [u8],
+ start: usize,
+ pub narches: usize,
+}
+
+/// Iterator over the fat architecture headers in a `MultiArch` container
+pub struct FatArchIterator<'a> {
+ index: usize,
+ data: &'a [u8],
+ narches: usize,
+ start: usize,
+}
+
+/// A single architecture froma multi architecture binary container
+/// ([MultiArch]).
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+pub enum SingleArch<'a> {
+ MachO(MachO<'a>),
+ Archive(archive::Archive<'a>),
+}
+
+impl<'a> Iterator for FatArchIterator<'a> {
+ type Item = error::Result<fat::FatArch>;
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.index >= self.narches {
+ None
+ } else {
+ let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
+ let arch = self
+ .data
+ .pread_with::<fat::FatArch>(offset, scroll::BE)
+ .map_err(core::convert::Into::into);
+ self.index += 1;
+ Some(arch)
+ }
+ }
+}
+
+/// Iterator over every entry contained in this `MultiArch` container
+pub struct SingleArchIterator<'a> {
+ index: usize,
+ data: &'a [u8],
+ narches: usize,
+ start: usize,
+}
+
+pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<crate::Hint> {
+ if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
+ Ok(crate::Hint::Archive)
+ } else {
+ let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?;
+ match magic {
+ header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
+ if let Some(ctx) = maybe_ctx {
+ Ok(crate::Hint::Mach(crate::HintData {
+ is_lsb: ctx.le.is_little(),
+ is_64: Some(ctx.container.is_big()),
+ }))
+ } else {
+ Err(error::Error::Malformed(format!(
+ "Correct mach magic {:#x} does not have a matching parsing context!",
+ magic
+ )))
+ }
+ }
+ fat::FAT_MAGIC => {
+ // should probably verify this is always Big Endian...
+ let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
+ Ok(crate::Hint::MachFat(narchitectures))
+ }
+ _ => Ok(crate::Hint::Unknown(bytes.pread::<u64>(0)?)),
+ }
+ }
+}
+
+fn extract_multi_entry(bytes: &[u8]) -> error::Result<SingleArch> {
+ if let Some(hint_bytes) = take_hint_bytes(bytes) {
+ match peek_bytes(hint_bytes)? {
+ crate::Hint::Mach(_) => {
+ let binary = MachO::parse(bytes, 0)?;
+ Ok(SingleArch::MachO(binary))
+ }
+ crate::Hint::Archive => {
+ let archive = archive::Archive::parse(bytes)?;
+ Ok(SingleArch::Archive(archive))
+ }
+ _ => Err(error::Error::Malformed(format!(
+ "multi-arch entry must be a Mach-O binary or an archive"
+ ))),
+ }
+ } else {
+ Err(error::Error::Malformed(format!("Object is too small")))
+ }
+}
+
+impl<'a> Iterator for SingleArchIterator<'a> {
+ type Item = error::Result<SingleArch<'a>>;
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.index >= self.narches {
+ None
+ } else {
+ let index = self.index;
+ let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
+ self.index += 1;
+ match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
+ Ok(arch) => {
+ let bytes = arch.slice(self.data);
+ Some(extract_multi_entry(bytes))
+ }
+ Err(e) => Some(Err(e.into())),
+ }
+ }
+ }
+}
+
+impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
+ type Item = error::Result<SingleArch<'a>>;
+ type IntoIter = SingleArchIterator<'a>;
+ fn into_iter(self) -> Self::IntoIter {
+ SingleArchIterator {
+ index: 0,
+ data: self.data,
+ narches: self.narches,
+ start: self.start,
+ }
+ }
+}
+
+impl<'a> MultiArch<'a> {
+ /// Lazily construct `Self`
+ pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
+ let header = fat::FatHeader::parse(bytes)?;
+ Ok(MultiArch {
+ data: bytes,
+ start: fat::SIZEOF_FAT_HEADER,
+ narches: header.nfat_arch as usize,
+ })
+ }
+ /// Iterate every fat arch header
+ pub fn iter_arches(&self) -> FatArchIterator {
+ FatArchIterator {
+ index: 0,
+ data: self.data,
+ narches: self.narches,
+ start: self.start,
+ }
+ }
+ /// Return all the architectures in this binary
+ pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
+ if self.narches > self.data.len() / fat::SIZEOF_FAT_ARCH {
+ return Err(error::Error::BufferTooShort(self.narches, "arches"));
+ }
+
+ let mut arches = Vec::with_capacity(self.narches);
+ for arch in self.iter_arches() {
+ arches.push(arch?);
+ }
+ Ok(arches)
+ }
+ /// Try to get the Mach-o binary at `index`
+ pub fn get(&self, index: usize) -> error::Result<SingleArch<'a>> {
+ if index >= self.narches {
+ return Err(error::Error::Malformed(format!(
+ "Requested the {}-th binary, but there are only {} architectures in this container",
+ index, self.narches
+ )));
+ }
+ let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
+ let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
+ let bytes = arch.slice(self.data);
+ extract_multi_entry(bytes)
+ }
+
+ pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
+ &'a self,
+ f: F,
+ ) -> Option<error::Result<SingleArch<'a>>> {
+ for (i, arch) in self.iter_arches().enumerate() {
+ if f(arch) {
+ return Some(self.get(i));
+ }
+ }
+ None
+ }
+ /// Try and find the `cputype` in `Self`, if there is one
+ pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
+ for arch in self.iter_arches() {
+ let arch = arch?;
+ if arch.cputype == cputype {
+ return Ok(Some(arch));
+ }
+ }
+ Ok(None)
+ }
+}
+
+impl<'a> fmt::Debug for MultiArch<'a> {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ fmt.debug_struct("MultiArch")
+ .field("arches", &self.arches().unwrap_or_default())
+ .field("data", &self.data.len())
+ .finish()
+ }
+}
+
+#[derive(Debug)]
+#[allow(clippy::large_enum_variant)]
+/// Either a collection of multiple architectures, or a single mach-o binary
+pub enum Mach<'a> {
+ /// A "fat" multi-architecture binary container
+ Fat(MultiArch<'a>),
+ /// A regular Mach-o binary
+ Binary(MachO<'a>),
+}
+
+impl<'a> Mach<'a> {
+ /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
+ pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
+ let size = bytes.len();
+ if size < 4 {
+ let error = error::Error::Malformed("size is smaller than a magical number".into());
+ return Err(error);
+ }
+ let magic = peek(&bytes, 0)?;
+ match magic {
+ fat::FAT_MAGIC => {
+ let multi = MultiArch::new(bytes)?;
+ Ok(Mach::Fat(multi))
+ }
+ // we might be a regular binary
+ _ => {
+ let binary = MachO::parse(bytes, 0)?;
+ Ok(Mach::Binary(binary))
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::{Mach, SingleArch};
+
+ #[test]
+ fn parse_multi_arch_of_macho_binaries() {
+ // Create via:
+ // clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c
+ // clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c
+ // lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64
+ // strip hello_world_fat_binaries
+ let bytes = include_bytes!(concat!(
+ env!("CARGO_MANIFEST_DIR"),
+ "/assets/hello_world_fat_binaries"
+ ));
+ let mach = Mach::parse(bytes).expect("failed to parse input file");
+ match mach {
+ Mach::Fat(fat) => {
+ assert!(fat.into_iter().count() > 0);
+ for entry in fat.into_iter() {
+ let entry = entry.expect("failed to read entry");
+ match entry {
+ SingleArch::MachO(macho) => {
+ assert!(macho.symbols().count() > 0);
+ }
+ _ => panic!("expected MultiArchEntry::MachO, got {:?}", entry),
+ }
+ }
+ }
+ Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
+ }
+ }
+
+ #[test]
+ fn parse_multi_arch_of_archives() {
+ // Created with:
+ // clang -c -o /tmp/hello_world.o hello_world.c
+ // ar -r /tmp/hello_world.a /tmp/hello_world.o
+ // lipo -create -output hello_world_fat_archives /tmp/hello_world.a
+ // strip hello_world_fat_archives
+ let bytes = include_bytes!(concat!(
+ env!("CARGO_MANIFEST_DIR"),
+ "/assets/hello_world_fat_archives"
+ ));
+ let mach = Mach::parse(bytes).expect("failed to parse input file");
+ match mach {
+ Mach::Fat(fat) => {
+ assert!(fat.into_iter().count() > 0);
+ for entry in fat.into_iter() {
+ let entry = entry.expect("failed to read entry");
+ match entry {
+ SingleArch::Archive(archive) => {
+ assert!(!archive.members().is_empty())
+ }
+ _ => panic!("expected MultiArchEntry::Archive, got {:?}", entry),
+ }
+ }
+ }
+ Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
+ }
+ }
+}