From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../rust/object/src/read/macho/dyld_cache.rs | 343 ++++++++++ third_party/rust/object/src/read/macho/fat.rs | 122 ++++ third_party/rust/object/src/read/macho/file.rs | 731 +++++++++++++++++++++ .../rust/object/src/read/macho/load_command.rs | 353 ++++++++++ third_party/rust/object/src/read/macho/mod.rs | 30 + .../rust/object/src/read/macho/relocation.rs | 126 ++++ third_party/rust/object/src/read/macho/section.rs | 384 +++++++++++ third_party/rust/object/src/read/macho/segment.rs | 303 +++++++++ third_party/rust/object/src/read/macho/symbol.rs | 488 ++++++++++++++ 9 files changed, 2880 insertions(+) create mode 100644 third_party/rust/object/src/read/macho/dyld_cache.rs create mode 100644 third_party/rust/object/src/read/macho/fat.rs create mode 100644 third_party/rust/object/src/read/macho/file.rs create mode 100644 third_party/rust/object/src/read/macho/load_command.rs create mode 100644 third_party/rust/object/src/read/macho/mod.rs create mode 100644 third_party/rust/object/src/read/macho/relocation.rs create mode 100644 third_party/rust/object/src/read/macho/section.rs create mode 100644 third_party/rust/object/src/read/macho/segment.rs create mode 100644 third_party/rust/object/src/read/macho/symbol.rs (limited to 'third_party/rust/object/src/read/macho') diff --git a/third_party/rust/object/src/read/macho/dyld_cache.rs b/third_party/rust/object/src/read/macho/dyld_cache.rs new file mode 100644 index 0000000000..0839ded7d7 --- /dev/null +++ b/third_party/rust/object/src/read/macho/dyld_cache.rs @@ -0,0 +1,343 @@ +use alloc::vec::Vec; +use core::slice; + +use crate::read::{Error, File, ReadError, ReadRef, Result}; +use crate::{macho, Architecture, Endian, Endianness}; + +/// A parsed representation of the dyld shared cache. +#[derive(Debug)] +pub struct DyldCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + endian: E, + data: R, + subcaches: Vec>, + mappings: &'data [macho::DyldCacheMappingInfo], + images: &'data [macho::DyldCacheImageInfo], + arch: Architecture, +} + +/// Information about a subcache. +#[derive(Debug)] +pub struct DyldSubCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + data: R, + mappings: &'data [macho::DyldCacheMappingInfo], +} + +// This is the offset of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; + +impl<'data, E, R> DyldCache<'data, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// Parse the raw dyld shared cache data. + /// For shared caches from macOS 12 / iOS 15 and above, the subcache files need to be + /// supplied as well, in the correct order, with the .symbols subcache last (if present). + /// For example, data would be the data for dyld_shared_cache_x86_64, + /// and subcache_data would be the data for [dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...] + pub fn parse(data: R, subcache_data: &[R]) -> Result { + let header = macho::DyldCacheHeader::parse(data)?; + let (arch, endian) = header.parse_magic()?; + let mappings = header.mappings(endian, data)?; + + let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); + let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); + + if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + return Err(Error("Incorrect number of SubCaches")); + } + + // Split out the .symbols subcache data from the other subcaches. + let (symbols_subcache_data_and_uuid, subcache_data) = + if let Some(symbols_uuid) = symbols_subcache_uuid { + let (sym_data, rest_data) = subcache_data.split_last().unwrap(); + (Some((*sym_data, symbols_uuid)), rest_data) + } else { + (None, subcache_data) + }; + + // Read the regular SubCaches (.1, .2, ...), if present. + let mut subcaches = Vec::new(); + for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if sc_header.uuid != info.uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); + } + + // Read the .symbols SubCache, if present. + // Other than the UUID verification, the symbols SubCache is currently unused. + let _symbols_subcache = match symbols_subcache_data_and_uuid { + Some((data, uuid)) => { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if sc_header.uuid != uuid { + return Err(Error("Unexpected .symbols SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + Some(DyldSubCache { data, mappings }) + } + None => None, + }; + + let images = header.images(endian, data)?; + Ok(DyldCache { + endian, + data, + subcaches, + mappings, + images, + arch, + }) + } + + /// Get the architecture type of the file. + pub fn architecture(&self) -> Architecture { + self.arch + } + + /// Get the endianness of the file. + #[inline] + pub fn endianness(&self) -> Endianness { + if self.is_little_endian() { + Endianness::Little + } else { + Endianness::Big + } + } + + /// Return true if the file is little endian, false if it is big endian. + pub fn is_little_endian(&self) -> bool { + self.endian.is_little_endian() + } + + /// Iterate over the images in this cache. + pub fn images<'cache>(&'cache self) -> DyldCacheImageIterator<'data, 'cache, E, R> { + DyldCacheImageIterator { + cache: self, + iter: self.images.iter(), + } + } + + /// Find the address in a mapping and return the cache or subcache data it was found in, + /// together with the translated file offset. + pub fn data_and_offset_for_address(&self, address: u64) -> Option<(R, u64)> { + if let Some(file_offset) = address_to_file_offset(address, self.endian, self.mappings) { + return Some((self.data, file_offset)); + } + for subcache in &self.subcaches { + if let Some(file_offset) = + address_to_file_offset(address, self.endian, subcache.mappings) + { + return Some((subcache.data, file_offset)); + } + } + None + } +} + +/// An iterator over all the images (dylibs) in the dyld shared cache. +#[derive(Debug)] +pub struct DyldCacheImageIterator<'data, 'cache, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + cache: &'cache DyldCache<'data, E, R>, + iter: slice::Iter<'data, macho::DyldCacheImageInfo>, +} + +impl<'data, 'cache, E, R> Iterator for DyldCacheImageIterator<'data, 'cache, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + type Item = DyldCacheImage<'data, 'cache, E, R>; + + fn next(&mut self) -> Option> { + let image_info = self.iter.next()?; + Some(DyldCacheImage { + cache: self.cache, + image_info, + }) + } +} + +/// One image (dylib) from inside the dyld shared cache. +#[derive(Debug)] +pub struct DyldCacheImage<'data, 'cache, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + pub(crate) cache: &'cache DyldCache<'data, E, R>, + image_info: &'data macho::DyldCacheImageInfo, +} + +impl<'data, 'cache, E, R> DyldCacheImage<'data, 'cache, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// The file system path of this image. + pub fn path(&self) -> Result<&'data str> { + let path = self.image_info.path(self.cache.endian, self.cache.data)?; + // The path should always be ascii, so from_utf8 should alway succeed. + let path = core::str::from_utf8(path).map_err(|_| Error("Path string not valid utf-8"))?; + Ok(path) + } + + /// The subcache data which contains the Mach-O header for this image, + /// together with the file offset at which this image starts. + pub fn image_data_and_offset(&self) -> Result<(R, u64)> { + let address = self.image_info.address.get(self.cache.endian); + self.cache + .data_and_offset_for_address(address) + .ok_or(Error("Address not found in any mapping")) + } + + /// Parse this image into an Object. + pub fn parse_object(&self) -> Result> { + File::parse_dyld_cache_image(self) + } +} + +impl macho::DyldCacheHeader { + /// Read the dyld cache header. + pub fn parse<'data, R: ReadRef<'data>>(data: R) -> Result<&'data Self> { + data.read_at::>(0) + .read_error("Invalid dyld cache header size or alignment") + } + + /// Returns (arch, endian) based on the magic string. + pub fn parse_magic(&self) -> Result<(Architecture, E)> { + let (arch, is_big_endian) = match &self.magic { + b"dyld_v1 i386\0" => (Architecture::I386, false), + b"dyld_v1 x86_64\0" => (Architecture::X86_64, false), + b"dyld_v1 x86_64h\0" => (Architecture::X86_64, false), + b"dyld_v1 ppc\0" => (Architecture::PowerPc, true), + b"dyld_v1 armv6\0" => (Architecture::Arm, false), + b"dyld_v1 armv7\0" => (Architecture::Arm, false), + b"dyld_v1 armv7f\0" => (Architecture::Arm, false), + b"dyld_v1 armv7s\0" => (Architecture::Arm, false), + b"dyld_v1 armv7k\0" => (Architecture::Arm, false), + b"dyld_v1 arm64\0" => (Architecture::Aarch64, false), + b"dyld_v1 arm64e\0" => (Architecture::Aarch64, false), + _ => return Err(Error("Unrecognized dyld cache magic")), + }; + let endian = + E::from_big_endian(is_big_endian).read_error("Unsupported dyld cache endian")?; + Ok((arch, endian)) + } + + /// Return the mapping information table. + pub fn mappings<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result<&'data [macho::DyldCacheMappingInfo]> { + data.read_slice_at::>( + self.mapping_offset.get(endian).into(), + self.mapping_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache mapping size or alignment") + } + + /// Return the information about subcaches, if present. + pub fn subcaches<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result]>> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let subcaches = data + .read_slice_at::>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(subcaches)) + } else { + Ok(None) + } + } + + /// Return the UUID for the .symbols subcache, if present. + pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let uuid = self.symbols_subcache_uuid; + if uuid != [0; 16] { + return Some(uuid); + } + } + None + } + + /// Return the image information table. + pub fn images<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result<&'data [macho::DyldCacheImageInfo]> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + data.read_slice_at::>( + self.images_across_all_subcaches_offset.get(endian).into(), + self.images_across_all_subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } else { + data.read_slice_at::>( + self.images_offset.get(endian).into(), + self.images_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } + } +} + +impl macho::DyldCacheImageInfo { + /// The file system path of this image. + pub fn path<'data, R: ReadRef<'data>>(&self, endian: E, data: R) -> Result<&'data [u8]> { + let r_start = self.path_file_offset.get(endian).into(); + let r_end = data.len().read_error("Couldn't get data len()")?; + data.read_bytes_at_until(r_start..r_end, 0) + .read_error("Couldn't read dyld cache image path") + } + + /// Find the file offset of the image by looking up its address in the mappings. + pub fn file_offset( + &self, + endian: E, + mappings: &[macho::DyldCacheMappingInfo], + ) -> Result { + let address = self.address.get(endian); + address_to_file_offset(address, endian, mappings) + .read_error("Invalid dyld cache image address") + } +} + +/// Find the file offset of the image by looking up its address in the mappings. +pub fn address_to_file_offset( + address: u64, + endian: E, + mappings: &[macho::DyldCacheMappingInfo], +) -> Option { + for mapping in mappings { + let mapping_address = mapping.address.get(endian); + if address >= mapping_address + && address < mapping_address.wrapping_add(mapping.size.get(endian)) + { + return Some(address - mapping_address + mapping.file_offset.get(endian)); + } + } + None +} diff --git a/third_party/rust/object/src/read/macho/fat.rs b/third_party/rust/object/src/read/macho/fat.rs new file mode 100644 index 0000000000..d4301b7e11 --- /dev/null +++ b/third_party/rust/object/src/read/macho/fat.rs @@ -0,0 +1,122 @@ +use crate::read::{Architecture, Error, ReadError, ReadRef, Result}; +use crate::{macho, BigEndian, Pod}; + +pub use macho::{FatArch32, FatArch64, FatHeader}; + +impl FatHeader { + /// Attempt to parse a fat header. + /// + /// Does not validate the magic value. + pub fn parse<'data, R: ReadRef<'data>>(file: R) -> Result<&'data FatHeader> { + file.read_at::(0) + .read_error("Invalid fat header size or alignment") + } + + /// Attempt to parse a fat header and 32-bit fat arches. + pub fn parse_arch32<'data, R: ReadRef<'data>>(file: R) -> Result<&'data [FatArch32]> { + let mut offset = 0; + let header = file + .read::(&mut offset) + .read_error("Invalid fat header size or alignment")?; + if header.magic.get(BigEndian) != macho::FAT_MAGIC { + return Err(Error("Invalid 32-bit fat magic")); + } + file.read_slice::(&mut offset, header.nfat_arch.get(BigEndian) as usize) + .read_error("Invalid nfat_arch") + } + + /// Attempt to parse a fat header and 64-bit fat arches. + pub fn parse_arch64<'data, R: ReadRef<'data>>(file: R) -> Result<&'data [FatArch64]> { + let mut offset = 0; + let header = file + .read::(&mut offset) + .read_error("Invalid fat header size or alignment")?; + if header.magic.get(BigEndian) != macho::FAT_MAGIC_64 { + return Err(Error("Invalid 64-bit fat magic")); + } + file.read_slice::(&mut offset, header.nfat_arch.get(BigEndian) as usize) + .read_error("Invalid nfat_arch") + } +} + +/// A trait for generic access to `FatArch32` and `FatArch64`. +#[allow(missing_docs)] +pub trait FatArch: Pod { + type Word: Into; + + fn cputype(&self) -> u32; + fn cpusubtype(&self) -> u32; + fn offset(&self) -> Self::Word; + fn size(&self) -> Self::Word; + fn align(&self) -> u32; + + fn architecture(&self) -> Architecture { + match self.cputype() { + macho::CPU_TYPE_ARM => Architecture::Arm, + macho::CPU_TYPE_ARM64 => Architecture::Aarch64, + macho::CPU_TYPE_X86 => Architecture::I386, + macho::CPU_TYPE_X86_64 => Architecture::X86_64, + macho::CPU_TYPE_MIPS => Architecture::Mips, + macho::CPU_TYPE_POWERPC => Architecture::PowerPc, + macho::CPU_TYPE_POWERPC64 => Architecture::PowerPc64, + _ => Architecture::Unknown, + } + } + + fn file_range(&self) -> (u64, u64) { + (self.offset().into(), self.size().into()) + } + + fn data<'data, R: ReadRef<'data>>(&self, file: R) -> Result<&'data [u8]> { + file.read_bytes_at(self.offset().into(), self.size().into()) + .read_error("Invalid fat arch offset or size") + } +} + +impl FatArch for FatArch32 { + type Word = u32; + + fn cputype(&self) -> u32 { + self.cputype.get(BigEndian) + } + + fn cpusubtype(&self) -> u32 { + self.cpusubtype.get(BigEndian) + } + + fn offset(&self) -> Self::Word { + self.offset.get(BigEndian) + } + + fn size(&self) -> Self::Word { + self.size.get(BigEndian) + } + + fn align(&self) -> u32 { + self.align.get(BigEndian) + } +} + +impl FatArch for FatArch64 { + type Word = u64; + + fn cputype(&self) -> u32 { + self.cputype.get(BigEndian) + } + + fn cpusubtype(&self) -> u32 { + self.cpusubtype.get(BigEndian) + } + + fn offset(&self) -> Self::Word { + self.offset.get(BigEndian) + } + + fn size(&self) -> Self::Word { + self.size.get(BigEndian) + } + + fn align(&self) -> u32 { + self.align.get(BigEndian) + } +} diff --git a/third_party/rust/object/src/read/macho/file.rs b/third_party/rust/object/src/read/macho/file.rs new file mode 100644 index 0000000000..ab8c05757f --- /dev/null +++ b/third_party/rust/object/src/read/macho/file.rs @@ -0,0 +1,731 @@ +use alloc::vec::Vec; +use core::fmt::Debug; +use core::{mem, str}; + +use crate::read::{ + self, Architecture, ComdatKind, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, + Object, ObjectComdat, ObjectKind, ObjectMap, ObjectSection, ReadError, ReadRef, Result, + SectionIndex, SymbolIndex, +}; +use crate::{endian, macho, BigEndian, ByteString, Endian, Endianness, Pod}; + +use super::{ + DyldCacheImage, LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, + MachOSegment, MachOSegmentInternal, MachOSegmentIterator, MachOSymbol, MachOSymbolIterator, + MachOSymbolTable, Nlist, Section, Segment, SymbolTable, +}; + +/// A 32-bit Mach-O object file. +pub type MachOFile32<'data, Endian = Endianness, R = &'data [u8]> = + MachOFile<'data, macho::MachHeader32, R>; +/// A 64-bit Mach-O object file. +pub type MachOFile64<'data, Endian = Endianness, R = &'data [u8]> = + MachOFile<'data, macho::MachHeader64, R>; + +/// A partially parsed Mach-O file. +/// +/// Most of the functionality of this type is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct MachOFile<'data, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) endian: Mach::Endian, + pub(super) data: R, + pub(super) header_offset: u64, + pub(super) header: &'data Mach, + pub(super) segments: Vec>, + pub(super) sections: Vec>, + pub(super) symbols: SymbolTable<'data, Mach, R>, +} + +impl<'data, Mach, R> MachOFile<'data, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + /// Parse the raw Mach-O file data. + pub fn parse(data: R) -> Result { + let header = Mach::parse(data, 0)?; + let endian = header.endian()?; + + // Build a list of segments and sections to make some operations more efficient. + let mut segments = Vec::new(); + let mut sections = Vec::new(); + let mut symbols = SymbolTable::default(); + if let Ok(mut commands) = header.load_commands(endian, data, 0) { + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + for section in segment.sections(endian, section_data)? { + let index = SectionIndex(sections.len() + 1); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); + } + } else if let Some(symtab) = command.symtab()? { + symbols = symtab.symbols(endian, data)?; + } + } + } + + Ok(MachOFile { + endian, + data, + header_offset: 0, + header, + segments, + sections, + symbols, + }) + } + + /// Parse the Mach-O file for the given image from the dyld shared cache. + /// This will read different sections from different subcaches, if necessary. + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &DyldCacheImage<'data, 'cache, E, R>, + ) -> Result { + let (data, header_offset) = image.image_data_and_offset()?; + let header = Mach::parse(data, header_offset)?; + let endian = header.endian()?; + + // Build a list of sections to make some operations more efficient. + // Also build a list of segments, because we need to remember which ReadRef + // to read each section's data from. Only the DyldCache knows this information, + // and we won't have access to it once we've exited this function. + let mut segments = Vec::new(); + let mut sections = Vec::new(); + let mut linkedit_data: Option = None; + let mut symtab = None; + if let Ok(mut commands) = header.load_commands(endian, data, header_offset) { + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + // Each segment can be stored in a different subcache. Get the segment's + // address and look it up in the cache mappings, to find the correct cache data. + let addr = segment.vmaddr(endian).into(); + let (data, _offset) = image + .cache + .data_and_offset_for_address(addr) + .read_error("Could not find segment data in dyld shared cache")?; + if segment.name() == macho::SEG_LINKEDIT.as_bytes() { + linkedit_data = Some(data); + } + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + + for section in segment.sections(endian, section_data)? { + let index = SectionIndex(sections.len() + 1); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); + } + } else if let Some(st) = command.symtab()? { + symtab = Some(st); + } + } + } + + // The symbols are found in the __LINKEDIT segment, so make sure to read them from the + // correct subcache. + let symbols = match (symtab, linkedit_data) { + (Some(symtab), Some(linkedit_data)) => symtab.symbols(endian, linkedit_data)?, + _ => SymbolTable::default(), + }; + + Ok(MachOFile { + endian, + data, + header_offset, + header, + segments, + sections, + symbols, + }) + } + + /// Return the section at the given index. + #[inline] + pub(super) fn section_internal( + &self, + index: SectionIndex, + ) -> Result<&MachOSectionInternal<'data, Mach>> { + index + .0 + .checked_sub(1) + .and_then(|index| self.sections.get(index)) + .read_error("Invalid Mach-O section index") + } + + pub(super) fn segment_internal( + &self, + index: usize, + ) -> Result<&MachOSegmentInternal<'data, Mach, R>> { + self.segments + .get(index) + .read_error("Invalid Mach-O segment index") + } +} + +impl<'data, Mach, R> read::private::Sealed for MachOFile<'data, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> Object<'data, 'file> for MachOFile<'data, Mach, R> +where + 'data: 'file, + Mach: MachHeader, + R: 'file + ReadRef<'data>, +{ + type Segment = MachOSegment<'data, 'file, Mach, R>; + type SegmentIterator = MachOSegmentIterator<'data, 'file, Mach, R>; + type Section = MachOSection<'data, 'file, Mach, R>; + type SectionIterator = MachOSectionIterator<'data, 'file, Mach, R>; + type Comdat = MachOComdat<'data, 'file, Mach, R>; + type ComdatIterator = MachOComdatIterator<'data, 'file, Mach, R>; + type Symbol = MachOSymbol<'data, 'file, Mach, R>; + type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>; + type SymbolTable = MachOSymbolTable<'data, 'file, Mach, R>; + type DynamicRelocationIterator = NoDynamicRelocationIterator; + + fn architecture(&self) -> Architecture { + match self.header.cputype(self.endian) { + macho::CPU_TYPE_ARM => Architecture::Arm, + macho::CPU_TYPE_ARM64 => Architecture::Aarch64, + macho::CPU_TYPE_X86 => Architecture::I386, + macho::CPU_TYPE_X86_64 => Architecture::X86_64, + macho::CPU_TYPE_MIPS => Architecture::Mips, + macho::CPU_TYPE_POWERPC => Architecture::PowerPc, + macho::CPU_TYPE_POWERPC64 => Architecture::PowerPc64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.header.is_little_endian() + } + + #[inline] + fn is_64(&self) -> bool { + self.header.is_type_64() + } + + fn kind(&self) -> ObjectKind { + match self.header.filetype(self.endian) { + macho::MH_OBJECT => ObjectKind::Relocatable, + macho::MH_EXECUTE => ObjectKind::Executable, + macho::MH_CORE => ObjectKind::Core, + macho::MH_DYLIB => ObjectKind::Dynamic, + _ => ObjectKind::Unknown, + } + } + + fn segments(&'file self) -> MachOSegmentIterator<'data, 'file, Mach, R> { + MachOSegmentIterator { + file: self, + iter: self.segments.iter(), + } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option> { + // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg + // ".debug_info" to "__debug_info", and limit to 16 bytes total. + let system_name = if section_name.starts_with(b".") { + if section_name.len() > 15 { + Some(§ion_name[1..15]) + } else { + Some(§ion_name[1..]) + } + } else { + None + }; + let cmp_section_name = |section: &MachOSection<'data, 'file, Mach, R>| { + section + .name_bytes() + .map(|name| { + section_name == name + || system_name + .filter(|system_name| { + name.starts_with(b"__") && name[2..] == **system_name + }) + .is_some() + }) + .unwrap_or(false) + }; + + self.sections().find(cmp_section_name) + } + + fn section_by_index( + &'file self, + index: SectionIndex, + ) -> Result> { + let internal = *self.section_internal(index)?; + Ok(MachOSection { + file: self, + internal, + }) + } + + fn sections(&'file self) -> MachOSectionIterator<'data, 'file, Mach, R> { + MachOSectionIterator { + file: self, + iter: self.sections.iter(), + } + } + + fn comdats(&'file self) -> MachOComdatIterator<'data, 'file, Mach, R> { + MachOComdatIterator { file: self } + } + + fn symbol_by_index( + &'file self, + index: SymbolIndex, + ) -> Result> { + let nlist = self.symbols.symbol(index.0)?; + MachOSymbol::new(self, index, nlist).read_error("Unsupported Mach-O symbol index") + } + + fn symbols(&'file self) -> MachOSymbolIterator<'data, 'file, Mach, R> { + MachOSymbolIterator { + file: self, + index: 0, + } + } + + #[inline] + fn symbol_table(&'file self) -> Option> { + Some(MachOSymbolTable { file: self }) + } + + fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data, 'file, Mach, R> { + MachOSymbolIterator { + file: self, + index: self.symbols.len(), + } + } + + #[inline] + fn dynamic_symbol_table(&'file self) -> Option> { + None + } + + fn object_map(&'file self) -> ObjectMap<'data> { + self.symbols.object_map(self.endian) + } + + fn imports(&self) -> Result>> { + let mut dysymtab = None; + let mut libraries = Vec::new(); + let twolevel = self.header.flags(self.endian) & macho::MH_TWOLEVEL != 0; + if twolevel { + libraries.push(&[][..]); + } + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; + while let Some(command) = commands.next()? { + if let Some(command) = command.dysymtab()? { + dysymtab = Some(command); + } + if twolevel { + if let Some(dylib) = command.dylib()? { + libraries.push(command.string(self.endian, dylib.dylib.name)?); + } + } + } + + let mut imports = Vec::new(); + if let Some(dysymtab) = dysymtab { + let index = dysymtab.iundefsym.get(self.endian) as usize; + let number = dysymtab.nundefsym.get(self.endian) as usize; + for i in index..(index.wrapping_add(number)) { + let symbol = self.symbols.symbol(i)?; + let name = symbol.name(self.endian, self.symbols.strings())?; + let library = if twolevel { + libraries + .get(symbol.library_ordinal(self.endian) as usize) + .copied() + .read_error("Invalid Mach-O symbol library ordinal")? + } else { + &[] + }; + imports.push(Import { + name: ByteString(name), + library: ByteString(library), + }); + } + } + Ok(imports) + } + + fn exports(&self) -> Result>> { + let mut dysymtab = None; + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; + while let Some(command) = commands.next()? { + if let Some(command) = command.dysymtab()? { + dysymtab = Some(command); + break; + } + } + + let mut exports = Vec::new(); + if let Some(dysymtab) = dysymtab { + let index = dysymtab.iextdefsym.get(self.endian) as usize; + let number = dysymtab.nextdefsym.get(self.endian) as usize; + for i in index..(index.wrapping_add(number)) { + let symbol = self.symbols.symbol(i)?; + let name = symbol.name(self.endian, self.symbols.strings())?; + let address = symbol.n_value(self.endian).into(); + exports.push(Export { + name: ByteString(name), + address, + }); + } + } + Ok(exports) + } + + #[inline] + fn dynamic_relocations(&'file self) -> Option { + None + } + + fn has_debug_symbols(&self) -> bool { + self.section_by_name(".debug_info").is_some() + } + + fn mach_uuid(&self) -> Result> { + self.header.uuid(self.endian, self.data, self.header_offset) + } + + fn relative_address_base(&self) -> u64 { + 0 + } + + fn entry(&self) -> u64 { + if let Ok(mut commands) = + self.header + .load_commands(self.endian, self.data, self.header_offset) + { + while let Ok(Some(command)) = commands.next() { + if let Ok(Some(command)) = command.entry_point() { + return command.entryoff.get(self.endian); + } + } + } + 0 + } + + fn flags(&self) -> FileFlags { + FileFlags::MachO { + flags: self.header.flags(self.endian), + } + } +} + +/// An iterator over the COMDAT section groups of a `MachOFile64`. +pub type MachOComdatIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatIterator<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the COMDAT section groups of a `MachOFile64`. +pub type MachOComdatIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatIterator<'data, 'file, macho::MachHeader64, R>; + +/// An iterator over the COMDAT section groups of a `MachOFile`. +#[derive(Debug)] +pub struct MachOComdatIterator<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> Iterator for MachOComdatIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOComdat<'data, 'file, Mach, R>; + + #[inline] + fn next(&mut self) -> Option { + None + } +} + +/// A COMDAT section group of a `MachOFile32`. +pub type MachOComdat32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdat<'data, 'file, macho::MachHeader32, R>; + +/// A COMDAT section group of a `MachOFile64`. +pub type MachOComdat64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdat<'data, 'file, macho::MachHeader64, R>; + +/// A COMDAT section group of a `MachOFile`. +#[derive(Debug)] +pub struct MachOComdat<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOComdat<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectComdat<'data> for MachOComdat<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type SectionIterator = MachOComdatSectionIterator<'data, 'file, Mach, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + unreachable!(); + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + unreachable!(); + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + unreachable!(); + } + + #[inline] + fn name(&self) -> Result<&str> { + unreachable!(); + } + + #[inline] + fn sections(&self) -> Self::SectionIterator { + unreachable!(); + } +} + +/// An iterator over the sections in a COMDAT section group of a `MachOFile32`. +pub type MachOComdatSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatSectionIterator<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the sections in a COMDAT section group of a `MachOFile64`. +pub type MachOComdatSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatSectionIterator<'data, 'file, macho::MachHeader64, R>; + +/// An iterator over the sections in a COMDAT section group of a `MachOFile`. +#[derive(Debug)] +pub struct MachOComdatSectionIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> Iterator for MachOComdatSectionIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = SectionIndex; + + fn next(&mut self) -> Option { + None + } +} + +/// A trait for generic access to `MachHeader32` and `MachHeader64`. +#[allow(missing_docs)] +pub trait MachHeader: Debug + Pod { + type Word: Into; + type Endian: endian::Endian; + type Segment: Segment; + type Section: Section; + type Nlist: Nlist; + + /// Return true if this type is a 64-bit header. + /// + /// This is a property of the type, not a value in the header data. + fn is_type_64(&self) -> bool; + + /// Return true if the `magic` field signifies big-endian. + fn is_big_endian(&self) -> bool; + + /// Return true if the `magic` field signifies little-endian. + fn is_little_endian(&self) -> bool; + + fn magic(&self) -> u32; + fn cputype(&self, endian: Self::Endian) -> u32; + fn cpusubtype(&self, endian: Self::Endian) -> u32; + fn filetype(&self, endian: Self::Endian) -> u32; + fn ncmds(&self, endian: Self::Endian) -> u32; + fn sizeofcmds(&self, endian: Self::Endian) -> u32; + fn flags(&self, endian: Self::Endian) -> u32; + + // Provided methods. + + /// Read the file header. + /// + /// Also checks that the magic field in the file header is a supported format. + fn parse<'data, R: ReadRef<'data>>(data: R, offset: u64) -> read::Result<&'data Self> { + let header = data + .read_at::(offset) + .read_error("Invalid Mach-O header size or alignment")?; + if !header.is_supported() { + return Err(Error("Unsupported Mach-O header")); + } + Ok(header) + } + + fn is_supported(&self) -> bool { + self.is_little_endian() || self.is_big_endian() + } + + fn endian(&self) -> Result { + Self::Endian::from_big_endian(self.is_big_endian()).read_error("Unsupported Mach-O endian") + } + + fn load_commands<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + header_offset: u64, + ) -> Result> { + let data = data + .read_bytes_at( + header_offset + mem::size_of::() as u64, + self.sizeofcmds(endian).into(), + ) + .read_error("Invalid Mach-O load command table size")?; + Ok(LoadCommandIterator::new(endian, data, self.ncmds(endian))) + } + + /// Return the UUID from the `LC_UUID` load command, if one is present. + fn uuid<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + header_offset: u64, + ) -> Result> { + let mut commands = self.load_commands(endian, data, header_offset)?; + while let Some(command) = commands.next()? { + if let Ok(Some(uuid)) = command.uuid() { + return Ok(Some(uuid.uuid)); + } + } + Ok(None) + } +} + +impl MachHeader for macho::MachHeader32 { + type Word = u32; + type Endian = Endian; + type Segment = macho::SegmentCommand32; + type Section = macho::Section32; + type Nlist = macho::Nlist32; + + fn is_type_64(&self) -> bool { + false + } + + fn is_big_endian(&self) -> bool { + self.magic() == macho::MH_MAGIC + } + + fn is_little_endian(&self) -> bool { + self.magic() == macho::MH_CIGAM + } + + fn magic(&self) -> u32 { + self.magic.get(BigEndian) + } + + fn cputype(&self, endian: Self::Endian) -> u32 { + self.cputype.get(endian) + } + + fn cpusubtype(&self, endian: Self::Endian) -> u32 { + self.cpusubtype.get(endian) + } + + fn filetype(&self, endian: Self::Endian) -> u32 { + self.filetype.get(endian) + } + + fn ncmds(&self, endian: Self::Endian) -> u32 { + self.ncmds.get(endian) + } + + fn sizeofcmds(&self, endian: Self::Endian) -> u32 { + self.sizeofcmds.get(endian) + } + + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} + +impl MachHeader for macho::MachHeader64 { + type Word = u64; + type Endian = Endian; + type Segment = macho::SegmentCommand64; + type Section = macho::Section64; + type Nlist = macho::Nlist64; + + fn is_type_64(&self) -> bool { + true + } + + fn is_big_endian(&self) -> bool { + self.magic() == macho::MH_MAGIC_64 + } + + fn is_little_endian(&self) -> bool { + self.magic() == macho::MH_CIGAM_64 + } + + fn magic(&self) -> u32 { + self.magic.get(BigEndian) + } + + fn cputype(&self, endian: Self::Endian) -> u32 { + self.cputype.get(endian) + } + + fn cpusubtype(&self, endian: Self::Endian) -> u32 { + self.cpusubtype.get(endian) + } + + fn filetype(&self, endian: Self::Endian) -> u32 { + self.filetype.get(endian) + } + + fn ncmds(&self, endian: Self::Endian) -> u32 { + self.ncmds.get(endian) + } + + fn sizeofcmds(&self, endian: Self::Endian) -> u32 { + self.sizeofcmds.get(endian) + } + + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} diff --git a/third_party/rust/object/src/read/macho/load_command.rs b/third_party/rust/object/src/read/macho/load_command.rs new file mode 100644 index 0000000000..10daf4ed10 --- /dev/null +++ b/third_party/rust/object/src/read/macho/load_command.rs @@ -0,0 +1,353 @@ +use core::marker::PhantomData; + +use crate::endian::Endian; +use crate::macho; +use crate::pod::Pod; +use crate::read::macho::{MachHeader, SymbolTable}; +use crate::read::{Bytes, ReadError, ReadRef, Result, StringTable}; + +/// An iterator over the load commands of a `MachHeader`. +#[derive(Debug, Default, Clone, Copy)] +pub struct LoadCommandIterator<'data, E: Endian> { + endian: E, + data: Bytes<'data>, + ncmds: u32, +} + +impl<'data, E: Endian> LoadCommandIterator<'data, E> { + pub(super) fn new(endian: E, data: &'data [u8], ncmds: u32) -> Self { + LoadCommandIterator { + endian, + data: Bytes(data), + ncmds, + } + } + + /// Return the next load command. + pub fn next(&mut self) -> Result>> { + if self.ncmds == 0 { + return Ok(None); + } + let header = self + .data + .read_at::>(0) + .read_error("Invalid Mach-O load command header")?; + let cmd = header.cmd.get(self.endian); + let cmdsize = header.cmdsize.get(self.endian) as usize; + let data = self + .data + .read_bytes(cmdsize) + .read_error("Invalid Mach-O load command size")?; + self.ncmds -= 1; + Ok(Some(LoadCommandData { + cmd, + data, + marker: Default::default(), + })) + } +} + +/// The data for a `LoadCommand`. +#[derive(Debug, Clone, Copy)] +pub struct LoadCommandData<'data, E: Endian> { + cmd: u32, + // Includes the header. + data: Bytes<'data>, + marker: PhantomData, +} + +impl<'data, E: Endian> LoadCommandData<'data, E> { + /// Return the `cmd` field of the `LoadCommand`. + /// + /// This is one of the `LC_` constants. + pub fn cmd(&self) -> u32 { + self.cmd + } + + /// Return the `cmdsize` field of the `LoadCommand`. + pub fn cmdsize(&self) -> u32 { + self.data.len() as u32 + } + + /// Parse the data as the given type. + #[inline] + pub fn data(&self) -> Result<&'data T> { + self.data + .read_at(0) + .read_error("Invalid Mach-O command size") + } + + /// Raw bytes of this LoadCommand structure. + pub fn raw_data(&self) -> &'data [u8] { + self.data.0 + } + + /// Parse a load command string value. + /// + /// Strings used by load commands are specified by offsets that are + /// relative to the load command header. + pub fn string(&self, endian: E, s: macho::LcStr) -> Result<&'data [u8]> { + self.data + .read_string_at(s.offset.get(endian) as usize) + .read_error("Invalid load command string offset") + } + + /// Parse the command data according to the `cmd` field. + pub fn variant(&self) -> Result> { + Ok(match self.cmd { + macho::LC_SEGMENT => { + let mut data = self.data; + let segment = data.read().read_error("Invalid Mach-O command size")?; + LoadCommandVariant::Segment32(segment, data.0) + } + macho::LC_SYMTAB => LoadCommandVariant::Symtab(self.data()?), + macho::LC_THREAD | macho::LC_UNIXTHREAD => { + let mut data = self.data; + let thread = data.read().read_error("Invalid Mach-O command size")?; + LoadCommandVariant::Thread(thread, data.0) + } + macho::LC_DYSYMTAB => LoadCommandVariant::Dysymtab(self.data()?), + macho::LC_LOAD_DYLIB + | macho::LC_LOAD_WEAK_DYLIB + | macho::LC_REEXPORT_DYLIB + | macho::LC_LAZY_LOAD_DYLIB + | macho::LC_LOAD_UPWARD_DYLIB => LoadCommandVariant::Dylib(self.data()?), + macho::LC_ID_DYLIB => LoadCommandVariant::IdDylib(self.data()?), + macho::LC_LOAD_DYLINKER => LoadCommandVariant::LoadDylinker(self.data()?), + macho::LC_ID_DYLINKER => LoadCommandVariant::IdDylinker(self.data()?), + macho::LC_PREBOUND_DYLIB => LoadCommandVariant::PreboundDylib(self.data()?), + macho::LC_ROUTINES => LoadCommandVariant::Routines32(self.data()?), + macho::LC_SUB_FRAMEWORK => LoadCommandVariant::SubFramework(self.data()?), + macho::LC_SUB_UMBRELLA => LoadCommandVariant::SubUmbrella(self.data()?), + macho::LC_SUB_CLIENT => LoadCommandVariant::SubClient(self.data()?), + macho::LC_SUB_LIBRARY => LoadCommandVariant::SubLibrary(self.data()?), + macho::LC_TWOLEVEL_HINTS => LoadCommandVariant::TwolevelHints(self.data()?), + macho::LC_PREBIND_CKSUM => LoadCommandVariant::PrebindCksum(self.data()?), + macho::LC_SEGMENT_64 => { + let mut data = self.data; + let segment = data.read().read_error("Invalid Mach-O command size")?; + LoadCommandVariant::Segment64(segment, data.0) + } + macho::LC_ROUTINES_64 => LoadCommandVariant::Routines64(self.data()?), + macho::LC_UUID => LoadCommandVariant::Uuid(self.data()?), + macho::LC_RPATH => LoadCommandVariant::Rpath(self.data()?), + macho::LC_CODE_SIGNATURE + | macho::LC_SEGMENT_SPLIT_INFO + | macho::LC_FUNCTION_STARTS + | macho::LC_DATA_IN_CODE + | macho::LC_DYLIB_CODE_SIGN_DRS + | macho::LC_LINKER_OPTIMIZATION_HINT + | macho::LC_DYLD_EXPORTS_TRIE + | macho::LC_DYLD_CHAINED_FIXUPS => LoadCommandVariant::LinkeditData(self.data()?), + macho::LC_ENCRYPTION_INFO => LoadCommandVariant::EncryptionInfo32(self.data()?), + macho::LC_DYLD_INFO | macho::LC_DYLD_INFO_ONLY => { + LoadCommandVariant::DyldInfo(self.data()?) + } + macho::LC_VERSION_MIN_MACOSX + | macho::LC_VERSION_MIN_IPHONEOS + | macho::LC_VERSION_MIN_TVOS + | macho::LC_VERSION_MIN_WATCHOS => LoadCommandVariant::VersionMin(self.data()?), + macho::LC_DYLD_ENVIRONMENT => LoadCommandVariant::DyldEnvironment(self.data()?), + macho::LC_MAIN => LoadCommandVariant::EntryPoint(self.data()?), + macho::LC_SOURCE_VERSION => LoadCommandVariant::SourceVersion(self.data()?), + macho::LC_ENCRYPTION_INFO_64 => LoadCommandVariant::EncryptionInfo64(self.data()?), + macho::LC_LINKER_OPTION => LoadCommandVariant::LinkerOption(self.data()?), + macho::LC_NOTE => LoadCommandVariant::Note(self.data()?), + macho::LC_BUILD_VERSION => LoadCommandVariant::BuildVersion(self.data()?), + macho::LC_FILESET_ENTRY => LoadCommandVariant::FilesetEntry(self.data()?), + _ => LoadCommandVariant::Other, + }) + } + + /// Try to parse this command as a `SegmentCommand32`. + /// + /// Returns the segment command and the data containing the sections. + pub fn segment_32(self) -> Result, &'data [u8])>> { + if self.cmd == macho::LC_SEGMENT { + let mut data = self.data; + let segment = data.read().read_error("Invalid Mach-O command size")?; + Ok(Some((segment, data.0))) + } else { + Ok(None) + } + } + + /// Try to parse this command as a `SymtabCommand`. + /// + /// Returns the segment command and the data containing the sections. + pub fn symtab(self) -> Result>> { + if self.cmd == macho::LC_SYMTAB { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `DysymtabCommand`. + pub fn dysymtab(self) -> Result>> { + if self.cmd == macho::LC_DYSYMTAB { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `DylibCommand`. + pub fn dylib(self) -> Result>> { + if self.cmd == macho::LC_LOAD_DYLIB + || self.cmd == macho::LC_LOAD_WEAK_DYLIB + || self.cmd == macho::LC_REEXPORT_DYLIB + || self.cmd == macho::LC_LAZY_LOAD_DYLIB + || self.cmd == macho::LC_LOAD_UPWARD_DYLIB + { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `UuidCommand`. + pub fn uuid(self) -> Result>> { + if self.cmd == macho::LC_UUID { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `SegmentCommand64`. + pub fn segment_64(self) -> Result, &'data [u8])>> { + if self.cmd == macho::LC_SEGMENT_64 { + let mut data = self.data; + let command = data.read().read_error("Invalid Mach-O command size")?; + Ok(Some((command, data.0))) + } else { + Ok(None) + } + } + + /// Try to parse this command as a `DyldInfoCommand`. + pub fn dyld_info(self) -> Result>> { + if self.cmd == macho::LC_DYLD_INFO || self.cmd == macho::LC_DYLD_INFO_ONLY { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as an `EntryPointCommand`. + pub fn entry_point(self) -> Result>> { + if self.cmd == macho::LC_MAIN { + Some(self.data()).transpose() + } else { + Ok(None) + } + } +} + +/// A `LoadCommand` that has been interpreted according to its `cmd` field. +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum LoadCommandVariant<'data, E: Endian> { + /// `LC_SEGMENT` + Segment32(&'data macho::SegmentCommand32, &'data [u8]), + /// `LC_SYMTAB` + Symtab(&'data macho::SymtabCommand), + // obsolete: `LC_SYMSEG` + //Symseg(&'data macho::SymsegCommand), + /// `LC_THREAD` or `LC_UNIXTHREAD` + Thread(&'data macho::ThreadCommand, &'data [u8]), + // obsolete: `LC_IDFVMLIB` or `LC_LOADFVMLIB` + //Fvmlib(&'data macho::FvmlibCommand), + // obsolete: `LC_IDENT` + //Ident(&'data macho::IdentCommand), + // internal: `LC_FVMFILE` + //Fvmfile(&'data macho::FvmfileCommand), + // internal: `LC_PREPAGE` + /// `LC_DYSYMTAB` + Dysymtab(&'data macho::DysymtabCommand), + /// `LC_LOAD_DYLIB`, `LC_LOAD_WEAK_DYLIB`, `LC_REEXPORT_DYLIB`, + /// `LC_LAZY_LOAD_DYLIB`, or `LC_LOAD_UPWARD_DYLIB` + Dylib(&'data macho::DylibCommand), + /// `LC_ID_DYLIB` + IdDylib(&'data macho::DylibCommand), + /// `LC_LOAD_DYLINKER` + LoadDylinker(&'data macho::DylinkerCommand), + /// `LC_ID_DYLINKER` + IdDylinker(&'data macho::DylinkerCommand), + /// `LC_PREBOUND_DYLIB` + PreboundDylib(&'data macho::PreboundDylibCommand), + /// `LC_ROUTINES` + Routines32(&'data macho::RoutinesCommand32), + /// `LC_SUB_FRAMEWORK` + SubFramework(&'data macho::SubFrameworkCommand), + /// `LC_SUB_UMBRELLA` + SubUmbrella(&'data macho::SubUmbrellaCommand), + /// `LC_SUB_CLIENT` + SubClient(&'data macho::SubClientCommand), + /// `LC_SUB_LIBRARY` + SubLibrary(&'data macho::SubLibraryCommand), + /// `LC_TWOLEVEL_HINTS` + TwolevelHints(&'data macho::TwolevelHintsCommand), + /// `LC_PREBIND_CKSUM` + PrebindCksum(&'data macho::PrebindCksumCommand), + /// `LC_SEGMENT_64` + Segment64(&'data macho::SegmentCommand64, &'data [u8]), + /// `LC_ROUTINES_64` + Routines64(&'data macho::RoutinesCommand64), + /// `LC_UUID` + Uuid(&'data macho::UuidCommand), + /// `LC_RPATH` + Rpath(&'data macho::RpathCommand), + /// `LC_CODE_SIGNATURE`, `LC_SEGMENT_SPLIT_INFO`, `LC_FUNCTION_STARTS`, + /// `LC_DATA_IN_CODE`, `LC_DYLIB_CODE_SIGN_DRS`, `LC_LINKER_OPTIMIZATION_HINT`, + /// `LC_DYLD_EXPORTS_TRIE`, or `LC_DYLD_CHAINED_FIXUPS`. + LinkeditData(&'data macho::LinkeditDataCommand), + /// `LC_ENCRYPTION_INFO` + EncryptionInfo32(&'data macho::EncryptionInfoCommand32), + /// `LC_DYLD_INFO` or `LC_DYLD_INFO_ONLY` + DyldInfo(&'data macho::DyldInfoCommand), + /// `LC_VERSION_MIN_MACOSX`, `LC_VERSION_MIN_IPHONEOS`, `LC_VERSION_MIN_WATCHOS`, + /// or `LC_VERSION_MIN_TVOS` + VersionMin(&'data macho::VersionMinCommand), + /// `LC_DYLD_ENVIRONMENT` + DyldEnvironment(&'data macho::DylinkerCommand), + /// `LC_MAIN` + EntryPoint(&'data macho::EntryPointCommand), + /// `LC_SOURCE_VERSION` + SourceVersion(&'data macho::SourceVersionCommand), + /// `LC_ENCRYPTION_INFO_64` + EncryptionInfo64(&'data macho::EncryptionInfoCommand64), + /// `LC_LINKER_OPTION` + LinkerOption(&'data macho::LinkerOptionCommand), + /// `LC_NOTE` + Note(&'data macho::NoteCommand), + /// `LC_BUILD_VERSION` + BuildVersion(&'data macho::BuildVersionCommand), + /// `LC_FILESET_ENTRY` + FilesetEntry(&'data macho::FilesetEntryCommand), + /// An unrecognized or obsolete load command. + Other, +} + +impl macho::SymtabCommand { + /// Return the symbol table that this command references. + pub fn symbols<'data, Mach: MachHeader, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result> { + let symbols = data + .read_slice_at( + self.symoff.get(endian).into(), + self.nsyms.get(endian) as usize, + ) + .read_error("Invalid Mach-O symbol table offset or size")?; + let str_start: u64 = self.stroff.get(endian).into(); + let str_end = str_start + .checked_add(self.strsize.get(endian).into()) + .read_error("Invalid Mach-O string table length")?; + let strings = StringTable::new(data, str_start, str_end); + Ok(SymbolTable::new(symbols, strings)) + } +} diff --git a/third_party/rust/object/src/read/macho/mod.rs b/third_party/rust/object/src/read/macho/mod.rs new file mode 100644 index 0000000000..f07ed581b6 --- /dev/null +++ b/third_party/rust/object/src/read/macho/mod.rs @@ -0,0 +1,30 @@ +//! Support for reading Mach-O files. +//! +//! Defines traits to abstract over the difference between 32-bit and 64-bit +//! Mach-O files, and implements read functionality in terms of these traits. +//! +//! Also provides `MachOFile` and related types which implement the `Object` trait. + +mod dyld_cache; +pub use dyld_cache::*; + +mod fat; +pub use fat::*; + +mod file; +pub use file::*; + +mod load_command; +pub use load_command::*; + +mod segment; +pub use segment::*; + +mod section; +pub use section::*; + +mod symbol; +pub use symbol::*; + +mod relocation; +pub use relocation::*; diff --git a/third_party/rust/object/src/read/macho/relocation.rs b/third_party/rust/object/src/read/macho/relocation.rs new file mode 100644 index 0000000000..5dd7df8966 --- /dev/null +++ b/third_party/rust/object/src/read/macho/relocation.rs @@ -0,0 +1,126 @@ +use core::{fmt, slice}; + +use crate::endian::Endianness; +use crate::macho; +use crate::read::{ + ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, SectionIndex, + SymbolIndex, +}; + +use super::{MachHeader, MachOFile}; + +/// An iterator over the relocations in a `MachOSection32`. +pub type MachORelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachORelocationIterator<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the relocations in a `MachOSection64`. +pub type MachORelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachORelocationIterator<'data, 'file, macho::MachHeader64, R>; + +/// An iterator over the relocations in a `MachOSection`. +pub struct MachORelocationIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) relocations: slice::Iter<'data, macho::Relocation>, +} + +impl<'data, 'file, Mach, R> Iterator for MachORelocationIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + loop { + let reloc = self.relocations.next()?; + let endian = self.file.endian; + let cputype = self.file.header.cputype(endian); + if reloc.r_scattered(endian, cputype) { + // FIXME: handle scattered relocations + // We need to add `RelocationTarget::Address` for this. + continue; + } + let reloc = reloc.info(self.file.endian); + let mut encoding = RelocationEncoding::Generic; + let kind = match cputype { + macho::CPU_TYPE_ARM => match (reloc.r_type, reloc.r_pcrel) { + (macho::ARM_RELOC_VANILLA, false) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + macho::CPU_TYPE_ARM64 => match (reloc.r_type, reloc.r_pcrel) { + (macho::ARM64_RELOC_UNSIGNED, false) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + macho::CPU_TYPE_X86 => match (reloc.r_type, reloc.r_pcrel) { + (macho::GENERIC_RELOC_VANILLA, false) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + macho::CPU_TYPE_X86_64 => match (reloc.r_type, reloc.r_pcrel) { + (macho::X86_64_RELOC_UNSIGNED, false) => RelocationKind::Absolute, + (macho::X86_64_RELOC_SIGNED, true) => { + encoding = RelocationEncoding::X86RipRelative; + RelocationKind::Relative + } + (macho::X86_64_RELOC_BRANCH, true) => { + encoding = RelocationEncoding::X86Branch; + RelocationKind::Relative + } + (macho::X86_64_RELOC_GOT, true) => RelocationKind::GotRelative, + (macho::X86_64_RELOC_GOT_LOAD, true) => { + encoding = RelocationEncoding::X86RipRelativeMovq; + RelocationKind::GotRelative + } + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }; + let size = 8 << reloc.r_length; + let target = if reloc.r_extern { + RelocationTarget::Symbol(SymbolIndex(reloc.r_symbolnum as usize)) + } else { + RelocationTarget::Section(SectionIndex(reloc.r_symbolnum as usize)) + }; + let addend = if reloc.r_pcrel { -4 } else { 0 }; + return Some(( + reloc.r_address as u64, + Relocation { + kind, + encoding, + size, + target, + addend, + implicit_addend: true, + }, + )); + } + } +} + +impl<'data, 'file, Mach, R> fmt::Debug for MachORelocationIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachORelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/macho/section.rs b/third_party/rust/object/src/read/macho/section.rs new file mode 100644 index 0000000000..9e71aa8fd4 --- /dev/null +++ b/third_party/rust/object/src/read/macho/section.rs @@ -0,0 +1,384 @@ +use core::fmt::Debug; +use core::{fmt, result, slice, str}; + +use crate::endian::{self, Endianness}; +use crate::macho; +use crate::pod::Pod; +use crate::read::{ + self, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, Result, + SectionFlags, SectionIndex, SectionKind, +}; + +use super::{MachHeader, MachOFile, MachORelocationIterator}; + +/// An iterator over the sections of a `MachOFile32`. +pub type MachOSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSectionIterator<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the sections of a `MachOFile64`. +pub type MachOSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSectionIterator<'data, 'file, macho::MachHeader64, R>; + +/// An iterator over the sections of a `MachOFile`. +pub struct MachOSectionIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) iter: slice::Iter<'file, MachOSectionInternal<'data, Mach>>, +} + +impl<'data, 'file, Mach, R> fmt::Debug for MachOSectionIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("MachOSectionIterator").finish() + } +} + +impl<'data, 'file, Mach, R> Iterator for MachOSectionIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOSection<'data, 'file, Mach, R>; + + fn next(&mut self) -> Option { + self.iter.next().map(|&internal| MachOSection { + file: self.file, + internal, + }) + } +} + +/// A section of a `MachOFile32`. +pub type MachOSection32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSection<'data, 'file, macho::MachHeader32, R>; +/// A section of a `MachOFile64`. +pub type MachOSection64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSection<'data, 'file, macho::MachHeader64, R>; + +/// A section of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSection<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) internal: MachOSectionInternal<'data, Mach>, +} + +impl<'data, 'file, Mach, R> MachOSection<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn bytes(&self) -> Result<&'data [u8]> { + let segment_index = self.internal.segment_index; + let segment = self.file.segment_internal(segment_index)?; + self.internal + .section + .data(self.file.endian, segment.data) + .read_error("Invalid Mach-O section size or offset") + } +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSection<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSection<'data> for MachOSection<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type RelocationIterator = MachORelocationIterator<'data, 'file, Mach, R>; + + #[inline] + fn index(&self) -> SectionIndex { + self.internal.index + } + + #[inline] + fn address(&self) -> u64 { + self.internal.section.addr(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.internal.section.size(self.file.endian).into() + } + + #[inline] + fn align(&self) -> u64 { + 1 << self.internal.section.align(self.file.endian) + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + self.internal.section.file_range(self.file.endian) + } + + #[inline] + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn compressed_file_range(&self) -> Result { + Ok(CompressedFileRange::none(self.file_range())) + } + + #[inline] + fn compressed_data(&self) -> Result> { + self.data().map(CompressedData::none) + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + Ok(self.internal.section.name()) + } + + #[inline] + fn name(&self) -> Result<&str> { + str::from_utf8(self.internal.section.name()) + .ok() + .read_error("Non UTF-8 Mach-O section name") + } + + #[inline] + fn segment_name_bytes(&self) -> Result> { + Ok(Some(self.internal.section.segment_name())) + } + + #[inline] + fn segment_name(&self) -> Result> { + Ok(Some( + str::from_utf8(self.internal.section.segment_name()) + .ok() + .read_error("Non UTF-8 Mach-O segment name")?, + )) + } + + fn kind(&self) -> SectionKind { + self.internal.kind + } + + fn relocations(&self) -> MachORelocationIterator<'data, 'file, Mach, R> { + MachORelocationIterator { + file: self.file, + relocations: self + .internal + .section + .relocations(self.file.endian, self.file.data) + .unwrap_or(&[]) + .iter(), + } + } + + fn flags(&self) -> SectionFlags { + SectionFlags::MachO { + flags: self.internal.section.flags(self.file.endian), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub(super) struct MachOSectionInternal<'data, Mach: MachHeader> { + pub index: SectionIndex, + pub segment_index: usize, + pub kind: SectionKind, + pub section: &'data Mach::Section, +} + +impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { + pub(super) fn parse( + index: SectionIndex, + segment_index: usize, + section: &'data Mach::Section, + ) -> Self { + // TODO: we don't validate flags, should we? + let kind = match (section.segment_name(), section.name()) { + (b"__TEXT", b"__text") => SectionKind::Text, + (b"__TEXT", b"__const") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__cstring") => SectionKind::ReadOnlyString, + (b"__TEXT", b"__literal4") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__literal8") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__literal16") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__eh_frame") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__gcc_except_tab") => SectionKind::ReadOnlyData, + (b"__DATA", b"__data") => SectionKind::Data, + (b"__DATA", b"__const") => SectionKind::ReadOnlyData, + (b"__DATA", b"__bss") => SectionKind::UninitializedData, + (b"__DATA", b"__common") => SectionKind::Common, + (b"__DATA", b"__thread_data") => SectionKind::Tls, + (b"__DATA", b"__thread_bss") => SectionKind::UninitializedTls, + (b"__DATA", b"__thread_vars") => SectionKind::TlsVariables, + (b"__DWARF", _) => SectionKind::Debug, + _ => SectionKind::Unknown, + }; + MachOSectionInternal { + index, + segment_index, + kind, + section, + } + } +} + +/// A trait for generic access to `Section32` and `Section64`. +#[allow(missing_docs)] +pub trait Section: Debug + Pod { + type Word: Into; + type Endian: endian::Endian; + + fn sectname(&self) -> &[u8; 16]; + fn segname(&self) -> &[u8; 16]; + fn addr(&self, endian: Self::Endian) -> Self::Word; + fn size(&self, endian: Self::Endian) -> Self::Word; + fn offset(&self, endian: Self::Endian) -> u32; + fn align(&self, endian: Self::Endian) -> u32; + fn reloff(&self, endian: Self::Endian) -> u32; + fn nreloc(&self, endian: Self::Endian) -> u32; + fn flags(&self, endian: Self::Endian) -> u32; + + /// Return the `sectname` bytes up until the null terminator. + fn name(&self) -> &[u8] { + let sectname = &self.sectname()[..]; + match memchr::memchr(b'\0', sectname) { + Some(end) => §name[..end], + None => sectname, + } + } + + /// Return the `segname` bytes up until the null terminator. + fn segment_name(&self) -> &[u8] { + let segname = &self.segname()[..]; + match memchr::memchr(b'\0', segname) { + Some(end) => &segname[..end], + None => segname, + } + } + + /// Return the offset and size of the section in the file. + /// + /// Returns `None` for sections that have no data in the file. + fn file_range(&self, endian: Self::Endian) -> Option<(u64, u64)> { + match self.flags(endian) & macho::SECTION_TYPE { + macho::S_ZEROFILL | macho::S_GB_ZEROFILL | macho::S_THREAD_LOCAL_ZEROFILL => None, + _ => Some((self.offset(endian).into(), self.size(endian).into())), + } + } + + /// Return the section data. + /// + /// Returns `Ok(&[])` if the section has no data. + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> result::Result<&'data [u8], ()> { + if let Some((offset, size)) = self.file_range(endian) { + data.read_bytes_at(offset, size) + } else { + Ok(&[]) + } + } + + /// Return the relocation array. + /// + /// Returns `Err` for invalid values. + fn relocations<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> Result<&'data [macho::Relocation]> { + data.read_slice_at(self.reloff(endian).into(), self.nreloc(endian) as usize) + .read_error("Invalid Mach-O relocations offset or number") + } +} + +impl Section for macho::Section32 { + type Word = u32; + type Endian = Endian; + + fn sectname(&self) -> &[u8; 16] { + &self.sectname + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn addr(&self, endian: Self::Endian) -> Self::Word { + self.addr.get(endian) + } + fn size(&self, endian: Self::Endian) -> Self::Word { + self.size.get(endian) + } + fn offset(&self, endian: Self::Endian) -> u32 { + self.offset.get(endian) + } + fn align(&self, endian: Self::Endian) -> u32 { + self.align.get(endian) + } + fn reloff(&self, endian: Self::Endian) -> u32 { + self.reloff.get(endian) + } + fn nreloc(&self, endian: Self::Endian) -> u32 { + self.nreloc.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} + +impl Section for macho::Section64 { + type Word = u64; + type Endian = Endian; + + fn sectname(&self) -> &[u8; 16] { + &self.sectname + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn addr(&self, endian: Self::Endian) -> Self::Word { + self.addr.get(endian) + } + fn size(&self, endian: Self::Endian) -> Self::Word { + self.size.get(endian) + } + fn offset(&self, endian: Self::Endian) -> u32 { + self.offset.get(endian) + } + fn align(&self, endian: Self::Endian) -> u32 { + self.align.get(endian) + } + fn reloff(&self, endian: Self::Endian) -> u32 { + self.reloff.get(endian) + } + fn nreloc(&self, endian: Self::Endian) -> u32 { + self.nreloc.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} diff --git a/third_party/rust/object/src/read/macho/segment.rs b/third_party/rust/object/src/read/macho/segment.rs new file mode 100644 index 0000000000..c7eaa6fff7 --- /dev/null +++ b/third_party/rust/object/src/read/macho/segment.rs @@ -0,0 +1,303 @@ +use core::fmt::Debug; +use core::{result, slice, str}; + +use crate::endian::{self, Endianness}; +use crate::macho; +use crate::pod::Pod; +use crate::read::{self, ObjectSegment, ReadError, ReadRef, Result, SegmentFlags}; + +use super::{LoadCommandData, MachHeader, MachOFile, Section}; + +/// An iterator over the segments of a `MachOFile32`. +pub type MachOSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegmentIterator<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the segments of a `MachOFile64`. +pub type MachOSegmentIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegmentIterator<'data, 'file, macho::MachHeader64, R>; + +/// An iterator over the segments of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegmentIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) iter: slice::Iter<'file, MachOSegmentInternal<'data, Mach, R>>, +} + +impl<'data, 'file, Mach, R> Iterator for MachOSegmentIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOSegment<'data, 'file, Mach, R>; + + fn next(&mut self) -> Option { + self.iter.next().map(|internal| MachOSegment { + file: self.file, + internal, + }) + } +} + +/// A segment of a `MachOFile32`. +pub type MachOSegment32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegment<'data, 'file, macho::MachHeader32, R>; +/// A segment of a `MachOFile64`. +pub type MachOSegment64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegment<'data, 'file, macho::MachHeader64, R>; + +/// A segment of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegment<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + file: &'file MachOFile<'data, Mach, R>, + internal: &'file MachOSegmentInternal<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> MachOSegment<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn bytes(&self) -> Result<&'data [u8]> { + self.internal + .segment + .data(self.file.endian, self.file.data) + .read_error("Invalid Mach-O segment size or offset") + } +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSegment<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSegment<'data> for MachOSegment<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[inline] + fn address(&self) -> u64 { + self.internal.segment.vmaddr(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.internal.segment.vmsize(self.file.endian).into() + } + + #[inline] + fn align(&self) -> u64 { + // Page size. + 0x1000 + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + self.internal.segment.file_range(self.file.endian) + } + + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn name_bytes(&self) -> Result> { + Ok(Some(self.internal.segment.name())) + } + + #[inline] + fn name(&self) -> Result> { + Ok(Some( + str::from_utf8(self.internal.segment.name()) + .ok() + .read_error("Non UTF-8 Mach-O segment name")?, + )) + } + + #[inline] + fn flags(&self) -> SegmentFlags { + let flags = self.internal.segment.flags(self.file.endian); + let maxprot = self.internal.segment.maxprot(self.file.endian); + let initprot = self.internal.segment.initprot(self.file.endian); + SegmentFlags::MachO { + flags, + maxprot, + initprot, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub(super) struct MachOSegmentInternal<'data, Mach: MachHeader, R: ReadRef<'data>> { + pub data: R, + pub segment: &'data Mach::Segment, +} + +/// A trait for generic access to `SegmentCommand32` and `SegmentCommand64`. +#[allow(missing_docs)] +pub trait Segment: Debug + Pod { + type Word: Into; + type Endian: endian::Endian; + type Section: Section; + + fn from_command(command: LoadCommandData) -> Result>; + + fn cmd(&self, endian: Self::Endian) -> u32; + fn cmdsize(&self, endian: Self::Endian) -> u32; + fn segname(&self) -> &[u8; 16]; + fn vmaddr(&self, endian: Self::Endian) -> Self::Word; + fn vmsize(&self, endian: Self::Endian) -> Self::Word; + fn fileoff(&self, endian: Self::Endian) -> Self::Word; + fn filesize(&self, endian: Self::Endian) -> Self::Word; + fn maxprot(&self, endian: Self::Endian) -> u32; + fn initprot(&self, endian: Self::Endian) -> u32; + fn nsects(&self, endian: Self::Endian) -> u32; + fn flags(&self, endian: Self::Endian) -> u32; + + /// Return the `segname` bytes up until the null terminator. + fn name(&self) -> &[u8] { + let segname = &self.segname()[..]; + match memchr::memchr(b'\0', segname) { + Some(end) => &segname[..end], + None => segname, + } + } + + /// Return the offset and size of the segment in the file. + fn file_range(&self, endian: Self::Endian) -> (u64, u64) { + (self.fileoff(endian).into(), self.filesize(endian).into()) + } + + /// Get the segment data from the file data. + /// + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> result::Result<&'data [u8], ()> { + let (offset, size) = self.file_range(endian); + data.read_bytes_at(offset, size) + } + + /// Get the array of sections from the data following the segment command. + /// + /// Returns `Err` for invalid values. + fn sections<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + section_data: R, + ) -> Result<&'data [Self::Section]> { + section_data + .read_slice_at(0, self.nsects(endian) as usize) + .read_error("Invalid Mach-O number of sections") + } +} + +impl Segment for macho::SegmentCommand32 { + type Word = u32; + type Endian = Endian; + type Section = macho::Section32; + + fn from_command(command: LoadCommandData) -> Result> { + command.segment_32() + } + + fn cmd(&self, endian: Self::Endian) -> u32 { + self.cmd.get(endian) + } + fn cmdsize(&self, endian: Self::Endian) -> u32 { + self.cmdsize.get(endian) + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn vmaddr(&self, endian: Self::Endian) -> Self::Word { + self.vmaddr.get(endian) + } + fn vmsize(&self, endian: Self::Endian) -> Self::Word { + self.vmsize.get(endian) + } + fn fileoff(&self, endian: Self::Endian) -> Self::Word { + self.fileoff.get(endian) + } + fn filesize(&self, endian: Self::Endian) -> Self::Word { + self.filesize.get(endian) + } + fn maxprot(&self, endian: Self::Endian) -> u32 { + self.maxprot.get(endian) + } + fn initprot(&self, endian: Self::Endian) -> u32 { + self.initprot.get(endian) + } + fn nsects(&self, endian: Self::Endian) -> u32 { + self.nsects.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} + +impl Segment for macho::SegmentCommand64 { + type Word = u64; + type Endian = Endian; + type Section = macho::Section64; + + fn from_command(command: LoadCommandData) -> Result> { + command.segment_64() + } + + fn cmd(&self, endian: Self::Endian) -> u32 { + self.cmd.get(endian) + } + fn cmdsize(&self, endian: Self::Endian) -> u32 { + self.cmdsize.get(endian) + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn vmaddr(&self, endian: Self::Endian) -> Self::Word { + self.vmaddr.get(endian) + } + fn vmsize(&self, endian: Self::Endian) -> Self::Word { + self.vmsize.get(endian) + } + fn fileoff(&self, endian: Self::Endian) -> Self::Word { + self.fileoff.get(endian) + } + fn filesize(&self, endian: Self::Endian) -> Self::Word { + self.filesize.get(endian) + } + fn maxprot(&self, endian: Self::Endian) -> u32 { + self.maxprot.get(endian) + } + fn initprot(&self, endian: Self::Endian) -> u32 { + self.initprot.get(endian) + } + fn nsects(&self, endian: Self::Endian) -> u32 { + self.nsects.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} diff --git a/third_party/rust/object/src/read/macho/symbol.rs b/third_party/rust/object/src/read/macho/symbol.rs new file mode 100644 index 0000000000..e102c5d0b6 --- /dev/null +++ b/third_party/rust/object/src/read/macho/symbol.rs @@ -0,0 +1,488 @@ +use alloc::vec::Vec; +use core::fmt::Debug; +use core::{fmt, slice, str}; + +use crate::endian::{self, Endianness}; +use crate::macho; +use crate::pod::Pod; +use crate::read::util::StringTable; +use crate::read::{ + self, ObjectMap, ObjectMapEntry, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Result, + SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapEntry, + SymbolScope, SymbolSection, +}; + +use super::{MachHeader, MachOFile}; + +/// A table of symbol entries in a Mach-O file. +/// +/// Also includes the string table used for the symbol names. +#[derive(Debug, Clone, Copy)] +pub struct SymbolTable<'data, Mach: MachHeader, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + symbols: &'data [Mach::Nlist], + strings: StringTable<'data, R>, +} + +impl<'data, Mach: MachHeader, R: ReadRef<'data>> Default for SymbolTable<'data, Mach, R> { + fn default() -> Self { + SymbolTable { + symbols: &[], + strings: Default::default(), + } + } +} + +impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> { + #[inline] + pub(super) fn new(symbols: &'data [Mach::Nlist], strings: StringTable<'data, R>) -> Self { + SymbolTable { symbols, strings } + } + + /// Return the string table used for the symbol names. + #[inline] + pub fn strings(&self) -> StringTable<'data, R> { + self.strings + } + + /// Iterate over the symbols. + #[inline] + pub fn iter(&self) -> slice::Iter<'data, Mach::Nlist> { + self.symbols.iter() + } + + /// Return true if the symbol table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + + /// The number of symbols. + #[inline] + pub fn len(&self) -> usize { + self.symbols.len() + } + + /// Return the symbol at the given index. + pub fn symbol(&self, index: usize) -> Result<&'data Mach::Nlist> { + self.symbols + .get(index) + .read_error("Invalid Mach-O symbol index") + } + + /// Construct a map from addresses to a user-defined map entry. + pub fn map Option>( + &self, + f: F, + ) -> SymbolMap { + let mut symbols = Vec::new(); + for nlist in self.symbols { + if !nlist.is_definition() { + continue; + } + if let Some(entry) = f(nlist) { + symbols.push(entry); + } + } + SymbolMap::new(symbols) + } + + /// Construct a map from addresses to symbol names and object file names. + pub fn object_map(&self, endian: Mach::Endian) -> ObjectMap<'data> { + let mut symbols = Vec::new(); + let mut objects = Vec::new(); + let mut object = None; + let mut current_function = None; + // Each module starts with one or two N_SO symbols (path, or directory + filename) + // and one N_OSO symbol. The module is terminated by an empty N_SO symbol. + for nlist in self.symbols { + let n_type = nlist.n_type(); + if n_type & macho::N_STAB == 0 { + continue; + } + // TODO: includes variables too (N_GSYM, N_STSYM). These may need to get their + // address from regular symbols though. + match n_type { + macho::N_SO => { + object = None; + } + macho::N_OSO => { + object = None; + if let Ok(name) = nlist.name(endian, self.strings) { + if !name.is_empty() { + object = Some(objects.len()); + objects.push(name); + } + } + } + macho::N_FUN => { + if let Ok(name) = nlist.name(endian, self.strings) { + if !name.is_empty() { + current_function = Some((name, nlist.n_value(endian).into())) + } else if let Some((name, address)) = current_function.take() { + if let Some(object) = object { + symbols.push(ObjectMapEntry { + address, + size: nlist.n_value(endian).into(), + name, + object, + }); + } + } + } + } + _ => {} + } + } + ObjectMap { + symbols: SymbolMap::new(symbols), + objects, + } + } +} + +/// An iterator over the symbols of a `MachOFile32`. +pub type MachOSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolTable<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the symbols of a `MachOFile64`. +pub type MachOSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolTable<'data, 'file, macho::MachHeader64, R>; + +/// A symbol table of a `MachOFile`. +#[derive(Debug, Clone, Copy)] +pub struct MachOSymbolTable<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbolTable<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSymbolTable<'data> for MachOSymbolTable<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Symbol = MachOSymbol<'data, 'file, Mach, R>; + type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>; + + fn symbols(&self) -> Self::SymbolIterator { + MachOSymbolIterator { + file: self.file, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result { + let nlist = self.file.symbols.symbol(index.0)?; + MachOSymbol::new(self.file, index, nlist).read_error("Unsupported Mach-O symbol index") + } +} + +/// An iterator over the symbols of a `MachOFile32`. +pub type MachOSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolIterator<'data, 'file, macho::MachHeader32, R>; +/// An iterator over the symbols of a `MachOFile64`. +pub type MachOSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolIterator<'data, 'file, macho::MachHeader64, R>; + +/// An iterator over the symbols of a `MachOFile`. +pub struct MachOSymbolIterator<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) index: usize, +} + +impl<'data, 'file, Mach, R> fmt::Debug for MachOSymbolIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachOSymbolIterator").finish() + } +} + +impl<'data, 'file, Mach, R> Iterator for MachOSymbolIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOSymbol<'data, 'file, Mach, R>; + + fn next(&mut self) -> Option { + loop { + let index = self.index; + let nlist = self.file.symbols.symbols.get(index)?; + self.index += 1; + if let Some(symbol) = MachOSymbol::new(self.file, SymbolIndex(index), nlist) { + return Some(symbol); + } + } + } +} + +/// A symbol of a `MachOFile32`. +pub type MachOSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbol<'data, 'file, macho::MachHeader32, R>; +/// A symbol of a `MachOFile64`. +pub type MachOSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbol<'data, 'file, macho::MachHeader64, R>; + +/// A symbol of a `MachOFile`. +#[derive(Debug, Clone, Copy)] +pub struct MachOSymbol<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + file: &'file MachOFile<'data, Mach, R>, + index: SymbolIndex, + nlist: &'data Mach::Nlist, +} + +impl<'data, 'file, Mach, R> MachOSymbol<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) fn new( + file: &'file MachOFile<'data, Mach, R>, + index: SymbolIndex, + nlist: &'data Mach::Nlist, + ) -> Option { + if nlist.n_type() & macho::N_STAB != 0 { + return None; + } + Some(MachOSymbol { file, index, nlist }) + } +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbol<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSymbol<'data> for MachOSymbol<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[inline] + fn index(&self) -> SymbolIndex { + self.index + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + self.nlist.name(self.file.endian, self.file.symbols.strings) + } + + fn name(&self) -> Result<&'data str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 Mach-O symbol name") + } + + #[inline] + fn address(&self) -> u64 { + self.nlist.n_value(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + 0 + } + + fn kind(&self) -> SymbolKind { + self.section() + .index() + .and_then(|index| self.file.section_internal(index).ok()) + .map(|section| match section.kind { + SectionKind::Text => SymbolKind::Text, + SectionKind::Data + | SectionKind::ReadOnlyData + | SectionKind::ReadOnlyString + | SectionKind::UninitializedData + | SectionKind::Common => SymbolKind::Data, + SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => { + SymbolKind::Tls + } + _ => SymbolKind::Unknown, + }) + .unwrap_or(SymbolKind::Unknown) + } + + fn section(&self) -> SymbolSection { + match self.nlist.n_type() & macho::N_TYPE { + macho::N_UNDF => SymbolSection::Undefined, + macho::N_ABS => SymbolSection::Absolute, + macho::N_SECT => { + let n_sect = self.nlist.n_sect(); + if n_sect != 0 { + SymbolSection::Section(SectionIndex(n_sect as usize)) + } else { + SymbolSection::Unknown + } + } + _ => SymbolSection::Unknown, + } + } + + #[inline] + fn is_undefined(&self) -> bool { + self.nlist.n_type() & macho::N_TYPE == macho::N_UNDF + } + + #[inline] + fn is_definition(&self) -> bool { + self.nlist.is_definition() + } + + #[inline] + fn is_common(&self) -> bool { + // Mach-O common symbols are based on section, not symbol + false + } + + #[inline] + fn is_weak(&self) -> bool { + self.nlist.n_desc(self.file.endian) & (macho::N_WEAK_REF | macho::N_WEAK_DEF) != 0 + } + + fn scope(&self) -> SymbolScope { + let n_type = self.nlist.n_type(); + if n_type & macho::N_TYPE == macho::N_UNDF { + SymbolScope::Unknown + } else if n_type & macho::N_EXT == 0 { + SymbolScope::Compilation + } else if n_type & macho::N_PEXT != 0 { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + } + } + + #[inline] + fn is_global(&self) -> bool { + self.scope() != SymbolScope::Compilation + } + + #[inline] + fn is_local(&self) -> bool { + self.scope() == SymbolScope::Compilation + } + + #[inline] + fn flags(&self) -> SymbolFlags { + let n_desc = self.nlist.n_desc(self.file.endian); + SymbolFlags::MachO { n_desc } + } +} + +/// A trait for generic access to `Nlist32` and `Nlist64`. +#[allow(missing_docs)] +pub trait Nlist: Debug + Pod { + type Word: Into; + type Endian: endian::Endian; + + fn n_strx(&self, endian: Self::Endian) -> u32; + fn n_type(&self) -> u8; + fn n_sect(&self) -> u8; + fn n_desc(&self, endian: Self::Endian) -> u16; + fn n_value(&self, endian: Self::Endian) -> Self::Word; + + fn name<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + strings + .get(self.n_strx(endian)) + .read_error("Invalid Mach-O symbol name offset") + } + + /// Return true if this is a STAB symbol. + /// + /// This determines the meaning of the `n_type` field. + fn is_stab(&self) -> bool { + self.n_type() & macho::N_STAB != 0 + } + + /// Return true if this is an undefined symbol. + fn is_undefined(&self) -> bool { + let n_type = self.n_type(); + n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_UNDF + } + + /// Return true if the symbol is a definition of a function or data object. + fn is_definition(&self) -> bool { + let n_type = self.n_type(); + n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE != macho::N_UNDF + } + + /// Return the library ordinal. + /// + /// This is either a 1-based index into the dylib load commands, + /// or a special ordinal. + #[inline] + fn library_ordinal(&self, endian: Self::Endian) -> u8 { + (self.n_desc(endian) >> 8) as u8 + } +} + +impl Nlist for macho::Nlist32 { + type Word = u32; + type Endian = Endian; + + fn n_strx(&self, endian: Self::Endian) -> u32 { + self.n_strx.get(endian) + } + fn n_type(&self) -> u8 { + self.n_type + } + fn n_sect(&self) -> u8 { + self.n_sect + } + fn n_desc(&self, endian: Self::Endian) -> u16 { + self.n_desc.get(endian) + } + fn n_value(&self, endian: Self::Endian) -> Self::Word { + self.n_value.get(endian) + } +} + +impl Nlist for macho::Nlist64 { + type Word = u64; + type Endian = Endian; + + fn n_strx(&self, endian: Self::Endian) -> u32 { + self.n_strx.get(endian) + } + fn n_type(&self) -> u8 { + self.n_type + } + fn n_sect(&self) -> u8 { + self.n_sect + } + fn n_desc(&self, endian: Self::Endian) -> u16 { + self.n_desc.get(endian) + } + fn n_value(&self, endian: Self::Endian) -> Self::Word { + self.n_value.get(endian) + } +} -- cgit v1.2.3