diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/object/src/read | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/object/src/read')
51 files changed, 18818 insertions, 0 deletions
diff --git a/third_party/rust/object/src/read/any.rs b/third_party/rust/object/src/read/any.rs new file mode 100644 index 0000000000..c390b21b60 --- /dev/null +++ b/third_party/rust/object/src/read/any.rs @@ -0,0 +1,1341 @@ +use alloc::fmt; +use alloc::vec::Vec; +use core::marker::PhantomData; + +#[cfg(feature = "coff")] +use crate::read::coff; +#[cfg(feature = "elf")] +use crate::read::elf; +#[cfg(feature = "macho")] +use crate::read::macho; +#[cfg(feature = "pe")] +use crate::read::pe; +#[cfg(feature = "wasm")] +use crate::read::wasm; +#[cfg(feature = "xcoff")] +use crate::read::xcoff; +use crate::read::{ + self, Architecture, BinaryFormat, CodeView, ComdatKind, CompressedData, CompressedFileRange, + Error, Export, FileFlags, FileKind, Import, Object, ObjectComdat, ObjectKind, ObjectMap, + ObjectSection, ObjectSegment, ObjectSymbol, ObjectSymbolTable, ReadRef, Relocation, Result, + SectionFlags, SectionIndex, SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, SymbolKind, + SymbolMap, SymbolMapName, SymbolScope, SymbolSection, +}; +#[allow(unused_imports)] +use crate::{AddressSize, Endian, Endianness}; + +/// Evaluate an expression on the contents of a file format enum. +/// +/// This is a hack to avoid virtual calls. +macro_rules! with_inner { + ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { + match $inner { + #[cfg(feature = "coff")] + $enum::Coff(ref $var) => $body, + #[cfg(feature = "elf")] + $enum::Elf32(ref $var) => $body, + #[cfg(feature = "elf")] + $enum::Elf64(ref $var) => $body, + #[cfg(feature = "macho")] + $enum::MachO32(ref $var) => $body, + #[cfg(feature = "macho")] + $enum::MachO64(ref $var) => $body, + #[cfg(feature = "pe")] + $enum::Pe32(ref $var) => $body, + #[cfg(feature = "pe")] + $enum::Pe64(ref $var) => $body, + #[cfg(feature = "wasm")] + $enum::Wasm(ref $var) => $body, + #[cfg(feature = "xcoff")] + $enum::Xcoff32(ref $var) => $body, + #[cfg(feature = "xcoff")] + $enum::Xcoff64(ref $var) => $body, + } + }; +} + +macro_rules! with_inner_mut { + ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { + match $inner { + #[cfg(feature = "coff")] + $enum::Coff(ref mut $var) => $body, + #[cfg(feature = "elf")] + $enum::Elf32(ref mut $var) => $body, + #[cfg(feature = "elf")] + $enum::Elf64(ref mut $var) => $body, + #[cfg(feature = "macho")] + $enum::MachO32(ref mut $var) => $body, + #[cfg(feature = "macho")] + $enum::MachO64(ref mut $var) => $body, + #[cfg(feature = "pe")] + $enum::Pe32(ref mut $var) => $body, + #[cfg(feature = "pe")] + $enum::Pe64(ref mut $var) => $body, + #[cfg(feature = "wasm")] + $enum::Wasm(ref mut $var) => $body, + #[cfg(feature = "xcoff")] + $enum::Xcoff32(ref mut $var) => $body, + #[cfg(feature = "xcoff")] + $enum::Xcoff64(ref mut $var) => $body, + } + }; +} + +/// Like `with_inner!`, but wraps the result in another enum. +macro_rules! map_inner { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + #[cfg(feature = "coff")] + $from::Coff(ref $var) => $to::Coff($body), + #[cfg(feature = "elf")] + $from::Elf32(ref $var) => $to::Elf32($body), + #[cfg(feature = "elf")] + $from::Elf64(ref $var) => $to::Elf64($body), + #[cfg(feature = "macho")] + $from::MachO32(ref $var) => $to::MachO32($body), + #[cfg(feature = "macho")] + $from::MachO64(ref $var) => $to::MachO64($body), + #[cfg(feature = "pe")] + $from::Pe32(ref $var) => $to::Pe32($body), + #[cfg(feature = "pe")] + $from::Pe64(ref $var) => $to::Pe64($body), + #[cfg(feature = "wasm")] + $from::Wasm(ref $var) => $to::Wasm($body), + #[cfg(feature = "xcoff")] + $from::Xcoff32(ref $var) => $to::Xcoff32($body), + #[cfg(feature = "xcoff")] + $from::Xcoff64(ref $var) => $to::Xcoff64($body), + } + }; +} + +/// Like `map_inner!`, but the result is a Result or Option. +macro_rules! map_inner_option { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + #[cfg(feature = "coff")] + $from::Coff(ref $var) => $body.map($to::Coff), + #[cfg(feature = "elf")] + $from::Elf32(ref $var) => $body.map($to::Elf32), + #[cfg(feature = "elf")] + $from::Elf64(ref $var) => $body.map($to::Elf64), + #[cfg(feature = "macho")] + $from::MachO32(ref $var) => $body.map($to::MachO32), + #[cfg(feature = "macho")] + $from::MachO64(ref $var) => $body.map($to::MachO64), + #[cfg(feature = "pe")] + $from::Pe32(ref $var) => $body.map($to::Pe32), + #[cfg(feature = "pe")] + $from::Pe64(ref $var) => $body.map($to::Pe64), + #[cfg(feature = "wasm")] + $from::Wasm(ref $var) => $body.map($to::Wasm), + #[cfg(feature = "xcoff")] + $from::Xcoff32(ref $var) => $body.map($to::Xcoff32), + #[cfg(feature = "xcoff")] + $from::Xcoff64(ref $var) => $body.map($to::Xcoff64), + } + }; +} + +macro_rules! map_inner_option_mut { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + #[cfg(feature = "coff")] + $from::Coff(ref mut $var) => $body.map($to::Coff), + #[cfg(feature = "elf")] + $from::Elf32(ref mut $var) => $body.map($to::Elf32), + #[cfg(feature = "elf")] + $from::Elf64(ref mut $var) => $body.map($to::Elf64), + #[cfg(feature = "macho")] + $from::MachO32(ref mut $var) => $body.map($to::MachO32), + #[cfg(feature = "macho")] + $from::MachO64(ref mut $var) => $body.map($to::MachO64), + #[cfg(feature = "pe")] + $from::Pe32(ref mut $var) => $body.map($to::Pe32), + #[cfg(feature = "pe")] + $from::Pe64(ref mut $var) => $body.map($to::Pe64), + #[cfg(feature = "wasm")] + $from::Wasm(ref mut $var) => $body.map($to::Wasm), + #[cfg(feature = "xcoff")] + $from::Xcoff32(ref mut $var) => $body.map($to::Xcoff32), + #[cfg(feature = "xcoff")] + $from::Xcoff64(ref mut $var) => $body.map($to::Xcoff64), + } + }; +} + +/// Call `next` for a file format iterator. +macro_rules! next_inner { + ($inner:expr, $from:ident, $to:ident) => { + match $inner { + #[cfg(feature = "coff")] + $from::Coff(ref mut iter) => iter.next().map($to::Coff), + #[cfg(feature = "elf")] + $from::Elf32(ref mut iter) => iter.next().map($to::Elf32), + #[cfg(feature = "elf")] + $from::Elf64(ref mut iter) => iter.next().map($to::Elf64), + #[cfg(feature = "macho")] + $from::MachO32(ref mut iter) => iter.next().map($to::MachO32), + #[cfg(feature = "macho")] + $from::MachO64(ref mut iter) => iter.next().map($to::MachO64), + #[cfg(feature = "pe")] + $from::Pe32(ref mut iter) => iter.next().map($to::Pe32), + #[cfg(feature = "pe")] + $from::Pe64(ref mut iter) => iter.next().map($to::Pe64), + #[cfg(feature = "wasm")] + $from::Wasm(ref mut iter) => iter.next().map($to::Wasm), + #[cfg(feature = "xcoff")] + $from::Xcoff32(ref mut iter) => iter.next().map($to::Xcoff32), + #[cfg(feature = "xcoff")] + $from::Xcoff64(ref mut iter) => iter.next().map($to::Xcoff64), + } + }; +} + +/// An object file. +/// +/// Most functionality is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct File<'data, R: ReadRef<'data> = &'data [u8]> { + inner: FileInternal<'data, R>, +} + +#[derive(Debug)] +enum FileInternal<'data, R: ReadRef<'data>> { + #[cfg(feature = "coff")] + Coff(coff::CoffFile<'data, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfFile32<'data, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfFile64<'data, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOFile32<'data, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOFile64<'data, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeFile32<'data, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeFile64<'data, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmFile<'data, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffFile32<'data, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffFile64<'data, R>), +} + +impl<'data, R: ReadRef<'data>> File<'data, R> { + /// Parse the raw file data. + pub fn parse(data: R) -> Result<Self> { + let inner = match FileKind::parse(data)? { + #[cfg(feature = "elf")] + FileKind::Elf32 => FileInternal::Elf32(elf::ElfFile32::parse(data)?), + #[cfg(feature = "elf")] + FileKind::Elf64 => FileInternal::Elf64(elf::ElfFile64::parse(data)?), + #[cfg(feature = "macho")] + FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse(data)?), + #[cfg(feature = "macho")] + FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse(data)?), + #[cfg(feature = "wasm")] + FileKind::Wasm => FileInternal::Wasm(wasm::WasmFile::parse(data)?), + #[cfg(feature = "pe")] + FileKind::Pe32 => FileInternal::Pe32(pe::PeFile32::parse(data)?), + #[cfg(feature = "pe")] + FileKind::Pe64 => FileInternal::Pe64(pe::PeFile64::parse(data)?), + #[cfg(feature = "coff")] + FileKind::Coff => FileInternal::Coff(coff::CoffFile::parse(data)?), + #[cfg(feature = "xcoff")] + FileKind::Xcoff32 => FileInternal::Xcoff32(xcoff::XcoffFile32::parse(data)?), + #[cfg(feature = "xcoff")] + FileKind::Xcoff64 => FileInternal::Xcoff64(xcoff::XcoffFile64::parse(data)?), + #[allow(unreachable_patterns)] + _ => return Err(Error("Unsupported file format")), + }; + Ok(File { inner }) + } + + /// Parse a Mach-O image from the dyld shared cache. + #[cfg(feature = "macho")] + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &macho::DyldCacheImage<'data, 'cache, E, R>, + ) -> Result<Self> { + let inner = match image.cache.architecture().address_size() { + Some(AddressSize::U64) => { + FileInternal::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?) + } + Some(AddressSize::U32) => { + FileInternal::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?) + } + _ => return Err(Error("Unsupported file format")), + }; + Ok(File { inner }) + } + + /// Return the file format. + pub fn format(&self) -> BinaryFormat { + match self.inner { + #[cfg(feature = "coff")] + FileInternal::Coff(_) => BinaryFormat::Coff, + #[cfg(feature = "elf")] + FileInternal::Elf32(_) | FileInternal::Elf64(_) => BinaryFormat::Elf, + #[cfg(feature = "macho")] + FileInternal::MachO32(_) | FileInternal::MachO64(_) => BinaryFormat::MachO, + #[cfg(feature = "pe")] + FileInternal::Pe32(_) | FileInternal::Pe64(_) => BinaryFormat::Pe, + #[cfg(feature = "wasm")] + FileInternal::Wasm(_) => BinaryFormat::Wasm, + #[cfg(feature = "xcoff")] + FileInternal::Xcoff32(_) | FileInternal::Xcoff64(_) => BinaryFormat::Xcoff, + } + } +} + +impl<'data, R: ReadRef<'data>> read::private::Sealed for File<'data, R> {} + +impl<'data, 'file, R> Object<'data, 'file> for File<'data, R> +where + 'data: 'file, + R: 'file + ReadRef<'data>, +{ + type Segment = Segment<'data, 'file, R>; + type SegmentIterator = SegmentIterator<'data, 'file, R>; + type Section = Section<'data, 'file, R>; + type SectionIterator = SectionIterator<'data, 'file, R>; + type Comdat = Comdat<'data, 'file, R>; + type ComdatIterator = ComdatIterator<'data, 'file, R>; + type Symbol = Symbol<'data, 'file, R>; + type SymbolIterator = SymbolIterator<'data, 'file, R>; + type SymbolTable = SymbolTable<'data, 'file, R>; + type DynamicRelocationIterator = DynamicRelocationIterator<'data, 'file, R>; + + fn architecture(&self) -> Architecture { + with_inner!(self.inner, FileInternal, |x| x.architecture()) + } + + fn is_little_endian(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.is_little_endian()) + } + + fn is_64(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.is_64()) + } + + fn kind(&self) -> ObjectKind { + with_inner!(self.inner, FileInternal, |x| x.kind()) + } + + fn segments(&'file self) -> SegmentIterator<'data, 'file, R> { + SegmentIterator { + inner: map_inner!(self.inner, FileInternal, SegmentIteratorInternal, |x| x + .segments()), + } + } + + fn section_by_name_bytes(&'file self, section_name: &[u8]) -> Option<Section<'data, 'file, R>> { + map_inner_option!(self.inner, FileInternal, SectionInternal, |x| x + .section_by_name_bytes(section_name)) + .map(|inner| Section { inner }) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Result<Section<'data, 'file, R>> { + map_inner_option!(self.inner, FileInternal, SectionInternal, |x| x + .section_by_index(index)) + .map(|inner| Section { inner }) + } + + fn sections(&'file self) -> SectionIterator<'data, 'file, R> { + SectionIterator { + inner: map_inner!(self.inner, FileInternal, SectionIteratorInternal, |x| x + .sections()), + } + } + + fn comdats(&'file self) -> ComdatIterator<'data, 'file, R> { + ComdatIterator { + inner: map_inner!(self.inner, FileInternal, ComdatIteratorInternal, |x| x + .comdats()), + } + } + + fn symbol_by_index(&'file self, index: SymbolIndex) -> Result<Symbol<'data, 'file, R>> { + map_inner_option!(self.inner, FileInternal, SymbolInternal, |x| x + .symbol_by_index(index) + .map(|x| (x, PhantomData))) + .map(|inner| Symbol { inner }) + } + + fn symbols(&'file self) -> SymbolIterator<'data, 'file, R> { + SymbolIterator { + inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| ( + x.symbols(), + PhantomData + )), + } + } + + fn symbol_table(&'file self) -> Option<SymbolTable<'data, 'file, R>> { + map_inner_option!(self.inner, FileInternal, SymbolTableInternal, |x| x + .symbol_table() + .map(|x| (x, PhantomData))) + .map(|inner| SymbolTable { inner }) + } + + fn dynamic_symbols(&'file self) -> SymbolIterator<'data, 'file, R> { + SymbolIterator { + inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| ( + x.dynamic_symbols(), + PhantomData + )), + } + } + + fn dynamic_symbol_table(&'file self) -> Option<SymbolTable<'data, 'file, R>> { + map_inner_option!(self.inner, FileInternal, SymbolTableInternal, |x| x + .dynamic_symbol_table() + .map(|x| (x, PhantomData))) + .map(|inner| SymbolTable { inner }) + } + + #[cfg(feature = "elf")] + fn dynamic_relocations(&'file self) -> Option<DynamicRelocationIterator<'data, 'file, R>> { + let inner = match self.inner { + FileInternal::Elf32(ref elf) => { + DynamicRelocationIteratorInternal::Elf32(elf.dynamic_relocations()?) + } + FileInternal::Elf64(ref elf) => { + DynamicRelocationIteratorInternal::Elf64(elf.dynamic_relocations()?) + } + #[allow(unreachable_patterns)] + _ => return None, + }; + Some(DynamicRelocationIterator { inner }) + } + + #[cfg(not(feature = "elf"))] + fn dynamic_relocations(&'file self) -> Option<DynamicRelocationIterator<'data, 'file, R>> { + None + } + + fn symbol_map(&self) -> SymbolMap<SymbolMapName<'data>> { + with_inner!(self.inner, FileInternal, |x| x.symbol_map()) + } + + fn object_map(&self) -> ObjectMap<'data> { + with_inner!(self.inner, FileInternal, |x| x.object_map()) + } + + fn imports(&self) -> Result<Vec<Import<'data>>> { + with_inner!(self.inner, FileInternal, |x| x.imports()) + } + + fn exports(&self) -> Result<Vec<Export<'data>>> { + with_inner!(self.inner, FileInternal, |x| x.exports()) + } + + fn has_debug_symbols(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.has_debug_symbols()) + } + + #[inline] + fn mach_uuid(&self) -> Result<Option<[u8; 16]>> { + with_inner!(self.inner, FileInternal, |x| x.mach_uuid()) + } + + #[inline] + fn build_id(&self) -> Result<Option<&'data [u8]>> { + with_inner!(self.inner, FileInternal, |x| x.build_id()) + } + + #[inline] + fn gnu_debuglink(&self) -> Result<Option<(&'data [u8], u32)>> { + with_inner!(self.inner, FileInternal, |x| x.gnu_debuglink()) + } + + #[inline] + fn gnu_debugaltlink(&self) -> Result<Option<(&'data [u8], &'data [u8])>> { + with_inner!(self.inner, FileInternal, |x| x.gnu_debugaltlink()) + } + + #[inline] + fn pdb_info(&self) -> Result<Option<CodeView>> { + with_inner!(self.inner, FileInternal, |x| x.pdb_info()) + } + + fn relative_address_base(&self) -> u64 { + with_inner!(self.inner, FileInternal, |x| x.relative_address_base()) + } + + fn entry(&self) -> u64 { + with_inner!(self.inner, FileInternal, |x| x.entry()) + } + + fn flags(&self) -> FileFlags { + with_inner!(self.inner, FileInternal, |x| x.flags()) + } +} + +/// An iterator over the segments of a `File`. +#[derive(Debug)] +pub struct SegmentIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: SegmentIteratorInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum SegmentIteratorInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffSegmentIterator<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfSegmentIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfSegmentIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOSegmentIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOSegmentIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeSegmentIterator32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeSegmentIterator64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSegmentIterator<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffSegmentIterator32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffSegmentIterator64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for SegmentIterator<'data, 'file, R> { + type Item = Segment<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + next_inner!(self.inner, SegmentIteratorInternal, SegmentInternal) + .map(|inner| Segment { inner }) + } +} + +/// A segment of a `File`. +pub struct Segment<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: SegmentInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum SegmentInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffSegment<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfSegment32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfSegment64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOSegment32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOSegment64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeSegment32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeSegment64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSegment<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffSegment32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffSegment64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Segment<'data, 'file, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + let mut s = f.debug_struct("Segment"); + match self.name() { + Ok(Some(ref name)) => { + s.field("name", name); + } + Ok(None) => {} + Err(_) => { + s.field("name", &"<invalid>"); + } + } + s.field("address", &self.address()) + .field("size", &self.size()) + .finish() + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for Segment<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSegment<'data> for Segment<'data, 'file, R> { + fn address(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.size()) + } + + fn align(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.align()) + } + + fn file_range(&self) -> (u64, u64) { + with_inner!(self.inner, SegmentInternal, |x| x.file_range()) + } + + fn data(&self) -> Result<&'data [u8]> { + with_inner!(self.inner, SegmentInternal, |x| x.data()) + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + with_inner!(self.inner, SegmentInternal, |x| x.data_range(address, size)) + } + + fn name_bytes(&self) -> Result<Option<&[u8]>> { + with_inner!(self.inner, SegmentInternal, |x| x.name_bytes()) + } + + fn name(&self) -> Result<Option<&str>> { + with_inner!(self.inner, SegmentInternal, |x| x.name()) + } + + fn flags(&self) -> SegmentFlags { + with_inner!(self.inner, SegmentInternal, |x| x.flags()) + } +} + +/// An iterator of the sections of a `File`. +#[derive(Debug)] +pub struct SectionIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: SectionIteratorInternal<'data, 'file, R>, +} + +// we wrap our enums in a struct so that they are kept private. +#[derive(Debug)] +enum SectionIteratorInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffSectionIterator<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfSectionIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfSectionIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOSectionIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOSectionIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeSectionIterator32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeSectionIterator64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSectionIterator<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffSectionIterator32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffSectionIterator64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for SectionIterator<'data, 'file, R> { + type Item = Section<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + next_inner!(self.inner, SectionIteratorInternal, SectionInternal) + .map(|inner| Section { inner }) + } +} + +/// A Section of a File +pub struct Section<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: SectionInternal<'data, 'file, R>, +} + +enum SectionInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffSection<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfSection32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfSection64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOSection32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOSection64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeSection32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeSection64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSection<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffSection32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffSection64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Section<'data, 'file, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + let mut s = f.debug_struct("Section"); + match self.segment_name() { + Ok(Some(ref name)) => { + s.field("segment", name); + } + Ok(None) => {} + Err(_) => { + s.field("segment", &"<invalid>"); + } + } + s.field("name", &self.name().unwrap_or("<invalid>")) + .field("address", &self.address()) + .field("size", &self.size()) + .field("align", &self.align()) + .field("kind", &self.kind()) + .field("flags", &self.flags()) + .finish() + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for Section<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for Section<'data, 'file, R> { + type RelocationIterator = SectionRelocationIterator<'data, 'file, R>; + + fn index(&self) -> SectionIndex { + with_inner!(self.inner, SectionInternal, |x| x.index()) + } + + fn address(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.size()) + } + + fn align(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.align()) + } + + fn file_range(&self) -> Option<(u64, u64)> { + with_inner!(self.inner, SectionInternal, |x| x.file_range()) + } + + fn data(&self) -> Result<&'data [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.data()) + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + with_inner!(self.inner, SectionInternal, |x| x.data_range(address, size)) + } + + fn compressed_file_range(&self) -> Result<CompressedFileRange> { + with_inner!(self.inner, SectionInternal, |x| x.compressed_file_range()) + } + + fn compressed_data(&self) -> Result<CompressedData<'data>> { + with_inner!(self.inner, SectionInternal, |x| x.compressed_data()) + } + + fn name_bytes(&self) -> Result<&[u8]> { + with_inner!(self.inner, SectionInternal, |x| x.name_bytes()) + } + + fn name(&self) -> Result<&str> { + with_inner!(self.inner, SectionInternal, |x| x.name()) + } + + fn segment_name_bytes(&self) -> Result<Option<&[u8]>> { + with_inner!(self.inner, SectionInternal, |x| x.segment_name_bytes()) + } + + fn segment_name(&self) -> Result<Option<&str>> { + with_inner!(self.inner, SectionInternal, |x| x.segment_name()) + } + + fn kind(&self) -> SectionKind { + with_inner!(self.inner, SectionInternal, |x| x.kind()) + } + + fn relocations(&self) -> SectionRelocationIterator<'data, 'file, R> { + SectionRelocationIterator { + inner: map_inner!( + self.inner, + SectionInternal, + SectionRelocationIteratorInternal, + |x| x.relocations() + ), + } + } + + fn flags(&self) -> SectionFlags { + with_inner!(self.inner, SectionInternal, |x| x.flags()) + } +} + +/// An iterator of the COMDAT section groups of a `File`. +#[derive(Debug)] +pub struct ComdatIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: ComdatIteratorInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum ComdatIteratorInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffComdatIterator<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfComdatIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfComdatIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOComdatIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOComdatIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeComdatIterator32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeComdatIterator64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmComdatIterator<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffComdatIterator32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffComdatIterator64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for ComdatIterator<'data, 'file, R> { + type Item = Comdat<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + next_inner!(self.inner, ComdatIteratorInternal, ComdatInternal) + .map(|inner| Comdat { inner }) + } +} + +/// A COMDAT section group of a `File`. +pub struct Comdat<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: ComdatInternal<'data, 'file, R>, +} + +enum ComdatInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffComdat<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfComdat32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfComdat64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOComdat32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOComdat64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeComdat32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeComdat64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmComdat<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffComdat32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffComdat64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Comdat<'data, 'file, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut s = f.debug_struct("Comdat"); + s.field("symbol", &self.symbol()) + .field("name", &self.name().unwrap_or("<invalid>")) + .field("kind", &self.kind()) + .finish() + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for Comdat<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for Comdat<'data, 'file, R> { + type SectionIterator = ComdatSectionIterator<'data, 'file, R>; + + fn kind(&self) -> ComdatKind { + with_inner!(self.inner, ComdatInternal, |x| x.kind()) + } + + fn symbol(&self) -> SymbolIndex { + with_inner!(self.inner, ComdatInternal, |x| x.symbol()) + } + + fn name_bytes(&self) -> Result<&[u8]> { + with_inner!(self.inner, ComdatInternal, |x| x.name_bytes()) + } + + fn name(&self) -> Result<&str> { + with_inner!(self.inner, ComdatInternal, |x| x.name()) + } + + fn sections(&self) -> ComdatSectionIterator<'data, 'file, R> { + ComdatSectionIterator { + inner: map_inner!( + self.inner, + ComdatInternal, + ComdatSectionIteratorInternal, + |x| x.sections() + ), + } + } +} + +/// An iterator over COMDAT section entries. +#[derive(Debug)] +pub struct ComdatSectionIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: ComdatSectionIteratorInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum ComdatSectionIteratorInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffComdatSectionIterator<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfComdatSectionIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfComdatSectionIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachOComdatSectionIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachOComdatSectionIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeComdatSectionIterator32<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeComdatSectionIterator64<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmComdatSectionIterator<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffComdatSectionIterator32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffComdatSectionIterator64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for ComdatSectionIterator<'data, 'file, R> { + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + with_inner_mut!(self.inner, ComdatSectionIteratorInternal, |x| x.next()) + } +} + +/// A symbol table. +#[derive(Debug)] +pub struct SymbolTable<'data, 'file, R = &'data [u8]> +where + 'data: 'file, + R: ReadRef<'data>, +{ + inner: SymbolTableInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum SymbolTableInternal<'data, 'file, R> +where + 'data: 'file, + R: ReadRef<'data>, +{ + #[cfg(feature = "coff")] + Coff((coff::CoffSymbolTable<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "elf")] + Elf32( + ( + elf::ElfSymbolTable32<'data, 'file, Endianness, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "elf")] + Elf64( + ( + elf::ElfSymbolTable64<'data, 'file, Endianness, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "macho")] + MachO32( + ( + macho::MachOSymbolTable32<'data, 'file, Endianness, R>, + PhantomData<()>, + ), + ), + #[cfg(feature = "macho")] + MachO64( + ( + macho::MachOSymbolTable64<'data, 'file, Endianness, R>, + PhantomData<()>, + ), + ), + #[cfg(feature = "pe")] + Pe32((coff::CoffSymbolTable<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "pe")] + Pe64((coff::CoffSymbolTable<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "wasm")] + Wasm((wasm::WasmSymbolTable<'data, 'file>, PhantomData<R>)), + #[cfg(feature = "xcoff")] + Xcoff32((xcoff::XcoffSymbolTable32<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "xcoff")] + Xcoff64((xcoff::XcoffSymbolTable64<'data, 'file, R>, PhantomData<R>)), +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for SymbolTable<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSymbolTable<'data> for SymbolTable<'data, 'file, R> { + type Symbol = Symbol<'data, 'file, R>; + type SymbolIterator = SymbolIterator<'data, 'file, R>; + + fn symbols(&self) -> Self::SymbolIterator { + SymbolIterator { + inner: map_inner!( + self.inner, + SymbolTableInternal, + SymbolIteratorInternal, + |x| (x.0.symbols(), PhantomData) + ), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> { + map_inner_option!(self.inner, SymbolTableInternal, SymbolInternal, |x| x + .0 + .symbol_by_index(index) + .map(|x| (x, PhantomData))) + .map(|inner| Symbol { inner }) + } +} + +/// An iterator over symbol table entries. +#[derive(Debug)] +pub struct SymbolIterator<'data, 'file, R = &'data [u8]> +where + 'data: 'file, + R: ReadRef<'data>, +{ + inner: SymbolIteratorInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum SymbolIteratorInternal<'data, 'file, R> +where + 'data: 'file, + R: ReadRef<'data>, +{ + #[cfg(feature = "coff")] + Coff((coff::CoffSymbolIterator<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "elf")] + Elf32( + ( + elf::ElfSymbolIterator32<'data, 'file, Endianness, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "elf")] + Elf64( + ( + elf::ElfSymbolIterator64<'data, 'file, Endianness, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "macho")] + MachO32( + ( + macho::MachOSymbolIterator32<'data, 'file, Endianness, R>, + PhantomData<()>, + ), + ), + #[cfg(feature = "macho")] + MachO64( + ( + macho::MachOSymbolIterator64<'data, 'file, Endianness, R>, + PhantomData<()>, + ), + ), + #[cfg(feature = "pe")] + Pe32((coff::CoffSymbolIterator<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "pe")] + Pe64((coff::CoffSymbolIterator<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "wasm")] + Wasm((wasm::WasmSymbolIterator<'data, 'file>, PhantomData<R>)), + #[cfg(feature = "xcoff")] + Xcoff32( + ( + xcoff::XcoffSymbolIterator32<'data, 'file, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "xcoff")] + Xcoff64( + ( + xcoff::XcoffSymbolIterator64<'data, 'file, R>, + PhantomData<R>, + ), + ), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for SymbolIterator<'data, 'file, R> { + type Item = Symbol<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + map_inner_option_mut!(self.inner, SymbolIteratorInternal, SymbolInternal, |iter| { + iter.0.next().map(|x| (x, PhantomData)) + }) + .map(|inner| Symbol { inner }) + } +} + +/// A symbol table entry. +pub struct Symbol<'data, 'file, R = &'data [u8]> +where + 'data: 'file, + R: ReadRef<'data>, +{ + inner: SymbolInternal<'data, 'file, R>, +} + +enum SymbolInternal<'data, 'file, R> +where + 'data: 'file, + R: ReadRef<'data>, +{ + #[cfg(feature = "coff")] + Coff((coff::CoffSymbol<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "elf")] + Elf32( + ( + elf::ElfSymbol32<'data, 'file, Endianness, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "elf")] + Elf64( + ( + elf::ElfSymbol64<'data, 'file, Endianness, R>, + PhantomData<R>, + ), + ), + #[cfg(feature = "macho")] + MachO32( + ( + macho::MachOSymbol32<'data, 'file, Endianness, R>, + PhantomData<()>, + ), + ), + #[cfg(feature = "macho")] + MachO64( + ( + macho::MachOSymbol64<'data, 'file, Endianness, R>, + PhantomData<()>, + ), + ), + #[cfg(feature = "pe")] + Pe32((coff::CoffSymbol<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "pe")] + Pe64((coff::CoffSymbol<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "wasm")] + Wasm((wasm::WasmSymbol<'data, 'file>, PhantomData<R>)), + #[cfg(feature = "xcoff")] + Xcoff32((xcoff::XcoffSymbol32<'data, 'file, R>, PhantomData<R>)), + #[cfg(feature = "xcoff")] + Xcoff64((xcoff::XcoffSymbol64<'data, 'file, R>, PhantomData<R>)), +} + +impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Symbol<'data, 'file, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Symbol") + .field("name", &self.name().unwrap_or("<invalid>")) + .field("address", &self.address()) + .field("size", &self.size()) + .field("kind", &self.kind()) + .field("section", &self.section()) + .field("scope", &self.scope()) + .field("weak", &self.is_weak()) + .field("flags", &self.flags()) + .finish() + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for Symbol<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSymbol<'data> for Symbol<'data, 'file, R> { + fn index(&self) -> SymbolIndex { + with_inner!(self.inner, SymbolInternal, |x| x.0.index()) + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + with_inner!(self.inner, SymbolInternal, |x| x.0.name_bytes()) + } + + fn name(&self) -> Result<&'data str> { + with_inner!(self.inner, SymbolInternal, |x| x.0.name()) + } + + fn address(&self) -> u64 { + with_inner!(self.inner, SymbolInternal, |x| x.0.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SymbolInternal, |x| x.0.size()) + } + + fn kind(&self) -> SymbolKind { + with_inner!(self.inner, SymbolInternal, |x| x.0.kind()) + } + + fn section(&self) -> SymbolSection { + with_inner!(self.inner, SymbolInternal, |x| x.0.section()) + } + + fn is_undefined(&self) -> bool { + with_inner!(self.inner, SymbolInternal, |x| x.0.is_undefined()) + } + + fn is_definition(&self) -> bool { + with_inner!(self.inner, SymbolInternal, |x| x.0.is_definition()) + } + + fn is_common(&self) -> bool { + with_inner!(self.inner, SymbolInternal, |x| x.0.is_common()) + } + + fn is_weak(&self) -> bool { + with_inner!(self.inner, SymbolInternal, |x| x.0.is_weak()) + } + + fn scope(&self) -> SymbolScope { + with_inner!(self.inner, SymbolInternal, |x| x.0.scope()) + } + + fn is_global(&self) -> bool { + with_inner!(self.inner, SymbolInternal, |x| x.0.is_global()) + } + + fn is_local(&self) -> bool { + with_inner!(self.inner, SymbolInternal, |x| x.0.is_local()) + } + + fn flags(&self) -> SymbolFlags<SectionIndex> { + with_inner!(self.inner, SymbolInternal, |x| x.0.flags()) + } +} + +/// An iterator over dynamic relocation entries. +#[derive(Debug)] +pub struct DynamicRelocationIterator<'data, 'file, R = &'data [u8]> +where + 'data: 'file, + R: ReadRef<'data>, +{ + inner: DynamicRelocationIteratorInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum DynamicRelocationIteratorInternal<'data, 'file, R> +where + 'data: 'file, + R: ReadRef<'data>, +{ + #[cfg(feature = "elf")] + Elf32(elf::ElfDynamicRelocationIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfDynamicRelocationIterator64<'data, 'file, Endianness, R>), + // We need to always use the lifetime parameters. + #[allow(unused)] + None(PhantomData<(&'data (), &'file (), R)>), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for DynamicRelocationIterator<'data, 'file, R> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + match self.inner { + #[cfg(feature = "elf")] + DynamicRelocationIteratorInternal::Elf32(ref mut elf) => elf.next(), + #[cfg(feature = "elf")] + DynamicRelocationIteratorInternal::Elf64(ref mut elf) => elf.next(), + DynamicRelocationIteratorInternal::None(_) => None, + } + } +} + +/// An iterator over section relocation entries. +#[derive(Debug)] +pub struct SectionRelocationIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> +where + 'data: 'file, +{ + inner: SectionRelocationIteratorInternal<'data, 'file, R>, +} + +#[derive(Debug)] +enum SectionRelocationIteratorInternal<'data, 'file, R: ReadRef<'data>> +where + 'data: 'file, +{ + #[cfg(feature = "coff")] + Coff(coff::CoffRelocationIterator<'data, 'file, R>), + #[cfg(feature = "elf")] + Elf32(elf::ElfSectionRelocationIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "elf")] + Elf64(elf::ElfSectionRelocationIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO32(macho::MachORelocationIterator32<'data, 'file, Endianness, R>), + #[cfg(feature = "macho")] + MachO64(macho::MachORelocationIterator64<'data, 'file, Endianness, R>), + #[cfg(feature = "pe")] + Pe32(pe::PeRelocationIterator<'data, 'file, R>), + #[cfg(feature = "pe")] + Pe64(pe::PeRelocationIterator<'data, 'file, R>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmRelocationIterator<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff32(xcoff::XcoffRelocationIterator32<'data, 'file, R>), + #[cfg(feature = "xcoff")] + Xcoff64(xcoff::XcoffRelocationIterator64<'data, 'file, R>), +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for SectionRelocationIterator<'data, 'file, R> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + with_inner_mut!(self.inner, SectionRelocationIteratorInternal, |x| x.next()) + } +} diff --git a/third_party/rust/object/src/read/archive.rs b/third_party/rust/object/src/read/archive.rs new file mode 100644 index 0000000000..f5aaa9b190 --- /dev/null +++ b/third_party/rust/object/src/read/archive.rs @@ -0,0 +1,739 @@ +//! Support for archive files. + +use core::convert::TryInto; + +use crate::archive; +use crate::read::{self, Bytes, Error, ReadError, ReadRef}; + +/// The kind of archive format. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum ArchiveKind { + /// There are no special files that indicate the archive format. + Unknown, + /// The GNU (or System V) archive format. + Gnu, + /// The GNU (or System V) archive format with 64-bit symbol table. + Gnu64, + /// The BSD archive format. + Bsd, + /// The BSD archive format with 64-bit symbol table. + /// + /// This is used for Darwin. + Bsd64, + /// The Windows COFF archive format. + Coff, + /// The AIX big archive format. + AixBig, +} + +/// The list of members in the archive. +#[derive(Debug, Clone, Copy)] +enum Members<'data> { + Common { + offset: u64, + end_offset: u64, + }, + AixBig { + index: &'data [archive::AixMemberOffset], + }, +} + +/// A partially parsed archive file. +#[derive(Debug, Clone, Copy)] +pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> { + data: R, + kind: ArchiveKind, + members: Members<'data>, + symbols: (u64, u64), + names: &'data [u8], +} + +impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { + /// Parse the archive header and special members. + pub fn parse(data: R) -> read::Result<Self> { + let len = data.len().read_error("Unknown archive length")?; + let mut tail = 0; + let magic = data + .read_bytes(&mut tail, archive::MAGIC.len() as u64) + .read_error("Invalid archive size")?; + + if magic == archive::AIX_BIG_MAGIC { + return Self::parse_aixbig(data); + } else if magic != archive::MAGIC { + return Err(Error("Unsupported archive identifier")); + } + + let mut members_offset = tail; + let members_end_offset = len; + + let mut file = ArchiveFile { + data, + kind: ArchiveKind::Unknown, + members: Members::Common { + offset: 0, + end_offset: 0, + }, + symbols: (0, 0), + names: &[], + }; + + // The first few members may be special, so parse them. + // GNU has: + // - "/" or "/SYM64/": symbol table (optional) + // - "//": names table (optional) + // COFF has: + // - "/": first linker member + // - "/": second linker member + // - "//": names table + // BSD has: + // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional) + // BSD 64-bit has: + // - "__.SYMDEF_64" or "__.SYMDEF_64 SORTED": symbol table (optional) + // BSD may use the extended name for the symbol table. This is handled + // by `ArchiveMember::parse`. + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"/" { + // GNU symbol table (unless we later determine this is COFF). + file.kind = ArchiveKind::Gnu; + file.symbols = member.file_range(); + members_offset = tail; + + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"/" { + // COFF linker member. + file.kind = ArchiveKind::Coff; + file.symbols = member.file_range(); + members_offset = tail; + + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"//" { + // COFF names table. + file.names = member.data(data)?; + members_offset = tail; + } + } + } else if member.name == b"//" { + // GNU names table. + file.names = member.data(data)?; + members_offset = tail; + } + } + } else if member.name == b"/SYM64/" { + // GNU 64-bit symbol table. + file.kind = ArchiveKind::Gnu64; + file.symbols = member.file_range(); + members_offset = tail; + + if tail < len { + let member = ArchiveMember::parse(data, &mut tail, &[])?; + if member.name == b"//" { + // GNU names table. + file.names = member.data(data)?; + members_offset = tail; + } + } + } else if member.name == b"//" { + // GNU names table. + file.kind = ArchiveKind::Gnu; + file.names = member.data(data)?; + members_offset = tail; + } else if member.name == b"__.SYMDEF" || member.name == b"__.SYMDEF SORTED" { + // BSD symbol table. + file.kind = ArchiveKind::Bsd; + file.symbols = member.file_range(); + members_offset = tail; + } else if member.name == b"__.SYMDEF_64" || member.name == b"__.SYMDEF_64 SORTED" { + // BSD 64-bit symbol table. + file.kind = ArchiveKind::Bsd64; + file.symbols = member.file_range(); + members_offset = tail; + } else { + // TODO: This could still be a BSD file. We leave this as unknown for now. + } + } + file.members = Members::Common { + offset: members_offset, + end_offset: members_end_offset, + }; + Ok(file) + } + + fn parse_aixbig(data: R) -> read::Result<Self> { + let mut tail = 0; + + let file_header = data + .read::<archive::AixFileHeader>(&mut tail) + .read_error("Invalid AIX big archive file header")?; + // Caller already validated this. + debug_assert_eq!(file_header.magic, archive::AIX_BIG_MAGIC); + + let mut file = ArchiveFile { + data, + kind: ArchiveKind::AixBig, + members: Members::AixBig { index: &[] }, + symbols: (0, 0), + names: &[], + }; + + // Read the span of symbol table. + let symtbl64 = parse_u64_digits(&file_header.gst64off, 10) + .read_error("Invalid offset to 64-bit symbol table in AIX big archive")?; + if symtbl64 > 0 { + // The symbol table is also a file with header. + let member = ArchiveMember::parse_aixbig(data, symtbl64)?; + file.symbols = member.file_range(); + } else { + let symtbl = parse_u64_digits(&file_header.gstoff, 10) + .read_error("Invalid offset to symbol table in AIX big archive")?; + if symtbl > 0 { + // The symbol table is also a file with header. + let member = ArchiveMember::parse_aixbig(data, symtbl)?; + file.symbols = member.file_range(); + } + } + + // Big archive member index table lists file entries with offsets and names. + // To avoid potential infinite loop (members are double-linked list), the + // iterator goes through the index instead of real members. + let member_table_offset = parse_u64_digits(&file_header.memoff, 10) + .read_error("Invalid offset for member table of AIX big archive")?; + if member_table_offset == 0 { + // The offset would be zero if archive contains no file. + return Ok(file); + } + + // The member index table is also a file with header. + let member = ArchiveMember::parse_aixbig(data, member_table_offset)?; + let mut member_data = Bytes(member.data(data)?); + + // Structure of member index table: + // Number of entries (20 bytes) + // Offsets of each entry (20*N bytes) + // Names string table (the rest of bytes to fill size defined in header) + let members_count_bytes = member_data + .read_slice::<u8>(20) + .read_error("Missing member count in AIX big archive")?; + let members_count = parse_u64_digits(members_count_bytes, 10) + .and_then(|size| size.try_into().ok()) + .read_error("Invalid member count in AIX big archive")?; + let index = member_data + .read_slice::<archive::AixMemberOffset>(members_count) + .read_error("Member count overflow in AIX big archive")?; + file.members = Members::AixBig { index }; + + Ok(file) + } + + /// Return the archive format. + #[inline] + pub fn kind(&self) -> ArchiveKind { + self.kind + } + + /// Iterate over the members of the archive. + /// + /// This does not return special members. + #[inline] + pub fn members(&self) -> ArchiveMemberIterator<'data, R> { + ArchiveMemberIterator { + data: self.data, + members: self.members, + names: self.names, + } + } +} + +/// An iterator over the members of an archive. +#[derive(Debug)] +pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> { + data: R, + members: Members<'data>, + names: &'data [u8], +} + +impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { + type Item = read::Result<ArchiveMember<'data>>; + + fn next(&mut self) -> Option<Self::Item> { + match &mut self.members { + Members::Common { + ref mut offset, + ref mut end_offset, + } => { + if *offset >= *end_offset { + return None; + } + let member = ArchiveMember::parse(self.data, offset, self.names); + if member.is_err() { + *offset = *end_offset; + } + Some(member) + } + Members::AixBig { ref mut index } => match **index { + [] => None, + [ref first, ref rest @ ..] => { + *index = rest; + let member = ArchiveMember::parse_aixbig_index(self.data, first); + if member.is_err() { + *index = &[]; + } + Some(member) + } + }, + } + } +} + +/// An archive member header. +#[derive(Debug, Clone, Copy)] +enum MemberHeader<'data> { + /// Common header used by many formats. + Common(&'data archive::Header), + /// AIX big archive header + AixBig(&'data archive::AixHeader), +} + +/// A partially parsed archive member. +#[derive(Debug)] +pub struct ArchiveMember<'data> { + header: MemberHeader<'data>, + name: &'data [u8], + offset: u64, + size: u64, +} + +impl<'data> ArchiveMember<'data> { + /// Parse the member header, name, and file data in an archive with the common format. + /// + /// This reads the extended name (if any) and adjusts the file size. + fn parse<R: ReadRef<'data>>( + data: R, + offset: &mut u64, + names: &'data [u8], + ) -> read::Result<Self> { + let header = data + .read::<archive::Header>(offset) + .read_error("Invalid archive member header")?; + if header.terminator != archive::TERMINATOR { + return Err(Error("Invalid archive terminator")); + } + + let mut file_offset = *offset; + let mut file_size = + parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?; + *offset = offset + .checked_add(file_size) + .read_error("Archive member size is too large")?; + // Entries are padded to an even number of bytes. + if (file_size & 1) != 0 { + *offset = offset.saturating_add(1); + } + + let name = if header.name[0] == b'/' && (header.name[1] as char).is_ascii_digit() { + // Read file name from the names table. + parse_sysv_extended_name(&header.name[1..], names) + .read_error("Invalid archive extended name offset")? + } else if &header.name[..3] == b"#1/" && (header.name[3] as char).is_ascii_digit() { + // Read file name from the start of the file data. + parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size) + .read_error("Invalid archive extended name length")? + } else if header.name[0] == b'/' { + let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len()); + &header.name[..name_len] + } else { + let name_len = memchr::memchr(b'/', &header.name) + .or_else(|| memchr::memchr(b' ', &header.name)) + .unwrap_or(header.name.len()); + &header.name[..name_len] + }; + + Ok(ArchiveMember { + header: MemberHeader::Common(header), + name, + offset: file_offset, + size: file_size, + }) + } + + /// Parse a member index entry in an AIX big archive, + /// and then parse the member header, name, and file data. + fn parse_aixbig_index<R: ReadRef<'data>>( + data: R, + index: &archive::AixMemberOffset, + ) -> read::Result<Self> { + let offset = parse_u64_digits(&index.0, 10) + .read_error("Invalid AIX big archive file member offset")?; + Self::parse_aixbig(data, offset) + } + + /// Parse the member header, name, and file data in an AIX big archive. + fn parse_aixbig<R: ReadRef<'data>>(data: R, mut offset: u64) -> read::Result<Self> { + // The format was described at + // https://www.ibm.com/docs/en/aix/7.3?topic=formats-ar-file-format-big + let header = data + .read::<archive::AixHeader>(&mut offset) + .read_error("Invalid AIX big archive member header")?; + let name_length = parse_u64_digits(&header.namlen, 10) + .read_error("Invalid AIX big archive member name length")?; + let name = data + .read_bytes(&mut offset, name_length) + .read_error("Invalid AIX big archive member name")?; + + // The actual data for a file member begins at the first even-byte boundary beyond the + // member header and continues for the number of bytes specified by the ar_size field. The + // ar command inserts null bytes for padding where necessary. + if offset & 1 != 0 { + offset = offset.saturating_add(1); + } + // Because of the even-byte boundary, we have to read and check terminator after header. + let terminator = data + .read_bytes(&mut offset, 2) + .read_error("Invalid AIX big archive terminator")?; + if terminator != archive::TERMINATOR { + return Err(Error("Invalid AIX big archive terminator")); + } + + let size = parse_u64_digits(&header.size, 10) + .read_error("Invalid archive member size in AIX big archive")?; + Ok(ArchiveMember { + header: MemberHeader::AixBig(header), + name, + offset, + size, + }) + } + + /// Return the raw header that is common to many archive formats. + /// + /// Returns `None` if this archive does not use the common header format. + #[inline] + pub fn header(&self) -> Option<&'data archive::Header> { + match self.header { + MemberHeader::Common(header) => Some(header), + _ => None, + } + } + + /// Return the raw header for AIX big archives. + /// + /// Returns `None` if this is not an AIX big archive. + #[inline] + pub fn aix_header(&self) -> Option<&'data archive::AixHeader> { + match self.header { + MemberHeader::AixBig(header) => Some(header), + _ => None, + } + } + + /// Return the parsed file name. + /// + /// This may be an extended file name. + #[inline] + pub fn name(&self) -> &'data [u8] { + self.name + } + + /// Parse the file modification timestamp from the header. + #[inline] + pub fn date(&self) -> Option<u64> { + match &self.header { + MemberHeader::Common(header) => parse_u64_digits(&header.date, 10), + MemberHeader::AixBig(header) => parse_u64_digits(&header.date, 10), + } + } + + /// Parse the user ID from the header. + #[inline] + pub fn uid(&self) -> Option<u64> { + match &self.header { + MemberHeader::Common(header) => parse_u64_digits(&header.uid, 10), + MemberHeader::AixBig(header) => parse_u64_digits(&header.uid, 10), + } + } + + /// Parse the group ID from the header. + #[inline] + pub fn gid(&self) -> Option<u64> { + match &self.header { + MemberHeader::Common(header) => parse_u64_digits(&header.gid, 10), + MemberHeader::AixBig(header) => parse_u64_digits(&header.gid, 10), + } + } + + /// Parse the file mode from the header. + #[inline] + pub fn mode(&self) -> Option<u64> { + match &self.header { + MemberHeader::Common(header) => parse_u64_digits(&header.mode, 8), + MemberHeader::AixBig(header) => parse_u64_digits(&header.mode, 8), + } + } + + /// Return the offset and size of the file data. + pub fn file_range(&self) -> (u64, u64) { + (self.offset, self.size) + } + + /// Return the file data. + #[inline] + pub fn data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]> { + data.read_bytes_at(self.offset, self.size) + .read_error("Archive member size is too large") + } +} + +// Ignores bytes starting from the first space. +fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> { + if let [b' ', ..] = digits { + return None; + } + let mut result: u64 = 0; + for &c in digits { + if c == b' ' { + return Some(result); + } else { + let x = (c as char).to_digit(radix)?; + result = result + .checked_mul(u64::from(radix))? + .checked_add(u64::from(x))?; + } + } + Some(result) +} + +fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> { + let offset = parse_u64_digits(digits, 10).ok_or(())?; + let offset = offset.try_into().map_err(|_| ())?; + let name_data = names.get(offset..).ok_or(())?; + let name = match memchr::memchr2(b'/', b'\0', name_data) { + Some(len) => &name_data[..len], + None => name_data, + }; + Ok(name) +} + +/// Modifies `data` to start after the extended name. +fn parse_bsd_extended_name<'data, R: ReadRef<'data>>( + digits: &[u8], + data: R, + offset: &mut u64, + size: &mut u64, +) -> Result<&'data [u8], ()> { + let len = parse_u64_digits(digits, 10).ok_or(())?; + *size = size.checked_sub(len).ok_or(())?; + let name_data = data.read_bytes(offset, len)?; + let name = match memchr::memchr(b'\0', name_data) { + Some(len) => &name_data[..len], + None => name_data, + }; + Ok(name) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn kind() { + let data = b"!<arch>\n"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Unknown); + + let data = b"\ + !<arch>\n\ + / 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + + let data = b"\ + !<arch>\n\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + + let data = b"\ + !<arch>\n\ + / 4 `\n\ + 0000\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + + let data = b"\ + !<arch>\n\ + /SYM64/ 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu64); + + let data = b"\ + !<arch>\n\ + /SYM64/ 4 `\n\ + 0000\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu64); + + let data = b"\ + !<arch>\n\ + __.SYMDEF 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd); + + let data = b"\ + !<arch>\n\ + #1/9 13 `\n\ + __.SYMDEF0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd); + + let data = b"\ + !<arch>\n\ + #1/16 20 `\n\ + __.SYMDEF SORTED0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd); + + let data = b"\ + !<arch>\n\ + __.SYMDEF_64 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd64); + + let data = b"\ + !<arch>\n\ + #1/12 16 `\n\ + __.SYMDEF_640000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd64); + + let data = b"\ + !<arch>\n\ + #1/19 23 `\n\ + __.SYMDEF_64 SORTED0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Bsd64); + + let data = b"\ + !<arch>\n\ + / 4 `\n\ + 0000\ + / 4 `\n\ + 0000\ + // 4 `\n\ + 0000"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Coff); + + let data = b"\ + <bigaf>\n\ + 0 0 \ + 0 0 \ + 0 128 \ + 6 0 \ + 0 \0\0\0\0\0\0\0\0\0\0\0\0\ + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ + \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + let archive = ArchiveFile::parse(&data[..]).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::AixBig); + } + + #[test] + fn gnu_names() { + let data = b"\ + !<arch>\n\ + // 18 `\n\ + 0123456789abcdef/\n\ + s p a c e/ 0 0 0 644 4 `\n\ + 0000\ + 0123456789abcde/0 0 0 644 3 `\n\ + odd\n\ + /0 0 0 0 644 4 `\n\ + even"; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Gnu); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"s p a c e"); + assert_eq!(member.data(data).unwrap(), &b"0000"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcde"); + assert_eq!(member.data(data).unwrap(), &b"odd"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcdef"); + assert_eq!(member.data(data).unwrap(), &b"even"[..]); + + assert!(members.next().is_none()); + } + + #[test] + fn bsd_names() { + let data = b"\ + !<arch>\n\ + 0123456789abcde 0 0 0 644 3 `\n\ + odd\n\ + #1/16 0 0 0 644 20 `\n\ + 0123456789abcdefeven"; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::Unknown); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcde"); + assert_eq!(member.data(data).unwrap(), &b"odd"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcdef"); + assert_eq!(member.data(data).unwrap(), &b"even"[..]); + + assert!(members.next().is_none()); + } + + #[test] + fn aix_names() { + let data = b"\ + <bigaf>\n\ + 396 0 0 \ + 128 262 0 \ + 4 262 0 \ + 1662610370 223 1 644 16 \ + 0123456789abcdef`\nord\n\ + 4 396 128 \ + 1662610374 223 1 644 16 \ + fedcba9876543210`\nrev\n\ + 94 0 262 \ + 0 0 0 0 0 \ + `\n2 128 \ + 262 0123456789abcdef\0fedcba9876543210\0"; + let data = &data[..]; + let archive = ArchiveFile::parse(data).unwrap(); + assert_eq!(archive.kind(), ArchiveKind::AixBig); + let mut members = archive.members(); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"0123456789abcdef"); + assert_eq!(member.data(data).unwrap(), &b"ord\n"[..]); + + let member = members.next().unwrap().unwrap(); + assert_eq!(member.name(), b"fedcba9876543210"); + assert_eq!(member.data(data).unwrap(), &b"rev\n"[..]); + + assert!(members.next().is_none()); + } +} diff --git a/third_party/rust/object/src/read/coff/comdat.rs b/third_party/rust/object/src/read/coff/comdat.rs new file mode 100644 index 0000000000..3be69ecc2b --- /dev/null +++ b/third_party/rust/object/src/read/coff/comdat.rs @@ -0,0 +1,167 @@ +use core::str; + +use crate::endian::LittleEndian as LE; +use crate::pe; +use crate::read::{ + self, ComdatKind, ObjectComdat, ReadError, ReadRef, Result, SectionIndex, SymbolIndex, +}; + +use super::CoffFile; + +/// An iterator over the COMDAT section groups of a `CoffFile`. +#[derive(Debug)] +pub struct CoffComdatIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file CoffFile<'data, R>, + pub(super) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for CoffComdatIterator<'data, 'file, R> { + type Item = CoffComdat<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + let index = self.index; + let symbol = self.file.common.symbols.symbol(index).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + if let Some(comdat) = CoffComdat::parse(self.file, symbol, index) { + return Some(comdat); + } + } + } +} + +/// A COMDAT section group of a `CoffFile`. +#[derive(Debug)] +pub struct CoffComdat<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + file: &'file CoffFile<'data, R>, + symbol_index: SymbolIndex, + symbol: &'data pe::ImageSymbol, + selection: u8, +} + +impl<'data, 'file, R: ReadRef<'data>> CoffComdat<'data, 'file, R> { + fn parse( + file: &'file CoffFile<'data, R>, + section_symbol: &'data pe::ImageSymbol, + index: usize, + ) -> Option<CoffComdat<'data, 'file, R>> { + // Must be a section symbol. + if !section_symbol.has_aux_section() { + return None; + } + + // Auxiliary record must have a non-associative selection. + let aux = file.common.symbols.aux_section(index).ok()?; + let selection = aux.selection; + if selection == 0 || selection == pe::IMAGE_COMDAT_SELECT_ASSOCIATIVE { + return None; + } + + // Find the COMDAT symbol. + let mut symbol_index = index; + let mut symbol = section_symbol; + let section_number = section_symbol.section_number.get(LE); + loop { + symbol_index += 1 + symbol.number_of_aux_symbols as usize; + symbol = file.common.symbols.symbol(symbol_index).ok()?; + if section_number == symbol.section_number.get(LE) { + break; + } + } + + Some(CoffComdat { + file, + symbol_index: SymbolIndex(symbol_index), + symbol, + selection, + }) + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for CoffComdat<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for CoffComdat<'data, 'file, R> { + type SectionIterator = CoffComdatSectionIterator<'data, 'file, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + match self.selection { + pe::IMAGE_COMDAT_SELECT_NODUPLICATES => ComdatKind::NoDuplicates, + pe::IMAGE_COMDAT_SELECT_ANY => ComdatKind::Any, + pe::IMAGE_COMDAT_SELECT_SAME_SIZE => ComdatKind::SameSize, + pe::IMAGE_COMDAT_SELECT_EXACT_MATCH => ComdatKind::ExactMatch, + pe::IMAGE_COMDAT_SELECT_LARGEST => ComdatKind::Largest, + pe::IMAGE_COMDAT_SELECT_NEWEST => ComdatKind::Newest, + _ => ComdatKind::Unknown, + } + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + self.symbol_index + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + // Find the name of first symbol referring to the section. + self.symbol.name(self.file.common.symbols.strings()) + } + + #[inline] + fn name(&self) -> Result<&str> { + let bytes = self.name_bytes()?; + str::from_utf8(bytes) + .ok() + .read_error("Non UTF-8 COFF COMDAT name") + } + + #[inline] + fn sections(&self) -> Self::SectionIterator { + CoffComdatSectionIterator { + file: self.file, + section_number: self.symbol.section_number.get(LE), + index: 0, + } + } +} + +/// An iterator over the sections in a COMDAT section group of a `CoffFile`. +#[derive(Debug)] +pub struct CoffComdatSectionIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + file: &'file CoffFile<'data, R>, + section_number: u16, + index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for CoffComdatSectionIterator<'data, 'file, R> { + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + // Find associated COMDAT symbols. + // TODO: it seems gcc doesn't use associated symbols for this + loop { + let index = self.index; + let symbol = self.file.common.symbols.symbol(index).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + + // Must be a section symbol. + if !symbol.has_aux_section() { + continue; + } + + let section_number = symbol.section_number.get(LE); + + let aux = self.file.common.symbols.aux_section(index).ok()?; + if aux.selection == pe::IMAGE_COMDAT_SELECT_ASSOCIATIVE { + // TODO: use high_number for bigobj + if aux.number.get(LE) == self.section_number { + return Some(SectionIndex(section_number as usize)); + } + } else if aux.selection != 0 { + if section_number == self.section_number { + return Some(SectionIndex(section_number as usize)); + } + } + } + } +} diff --git a/third_party/rust/object/src/read/coff/file.rs b/third_party/rust/object/src/read/coff/file.rs new file mode 100644 index 0000000000..c6cc9f8460 --- /dev/null +++ b/third_party/rust/object/src/read/coff/file.rs @@ -0,0 +1,247 @@ +use alloc::vec::Vec; + +use crate::read::{ + self, Architecture, Export, FileFlags, Import, NoDynamicRelocationIterator, Object, ObjectKind, + ObjectSection, ReadError, ReadRef, Result, SectionIndex, SymbolIndex, +}; +use crate::{pe, LittleEndian as LE}; + +use super::{ + CoffComdat, CoffComdatIterator, CoffSection, CoffSectionIterator, CoffSegment, + CoffSegmentIterator, CoffSymbol, CoffSymbolIterator, CoffSymbolTable, SectionTable, + SymbolTable, +}; + +/// The common parts of `PeFile` and `CoffFile`. +#[derive(Debug)] +pub(crate) struct CoffCommon<'data, R: ReadRef<'data>> { + pub(crate) sections: SectionTable<'data>, + // TODO: ImageSymbolExBytes + pub(crate) symbols: SymbolTable<'data, R>, + pub(crate) image_base: u64, +} + +/// A COFF object file. +#[derive(Debug)] +pub struct CoffFile<'data, R: ReadRef<'data> = &'data [u8]> { + pub(super) header: &'data pe::ImageFileHeader, + pub(super) common: CoffCommon<'data, R>, + pub(super) data: R, +} + +impl<'data, R: ReadRef<'data>> CoffFile<'data, R> { + /// Parse the raw COFF file data. + pub fn parse(data: R) -> Result<Self> { + let mut offset = 0; + let header = pe::ImageFileHeader::parse(data, &mut offset)?; + let sections = header.sections(data, offset)?; + let symbols = header.symbols(data)?; + + Ok(CoffFile { + header, + common: CoffCommon { + sections, + symbols, + image_base: 0, + }, + data, + }) + } +} + +impl<'data, R: ReadRef<'data>> read::private::Sealed for CoffFile<'data, R> {} + +impl<'data, 'file, R> Object<'data, 'file> for CoffFile<'data, R> +where + 'data: 'file, + R: 'file + ReadRef<'data>, +{ + type Segment = CoffSegment<'data, 'file, R>; + type SegmentIterator = CoffSegmentIterator<'data, 'file, R>; + type Section = CoffSection<'data, 'file, R>; + type SectionIterator = CoffSectionIterator<'data, 'file, R>; + type Comdat = CoffComdat<'data, 'file, R>; + type ComdatIterator = CoffComdatIterator<'data, 'file, R>; + type Symbol = CoffSymbol<'data, 'file, R>; + type SymbolIterator = CoffSymbolIterator<'data, 'file, R>; + type SymbolTable = CoffSymbolTable<'data, 'file, R>; + type DynamicRelocationIterator = NoDynamicRelocationIterator; + + fn architecture(&self) -> Architecture { + match self.header.machine.get(LE) { + pe::IMAGE_FILE_MACHINE_ARMNT => Architecture::Arm, + pe::IMAGE_FILE_MACHINE_ARM64 => Architecture::Aarch64, + pe::IMAGE_FILE_MACHINE_I386 => Architecture::I386, + pe::IMAGE_FILE_MACHINE_AMD64 => Architecture::X86_64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + true + } + + #[inline] + fn is_64(&self) -> bool { + // Windows COFF is always 32-bit, even for 64-bit architectures. This could be confusing. + false + } + + fn kind(&self) -> ObjectKind { + ObjectKind::Relocatable + } + + fn segments(&'file self) -> CoffSegmentIterator<'data, 'file, R> { + CoffSegmentIterator { + file: self, + iter: self.common.sections.iter(), + } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option<CoffSection<'data, 'file, R>> { + self.sections() + .find(|section| section.name_bytes() == Ok(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Result<CoffSection<'data, 'file, R>> { + let section = self.common.sections.section(index.0)?; + Ok(CoffSection { + file: self, + index, + section, + }) + } + + fn sections(&'file self) -> CoffSectionIterator<'data, 'file, R> { + CoffSectionIterator { + file: self, + iter: self.common.sections.iter().enumerate(), + } + } + + fn comdats(&'file self) -> CoffComdatIterator<'data, 'file, R> { + CoffComdatIterator { + file: self, + index: 0, + } + } + + fn symbol_by_index(&'file self, index: SymbolIndex) -> Result<CoffSymbol<'data, 'file, R>> { + let symbol = self.common.symbols.symbol(index.0)?; + Ok(CoffSymbol { + file: &self.common, + index, + symbol, + }) + } + + fn symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R> { + CoffSymbolIterator { + file: &self.common, + index: 0, + } + } + + #[inline] + fn symbol_table(&'file self) -> Option<CoffSymbolTable<'data, 'file, R>> { + Some(CoffSymbolTable { file: &self.common }) + } + + fn dynamic_symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R> { + CoffSymbolIterator { + file: &self.common, + // Hack: don't return any. + index: self.common.symbols.len(), + } + } + + #[inline] + fn dynamic_symbol_table(&'file self) -> Option<CoffSymbolTable<'data, 'file, R>> { + None + } + + #[inline] + fn dynamic_relocations(&'file self) -> Option<NoDynamicRelocationIterator> { + None + } + + #[inline] + fn imports(&self) -> Result<Vec<Import<'data>>> { + // TODO: this could return undefined symbols, but not needed yet. + Ok(Vec::new()) + } + + #[inline] + fn exports(&self) -> Result<Vec<Export<'data>>> { + // TODO: this could return global symbols, but not needed yet. + Ok(Vec::new()) + } + + fn has_debug_symbols(&self) -> bool { + self.section_by_name(".debug_info").is_some() + } + + fn relative_address_base(&self) -> u64 { + 0 + } + + #[inline] + fn entry(&self) -> u64 { + 0 + } + + fn flags(&self) -> FileFlags { + FileFlags::Coff { + characteristics: self.header.characteristics.get(LE), + } + } +} + +impl pe::ImageFileHeader { + /// Read the file header. + /// + /// `data` must be the entire file data. + /// `offset` must be the file header offset. It is updated to point after the optional header, + /// which is where the section headers are located. + pub fn parse<'data, R: ReadRef<'data>>(data: R, offset: &mut u64) -> read::Result<&'data Self> { + let header = data + .read::<pe::ImageFileHeader>(offset) + .read_error("Invalid COFF file header size or alignment")?; + + // Skip over the optional header. + *offset = offset + .checked_add(header.size_of_optional_header.get(LE).into()) + .read_error("Invalid COFF optional header size")?; + + // TODO: maybe validate that the machine is known? + Ok(header) + } + + /// Read the section table. + /// + /// `data` must be the entire file data. + /// `offset` must be after the optional file header. + #[inline] + pub fn sections<'data, R: ReadRef<'data>>( + &self, + data: R, + offset: u64, + ) -> read::Result<SectionTable<'data>> { + SectionTable::parse(self, data, offset) + } + + /// Read the symbol table and string table. + /// + /// `data` must be the entire file data. + #[inline] + pub fn symbols<'data, R: ReadRef<'data>>( + &self, + data: R, + ) -> read::Result<SymbolTable<'data, R>> { + SymbolTable::parse(self, data) + } +} diff --git a/third_party/rust/object/src/read/coff/mod.rs b/third_party/rust/object/src/read/coff/mod.rs new file mode 100644 index 0000000000..d5b3caf32a --- /dev/null +++ b/third_party/rust/object/src/read/coff/mod.rs @@ -0,0 +1,18 @@ +//! Support for reading Windows COFF files. +//! +//! Provides `CoffFile` and related types which implement the `Object` trait. + +mod file; +pub use file::*; + +mod section; +pub use section::*; + +mod symbol; +pub use symbol::*; + +mod relocation; +pub use relocation::*; + +mod comdat; +pub use comdat::*; diff --git a/third_party/rust/object/src/read/coff/relocation.rs b/third_party/rust/object/src/read/coff/relocation.rs new file mode 100644 index 0000000000..9a1fcb6180 --- /dev/null +++ b/third_party/rust/object/src/read/coff/relocation.rs @@ -0,0 +1,91 @@ +use alloc::fmt; +use core::slice; + +use crate::endian::LittleEndian as LE; +use crate::pe; +use crate::read::{ + ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, SymbolIndex, +}; + +use super::CoffFile; + +/// An iterator over the relocations in a `CoffSection`. +pub struct CoffRelocationIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file CoffFile<'data, R>, + pub(super) iter: slice::Iter<'data, pe::ImageRelocation>, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for CoffRelocationIterator<'data, 'file, R> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|relocation| { + let (kind, size, addend) = match self.file.header.machine.get(LE) { + pe::IMAGE_FILE_MACHINE_ARMNT => match relocation.typ.get(LE) { + pe::IMAGE_REL_ARM_ADDR32 => (RelocationKind::Absolute, 32, 0), + pe::IMAGE_REL_ARM_ADDR32NB => (RelocationKind::ImageOffset, 32, 0), + pe::IMAGE_REL_ARM_REL32 => (RelocationKind::Relative, 32, -4), + pe::IMAGE_REL_ARM_SECTION => (RelocationKind::SectionIndex, 16, 0), + pe::IMAGE_REL_ARM_SECREL => (RelocationKind::SectionOffset, 32, 0), + typ => (RelocationKind::Coff(typ), 0, 0), + }, + pe::IMAGE_FILE_MACHINE_ARM64 => match relocation.typ.get(LE) { + pe::IMAGE_REL_ARM64_ADDR32 => (RelocationKind::Absolute, 32, 0), + pe::IMAGE_REL_ARM64_ADDR32NB => (RelocationKind::ImageOffset, 32, 0), + pe::IMAGE_REL_ARM64_SECREL => (RelocationKind::SectionOffset, 32, 0), + pe::IMAGE_REL_ARM64_SECTION => (RelocationKind::SectionIndex, 16, 0), + pe::IMAGE_REL_ARM64_ADDR64 => (RelocationKind::Absolute, 64, 0), + pe::IMAGE_REL_ARM64_REL32 => (RelocationKind::Relative, 32, -4), + typ => (RelocationKind::Coff(typ), 0, 0), + }, + pe::IMAGE_FILE_MACHINE_I386 => match relocation.typ.get(LE) { + pe::IMAGE_REL_I386_DIR16 => (RelocationKind::Absolute, 16, 0), + pe::IMAGE_REL_I386_REL16 => (RelocationKind::Relative, 16, 0), + pe::IMAGE_REL_I386_DIR32 => (RelocationKind::Absolute, 32, 0), + pe::IMAGE_REL_I386_DIR32NB => (RelocationKind::ImageOffset, 32, 0), + pe::IMAGE_REL_I386_SECTION => (RelocationKind::SectionIndex, 16, 0), + pe::IMAGE_REL_I386_SECREL => (RelocationKind::SectionOffset, 32, 0), + pe::IMAGE_REL_I386_SECREL7 => (RelocationKind::SectionOffset, 7, 0), + pe::IMAGE_REL_I386_REL32 => (RelocationKind::Relative, 32, -4), + typ => (RelocationKind::Coff(typ), 0, 0), + }, + pe::IMAGE_FILE_MACHINE_AMD64 => match relocation.typ.get(LE) { + pe::IMAGE_REL_AMD64_ADDR64 => (RelocationKind::Absolute, 64, 0), + pe::IMAGE_REL_AMD64_ADDR32 => (RelocationKind::Absolute, 32, 0), + pe::IMAGE_REL_AMD64_ADDR32NB => (RelocationKind::ImageOffset, 32, 0), + pe::IMAGE_REL_AMD64_REL32 => (RelocationKind::Relative, 32, -4), + pe::IMAGE_REL_AMD64_REL32_1 => (RelocationKind::Relative, 32, -5), + pe::IMAGE_REL_AMD64_REL32_2 => (RelocationKind::Relative, 32, -6), + pe::IMAGE_REL_AMD64_REL32_3 => (RelocationKind::Relative, 32, -7), + pe::IMAGE_REL_AMD64_REL32_4 => (RelocationKind::Relative, 32, -8), + pe::IMAGE_REL_AMD64_REL32_5 => (RelocationKind::Relative, 32, -9), + pe::IMAGE_REL_AMD64_SECTION => (RelocationKind::SectionIndex, 16, 0), + pe::IMAGE_REL_AMD64_SECREL => (RelocationKind::SectionOffset, 32, 0), + pe::IMAGE_REL_AMD64_SECREL7 => (RelocationKind::SectionOffset, 7, 0), + typ => (RelocationKind::Coff(typ), 0, 0), + }, + _ => (RelocationKind::Coff(relocation.typ.get(LE)), 0, 0), + }; + let target = RelocationTarget::Symbol(SymbolIndex( + relocation.symbol_table_index.get(LE) as usize, + )); + ( + u64::from(relocation.virtual_address.get(LE)), + Relocation { + kind, + encoding: RelocationEncoding::Generic, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for CoffRelocationIterator<'data, 'file, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CoffRelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/coff/section.rs b/third_party/rust/object/src/read/coff/section.rs new file mode 100644 index 0000000000..731e37ca8b --- /dev/null +++ b/third_party/rust/object/src/read/coff/section.rs @@ -0,0 +1,524 @@ +use core::convert::TryFrom; +use core::{iter, result, slice, str}; + +use crate::endian::LittleEndian as LE; +use crate::pe; +use crate::read::util::StringTable; +use crate::read::{ + self, CompressedData, CompressedFileRange, Error, ObjectSection, ObjectSegment, ReadError, + ReadRef, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, +}; + +use super::{CoffFile, CoffRelocationIterator}; + +/// The table of section headers in a COFF or PE file. +#[derive(Debug, Default, Clone, Copy)] +pub struct SectionTable<'data> { + sections: &'data [pe::ImageSectionHeader], +} + +impl<'data> SectionTable<'data> { + /// Parse the section table. + /// + /// `data` must be the entire file data. + /// `offset` must be after the optional file header. + pub fn parse<R: ReadRef<'data>>( + header: &pe::ImageFileHeader, + data: R, + offset: u64, + ) -> Result<Self> { + let sections = data + .read_slice_at(offset, header.number_of_sections.get(LE).into()) + .read_error("Invalid COFF/PE section headers")?; + Ok(SectionTable { sections }) + } + + /// Iterate over the section headers. + /// + /// Warning: sections indices start at 1. + #[inline] + pub fn iter(&self) -> slice::Iter<'data, pe::ImageSectionHeader> { + self.sections.iter() + } + + /// Return true if the section table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.sections.is_empty() + } + + /// The number of section headers. + #[inline] + pub fn len(&self) -> usize { + self.sections.len() + } + + /// Return the section header at the given index. + /// + /// The index is 1-based. + pub fn section(&self, index: usize) -> read::Result<&'data pe::ImageSectionHeader> { + self.sections + .get(index.wrapping_sub(1)) + .read_error("Invalid COFF/PE section index") + } + + /// Return the section header with the given name. + /// + /// The returned index is 1-based. + /// + /// Ignores sections with invalid names. + pub fn section_by_name<R: ReadRef<'data>>( + &self, + strings: StringTable<'data, R>, + name: &[u8], + ) -> Option<(usize, &'data pe::ImageSectionHeader)> { + self.sections + .iter() + .enumerate() + .find(|(_, section)| section.name(strings) == Ok(name)) + .map(|(index, section)| (index + 1, section)) + } + + /// Compute the maximum file offset used by sections. + /// + /// This will usually match the end of file, unless the PE file has a + /// [data overlay](https://security.stackexchange.com/questions/77336/how-is-the-file-overlay-read-by-an-exe-virus) + pub fn max_section_file_offset(&self) -> u64 { + let mut max = 0; + for section in self.iter() { + match (section.pointer_to_raw_data.get(LE) as u64) + .checked_add(section.size_of_raw_data.get(LE) as u64) + { + None => { + // This cannot happen, we're suming two u32 into a u64 + continue; + } + Some(end_of_section) => { + if end_of_section > max { + max = end_of_section; + } + } + } + } + max + } +} + +/// An iterator over the loadable sections of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSegmentIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file CoffFile<'data, R>, + pub(super) iter: slice::Iter<'data, pe::ImageSectionHeader>, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for CoffSegmentIterator<'data, 'file, R> { + type Item = CoffSegment<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|section| CoffSegment { + file: self.file, + section, + }) + } +} + +/// A loadable section of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSegment<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file CoffFile<'data, R>, + pub(super) section: &'data pe::ImageSectionHeader, +} + +impl<'data, 'file, R: ReadRef<'data>> CoffSegment<'data, 'file, R> { + fn bytes(&self) -> Result<&'data [u8]> { + self.section + .coff_data(self.file.data) + .read_error("Invalid COFF section offset or size") + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for CoffSegment<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSegment<'data> for CoffSegment<'data, 'file, R> { + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address.get(LE)) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size.get(LE)) + } + + #[inline] + fn align(&self) -> u64 { + self.section.coff_alignment() + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + let (offset, size) = self.section.coff_file_range().unwrap_or((0, 0)); + (u64::from(offset), u64::from(size)) + } + + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn name_bytes(&self) -> Result<Option<&[u8]>> { + self.section + .name(self.file.common.symbols.strings()) + .map(Some) + } + + #[inline] + fn name(&self) -> Result<Option<&str>> { + let name = self.section.name(self.file.common.symbols.strings())?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 COFF section name") + .map(Some) + } + + #[inline] + fn flags(&self) -> SegmentFlags { + let characteristics = self.section.characteristics.get(LE); + SegmentFlags::Coff { characteristics } + } +} + +/// An iterator over the sections of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSectionIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file CoffFile<'data, R>, + pub(super) iter: iter::Enumerate<slice::Iter<'data, pe::ImageSectionHeader>>, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for CoffSectionIterator<'data, 'file, R> { + type Item = CoffSection<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| CoffSection { + file: self.file, + index: SectionIndex(index + 1), + section, + }) + } +} + +/// A section of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSection<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file CoffFile<'data, R>, + pub(super) index: SectionIndex, + pub(super) section: &'data pe::ImageSectionHeader, +} + +impl<'data, 'file, R: ReadRef<'data>> CoffSection<'data, 'file, R> { + fn bytes(&self) -> Result<&'data [u8]> { + self.section + .coff_data(self.file.data) + .read_error("Invalid COFF section offset or size") + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for CoffSection<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for CoffSection<'data, 'file, R> { + type RelocationIterator = CoffRelocationIterator<'data, 'file, R>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address.get(LE)) + } + + #[inline] + fn size(&self) -> u64 { + // TODO: This may need to be the length from the auxiliary symbol for this section. + u64::from(self.section.size_of_raw_data.get(LE)) + } + + #[inline] + fn align(&self) -> u64 { + self.section.coff_alignment() + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + let (offset, size) = self.section.coff_file_range()?; + Some((u64::from(offset), u64::from(size))) + } + + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn compressed_file_range(&self) -> Result<CompressedFileRange> { + Ok(CompressedFileRange::none(self.file_range())) + } + + #[inline] + fn compressed_data(&self) -> Result<CompressedData<'data>> { + self.data().map(CompressedData::none) + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + self.section.name(self.file.common.symbols.strings()) + } + + #[inline] + fn name(&self) -> Result<&str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 COFF section name") + } + + #[inline] + fn segment_name_bytes(&self) -> Result<Option<&[u8]>> { + Ok(None) + } + + #[inline] + fn segment_name(&self) -> Result<Option<&str>> { + Ok(None) + } + + #[inline] + fn kind(&self) -> SectionKind { + self.section.kind() + } + + fn relocations(&self) -> CoffRelocationIterator<'data, 'file, R> { + let relocations = self.section.coff_relocations(self.file.data).unwrap_or(&[]); + CoffRelocationIterator { + file: self.file, + iter: relocations.iter(), + } + } + + fn flags(&self) -> SectionFlags { + SectionFlags::Coff { + characteristics: self.section.characteristics.get(LE), + } + } +} + +impl pe::ImageSectionHeader { + pub(crate) fn kind(&self) -> SectionKind { + let characteristics = self.characteristics.get(LE); + if characteristics & (pe::IMAGE_SCN_CNT_CODE | pe::IMAGE_SCN_MEM_EXECUTE) != 0 { + SectionKind::Text + } else if characteristics & pe::IMAGE_SCN_CNT_INITIALIZED_DATA != 0 { + if characteristics & pe::IMAGE_SCN_MEM_DISCARDABLE != 0 { + SectionKind::Other + } else if characteristics & pe::IMAGE_SCN_MEM_WRITE != 0 { + SectionKind::Data + } else { + SectionKind::ReadOnlyData + } + } else if characteristics & pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0 { + SectionKind::UninitializedData + } else if characteristics & pe::IMAGE_SCN_LNK_INFO != 0 { + SectionKind::Linker + } else { + SectionKind::Unknown + } + } +} + +impl pe::ImageSectionHeader { + /// Return the string table offset of the section name. + /// + /// Returns `Ok(None)` if the name doesn't use the string table + /// and can be obtained with `raw_name` instead. + pub fn name_offset(&self) -> Result<Option<u32>> { + let bytes = &self.name; + if bytes[0] != b'/' { + return Ok(None); + } + + if bytes[1] == b'/' { + let mut offset = 0; + for byte in bytes[2..].iter() { + let digit = match byte { + b'A'..=b'Z' => byte - b'A', + b'a'..=b'z' => byte - b'a' + 26, + b'0'..=b'9' => byte - b'0' + 52, + b'+' => 62, + b'/' => 63, + _ => return Err(Error("Invalid COFF section name base-64 offset")), + }; + offset = offset * 64 + digit as u64; + } + u32::try_from(offset) + .ok() + .read_error("Invalid COFF section name base-64 offset") + .map(Some) + } else { + let mut offset = 0; + for byte in bytes[1..].iter() { + let digit = match byte { + b'0'..=b'9' => byte - b'0', + 0 => break, + _ => return Err(Error("Invalid COFF section name base-10 offset")), + }; + offset = offset * 10 + digit as u32; + } + Ok(Some(offset)) + } + } + + /// Return the section name. + /// + /// This handles decoding names that are offsets into the symbol string table. + pub fn name<'data, R: ReadRef<'data>>( + &'data self, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + if let Some(offset) = self.name_offset()? { + strings + .get(offset) + .read_error("Invalid COFF section name offset") + } else { + Ok(self.raw_name()) + } + } + + /// Return the raw section name. + pub fn raw_name(&self) -> &[u8] { + let bytes = &self.name; + match memchr::memchr(b'\0', bytes) { + Some(end) => &bytes[..end], + None => &bytes[..], + } + } + + /// Return the offset and size of the section in a COFF file. + /// + /// Returns `None` for sections that have no data in the file. + pub fn coff_file_range(&self) -> Option<(u32, u32)> { + if self.characteristics.get(LE) & pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0 { + None + } else { + let offset = self.pointer_to_raw_data.get(LE); + // Note: virtual size is not used for COFF. + let size = self.size_of_raw_data.get(LE); + Some((offset, size)) + } + } + + /// Return the section data in a COFF file. + /// + /// Returns `Ok(&[])` if the section has no data. + /// Returns `Err` for invalid values. + pub fn coff_data<'data, R: ReadRef<'data>>(&self, data: R) -> result::Result<&'data [u8], ()> { + if let Some((offset, size)) = self.coff_file_range() { + data.read_bytes_at(offset.into(), size.into()) + } else { + Ok(&[]) + } + } + + /// Return the section alignment in bytes. + /// + /// This is only valid for sections in a COFF file. + pub fn coff_alignment(&self) -> u64 { + match self.characteristics.get(LE) & pe::IMAGE_SCN_ALIGN_MASK { + pe::IMAGE_SCN_ALIGN_1BYTES => 1, + pe::IMAGE_SCN_ALIGN_2BYTES => 2, + pe::IMAGE_SCN_ALIGN_4BYTES => 4, + pe::IMAGE_SCN_ALIGN_8BYTES => 8, + pe::IMAGE_SCN_ALIGN_16BYTES => 16, + pe::IMAGE_SCN_ALIGN_32BYTES => 32, + pe::IMAGE_SCN_ALIGN_64BYTES => 64, + pe::IMAGE_SCN_ALIGN_128BYTES => 128, + pe::IMAGE_SCN_ALIGN_256BYTES => 256, + pe::IMAGE_SCN_ALIGN_512BYTES => 512, + pe::IMAGE_SCN_ALIGN_1024BYTES => 1024, + pe::IMAGE_SCN_ALIGN_2048BYTES => 2048, + pe::IMAGE_SCN_ALIGN_4096BYTES => 4096, + pe::IMAGE_SCN_ALIGN_8192BYTES => 8192, + _ => 16, + } + } + + /// Read the relocations in a COFF file. + /// + /// `data` must be the entire file data. + pub fn coff_relocations<'data, R: ReadRef<'data>>( + &self, + data: R, + ) -> read::Result<&'data [pe::ImageRelocation]> { + let mut pointer = self.pointer_to_relocations.get(LE).into(); + let mut number: usize = self.number_of_relocations.get(LE).into(); + if number == core::u16::MAX.into() + && self.characteristics.get(LE) & pe::IMAGE_SCN_LNK_NRELOC_OVFL != 0 + { + // Extended relocations. Read first relocation (which contains extended count) & adjust + // relocations pointer. + let extended_relocation_info = data + .read_at::<pe::ImageRelocation>(pointer) + .read_error("Invalid COFF relocation offset or number")?; + number = extended_relocation_info.virtual_address.get(LE) as usize; + if number == 0 { + return Err(Error("Invalid COFF relocation number")); + } + pointer += core::mem::size_of::<pe::ImageRelocation>() as u64; + // Extended relocation info does not contribute to the count of sections. + number -= 1; + } + data.read_slice_at(pointer, number) + .read_error("Invalid COFF relocation offset or number") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn name_offset() { + let mut section = pe::ImageSectionHeader::default(); + section.name = *b"xxxxxxxx"; + assert_eq!(section.name_offset(), Ok(None)); + section.name = *b"/0\0\0\0\0\0\0"; + assert_eq!(section.name_offset(), Ok(Some(0))); + section.name = *b"/9999999"; + assert_eq!(section.name_offset(), Ok(Some(999_9999))); + section.name = *b"//AAAAAA"; + assert_eq!(section.name_offset(), Ok(Some(0))); + section.name = *b"//D/////"; + assert_eq!(section.name_offset(), Ok(Some(0xffff_ffff))); + section.name = *b"//EAAAAA"; + assert!(section.name_offset().is_err()); + section.name = *b"////////"; + assert!(section.name_offset().is_err()); + } +} diff --git a/third_party/rust/object/src/read/coff/symbol.rs b/third_party/rust/object/src/read/coff/symbol.rs new file mode 100644 index 0000000000..c954c8a292 --- /dev/null +++ b/third_party/rust/object/src/read/coff/symbol.rs @@ -0,0 +1,518 @@ +use alloc::fmt; +use alloc::vec::Vec; +use core::convert::TryInto; +use core::str; + +use super::{CoffCommon, SectionTable}; +use crate::endian::{LittleEndian as LE, U32Bytes}; +use crate::pe; +use crate::pod::{bytes_of_slice, Pod}; +use crate::read::util::StringTable; +use crate::read::{ + self, Bytes, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Result, SectionIndex, + SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapEntry, SymbolScope, SymbolSection, +}; + +/// A table of symbol entries in a COFF or PE file. +/// +/// Also includes the string table used for the symbol names. +#[derive(Debug)] +pub struct SymbolTable<'data, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + symbols: &'data [pe::ImageSymbolBytes], + strings: StringTable<'data, R>, +} + +impl<'data, R: ReadRef<'data>> Default for SymbolTable<'data, R> { + fn default() -> Self { + Self { + symbols: &[], + strings: StringTable::default(), + } + } +} + +impl<'data, R: ReadRef<'data>> SymbolTable<'data, R> { + /// Read the symbol table. + pub fn parse(header: &pe::ImageFileHeader, data: R) -> Result<Self> { + // The symbol table may not be present. + let mut offset = header.pointer_to_symbol_table.get(LE).into(); + let (symbols, strings) = if offset != 0 { + let symbols = data + .read_slice(&mut offset, header.number_of_symbols.get(LE) as usize) + .read_error("Invalid COFF symbol table offset or size")?; + + // Note: don't update data when reading length; the length includes itself. + let length = data + .read_at::<U32Bytes<_>>(offset) + .read_error("Missing COFF string table")? + .get(LE); + let str_end = offset + .checked_add(length as u64) + .read_error("Invalid COFF string table length")?; + let strings = StringTable::new(data, offset, str_end); + + (symbols, strings) + } else { + (&[][..], StringTable::default()) + }; + + Ok(SymbolTable { symbols, strings }) + } + + /// Return the string table used for the symbol names. + #[inline] + pub fn strings(&self) -> StringTable<'data, R> { + self.strings + } + + /// Return true if the symbol table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + + /// The number of symbol table entries. + /// + /// This includes auxiliary symbol table entries. + #[inline] + pub fn len(&self) -> usize { + self.symbols.len() + } + + /// Iterate over the symbols. + #[inline] + pub fn iter<'table>(&'table self) -> SymbolIterator<'data, 'table, R> { + SymbolIterator { + symbols: self, + index: 0, + } + } + + /// Return the symbol table entry at the given index. + #[inline] + pub fn symbol(&self, index: usize) -> Result<&'data pe::ImageSymbol> { + self.get::<pe::ImageSymbol>(index, 0) + } + + /// Return the auxiliary function symbol for the symbol table entry at the given index. + /// + /// Note that the index is of the symbol, not the first auxiliary record. + #[inline] + pub fn aux_function(&self, index: usize) -> Result<&'data pe::ImageAuxSymbolFunction> { + self.get::<pe::ImageAuxSymbolFunction>(index, 1) + } + + /// Return the auxiliary section symbol for the symbol table entry at the given index. + /// + /// Note that the index is of the symbol, not the first auxiliary record. + #[inline] + pub fn aux_section(&self, index: usize) -> Result<&'data pe::ImageAuxSymbolSection> { + self.get::<pe::ImageAuxSymbolSection>(index, 1) + } + + /// Return the auxiliary file name for the symbol table entry at the given index. + /// + /// Note that the index is of the symbol, not the first auxiliary record. + pub fn aux_file_name(&self, index: usize, aux_count: u8) -> Result<&'data [u8]> { + let entries = index + .checked_add(1) + .and_then(|x| Some(x..x.checked_add(aux_count.into())?)) + .and_then(|x| self.symbols.get(x)) + .read_error("Invalid COFF symbol index")?; + let bytes = bytes_of_slice(entries); + // The name is padded with nulls. + Ok(match memchr::memchr(b'\0', bytes) { + Some(end) => &bytes[..end], + None => bytes, + }) + } + + /// Return the symbol table entry or auxiliary record at the given index and offset. + pub fn get<T: Pod>(&self, index: usize, offset: usize) -> Result<&'data T> { + let bytes = index + .checked_add(offset) + .and_then(|x| self.symbols.get(x)) + .read_error("Invalid COFF symbol index")?; + Bytes(&bytes.0[..]) + .read() + .read_error("Invalid COFF symbol data") + } + + /// Construct a map from addresses to a user-defined map entry. + pub fn map<Entry: SymbolMapEntry, F: Fn(&'data pe::ImageSymbol) -> Option<Entry>>( + &self, + f: F, + ) -> SymbolMap<Entry> { + let mut symbols = Vec::with_capacity(self.symbols.len()); + for (_, symbol) in self.iter() { + if !symbol.is_definition() { + continue; + } + if let Some(entry) = f(symbol) { + symbols.push(entry); + } + } + SymbolMap::new(symbols) + } +} + +/// An iterator for symbol entries in a COFF or PE file. +/// +/// Yields the index and symbol structure for each symbol. +#[derive(Debug)] +pub struct SymbolIterator<'data, 'table, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + symbols: &'table SymbolTable<'data, R>, + index: usize, +} + +impl<'data, 'table, R: ReadRef<'data>> Iterator for SymbolIterator<'data, 'table, R> { + type Item = (usize, &'data pe::ImageSymbol); + + fn next(&mut self) -> Option<Self::Item> { + let index = self.index; + let symbol = self.symbols.symbol(index).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + Some((index, symbol)) + } +} + +impl pe::ImageSymbol { + /// Parse a COFF symbol name. + /// + /// `strings` must be the string table used for symbol names. + pub fn name<'data, R: ReadRef<'data>>( + &'data self, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + if self.name[0] == 0 { + // If the name starts with 0 then the last 4 bytes are a string table offset. + let offset = u32::from_le_bytes(self.name[4..8].try_into().unwrap()); + strings + .get(offset) + .read_error("Invalid COFF symbol name offset") + } else { + // The name is inline and padded with nulls. + Ok(match memchr::memchr(b'\0', &self.name) { + Some(end) => &self.name[..end], + None => &self.name[..], + }) + } + } + + /// Return the symbol address. + /// + /// This takes into account the image base and the section address. + pub fn address(&self, image_base: u64, sections: &SectionTable) -> Result<u64> { + let section_number = self.section_number.get(LE) as usize; + let section = sections.section(section_number)?; + let virtual_address = u64::from(section.virtual_address.get(LE)); + let value = u64::from(self.value.get(LE)); + Ok(image_base + virtual_address + value) + } + + /// Return true if the symbol is a definition of a function or data object. + pub fn is_definition(&self) -> bool { + let section_number = self.section_number.get(LE); + if section_number == pe::IMAGE_SYM_UNDEFINED { + return false; + } + match self.storage_class { + pe::IMAGE_SYM_CLASS_STATIC => { + // Exclude section symbols. + !(self.value.get(LE) == 0 && self.number_of_aux_symbols > 0) + } + pe::IMAGE_SYM_CLASS_EXTERNAL | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL => true, + _ => false, + } + } + + /// Return true if the symbol has an auxiliary file name. + pub fn has_aux_file_name(&self) -> bool { + self.number_of_aux_symbols > 0 && self.storage_class == pe::IMAGE_SYM_CLASS_FILE + } + + /// Return true if the symbol has an auxiliary function symbol. + pub fn has_aux_function(&self) -> bool { + self.number_of_aux_symbols > 0 && self.derived_type() == pe::IMAGE_SYM_DTYPE_FUNCTION + } + + /// Return true if the symbol has an auxiliary section symbol. + pub fn has_aux_section(&self) -> bool { + self.number_of_aux_symbols > 0 + && self.storage_class == pe::IMAGE_SYM_CLASS_STATIC + && self.value.get(LE) == 0 + } +} + +/// A symbol table of a `CoffFile`. +#[derive(Debug, Clone, Copy)] +pub struct CoffSymbolTable<'data, 'file, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + pub(crate) file: &'file CoffCommon<'data, R>, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for CoffSymbolTable<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSymbolTable<'data> + for CoffSymbolTable<'data, 'file, R> +{ + type Symbol = CoffSymbol<'data, 'file, R>; + type SymbolIterator = CoffSymbolIterator<'data, 'file, R>; + + fn symbols(&self) -> Self::SymbolIterator { + CoffSymbolIterator { + file: self.file, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> { + let symbol = self.file.symbols.symbol(index.0)?; + Ok(CoffSymbol { + file: self.file, + index, + symbol, + }) + } +} + +/// An iterator over the symbols of a `CoffFile`. +pub struct CoffSymbolIterator<'data, 'file, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + pub(crate) file: &'file CoffCommon<'data, R>, + pub(crate) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for CoffSymbolIterator<'data, 'file, R> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CoffSymbolIterator").finish() + } +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for CoffSymbolIterator<'data, 'file, R> { + type Item = CoffSymbol<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + let index = self.index; + let symbol = self.file.symbols.symbol(index).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + Some(CoffSymbol { + file: self.file, + index: SymbolIndex(index), + symbol, + }) + } +} + +/// A symbol of a `CoffFile`. +#[derive(Debug, Clone, Copy)] +pub struct CoffSymbol<'data, 'file, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + pub(crate) file: &'file CoffCommon<'data, R>, + pub(crate) index: SymbolIndex, + pub(crate) symbol: &'data pe::ImageSymbol, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for CoffSymbol<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSymbol<'data> for CoffSymbol<'data, 'file, R> { + #[inline] + fn index(&self) -> SymbolIndex { + self.index + } + + fn name_bytes(&self) -> read::Result<&'data [u8]> { + if self.symbol.has_aux_file_name() { + self.file + .symbols + .aux_file_name(self.index.0, self.symbol.number_of_aux_symbols) + } else { + self.symbol.name(self.file.symbols.strings()) + } + } + + fn name(&self) -> read::Result<&'data str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 COFF symbol name") + } + + fn address(&self) -> u64 { + // Only return an address for storage classes that we know use an address. + match self.symbol.storage_class { + pe::IMAGE_SYM_CLASS_STATIC + | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL + | pe::IMAGE_SYM_CLASS_LABEL => {} + pe::IMAGE_SYM_CLASS_EXTERNAL => { + if self.symbol.section_number.get(LE) == pe::IMAGE_SYM_UNDEFINED { + // Undefined or common data, neither of which have an address. + return 0; + } + } + _ => return 0, + } + self.symbol + .address(self.file.image_base, &self.file.sections) + .unwrap_or(0) + } + + fn size(&self) -> u64 { + match self.symbol.storage_class { + pe::IMAGE_SYM_CLASS_STATIC => { + // Section symbols may duplicate the size from the section table. + if self.symbol.has_aux_section() { + if let Ok(aux) = self.file.symbols.aux_section(self.index.0) { + u64::from(aux.length.get(LE)) + } else { + 0 + } + } else { + 0 + } + } + pe::IMAGE_SYM_CLASS_EXTERNAL => { + if self.symbol.section_number.get(LE) == pe::IMAGE_SYM_UNDEFINED { + // For undefined symbols, symbol.value is 0 and the size is 0. + // For common data, symbol.value is the size. + u64::from(self.symbol.value.get(LE)) + } else if self.symbol.has_aux_function() { + // Function symbols may have a size. + if let Ok(aux) = self.file.symbols.aux_function(self.index.0) { + u64::from(aux.total_size.get(LE)) + } else { + 0 + } + } else { + 0 + } + } + // Most symbols don't have sizes. + _ => 0, + } + } + + fn kind(&self) -> SymbolKind { + let derived_kind = if self.symbol.derived_type() == pe::IMAGE_SYM_DTYPE_FUNCTION { + SymbolKind::Text + } else { + SymbolKind::Data + }; + match self.symbol.storage_class { + pe::IMAGE_SYM_CLASS_STATIC => { + if self.symbol.value.get(LE) == 0 && self.symbol.number_of_aux_symbols > 0 { + SymbolKind::Section + } else { + derived_kind + } + } + pe::IMAGE_SYM_CLASS_EXTERNAL | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL => derived_kind, + pe::IMAGE_SYM_CLASS_SECTION => SymbolKind::Section, + pe::IMAGE_SYM_CLASS_FILE => SymbolKind::File, + pe::IMAGE_SYM_CLASS_LABEL => SymbolKind::Label, + _ => SymbolKind::Unknown, + } + } + + fn section(&self) -> SymbolSection { + match self.symbol.section_number.get(LE) { + pe::IMAGE_SYM_UNDEFINED => { + if self.symbol.storage_class == pe::IMAGE_SYM_CLASS_EXTERNAL + && self.symbol.value.get(LE) == 0 + { + SymbolSection::Undefined + } else { + SymbolSection::Common + } + } + pe::IMAGE_SYM_ABSOLUTE => SymbolSection::Absolute, + pe::IMAGE_SYM_DEBUG => { + if self.symbol.storage_class == pe::IMAGE_SYM_CLASS_FILE { + SymbolSection::None + } else { + SymbolSection::Unknown + } + } + index if index > 0 => SymbolSection::Section(SectionIndex(index.into())), + _ => SymbolSection::Unknown, + } + } + + #[inline] + fn is_undefined(&self) -> bool { + self.symbol.storage_class == pe::IMAGE_SYM_CLASS_EXTERNAL + && self.symbol.section_number.get(LE) == pe::IMAGE_SYM_UNDEFINED + && self.symbol.value.get(LE) == 0 + } + + #[inline] + fn is_definition(&self) -> bool { + self.symbol.is_definition() + } + + #[inline] + fn is_common(&self) -> bool { + self.symbol.storage_class == pe::IMAGE_SYM_CLASS_EXTERNAL + && self.symbol.section_number.get(LE) == pe::IMAGE_SYM_UNDEFINED + && self.symbol.value.get(LE) != 0 + } + + #[inline] + fn is_weak(&self) -> bool { + self.symbol.storage_class == pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL + } + + #[inline] + fn scope(&self) -> SymbolScope { + match self.symbol.storage_class { + pe::IMAGE_SYM_CLASS_EXTERNAL | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL => { + // TODO: determine if symbol is exported + SymbolScope::Linkage + } + _ => SymbolScope::Compilation, + } + } + + #[inline] + fn is_global(&self) -> bool { + match self.symbol.storage_class { + pe::IMAGE_SYM_CLASS_EXTERNAL | pe::IMAGE_SYM_CLASS_WEAK_EXTERNAL => true, + _ => false, + } + } + + #[inline] + fn is_local(&self) -> bool { + !self.is_global() + } + + fn flags(&self) -> SymbolFlags<SectionIndex> { + if self.symbol.has_aux_section() { + if let Ok(aux) = self.file.symbols.aux_section(self.index.0) { + // TODO: use high_number for bigobj + let number = aux.number.get(LE) as usize; + return SymbolFlags::CoffSection { + selection: aux.selection, + associative_section: if number == 0 { + None + } else { + Some(SectionIndex(number)) + }, + }; + } + } + SymbolFlags::None + } +} diff --git a/third_party/rust/object/src/read/elf/comdat.rs b/third_party/rust/object/src/read/elf/comdat.rs new file mode 100644 index 0000000000..7cee85bb44 --- /dev/null +++ b/third_party/rust/object/src/read/elf/comdat.rs @@ -0,0 +1,162 @@ +use core::fmt::Debug; +use core::{iter, slice, str}; + +use crate::elf; +use crate::endian::{Endianness, U32Bytes}; +use crate::read::{self, ComdatKind, ObjectComdat, ReadError, ReadRef, SectionIndex, SymbolIndex}; + +use super::{ElfFile, FileHeader, SectionHeader, Sym}; + +/// An iterator over the COMDAT section groups of an `ElfFile32`. +pub type ElfComdatIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfComdatIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the COMDAT section groups of an `ElfFile64`. +pub type ElfComdatIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfComdatIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the COMDAT section groups of an `ElfFile`. +#[derive(Debug)] +pub struct ElfComdatIterator<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) iter: iter::Enumerate<slice::Iter<'data, Elf::SectionHeader>>, +} + +impl<'data, 'file, Elf, R> Iterator for ElfComdatIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type Item = ElfComdat<'data, 'file, Elf, R>; + + fn next(&mut self) -> Option<Self::Item> { + for (_index, section) in self.iter.by_ref() { + if let Some(comdat) = ElfComdat::parse(self.file, section) { + return Some(comdat); + } + } + None + } +} + +/// A COMDAT section group of an `ElfFile32`. +pub type ElfComdat32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfComdat<'data, 'file, elf::FileHeader32<Endian>, R>; +/// A COMDAT section group of an `ElfFile64`. +pub type ElfComdat64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfComdat<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// A COMDAT section group of an `ElfFile`. +#[derive(Debug)] +pub struct ElfComdat<'data, 'file, Elf, R = &'data [u8]> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + file: &'file ElfFile<'data, Elf, R>, + section: &'data Elf::SectionHeader, + sections: &'data [U32Bytes<Elf::Endian>], +} + +impl<'data, 'file, Elf, R> ElfComdat<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + fn parse( + file: &'file ElfFile<'data, Elf, R>, + section: &'data Elf::SectionHeader, + ) -> Option<ElfComdat<'data, 'file, Elf, R>> { + let (flag, sections) = section.group(file.endian, file.data).ok()??; + if flag != elf::GRP_COMDAT { + return None; + } + Some(ElfComdat { + file, + section, + sections, + }) + } +} + +impl<'data, 'file, Elf, R> read::private::Sealed for ElfComdat<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Elf, R> ObjectComdat<'data> for ElfComdat<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type SectionIterator = ElfComdatSectionIterator<'data, 'file, Elf, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + ComdatKind::Any + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + SymbolIndex(self.section.sh_info(self.file.endian) as usize) + } + + fn name_bytes(&self) -> read::Result<&[u8]> { + // FIXME: check sh_link + let index = self.section.sh_info(self.file.endian) as usize; + let symbol = self.file.symbols.symbol(index)?; + symbol.name(self.file.endian, self.file.symbols.strings()) + } + + fn name(&self) -> read::Result<&str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 ELF COMDAT name") + } + + fn sections(&self) -> Self::SectionIterator { + ElfComdatSectionIterator { + file: self.file, + sections: self.sections.iter(), + } + } +} + +/// An iterator over the sections in a COMDAT section group of an `ElfFile32`. +pub type ElfComdatSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfComdatSectionIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the sections in a COMDAT section group of an `ElfFile64`. +pub type ElfComdatSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfComdatSectionIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the sections in a COMDAT section group of an `ElfFile`. +#[derive(Debug)] +pub struct ElfComdatSectionIterator<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + file: &'file ElfFile<'data, Elf, R>, + sections: slice::Iter<'data, U32Bytes<Elf::Endian>>, +} + +impl<'data, 'file, Elf, R> Iterator for ElfComdatSectionIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + let index = self.sections.next()?; + Some(SectionIndex(index.get(self.file.endian) as usize)) + } +} diff --git a/third_party/rust/object/src/read/elf/compression.rs b/third_party/rust/object/src/read/elf/compression.rs new file mode 100644 index 0000000000..7242dd39c8 --- /dev/null +++ b/third_party/rust/object/src/read/elf/compression.rs @@ -0,0 +1,56 @@ +use core::fmt::Debug; + +use crate::elf; +use crate::endian; +use crate::pod::Pod; + +/// A trait for generic access to `CompressionHeader32` and `CompressionHeader64`. +#[allow(missing_docs)] +pub trait CompressionHeader: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + + fn ch_type(&self, endian: Self::Endian) -> u32; + fn ch_size(&self, endian: Self::Endian) -> Self::Word; + fn ch_addralign(&self, endian: Self::Endian) -> Self::Word; +} + +impl<Endian: endian::Endian> CompressionHeader for elf::CompressionHeader32<Endian> { + type Word = u32; + type Endian = Endian; + + #[inline] + fn ch_type(&self, endian: Self::Endian) -> u32 { + self.ch_type.get(endian) + } + + #[inline] + fn ch_size(&self, endian: Self::Endian) -> Self::Word { + self.ch_size.get(endian) + } + + #[inline] + fn ch_addralign(&self, endian: Self::Endian) -> Self::Word { + self.ch_addralign.get(endian) + } +} + +impl<Endian: endian::Endian> CompressionHeader for elf::CompressionHeader64<Endian> { + type Word = u64; + type Endian = Endian; + + #[inline] + fn ch_type(&self, endian: Self::Endian) -> u32 { + self.ch_type.get(endian) + } + + #[inline] + fn ch_size(&self, endian: Self::Endian) -> Self::Word { + self.ch_size.get(endian) + } + + #[inline] + fn ch_addralign(&self, endian: Self::Endian) -> Self::Word { + self.ch_addralign.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/dynamic.rs b/third_party/rust/object/src/read/elf/dynamic.rs new file mode 100644 index 0000000000..5fe15b560d --- /dev/null +++ b/third_party/rust/object/src/read/elf/dynamic.rs @@ -0,0 +1,117 @@ +use core::convert::TryInto; +use core::fmt::Debug; + +use crate::elf; +use crate::endian; +use crate::pod::Pod; +use crate::read::{ReadError, Result, StringTable}; + +/// A trait for generic access to `Dyn32` and `Dyn64`. +#[allow(missing_docs)] +pub trait Dyn: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + + fn d_tag(&self, endian: Self::Endian) -> Self::Word; + fn d_val(&self, endian: Self::Endian) -> Self::Word; + + /// Try to convert the tag to a `u32`. + fn tag32(&self, endian: Self::Endian) -> Option<u32> { + self.d_tag(endian).into().try_into().ok() + } + + /// Try to convert the value to a `u32`. + fn val32(&self, endian: Self::Endian) -> Option<u32> { + self.d_val(endian).into().try_into().ok() + } + + /// Return true if the value is an offset in the dynamic string table. + fn is_string(&self, endian: Self::Endian) -> bool { + if let Some(tag) = self.tag32(endian) { + match tag { + elf::DT_NEEDED + | elf::DT_SONAME + | elf::DT_RPATH + | elf::DT_RUNPATH + | elf::DT_AUXILIARY + | elf::DT_FILTER => true, + _ => false, + } + } else { + false + } + } + + /// Use the value to get a string in a string table. + /// + /// Does not check for an appropriate tag. + fn string<'data>( + &self, + endian: Self::Endian, + strings: StringTable<'data>, + ) -> Result<&'data [u8]> { + self.val32(endian) + .and_then(|val| strings.get(val).ok()) + .read_error("Invalid ELF dyn string") + } + + /// Return true if the value is an address. + fn is_address(&self, endian: Self::Endian) -> bool { + if let Some(tag) = self.tag32(endian) { + match tag { + elf::DT_PLTGOT + | elf::DT_HASH + | elf::DT_STRTAB + | elf::DT_SYMTAB + | elf::DT_RELA + | elf::DT_INIT + | elf::DT_FINI + | elf::DT_SYMBOLIC + | elf::DT_REL + | elf::DT_DEBUG + | elf::DT_JMPREL + | elf::DT_FINI_ARRAY + | elf::DT_INIT_ARRAY + | elf::DT_PREINIT_ARRAY + | elf::DT_SYMTAB_SHNDX + | elf::DT_VERDEF + | elf::DT_VERNEED + | elf::DT_VERSYM + | elf::DT_ADDRRNGLO..=elf::DT_ADDRRNGHI => true, + _ => false, + } + } else { + false + } + } +} + +impl<Endian: endian::Endian> Dyn for elf::Dyn32<Endian> { + type Word = u32; + type Endian = Endian; + + #[inline] + fn d_tag(&self, endian: Self::Endian) -> Self::Word { + self.d_tag.get(endian) + } + + #[inline] + fn d_val(&self, endian: Self::Endian) -> Self::Word { + self.d_val.get(endian) + } +} + +impl<Endian: endian::Endian> Dyn for elf::Dyn64<Endian> { + type Word = u64; + type Endian = Endian; + + #[inline] + fn d_tag(&self, endian: Self::Endian) -> Self::Word { + self.d_tag.get(endian) + } + + #[inline] + fn d_val(&self, endian: Self::Endian) -> Self::Word { + self.d_val.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/file.rs b/third_party/rust/object/src/read/elf/file.rs new file mode 100644 index 0000000000..a00dd0ae31 --- /dev/null +++ b/third_party/rust/object/src/read/elf/file.rs @@ -0,0 +1,883 @@ +use alloc::vec::Vec; +use core::convert::TryInto; +use core::fmt::Debug; +use core::mem; + +use crate::read::{ + self, util, Architecture, ByteString, Bytes, Error, Export, FileFlags, Import, Object, + ObjectKind, ReadError, ReadRef, SectionIndex, StringTable, SymbolIndex, +}; +use crate::{elf, endian, Endian, Endianness, Pod, U32}; + +use super::{ + CompressionHeader, Dyn, ElfComdat, ElfComdatIterator, ElfDynamicRelocationIterator, ElfSection, + ElfSectionIterator, ElfSegment, ElfSegmentIterator, ElfSymbol, ElfSymbolIterator, + ElfSymbolTable, NoteHeader, ProgramHeader, Rel, Rela, RelocationSections, SectionHeader, + SectionTable, Sym, SymbolTable, +}; + +/// A 32-bit ELF object file. +pub type ElfFile32<'data, Endian = Endianness, R = &'data [u8]> = + ElfFile<'data, elf::FileHeader32<Endian>, R>; +/// A 64-bit ELF object file. +pub type ElfFile64<'data, Endian = Endianness, R = &'data [u8]> = + ElfFile<'data, elf::FileHeader64<Endian>, R>; + +/// A partially parsed ELF file. +/// +/// Most of the functionality of this type is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct ElfFile<'data, Elf, R = &'data [u8]> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) endian: Elf::Endian, + pub(super) data: R, + pub(super) header: &'data Elf, + pub(super) segments: &'data [Elf::ProgramHeader], + pub(super) sections: SectionTable<'data, Elf, R>, + pub(super) relocations: RelocationSections, + pub(super) symbols: SymbolTable<'data, Elf, R>, + pub(super) dynamic_symbols: SymbolTable<'data, Elf, R>, +} + +impl<'data, Elf, R> ElfFile<'data, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + /// Parse the raw ELF file data. + pub fn parse(data: R) -> read::Result<Self> { + let header = Elf::parse(data)?; + let endian = header.endian()?; + let segments = header.program_headers(endian, data)?; + let sections = header.sections(endian, data)?; + let symbols = sections.symbols(endian, data, elf::SHT_SYMTAB)?; + // TODO: get dynamic symbols from DT_SYMTAB if there are no sections + let dynamic_symbols = sections.symbols(endian, data, elf::SHT_DYNSYM)?; + // The API we provide requires a mapping from section to relocations, so build it now. + let relocations = sections.relocation_sections(endian, symbols.section())?; + + Ok(ElfFile { + endian, + data, + header, + segments, + sections, + relocations, + symbols, + dynamic_symbols, + }) + } + + /// Returns the endianness. + pub fn endian(&self) -> Elf::Endian { + self.endian + } + + /// Returns the raw data. + pub fn data(&self) -> R { + self.data + } + + /// Returns the raw ELF file header. + pub fn raw_header(&self) -> &'data Elf { + self.header + } + + /// Returns the raw ELF segments. + pub fn raw_segments(&self) -> &'data [Elf::ProgramHeader] { + self.segments + } + + fn raw_section_by_name<'file>( + &'file self, + section_name: &[u8], + ) -> Option<ElfSection<'data, 'file, Elf, R>> { + self.sections + .section_by_name(self.endian, section_name) + .map(|(index, section)| ElfSection { + file: self, + index: SectionIndex(index), + section, + }) + } + + #[cfg(feature = "compression")] + fn zdebug_section_by_name<'file>( + &'file self, + section_name: &[u8], + ) -> Option<ElfSection<'data, 'file, Elf, R>> { + if !section_name.starts_with(b".debug_") { + return None; + } + let mut name = Vec::with_capacity(section_name.len() + 1); + name.extend_from_slice(b".zdebug_"); + name.extend_from_slice(§ion_name[7..]); + self.raw_section_by_name(&name) + } + + #[cfg(not(feature = "compression"))] + fn zdebug_section_by_name<'file>( + &'file self, + _section_name: &[u8], + ) -> Option<ElfSection<'data, 'file, Elf, R>> { + None + } +} + +impl<'data, Elf, R> read::private::Sealed for ElfFile<'data, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Elf, R> Object<'data, 'file> for ElfFile<'data, Elf, R> +where + 'data: 'file, + Elf: FileHeader, + R: 'file + ReadRef<'data>, +{ + type Segment = ElfSegment<'data, 'file, Elf, R>; + type SegmentIterator = ElfSegmentIterator<'data, 'file, Elf, R>; + type Section = ElfSection<'data, 'file, Elf, R>; + type SectionIterator = ElfSectionIterator<'data, 'file, Elf, R>; + type Comdat = ElfComdat<'data, 'file, Elf, R>; + type ComdatIterator = ElfComdatIterator<'data, 'file, Elf, R>; + type Symbol = ElfSymbol<'data, 'file, Elf, R>; + type SymbolIterator = ElfSymbolIterator<'data, 'file, Elf, R>; + type SymbolTable = ElfSymbolTable<'data, 'file, Elf, R>; + type DynamicRelocationIterator = ElfDynamicRelocationIterator<'data, 'file, Elf, R>; + + fn architecture(&self) -> Architecture { + match ( + self.header.e_machine(self.endian), + self.header.is_class_64(), + ) { + (elf::EM_AARCH64, _) => Architecture::Aarch64, + (elf::EM_ARM, _) => Architecture::Arm, + (elf::EM_AVR, _) => Architecture::Avr, + (elf::EM_BPF, _) => Architecture::Bpf, + (elf::EM_386, _) => Architecture::I386, + (elf::EM_X86_64, false) => Architecture::X86_64_X32, + (elf::EM_X86_64, true) => Architecture::X86_64, + (elf::EM_HEXAGON, _) => Architecture::Hexagon, + (elf::EM_LOONGARCH, true) => Architecture::LoongArch64, + (elf::EM_MIPS, false) => Architecture::Mips, + (elf::EM_MIPS, true) => Architecture::Mips64, + (elf::EM_MSP430, _) => Architecture::Msp430, + (elf::EM_PPC, _) => Architecture::PowerPc, + (elf::EM_PPC64, _) => Architecture::PowerPc64, + (elf::EM_RISCV, false) => Architecture::Riscv32, + (elf::EM_RISCV, true) => Architecture::Riscv64, + // This is either s390 or s390x, depending on the ELF class. + // We only support the 64-bit variant s390x here. + (elf::EM_S390, true) => Architecture::S390x, + (elf::EM_SPARCV9, true) => Architecture::Sparc64, + (elf::EM_XTENSA, false) => Architecture::Xtensa, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.header.is_little_endian() + } + + #[inline] + fn is_64(&self) -> bool { + self.header.is_class_64() + } + + fn kind(&self) -> ObjectKind { + match self.header.e_type(self.endian) { + elf::ET_REL => ObjectKind::Relocatable, + elf::ET_EXEC => ObjectKind::Executable, + // TODO: check for `DF_1_PIE`? + elf::ET_DYN => ObjectKind::Dynamic, + elf::ET_CORE => ObjectKind::Core, + _ => ObjectKind::Unknown, + } + } + + fn segments(&'file self) -> ElfSegmentIterator<'data, 'file, Elf, R> { + ElfSegmentIterator { + file: self, + iter: self.segments.iter(), + } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option<ElfSection<'data, 'file, Elf, R>> { + self.raw_section_by_name(section_name) + .or_else(|| self.zdebug_section_by_name(section_name)) + } + + fn section_by_index( + &'file self, + index: SectionIndex, + ) -> read::Result<ElfSection<'data, 'file, Elf, R>> { + let section = self.sections.section(index)?; + Ok(ElfSection { + file: self, + index, + section, + }) + } + + fn sections(&'file self) -> ElfSectionIterator<'data, 'file, Elf, R> { + ElfSectionIterator { + file: self, + iter: self.sections.iter().enumerate(), + } + } + + fn comdats(&'file self) -> ElfComdatIterator<'data, 'file, Elf, R> { + ElfComdatIterator { + file: self, + iter: self.sections.iter().enumerate(), + } + } + + fn symbol_by_index( + &'file self, + index: SymbolIndex, + ) -> read::Result<ElfSymbol<'data, 'file, Elf, R>> { + let symbol = self.symbols.symbol(index.0)?; + Ok(ElfSymbol { + endian: self.endian, + symbols: &self.symbols, + index, + symbol, + }) + } + + fn symbols(&'file self) -> ElfSymbolIterator<'data, 'file, Elf, R> { + ElfSymbolIterator { + endian: self.endian, + symbols: &self.symbols, + index: 0, + } + } + + fn symbol_table(&'file self) -> Option<ElfSymbolTable<'data, 'file, Elf, R>> { + if self.symbols.is_empty() { + return None; + } + Some(ElfSymbolTable { + endian: self.endian, + symbols: &self.symbols, + }) + } + + fn dynamic_symbols(&'file self) -> ElfSymbolIterator<'data, 'file, Elf, R> { + ElfSymbolIterator { + endian: self.endian, + symbols: &self.dynamic_symbols, + index: 0, + } + } + + fn dynamic_symbol_table(&'file self) -> Option<ElfSymbolTable<'data, 'file, Elf, R>> { + if self.dynamic_symbols.is_empty() { + return None; + } + Some(ElfSymbolTable { + endian: self.endian, + symbols: &self.dynamic_symbols, + }) + } + + fn dynamic_relocations( + &'file self, + ) -> Option<ElfDynamicRelocationIterator<'data, 'file, Elf, R>> { + Some(ElfDynamicRelocationIterator { + section_index: SectionIndex(1), + file: self, + relocations: None, + }) + } + + /// Get the imported symbols. + fn imports(&self) -> read::Result<Vec<Import<'data>>> { + let mut imports = Vec::new(); + for symbol in self.dynamic_symbols.iter() { + if symbol.is_undefined(self.endian) { + let name = symbol.name(self.endian, self.dynamic_symbols.strings())?; + if !name.is_empty() { + // TODO: use symbol versioning to determine library + imports.push(Import { + name: ByteString(name), + library: ByteString(&[]), + }); + } + } + } + Ok(imports) + } + + /// Get the exported symbols. + fn exports(&self) -> read::Result<Vec<Export<'data>>> { + let mut exports = Vec::new(); + for symbol in self.dynamic_symbols.iter() { + if symbol.is_definition(self.endian) { + let name = symbol.name(self.endian, self.dynamic_symbols.strings())?; + let address = symbol.st_value(self.endian).into(); + exports.push(Export { + name: ByteString(name), + address, + }); + } + } + Ok(exports) + } + + fn has_debug_symbols(&self) -> bool { + for section in self.sections.iter() { + if let Ok(name) = self.sections.section_name(self.endian, section) { + if name == b".debug_info" || name == b".zdebug_info" { + return true; + } + } + } + false + } + + fn build_id(&self) -> read::Result<Option<&'data [u8]>> { + let endian = self.endian; + // Use section headers if present, otherwise use program headers. + if !self.sections.is_empty() { + for section in self.sections.iter() { + if let Some(mut notes) = section.notes(endian, self.data)? { + while let Some(note) = notes.next()? { + if note.name() == elf::ELF_NOTE_GNU + && note.n_type(endian) == elf::NT_GNU_BUILD_ID + { + return Ok(Some(note.desc())); + } + } + } + } + } else { + for segment in self.segments { + if let Some(mut notes) = segment.notes(endian, self.data)? { + while let Some(note) = notes.next()? { + if note.name() == elf::ELF_NOTE_GNU + && note.n_type(endian) == elf::NT_GNU_BUILD_ID + { + return Ok(Some(note.desc())); + } + } + } + } + } + Ok(None) + } + + fn gnu_debuglink(&self) -> read::Result<Option<(&'data [u8], u32)>> { + let section = match self.raw_section_by_name(b".gnu_debuglink") { + Some(section) => section, + None => return Ok(None), + }; + let data = section + .section + .data(self.endian, self.data) + .read_error("Invalid ELF .gnu_debuglink section offset or size") + .map(Bytes)?; + let filename = data + .read_string_at(0) + .read_error("Missing ELF .gnu_debuglink filename")?; + let crc_offset = util::align(filename.len() + 1, 4); + let crc = data + .read_at::<U32<_>>(crc_offset) + .read_error("Missing ELF .gnu_debuglink crc")? + .get(self.endian); + Ok(Some((filename, crc))) + } + + fn gnu_debugaltlink(&self) -> read::Result<Option<(&'data [u8], &'data [u8])>> { + let section = match self.raw_section_by_name(b".gnu_debugaltlink") { + Some(section) => section, + None => return Ok(None), + }; + let mut data = section + .section + .data(self.endian, self.data) + .read_error("Invalid ELF .gnu_debugaltlink section offset or size") + .map(Bytes)?; + let filename = data + .read_string() + .read_error("Missing ELF .gnu_debugaltlink filename")?; + let build_id = data.0; + Ok(Some((filename, build_id))) + } + + fn relative_address_base(&self) -> u64 { + 0 + } + + fn entry(&self) -> u64 { + self.header.e_entry(self.endian).into() + } + + fn flags(&self) -> FileFlags { + FileFlags::Elf { + os_abi: self.header.e_ident().os_abi, + abi_version: self.header.e_ident().abi_version, + e_flags: self.header.e_flags(self.endian), + } + } +} + +/// A trait for generic access to `FileHeader32` and `FileHeader64`. +#[allow(missing_docs)] +pub trait FileHeader: Debug + Pod { + // Ideally this would be a `u64: From<Word>`, but can't express that. + type Word: Into<u64>; + type Sword: Into<i64>; + type Endian: endian::Endian; + type ProgramHeader: ProgramHeader<Elf = Self, Endian = Self::Endian, Word = Self::Word>; + type SectionHeader: SectionHeader<Elf = Self, Endian = Self::Endian, Word = Self::Word>; + type CompressionHeader: CompressionHeader<Endian = Self::Endian, Word = Self::Word>; + type NoteHeader: NoteHeader<Endian = Self::Endian>; + type Dyn: Dyn<Endian = Self::Endian, Word = Self::Word>; + type Sym: Sym<Endian = Self::Endian, Word = Self::Word>; + type Rel: Rel<Endian = Self::Endian, Word = Self::Word>; + type Rela: Rela<Endian = Self::Endian, Word = Self::Word> + From<Self::Rel>; + + /// Return true if this type is a 64-bit header. + /// + /// This is a property of the type, not a value in the header data. + fn is_type_64(&self) -> bool; + + fn e_ident(&self) -> &elf::Ident; + fn e_type(&self, endian: Self::Endian) -> u16; + fn e_machine(&self, endian: Self::Endian) -> u16; + fn e_version(&self, endian: Self::Endian) -> u32; + fn e_entry(&self, endian: Self::Endian) -> Self::Word; + fn e_phoff(&self, endian: Self::Endian) -> Self::Word; + fn e_shoff(&self, endian: Self::Endian) -> Self::Word; + fn e_flags(&self, endian: Self::Endian) -> u32; + fn e_ehsize(&self, endian: Self::Endian) -> u16; + fn e_phentsize(&self, endian: Self::Endian) -> u16; + fn e_phnum(&self, endian: Self::Endian) -> u16; + fn e_shentsize(&self, endian: Self::Endian) -> u16; + fn e_shnum(&self, endian: Self::Endian) -> u16; + fn e_shstrndx(&self, endian: Self::Endian) -> u16; + + // Provided methods. + + /// Read the file header. + /// + /// Also checks that the ident field in the file header is a supported format. + fn parse<'data, R: ReadRef<'data>>(data: R) -> read::Result<&'data Self> { + let header = data + .read_at::<Self>(0) + .read_error("Invalid ELF header size or alignment")?; + if !header.is_supported() { + return Err(Error("Unsupported ELF header")); + } + // TODO: Check self.e_ehsize? + Ok(header) + } + + /// Check that the ident field in the file header is a supported format. + /// + /// This checks the magic number, version, class, and endianness. + fn is_supported(&self) -> bool { + let ident = self.e_ident(); + // TODO: Check self.e_version too? Requires endian though. + ident.magic == elf::ELFMAG + && (self.is_type_64() || self.is_class_32()) + && (!self.is_type_64() || self.is_class_64()) + && (self.is_little_endian() || self.is_big_endian()) + && ident.version == elf::EV_CURRENT + } + + fn is_class_32(&self) -> bool { + self.e_ident().class == elf::ELFCLASS32 + } + + fn is_class_64(&self) -> bool { + self.e_ident().class == elf::ELFCLASS64 + } + + fn is_little_endian(&self) -> bool { + self.e_ident().data == elf::ELFDATA2LSB + } + + fn is_big_endian(&self) -> bool { + self.e_ident().data == elf::ELFDATA2MSB + } + + fn endian(&self) -> read::Result<Self::Endian> { + Self::Endian::from_big_endian(self.is_big_endian()).read_error("Unsupported ELF endian") + } + + /// Return the first section header, if present. + /// + /// Section 0 is a special case because getting the section headers normally + /// requires `shnum`, but `shnum` may be in the first section header. + fn section_0<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<&'data Self::SectionHeader>> { + let shoff: u64 = self.e_shoff(endian).into(); + if shoff == 0 { + // No section headers is ok. + return Ok(None); + } + let shentsize = usize::from(self.e_shentsize(endian)); + if shentsize != mem::size_of::<Self::SectionHeader>() { + // Section header size must match. + return Err(Error("Invalid ELF section header entry size")); + } + data.read_at(shoff) + .map(Some) + .read_error("Invalid ELF section header offset or size") + } + + /// Return the `e_phnum` field of the header. Handles extended values. + /// + /// Returns `Err` for invalid values. + fn phnum<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<usize> { + let e_phnum = self.e_phnum(endian); + if e_phnum < elf::PN_XNUM { + Ok(e_phnum as usize) + } else if let Some(section_0) = self.section_0(endian, data)? { + Ok(section_0.sh_info(endian) as usize) + } else { + // Section 0 must exist if e_phnum overflows. + Err(Error("Missing ELF section headers for e_phnum overflow")) + } + } + + /// Return the `e_shnum` field of the header. Handles extended values. + /// + /// Returns `Err` for invalid values. + fn shnum<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<usize> { + let e_shnum = self.e_shnum(endian); + if e_shnum > 0 { + Ok(e_shnum as usize) + } else if let Some(section_0) = self.section_0(endian, data)? { + section_0 + .sh_size(endian) + .into() + .try_into() + .ok() + .read_error("Invalid ELF extended e_shnum") + } else { + // No section headers is ok. + Ok(0) + } + } + + /// Return the `e_shstrndx` field of the header. Handles extended values. + /// + /// Returns `Err` for invalid values (including if the index is 0). + fn shstrndx<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<u32> { + let e_shstrndx = self.e_shstrndx(endian); + let index = if e_shstrndx != elf::SHN_XINDEX { + e_shstrndx.into() + } else if let Some(section_0) = self.section_0(endian, data)? { + section_0.sh_link(endian) + } else { + // Section 0 must exist if we're trying to read e_shstrndx. + return Err(Error("Missing ELF section headers for e_shstrndx overflow")); + }; + if index == 0 { + return Err(Error("Missing ELF e_shstrndx")); + } + Ok(index) + } + + /// Return the slice of program headers. + /// + /// Returns `Ok(&[])` if there are no program headers. + /// Returns `Err` for invalid values. + fn program_headers<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<&'data [Self::ProgramHeader]> { + let phoff: u64 = self.e_phoff(endian).into(); + if phoff == 0 { + // No program headers is ok. + return Ok(&[]); + } + let phnum = self.phnum(endian, data)?; + if phnum == 0 { + // No program headers is ok. + return Ok(&[]); + } + let phentsize = self.e_phentsize(endian) as usize; + if phentsize != mem::size_of::<Self::ProgramHeader>() { + // Program header size must match. + return Err(Error("Invalid ELF program header entry size")); + } + data.read_slice_at(phoff, phnum) + .read_error("Invalid ELF program header size or alignment") + } + + /// Return the slice of section headers. + /// + /// Returns `Ok(&[])` if there are no section headers. + /// Returns `Err` for invalid values. + fn section_headers<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<&'data [Self::SectionHeader]> { + let shoff: u64 = self.e_shoff(endian).into(); + if shoff == 0 { + // No section headers is ok. + return Ok(&[]); + } + let shnum = self.shnum(endian, data)?; + if shnum == 0 { + // No section headers is ok. + return Ok(&[]); + } + let shentsize = usize::from(self.e_shentsize(endian)); + if shentsize != mem::size_of::<Self::SectionHeader>() { + // Section header size must match. + return Err(Error("Invalid ELF section header entry size")); + } + data.read_slice_at(shoff, shnum) + .read_error("Invalid ELF section header offset/size/alignment") + } + + /// Return the string table for the section headers. + fn section_strings<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + sections: &[Self::SectionHeader], + ) -> read::Result<StringTable<'data, R>> { + if sections.is_empty() { + return Ok(StringTable::default()); + } + let index = self.shstrndx(endian, data)? as usize; + let shstrtab = sections.get(index).read_error("Invalid ELF e_shstrndx")?; + let strings = if let Some((shstrtab_offset, shstrtab_size)) = shstrtab.file_range(endian) { + let shstrtab_end = shstrtab_offset + .checked_add(shstrtab_size) + .read_error("Invalid ELF shstrtab size")?; + StringTable::new(data, shstrtab_offset, shstrtab_end) + } else { + StringTable::default() + }; + Ok(strings) + } + + /// Return the section table. + fn sections<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<SectionTable<'data, Self, R>> { + let sections = self.section_headers(endian, data)?; + let strings = self.section_strings(endian, data, sections)?; + Ok(SectionTable::new(sections, strings)) + } + + /// Returns whether this is a mips64el elf file. + fn is_mips64el(&self, endian: Self::Endian) -> bool { + self.is_class_64() && self.is_little_endian() && self.e_machine(endian) == elf::EM_MIPS + } +} + +impl<Endian: endian::Endian> FileHeader for elf::FileHeader32<Endian> { + type Word = u32; + type Sword = i32; + type Endian = Endian; + type ProgramHeader = elf::ProgramHeader32<Endian>; + type SectionHeader = elf::SectionHeader32<Endian>; + type CompressionHeader = elf::CompressionHeader32<Endian>; + type NoteHeader = elf::NoteHeader32<Endian>; + type Dyn = elf::Dyn32<Endian>; + type Sym = elf::Sym32<Endian>; + type Rel = elf::Rel32<Endian>; + type Rela = elf::Rela32<Endian>; + + #[inline] + fn is_type_64(&self) -> bool { + false + } + + #[inline] + fn e_ident(&self) -> &elf::Ident { + &self.e_ident + } + + #[inline] + fn e_type(&self, endian: Self::Endian) -> u16 { + self.e_type.get(endian) + } + + #[inline] + fn e_machine(&self, endian: Self::Endian) -> u16 { + self.e_machine.get(endian) + } + + #[inline] + fn e_version(&self, endian: Self::Endian) -> u32 { + self.e_version.get(endian) + } + + #[inline] + fn e_entry(&self, endian: Self::Endian) -> Self::Word { + self.e_entry.get(endian) + } + + #[inline] + fn e_phoff(&self, endian: Self::Endian) -> Self::Word { + self.e_phoff.get(endian) + } + + #[inline] + fn e_shoff(&self, endian: Self::Endian) -> Self::Word { + self.e_shoff.get(endian) + } + + #[inline] + fn e_flags(&self, endian: Self::Endian) -> u32 { + self.e_flags.get(endian) + } + + #[inline] + fn e_ehsize(&self, endian: Self::Endian) -> u16 { + self.e_ehsize.get(endian) + } + + #[inline] + fn e_phentsize(&self, endian: Self::Endian) -> u16 { + self.e_phentsize.get(endian) + } + + #[inline] + fn e_phnum(&self, endian: Self::Endian) -> u16 { + self.e_phnum.get(endian) + } + + #[inline] + fn e_shentsize(&self, endian: Self::Endian) -> u16 { + self.e_shentsize.get(endian) + } + + #[inline] + fn e_shnum(&self, endian: Self::Endian) -> u16 { + self.e_shnum.get(endian) + } + + #[inline] + fn e_shstrndx(&self, endian: Self::Endian) -> u16 { + self.e_shstrndx.get(endian) + } +} + +impl<Endian: endian::Endian> FileHeader for elf::FileHeader64<Endian> { + type Word = u64; + type Sword = i64; + type Endian = Endian; + type ProgramHeader = elf::ProgramHeader64<Endian>; + type SectionHeader = elf::SectionHeader64<Endian>; + type CompressionHeader = elf::CompressionHeader64<Endian>; + type NoteHeader = elf::NoteHeader32<Endian>; + type Dyn = elf::Dyn64<Endian>; + type Sym = elf::Sym64<Endian>; + type Rel = elf::Rel64<Endian>; + type Rela = elf::Rela64<Endian>; + + #[inline] + fn is_type_64(&self) -> bool { + true + } + + #[inline] + fn e_ident(&self) -> &elf::Ident { + &self.e_ident + } + + #[inline] + fn e_type(&self, endian: Self::Endian) -> u16 { + self.e_type.get(endian) + } + + #[inline] + fn e_machine(&self, endian: Self::Endian) -> u16 { + self.e_machine.get(endian) + } + + #[inline] + fn e_version(&self, endian: Self::Endian) -> u32 { + self.e_version.get(endian) + } + + #[inline] + fn e_entry(&self, endian: Self::Endian) -> Self::Word { + self.e_entry.get(endian) + } + + #[inline] + fn e_phoff(&self, endian: Self::Endian) -> Self::Word { + self.e_phoff.get(endian) + } + + #[inline] + fn e_shoff(&self, endian: Self::Endian) -> Self::Word { + self.e_shoff.get(endian) + } + + #[inline] + fn e_flags(&self, endian: Self::Endian) -> u32 { + self.e_flags.get(endian) + } + + #[inline] + fn e_ehsize(&self, endian: Self::Endian) -> u16 { + self.e_ehsize.get(endian) + } + + #[inline] + fn e_phentsize(&self, endian: Self::Endian) -> u16 { + self.e_phentsize.get(endian) + } + + #[inline] + fn e_phnum(&self, endian: Self::Endian) -> u16 { + self.e_phnum.get(endian) + } + + #[inline] + fn e_shentsize(&self, endian: Self::Endian) -> u16 { + self.e_shentsize.get(endian) + } + + #[inline] + fn e_shnum(&self, endian: Self::Endian) -> u16 { + self.e_shnum.get(endian) + } + + #[inline] + fn e_shstrndx(&self, endian: Self::Endian) -> u16 { + self.e_shstrndx.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/hash.rs b/third_party/rust/object/src/read/elf/hash.rs new file mode 100644 index 0000000000..aa1039ac10 --- /dev/null +++ b/third_party/rust/object/src/read/elf/hash.rs @@ -0,0 +1,220 @@ +use core::mem; + +use crate::elf; +use crate::read::{ReadError, ReadRef, Result}; +use crate::{U32, U64}; + +use super::{FileHeader, Sym, SymbolTable, Version, VersionTable}; + +/// A SysV symbol hash table in an ELF file. +#[derive(Debug)] +pub struct HashTable<'data, Elf: FileHeader> { + buckets: &'data [U32<Elf::Endian>], + chains: &'data [U32<Elf::Endian>], +} + +impl<'data, Elf: FileHeader> HashTable<'data, Elf> { + /// Parse a SysV hash table. + /// + /// `data` should be from a `SHT_HASH` section, or from a + /// segment pointed to via the `DT_HASH` entry. + /// + /// The header is read at offset 0 in the given `data`. + pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result<Self> { + let mut offset = 0; + let header = data + .read::<elf::HashHeader<Elf::Endian>>(&mut offset) + .read_error("Invalid hash header")?; + let buckets = data + .read_slice(&mut offset, header.bucket_count.get(endian) as usize) + .read_error("Invalid hash buckets")?; + let chains = data + .read_slice(&mut offset, header.chain_count.get(endian) as usize) + .read_error("Invalid hash chains")?; + Ok(HashTable { buckets, chains }) + } + + /// Return the symbol table length. + pub fn symbol_table_length(&self) -> u32 { + self.chains.len() as u32 + } + + /// Use the hash table to find the symbol table entry with the given name, hash and version. + pub fn find<R: ReadRef<'data>>( + &self, + endian: Elf::Endian, + name: &[u8], + hash: u32, + version: Option<&Version>, + symbols: &SymbolTable<'data, Elf, R>, + versions: &VersionTable<'data, Elf>, + ) -> Option<(usize, &'data Elf::Sym)> { + // Get the chain start from the bucket for this hash. + let mut index = self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize; + // Avoid infinite loop. + let mut i = 0; + let strings = symbols.strings(); + while index != 0 && i < self.chains.len() { + if let Ok(symbol) = symbols.symbol(index) { + if symbol.name(endian, strings) == Ok(name) + && versions.matches(endian, index, version) + { + return Some((index, symbol)); + } + } + index = self.chains.get(index)?.get(endian) as usize; + i += 1; + } + None + } +} + +/// A GNU symbol hash table in an ELF file. +#[derive(Debug)] +pub struct GnuHashTable<'data, Elf: FileHeader> { + symbol_base: u32, + bloom_shift: u32, + bloom_filters: &'data [u8], + buckets: &'data [U32<Elf::Endian>], + values: &'data [U32<Elf::Endian>], +} + +impl<'data, Elf: FileHeader> GnuHashTable<'data, Elf> { + /// Parse a GNU hash table. + /// + /// `data` should be from a `SHT_GNU_HASH` section, or from a + /// segment pointed to via the `DT_GNU_HASH` entry. + /// + /// The header is read at offset 0 in the given `data`. + /// + /// The header does not contain a length field, and so all of `data` + /// will be used as the hash table values. It does not matter if this + /// is longer than needed, and this will often the case when accessing + /// the hash table via the `DT_GNU_HASH` entry. + pub fn parse(endian: Elf::Endian, data: &'data [u8]) -> Result<Self> { + let mut offset = 0; + let header = data + .read::<elf::GnuHashHeader<Elf::Endian>>(&mut offset) + .read_error("Invalid GNU hash header")?; + let bloom_len = + u64::from(header.bloom_count.get(endian)) * mem::size_of::<Elf::Word>() as u64; + let bloom_filters = data + .read_bytes(&mut offset, bloom_len) + .read_error("Invalid GNU hash bloom filters")?; + let buckets = data + .read_slice(&mut offset, header.bucket_count.get(endian) as usize) + .read_error("Invalid GNU hash buckets")?; + let chain_count = (data.len() - offset as usize) / 4; + let values = data + .read_slice(&mut offset, chain_count) + .read_error("Invalid GNU hash values")?; + Ok(GnuHashTable { + symbol_base: header.symbol_base.get(endian), + bloom_shift: header.bloom_shift.get(endian), + bloom_filters, + buckets, + values, + }) + } + + /// Return the symbol table index of the first symbol in the hash table. + pub fn symbol_base(&self) -> u32 { + self.symbol_base + } + + /// Determine the symbol table length by finding the last entry in the hash table. + /// + /// Returns `None` if the hash table is empty or invalid. + pub fn symbol_table_length(&self, endian: Elf::Endian) -> Option<u32> { + // Ensure we find a non-empty bucket. + if self.symbol_base == 0 { + return None; + } + + // Find the highest chain index in a bucket. + let mut max_symbol = 0; + for bucket in self.buckets { + let bucket = bucket.get(endian); + if max_symbol < bucket { + max_symbol = bucket; + } + } + + // Find the end of the chain. + for value in self + .values + .get(max_symbol.checked_sub(self.symbol_base)? as usize..)? + { + max_symbol += 1; + if value.get(endian) & 1 != 0 { + return Some(max_symbol); + } + } + + None + } + + /// Use the hash table to find the symbol table entry with the given name, hash, and version. + pub fn find<R: ReadRef<'data>>( + &self, + endian: Elf::Endian, + name: &[u8], + hash: u32, + version: Option<&Version>, + symbols: &SymbolTable<'data, Elf, R>, + versions: &VersionTable<'data, Elf>, + ) -> Option<(usize, &'data Elf::Sym)> { + let word_bits = mem::size_of::<Elf::Word>() as u32 * 8; + + // Test against bloom filter. + let bloom_count = self.bloom_filters.len() / mem::size_of::<Elf::Word>(); + let offset = + ((hash / word_bits) & (bloom_count as u32 - 1)) * mem::size_of::<Elf::Word>() as u32; + let filter = if word_bits == 64 { + self.bloom_filters + .read_at::<U64<Elf::Endian>>(offset.into()) + .ok()? + .get(endian) + } else { + self.bloom_filters + .read_at::<U32<Elf::Endian>>(offset.into()) + .ok()? + .get(endian) + .into() + }; + if filter & (1 << (hash % word_bits)) == 0 { + return None; + } + if filter & (1 << ((hash >> self.bloom_shift) % word_bits)) == 0 { + return None; + } + + // Get the chain start from the bucket for this hash. + let mut index = self.buckets[(hash as usize) % self.buckets.len()].get(endian) as usize; + if index == 0 { + return None; + } + + // Test symbols in the chain. + let strings = symbols.strings(); + let symbols = symbols.symbols().get(index..)?; + let values = self + .values + .get(index.checked_sub(self.symbol_base as usize)?..)?; + for (symbol, value) in symbols.iter().zip(values.iter()) { + let value = value.get(endian); + if value | 1 == hash | 1 { + if symbol.name(endian, strings) == Ok(name) + && versions.matches(endian, index, version) + { + return Some((index, symbol)); + } + } + if value & 1 != 0 { + break; + } + index += 1; + } + None + } +} diff --git a/third_party/rust/object/src/read/elf/mod.rs b/third_party/rust/object/src/read/elf/mod.rs new file mode 100644 index 0000000000..5b7d7f9f7f --- /dev/null +++ b/third_party/rust/object/src/read/elf/mod.rs @@ -0,0 +1,39 @@ +//! Support for reading ELF files. +//! +//! Defines traits to abstract over the difference between ELF32/ELF64, +//! and implements read functionality in terms of these traits. +//! +//! Also provides `ElfFile` and related types which implement the `Object` trait. + +mod file; +pub use file::*; + +mod segment; +pub use segment::*; + +mod section; +pub use section::*; + +mod symbol; +pub use symbol::*; + +mod relocation; +pub use relocation::*; + +mod comdat; +pub use comdat::*; + +mod dynamic; +pub use dynamic::*; + +mod compression; +pub use compression::*; + +mod note; +pub use note::*; + +mod hash; +pub use hash::*; + +mod version; +pub use version::*; diff --git a/third_party/rust/object/src/read/elf/note.rs b/third_party/rust/object/src/read/elf/note.rs new file mode 100644 index 0000000000..34024dbb89 --- /dev/null +++ b/third_party/rust/object/src/read/elf/note.rs @@ -0,0 +1,185 @@ +use core::fmt::Debug; +use core::mem; + +use crate::elf; +use crate::endian; +use crate::pod::Pod; +use crate::read::util; +use crate::read::{self, Bytes, Error, ReadError}; + +use super::FileHeader; + +/// An iterator over the notes in an ELF section or segment. +#[derive(Debug)] +pub struct NoteIterator<'data, Elf> +where + Elf: FileHeader, +{ + endian: Elf::Endian, + align: usize, + data: Bytes<'data>, +} + +impl<'data, Elf> NoteIterator<'data, Elf> +where + Elf: FileHeader, +{ + /// Returns `Err` if `align` is invalid. + pub(super) fn new( + endian: Elf::Endian, + align: Elf::Word, + data: &'data [u8], + ) -> read::Result<Self> { + let align = match align.into() { + 0u64..=4 => 4, + 8 => 8, + _ => return Err(Error("Invalid ELF note alignment")), + }; + // TODO: check data alignment? + Ok(NoteIterator { + endian, + align, + data: Bytes(data), + }) + } + + /// Returns the next note. + pub fn next(&mut self) -> read::Result<Option<Note<'data, Elf>>> { + let mut data = self.data; + if data.is_empty() { + return Ok(None); + } + + let header = data + .read_at::<Elf::NoteHeader>(0) + .read_error("ELF note is too short")?; + + // The name has no alignment requirement. + let offset = mem::size_of::<Elf::NoteHeader>(); + let namesz = header.n_namesz(self.endian) as usize; + let name = data + .read_bytes_at(offset, namesz) + .read_error("Invalid ELF note namesz")? + .0; + + // The descriptor must be aligned. + let offset = util::align(offset + namesz, self.align); + let descsz = header.n_descsz(self.endian) as usize; + let desc = data + .read_bytes_at(offset, descsz) + .read_error("Invalid ELF note descsz")? + .0; + + // The next note (if any) must be aligned. + let offset = util::align(offset + descsz, self.align); + if data.skip(offset).is_err() { + data = Bytes(&[]); + } + self.data = data; + + Ok(Some(Note { header, name, desc })) + } +} + +/// A parsed `NoteHeader`. +#[derive(Debug)] +pub struct Note<'data, Elf> +where + Elf: FileHeader, +{ + header: &'data Elf::NoteHeader, + name: &'data [u8], + desc: &'data [u8], +} + +impl<'data, Elf: FileHeader> Note<'data, Elf> { + /// Return the `n_type` field of the `NoteHeader`. + /// + /// The meaning of this field is determined by `name`. + pub fn n_type(&self, endian: Elf::Endian) -> u32 { + self.header.n_type(endian) + } + + /// Return the `n_namesz` field of the `NoteHeader`. + pub fn n_namesz(&self, endian: Elf::Endian) -> u32 { + self.header.n_namesz(endian) + } + + /// Return the `n_descsz` field of the `NoteHeader`. + pub fn n_descsz(&self, endian: Elf::Endian) -> u32 { + self.header.n_descsz(endian) + } + + /// Return the bytes for the name field following the `NoteHeader`, + /// excluding any null terminator. + /// + /// This field is usually a string including a null terminator + /// (but it is not required to be). + /// + /// The length of this field (including any null terminator) is given by + /// `n_namesz`. + pub fn name(&self) -> &'data [u8] { + if let Some((last, name)) = self.name.split_last() { + if *last == 0 { + return name; + } + } + self.name + } + + /// Return the bytes for the desc field following the `NoteHeader`. + /// + /// The length of this field is given by `n_descsz`. The meaning + /// of this field is determined by `name` and `n_type`. + pub fn desc(&self) -> &'data [u8] { + self.desc + } +} + +/// A trait for generic access to `NoteHeader32` and `NoteHeader64`. +#[allow(missing_docs)] +pub trait NoteHeader: Debug + Pod { + type Endian: endian::Endian; + + fn n_namesz(&self, endian: Self::Endian) -> u32; + fn n_descsz(&self, endian: Self::Endian) -> u32; + fn n_type(&self, endian: Self::Endian) -> u32; +} + +impl<Endian: endian::Endian> NoteHeader for elf::NoteHeader32<Endian> { + type Endian = Endian; + + #[inline] + fn n_namesz(&self, endian: Self::Endian) -> u32 { + self.n_namesz.get(endian) + } + + #[inline] + fn n_descsz(&self, endian: Self::Endian) -> u32 { + self.n_descsz.get(endian) + } + + #[inline] + fn n_type(&self, endian: Self::Endian) -> u32 { + self.n_type.get(endian) + } +} + +impl<Endian: endian::Endian> NoteHeader for elf::NoteHeader64<Endian> { + type Endian = Endian; + + #[inline] + fn n_namesz(&self, endian: Self::Endian) -> u32 { + self.n_namesz.get(endian) + } + + #[inline] + fn n_descsz(&self, endian: Self::Endian) -> u32 { + self.n_descsz.get(endian) + } + + #[inline] + fn n_type(&self, endian: Self::Endian) -> u32 { + self.n_type.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/relocation.rs b/third_party/rust/object/src/read/elf/relocation.rs new file mode 100644 index 0000000000..0c20be34cb --- /dev/null +++ b/third_party/rust/object/src/read/elf/relocation.rs @@ -0,0 +1,557 @@ +use alloc::fmt; +use alloc::vec::Vec; +use core::fmt::Debug; +use core::slice; + +use crate::elf; +use crate::endian::{self, Endianness}; +use crate::pod::Pod; +use crate::read::{ + self, Error, ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, + SectionIndex, SymbolIndex, +}; + +use super::{ElfFile, FileHeader, SectionHeader, SectionTable}; + +/// A mapping from section index to associated relocation sections. +#[derive(Debug)] +pub struct RelocationSections { + relocations: Vec<usize>, +} + +impl RelocationSections { + /// Create a new mapping using the section table. + /// + /// Skips relocation sections that do not use the given symbol table section. + pub fn parse<'data, Elf: FileHeader, R: ReadRef<'data>>( + endian: Elf::Endian, + sections: &SectionTable<'data, Elf, R>, + symbol_section: SectionIndex, + ) -> read::Result<Self> { + let mut relocations = vec![0; sections.len()]; + for (index, section) in sections.iter().enumerate().rev() { + let sh_type = section.sh_type(endian); + if sh_type == elf::SHT_REL || sh_type == elf::SHT_RELA { + // The symbol indices used in relocations must be for the symbol table + // we are expecting to use. + let sh_link = SectionIndex(section.sh_link(endian) as usize); + if sh_link != symbol_section { + continue; + } + + let sh_info = section.sh_info(endian) as usize; + if sh_info == 0 { + // Skip dynamic relocations. + continue; + } + if sh_info >= relocations.len() { + return Err(Error("Invalid ELF sh_info for relocation section")); + } + + // Handle multiple relocation sections by chaining them. + let next = relocations[sh_info]; + relocations[sh_info] = index; + relocations[index] = next; + } + } + Ok(Self { relocations }) + } + + /// Given a section index, return the section index of the associated relocation section. + /// + /// This may also be called with a relocation section index, and it will return the + /// next associated relocation section. + pub fn get(&self, index: usize) -> Option<usize> { + self.relocations.get(index).cloned().filter(|x| *x != 0) + } +} + +pub(super) enum ElfRelaIterator<'data, Elf: FileHeader> { + Rel(slice::Iter<'data, Elf::Rel>), + Rela(slice::Iter<'data, Elf::Rela>), +} + +impl<'data, Elf: FileHeader> ElfRelaIterator<'data, Elf> { + fn is_rel(&self) -> bool { + match self { + ElfRelaIterator::Rel(_) => true, + ElfRelaIterator::Rela(_) => false, + } + } +} + +impl<'data, Elf: FileHeader> Iterator for ElfRelaIterator<'data, Elf> { + type Item = Elf::Rela; + + fn next(&mut self) -> Option<Self::Item> { + match self { + ElfRelaIterator::Rel(ref mut i) => i.next().cloned().map(Self::Item::from), + ElfRelaIterator::Rela(ref mut i) => i.next().cloned(), + } + } +} + +/// An iterator over the dynamic relocations for an `ElfFile32`. +pub type ElfDynamicRelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfDynamicRelocationIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the dynamic relocations for an `ElfFile64`. +pub type ElfDynamicRelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfDynamicRelocationIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the dynamic relocations for an `ElfFile`. +pub struct ElfDynamicRelocationIterator<'data, 'file, Elf, R = &'data [u8]> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + /// The current relocation section index. + pub(super) section_index: SectionIndex, + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) relocations: Option<ElfRelaIterator<'data, Elf>>, +} + +impl<'data, 'file, Elf, R> Iterator for ElfDynamicRelocationIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + let endian = self.file.endian; + loop { + if let Some(ref mut relocations) = self.relocations { + if let Some(reloc) = relocations.next() { + let relocation = + parse_relocation(self.file.header, endian, reloc, relocations.is_rel()); + return Some((reloc.r_offset(endian).into(), relocation)); + } + self.relocations = None; + } + + let section = self.file.sections.section(self.section_index).ok()?; + self.section_index.0 += 1; + + let sh_link = SectionIndex(section.sh_link(endian) as usize); + if sh_link != self.file.dynamic_symbols.section() { + continue; + } + + match section.sh_type(endian) { + elf::SHT_REL => { + if let Ok(relocations) = section.data_as_array(endian, self.file.data) { + self.relocations = Some(ElfRelaIterator::Rel(relocations.iter())); + } + } + elf::SHT_RELA => { + if let Ok(relocations) = section.data_as_array(endian, self.file.data) { + self.relocations = Some(ElfRelaIterator::Rela(relocations.iter())); + } + } + _ => {} + } + } + } +} + +impl<'data, 'file, Elf, R> fmt::Debug for ElfDynamicRelocationIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfDynamicRelocationIterator").finish() + } +} + +/// An iterator over the relocations for an `ElfSection32`. +pub type ElfSectionRelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSectionRelocationIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the relocations for an `ElfSection64`. +pub type ElfSectionRelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSectionRelocationIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the relocations for an `ElfSection`. +pub struct ElfSectionRelocationIterator<'data, 'file, Elf, R = &'data [u8]> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + /// The current pointer in the chain of relocation sections. + pub(super) section_index: SectionIndex, + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) relocations: Option<ElfRelaIterator<'data, Elf>>, +} + +impl<'data, 'file, Elf, R> Iterator for ElfSectionRelocationIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + let endian = self.file.endian; + loop { + if let Some(ref mut relocations) = self.relocations { + if let Some(reloc) = relocations.next() { + let relocation = + parse_relocation(self.file.header, endian, reloc, relocations.is_rel()); + return Some((reloc.r_offset(endian).into(), relocation)); + } + self.relocations = None; + } + self.section_index = SectionIndex(self.file.relocations.get(self.section_index.0)?); + // The construction of RelocationSections ensures section_index is valid. + let section = self.file.sections.section(self.section_index).unwrap(); + match section.sh_type(endian) { + elf::SHT_REL => { + if let Ok(relocations) = section.data_as_array(endian, self.file.data) { + self.relocations = Some(ElfRelaIterator::Rel(relocations.iter())); + } + } + elf::SHT_RELA => { + if let Ok(relocations) = section.data_as_array(endian, self.file.data) { + self.relocations = Some(ElfRelaIterator::Rela(relocations.iter())); + } + } + _ => {} + } + } + } +} + +impl<'data, 'file, Elf, R> fmt::Debug for ElfSectionRelocationIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfSectionRelocationIterator").finish() + } +} + +fn parse_relocation<Elf: FileHeader>( + header: &Elf, + endian: Elf::Endian, + reloc: Elf::Rela, + implicit_addend: bool, +) -> Relocation { + let mut encoding = RelocationEncoding::Generic; + let is_mips64el = header.is_mips64el(endian); + let (kind, size) = match header.e_machine(endian) { + elf::EM_AARCH64 => match reloc.r_type(endian, false) { + elf::R_AARCH64_ABS64 => (RelocationKind::Absolute, 64), + elf::R_AARCH64_ABS32 => (RelocationKind::Absolute, 32), + elf::R_AARCH64_ABS16 => (RelocationKind::Absolute, 16), + elf::R_AARCH64_PREL64 => (RelocationKind::Relative, 64), + elf::R_AARCH64_PREL32 => (RelocationKind::Relative, 32), + elf::R_AARCH64_PREL16 => (RelocationKind::Relative, 16), + elf::R_AARCH64_CALL26 => { + encoding = RelocationEncoding::AArch64Call; + (RelocationKind::PltRelative, 26) + } + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_ARM => match reloc.r_type(endian, false) { + elf::R_ARM_ABS32 => (RelocationKind::Absolute, 32), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_AVR => match reloc.r_type(endian, false) { + elf::R_AVR_32 => (RelocationKind::Absolute, 32), + elf::R_AVR_16 => (RelocationKind::Absolute, 16), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_BPF => match reloc.r_type(endian, false) { + elf::R_BPF_64_64 => (RelocationKind::Absolute, 64), + elf::R_BPF_64_32 => (RelocationKind::Absolute, 32), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_386 => match reloc.r_type(endian, false) { + elf::R_386_32 => (RelocationKind::Absolute, 32), + elf::R_386_PC32 => (RelocationKind::Relative, 32), + elf::R_386_GOT32 => (RelocationKind::Got, 32), + elf::R_386_PLT32 => (RelocationKind::PltRelative, 32), + elf::R_386_GOTOFF => (RelocationKind::GotBaseOffset, 32), + elf::R_386_GOTPC => (RelocationKind::GotBaseRelative, 32), + elf::R_386_16 => (RelocationKind::Absolute, 16), + elf::R_386_PC16 => (RelocationKind::Relative, 16), + elf::R_386_8 => (RelocationKind::Absolute, 8), + elf::R_386_PC8 => (RelocationKind::Relative, 8), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_X86_64 => match reloc.r_type(endian, false) { + elf::R_X86_64_64 => (RelocationKind::Absolute, 64), + elf::R_X86_64_PC32 => (RelocationKind::Relative, 32), + elf::R_X86_64_GOT32 => (RelocationKind::Got, 32), + elf::R_X86_64_PLT32 => (RelocationKind::PltRelative, 32), + elf::R_X86_64_GOTPCREL => (RelocationKind::GotRelative, 32), + elf::R_X86_64_32 => (RelocationKind::Absolute, 32), + elf::R_X86_64_32S => { + encoding = RelocationEncoding::X86Signed; + (RelocationKind::Absolute, 32) + } + elf::R_X86_64_16 => (RelocationKind::Absolute, 16), + elf::R_X86_64_PC16 => (RelocationKind::Relative, 16), + elf::R_X86_64_8 => (RelocationKind::Absolute, 8), + elf::R_X86_64_PC8 => (RelocationKind::Relative, 8), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_HEXAGON => match reloc.r_type(endian, false) { + elf::R_HEX_32 => (RelocationKind::Absolute, 32), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_LOONGARCH => match reloc.r_type(endian, false) { + elf::R_LARCH_32 => (RelocationKind::Absolute, 32), + elf::R_LARCH_64 => (RelocationKind::Absolute, 64), + elf::R_LARCH_32_PCREL => (RelocationKind::Relative, 32), + elf::R_LARCH_B16 => { + encoding = RelocationEncoding::LoongArchBranch; + (RelocationKind::Relative, 16) + } + elf::R_LARCH_B21 => { + encoding = RelocationEncoding::LoongArchBranch; + (RelocationKind::Relative, 21) + } + elf::R_LARCH_B26 => { + encoding = RelocationEncoding::LoongArchBranch; + (RelocationKind::Relative, 26) + } + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_MIPS => match reloc.r_type(endian, is_mips64el) { + elf::R_MIPS_16 => (RelocationKind::Absolute, 16), + elf::R_MIPS_32 => (RelocationKind::Absolute, 32), + elf::R_MIPS_64 => (RelocationKind::Absolute, 64), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_MSP430 => match reloc.r_type(endian, false) { + elf::R_MSP430_32 => (RelocationKind::Absolute, 32), + elf::R_MSP430_16_BYTE => (RelocationKind::Absolute, 16), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_PPC => match reloc.r_type(endian, false) { + elf::R_PPC_ADDR32 => (RelocationKind::Absolute, 32), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_PPC64 => match reloc.r_type(endian, false) { + elf::R_PPC64_ADDR32 => (RelocationKind::Absolute, 32), + elf::R_PPC64_ADDR64 => (RelocationKind::Absolute, 64), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_RISCV => match reloc.r_type(endian, false) { + elf::R_RISCV_32 => (RelocationKind::Absolute, 32), + elf::R_RISCV_64 => (RelocationKind::Absolute, 64), + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_S390 => match reloc.r_type(endian, false) { + elf::R_390_8 => (RelocationKind::Absolute, 8), + elf::R_390_16 => (RelocationKind::Absolute, 16), + elf::R_390_32 => (RelocationKind::Absolute, 32), + elf::R_390_64 => (RelocationKind::Absolute, 64), + elf::R_390_PC16 => (RelocationKind::Relative, 16), + elf::R_390_PC32 => (RelocationKind::Relative, 32), + elf::R_390_PC64 => (RelocationKind::Relative, 64), + elf::R_390_PC16DBL => { + encoding = RelocationEncoding::S390xDbl; + (RelocationKind::Relative, 16) + } + elf::R_390_PC32DBL => { + encoding = RelocationEncoding::S390xDbl; + (RelocationKind::Relative, 32) + } + elf::R_390_PLT16DBL => { + encoding = RelocationEncoding::S390xDbl; + (RelocationKind::PltRelative, 16) + } + elf::R_390_PLT32DBL => { + encoding = RelocationEncoding::S390xDbl; + (RelocationKind::PltRelative, 32) + } + elf::R_390_GOT16 => (RelocationKind::Got, 16), + elf::R_390_GOT32 => (RelocationKind::Got, 32), + elf::R_390_GOT64 => (RelocationKind::Got, 64), + elf::R_390_GOTENT => { + encoding = RelocationEncoding::S390xDbl; + (RelocationKind::GotRelative, 32) + } + elf::R_390_GOTOFF16 => (RelocationKind::GotBaseOffset, 16), + elf::R_390_GOTOFF32 => (RelocationKind::GotBaseOffset, 32), + elf::R_390_GOTOFF64 => (RelocationKind::GotBaseOffset, 64), + elf::R_390_GOTPC => (RelocationKind::GotBaseRelative, 64), + elf::R_390_GOTPCDBL => { + encoding = RelocationEncoding::S390xDbl; + (RelocationKind::GotBaseRelative, 32) + } + r_type => (RelocationKind::Elf(r_type), 0), + }, + elf::EM_SPARC | elf::EM_SPARC32PLUS | elf::EM_SPARCV9 => { + match reloc.r_type(endian, false) { + elf::R_SPARC_32 | elf::R_SPARC_UA32 => (RelocationKind::Absolute, 32), + elf::R_SPARC_64 | elf::R_SPARC_UA64 => (RelocationKind::Absolute, 64), + r_type => (RelocationKind::Elf(r_type), 0), + } + } + elf::EM_XTENSA => match reloc.r_type(endian, false) { + elf::R_XTENSA_32 => (RelocationKind::Absolute, 32), + elf::R_XTENSA_32_PCREL => (RelocationKind::Relative, 32), + r_type => (RelocationKind::Elf(r_type), 0), + }, + _ => (RelocationKind::Elf(reloc.r_type(endian, false)), 0), + }; + let sym = reloc.r_sym(endian, is_mips64el) as usize; + let target = if sym == 0 { + RelocationTarget::Absolute + } else { + RelocationTarget::Symbol(SymbolIndex(sym)) + }; + Relocation { + kind, + encoding, + size, + target, + addend: reloc.r_addend(endian).into(), + implicit_addend, + } +} + +/// A trait for generic access to `Rel32` and `Rel64`. +#[allow(missing_docs)] +pub trait Rel: Debug + Pod + Clone { + type Word: Into<u64>; + type Sword: Into<i64>; + type Endian: endian::Endian; + + fn r_offset(&self, endian: Self::Endian) -> Self::Word; + fn r_info(&self, endian: Self::Endian) -> Self::Word; + fn r_sym(&self, endian: Self::Endian) -> u32; + fn r_type(&self, endian: Self::Endian) -> u32; +} + +impl<Endian: endian::Endian> Rel for elf::Rel32<Endian> { + type Word = u32; + type Sword = i32; + type Endian = Endian; + + #[inline] + fn r_offset(&self, endian: Self::Endian) -> Self::Word { + self.r_offset.get(endian) + } + + #[inline] + fn r_info(&self, endian: Self::Endian) -> Self::Word { + self.r_info.get(endian) + } + + #[inline] + fn r_sym(&self, endian: Self::Endian) -> u32 { + self.r_sym(endian) + } + + #[inline] + fn r_type(&self, endian: Self::Endian) -> u32 { + self.r_type(endian) + } +} + +impl<Endian: endian::Endian> Rel for elf::Rel64<Endian> { + type Word = u64; + type Sword = i64; + type Endian = Endian; + + #[inline] + fn r_offset(&self, endian: Self::Endian) -> Self::Word { + self.r_offset.get(endian) + } + + #[inline] + fn r_info(&self, endian: Self::Endian) -> Self::Word { + self.r_info.get(endian) + } + + #[inline] + fn r_sym(&self, endian: Self::Endian) -> u32 { + self.r_sym(endian) + } + + #[inline] + fn r_type(&self, endian: Self::Endian) -> u32 { + self.r_type(endian) + } +} + +/// A trait for generic access to `Rela32` and `Rela64`. +#[allow(missing_docs)] +pub trait Rela: Debug + Pod + Clone { + type Word: Into<u64>; + type Sword: Into<i64>; + type Endian: endian::Endian; + + fn r_offset(&self, endian: Self::Endian) -> Self::Word; + fn r_info(&self, endian: Self::Endian, is_mips64el: bool) -> Self::Word; + fn r_addend(&self, endian: Self::Endian) -> Self::Sword; + fn r_sym(&self, endian: Self::Endian, is_mips64el: bool) -> u32; + fn r_type(&self, endian: Self::Endian, is_mips64el: bool) -> u32; +} + +impl<Endian: endian::Endian> Rela for elf::Rela32<Endian> { + type Word = u32; + type Sword = i32; + type Endian = Endian; + + #[inline] + fn r_offset(&self, endian: Self::Endian) -> Self::Word { + self.r_offset.get(endian) + } + + #[inline] + fn r_info(&self, endian: Self::Endian, _is_mips64el: bool) -> Self::Word { + self.r_info.get(endian) + } + + #[inline] + fn r_addend(&self, endian: Self::Endian) -> Self::Sword { + self.r_addend.get(endian) + } + + #[inline] + fn r_sym(&self, endian: Self::Endian, _is_mips64el: bool) -> u32 { + self.r_sym(endian) + } + + #[inline] + fn r_type(&self, endian: Self::Endian, _is_mips64el: bool) -> u32 { + self.r_type(endian) + } +} + +impl<Endian: endian::Endian> Rela for elf::Rela64<Endian> { + type Word = u64; + type Sword = i64; + type Endian = Endian; + + #[inline] + fn r_offset(&self, endian: Self::Endian) -> Self::Word { + self.r_offset.get(endian) + } + + #[inline] + fn r_info(&self, endian: Self::Endian, is_mips64el: bool) -> Self::Word { + self.get_r_info(endian, is_mips64el) + } + + #[inline] + fn r_addend(&self, endian: Self::Endian) -> Self::Sword { + self.r_addend.get(endian) + } + + #[inline] + fn r_sym(&self, endian: Self::Endian, is_mips64el: bool) -> u32 { + self.r_sym(endian, is_mips64el) + } + + #[inline] + fn r_type(&self, endian: Self::Endian, is_mips64el: bool) -> u32 { + self.r_type(endian, is_mips64el) + } +} diff --git a/third_party/rust/object/src/read/elf/section.rs b/third_party/rust/object/src/read/elf/section.rs new file mode 100644 index 0000000000..3f8a082165 --- /dev/null +++ b/third_party/rust/object/src/read/elf/section.rs @@ -0,0 +1,1090 @@ +use core::fmt::Debug; +use core::{iter, mem, slice, str}; + +use crate::elf; +use crate::endian::{self, Endianness, U32Bytes}; +use crate::pod::Pod; +use crate::read::{ + self, Bytes, CompressedData, CompressedFileRange, CompressionFormat, Error, ObjectSection, + ReadError, ReadRef, SectionFlags, SectionIndex, SectionKind, StringTable, +}; + +use super::{ + CompressionHeader, ElfFile, ElfSectionRelocationIterator, FileHeader, GnuHashTable, HashTable, + NoteIterator, RelocationSections, SymbolTable, VerdefIterator, VerneedIterator, VersionTable, +}; + +/// The table of section headers in an ELF file. +/// +/// Also includes the string table used for the section names. +#[derive(Debug, Default, Clone, Copy)] +pub struct SectionTable<'data, Elf: FileHeader, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + sections: &'data [Elf::SectionHeader], + strings: StringTable<'data, R>, +} + +impl<'data, Elf: FileHeader, R: ReadRef<'data>> SectionTable<'data, Elf, R> { + /// Create a new section table. + #[inline] + pub fn new(sections: &'data [Elf::SectionHeader], strings: StringTable<'data, R>) -> Self { + SectionTable { sections, strings } + } + + /// Iterate over the section headers. + #[inline] + pub fn iter(&self) -> slice::Iter<'data, Elf::SectionHeader> { + self.sections.iter() + } + + /// Return true if the section table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.sections.is_empty() + } + + /// The number of section headers. + #[inline] + pub fn len(&self) -> usize { + self.sections.len() + } + + /// Return the section header at the given index. + pub fn section(&self, index: SectionIndex) -> read::Result<&'data Elf::SectionHeader> { + self.sections + .get(index.0) + .read_error("Invalid ELF section index") + } + + /// Return the section header with the given name. + /// + /// Ignores sections with invalid names. + pub fn section_by_name( + &self, + endian: Elf::Endian, + name: &[u8], + ) -> Option<(usize, &'data Elf::SectionHeader)> { + self.sections + .iter() + .enumerate() + .find(|(_, section)| self.section_name(endian, section) == Ok(name)) + } + + /// Return the section name for the given section header. + pub fn section_name( + &self, + endian: Elf::Endian, + section: &'data Elf::SectionHeader, + ) -> read::Result<&'data [u8]> { + section.name(endian, self.strings) + } + + /// Return the string table at the given section index. + /// + /// Returns an error if the section is not a string table. + #[inline] + pub fn strings( + &self, + endian: Elf::Endian, + data: R, + index: SectionIndex, + ) -> read::Result<StringTable<'data, R>> { + self.section(index)? + .strings(endian, data)? + .read_error("Invalid ELF string section type") + } + + /// Return the symbol table of the given section type. + /// + /// Returns an empty symbol table if the symbol table does not exist. + #[inline] + pub fn symbols( + &self, + endian: Elf::Endian, + data: R, + sh_type: u32, + ) -> read::Result<SymbolTable<'data, Elf, R>> { + debug_assert!(sh_type == elf::SHT_DYNSYM || sh_type == elf::SHT_SYMTAB); + + let (index, section) = match self + .iter() + .enumerate() + .find(|s| s.1.sh_type(endian) == sh_type) + { + Some(s) => s, + None => return Ok(SymbolTable::default()), + }; + + SymbolTable::parse(endian, data, self, SectionIndex(index), section) + } + + /// Return the symbol table at the given section index. + /// + /// Returns an error if the section is not a symbol table. + #[inline] + pub fn symbol_table_by_index( + &self, + endian: Elf::Endian, + data: R, + index: SectionIndex, + ) -> read::Result<SymbolTable<'data, Elf, R>> { + let section = self.section(index)?; + match section.sh_type(endian) { + elf::SHT_DYNSYM | elf::SHT_SYMTAB => {} + _ => return Err(Error("Invalid ELF symbol table section type")), + } + SymbolTable::parse(endian, data, self, index, section) + } + + /// Create a mapping from section index to associated relocation sections. + #[inline] + pub fn relocation_sections( + &self, + endian: Elf::Endian, + symbol_section: SectionIndex, + ) -> read::Result<RelocationSections> { + RelocationSections::parse(endian, self, symbol_section) + } + + /// Return the contents of a dynamic section. + /// + /// Also returns the linked string table index. + /// + /// Returns `Ok(None)` if there is no `SHT_DYNAMIC` section. + /// Returns `Err` for invalid values. + pub fn dynamic( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<(&'data [Elf::Dyn], SectionIndex)>> { + for section in self.sections { + if let Some(dynamic) = section.dynamic(endian, data)? { + return Ok(Some(dynamic)); + } + } + Ok(None) + } + + /// Return the header of a SysV hash section. + /// + /// Returns `Ok(None)` if there is no SysV GNU hash section. + /// Returns `Err` for invalid values. + pub fn hash_header( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<&'data elf::HashHeader<Elf::Endian>>> { + for section in self.sections { + if let Some(hash) = section.hash_header(endian, data)? { + return Ok(Some(hash)); + } + } + Ok(None) + } + + /// Return the contents of a SysV hash section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if there is no SysV hash section. + /// Returns `Err` for invalid values. + pub fn hash( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<(HashTable<'data, Elf>, SectionIndex)>> { + for section in self.sections { + if let Some(hash) = section.hash(endian, data)? { + return Ok(Some(hash)); + } + } + Ok(None) + } + + /// Return the header of a GNU hash section. + /// + /// Returns `Ok(None)` if there is no GNU hash section. + /// Returns `Err` for invalid values. + pub fn gnu_hash_header( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<&'data elf::GnuHashHeader<Elf::Endian>>> { + for section in self.sections { + if let Some(hash) = section.gnu_hash_header(endian, data)? { + return Ok(Some(hash)); + } + } + Ok(None) + } + + /// Return the contents of a GNU hash section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if there is no GNU hash section. + /// Returns `Err` for invalid values. + pub fn gnu_hash( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<(GnuHashTable<'data, Elf>, SectionIndex)>> { + for section in self.sections { + if let Some(hash) = section.gnu_hash(endian, data)? { + return Ok(Some(hash)); + } + } + Ok(None) + } + + /// Return the contents of a `SHT_GNU_VERSYM` section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if there is no `SHT_GNU_VERSYM` section. + /// Returns `Err` for invalid values. + pub fn gnu_versym( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<(&'data [elf::Versym<Elf::Endian>], SectionIndex)>> { + for section in self.sections { + if let Some(syms) = section.gnu_versym(endian, data)? { + return Ok(Some(syms)); + } + } + Ok(None) + } + + /// Return the contents of a `SHT_GNU_VERDEF` section. + /// + /// Also returns the linked string table index. + /// + /// Returns `Ok(None)` if there is no `SHT_GNU_VERDEF` section. + /// Returns `Err` for invalid values. + pub fn gnu_verdef( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<(VerdefIterator<'data, Elf>, SectionIndex)>> { + for section in self.sections { + if let Some(defs) = section.gnu_verdef(endian, data)? { + return Ok(Some(defs)); + } + } + Ok(None) + } + + /// Return the contents of a `SHT_GNU_VERNEED` section. + /// + /// Also returns the linked string table index. + /// + /// Returns `Ok(None)` if there is no `SHT_GNU_VERNEED` section. + /// Returns `Err` for invalid values. + pub fn gnu_verneed( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<(VerneedIterator<'data, Elf>, SectionIndex)>> { + for section in self.sections { + if let Some(needs) = section.gnu_verneed(endian, data)? { + return Ok(Some(needs)); + } + } + Ok(None) + } + + /// Returns the symbol version table. + /// + /// Returns `Ok(None)` if there is no `SHT_GNU_VERSYM` section. + /// Returns `Err` for invalid values. + pub fn versions( + &self, + endian: Elf::Endian, + data: R, + ) -> read::Result<Option<VersionTable<'data, Elf>>> { + let (versyms, link) = match self.gnu_versym(endian, data)? { + Some(val) => val, + None => return Ok(None), + }; + let strings = self.symbol_table_by_index(endian, data, link)?.strings(); + // TODO: check links? + let verdefs = self.gnu_verdef(endian, data)?.map(|x| x.0); + let verneeds = self.gnu_verneed(endian, data)?.map(|x| x.0); + VersionTable::parse(endian, versyms, verdefs, verneeds, strings).map(Some) + } +} + +/// An iterator over the sections of an `ElfFile32`. +pub type ElfSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSectionIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the sections of an `ElfFile64`. +pub type ElfSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSectionIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the sections of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSectionIterator<'data, 'file, Elf, R = &'data [u8]> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) iter: iter::Enumerate<slice::Iter<'data, Elf::SectionHeader>>, +} + +impl<'data, 'file, Elf, R> Iterator for ElfSectionIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type Item = ElfSection<'data, 'file, Elf, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| ElfSection { + index: SectionIndex(index), + file: self.file, + section, + }) + } +} + +/// A section of an `ElfFile32`. +pub type ElfSection32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSection<'data, 'file, elf::FileHeader32<Endian>, R>; +/// A section of an `ElfFile64`. +pub type ElfSection64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSection<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// A section of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSection<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) index: SectionIndex, + pub(super) section: &'data Elf::SectionHeader, +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSection<'data, 'file, Elf, R> { + fn bytes(&self) -> read::Result<&'data [u8]> { + self.section + .data(self.file.endian, self.file.data) + .read_error("Invalid ELF section size or offset") + } + + fn maybe_compressed(&self) -> read::Result<Option<CompressedFileRange>> { + let endian = self.file.endian; + if (self.section.sh_flags(endian).into() & u64::from(elf::SHF_COMPRESSED)) == 0 { + return Ok(None); + } + let (section_offset, section_size) = self + .section + .file_range(endian) + .read_error("Invalid ELF compressed section type")?; + let mut offset = section_offset; + let header = self + .file + .data + .read::<Elf::CompressionHeader>(&mut offset) + .read_error("Invalid ELF compressed section offset")?; + if header.ch_type(endian) != elf::ELFCOMPRESS_ZLIB { + return Err(Error("Unsupported ELF compression type")); + } + let uncompressed_size = header.ch_size(endian).into(); + let compressed_size = section_size + .checked_sub(offset - section_offset) + .read_error("Invalid ELF compressed section size")?; + Ok(Some(CompressedFileRange { + format: CompressionFormat::Zlib, + offset, + compressed_size, + uncompressed_size, + })) + } + + /// Try GNU-style "ZLIB" header decompression. + fn maybe_compressed_gnu(&self) -> read::Result<Option<CompressedFileRange>> { + let name = match self.name() { + Ok(name) => name, + // I think it's ok to ignore this error? + Err(_) => return Ok(None), + }; + if !name.starts_with(".zdebug_") { + return Ok(None); + } + let (section_offset, section_size) = self + .section + .file_range(self.file.endian) + .read_error("Invalid ELF GNU compressed section type")?; + let mut offset = section_offset; + let data = self.file.data; + // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally + // huge allocations. This also reduces the chance of accidentally matching on a + // .debug_str that happens to start with "ZLIB". + if data + .read_bytes(&mut offset, 8) + .read_error("ELF GNU compressed section is too short")? + != b"ZLIB\0\0\0\0" + { + return Err(Error("Invalid ELF GNU compressed section header")); + } + let uncompressed_size = data + .read::<U32Bytes<_>>(&mut offset) + .read_error("ELF GNU compressed section is too short")? + .get(endian::BigEndian) + .into(); + let compressed_size = section_size + .checked_sub(offset - section_offset) + .read_error("ELF GNU compressed section is too short")?; + Ok(Some(CompressedFileRange { + format: CompressionFormat::Zlib, + offset, + compressed_size, + uncompressed_size, + })) + } +} + +impl<'data, 'file, Elf, R> read::private::Sealed for ElfSection<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Elf, R> ObjectSection<'data> for ElfSection<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type RelocationIterator = ElfSectionRelocationIterator<'data, 'file, Elf, R>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + self.section.sh_addr(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.section.sh_size(self.file.endian).into() + } + + #[inline] + fn align(&self) -> u64 { + self.section.sh_addralign(self.file.endian).into() + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + self.section.file_range(self.file.endian) + } + + #[inline] + fn data(&self) -> read::Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> read::Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + fn compressed_file_range(&self) -> read::Result<CompressedFileRange> { + Ok(if let Some(data) = self.maybe_compressed()? { + data + } else if let Some(data) = self.maybe_compressed_gnu()? { + data + } else { + CompressedFileRange::none(self.file_range()) + }) + } + + fn compressed_data(&self) -> read::Result<CompressedData<'data>> { + self.compressed_file_range()?.data(self.file.data) + } + + fn name_bytes(&self) -> read::Result<&[u8]> { + self.file + .sections + .section_name(self.file.endian, self.section) + } + + fn name(&self) -> read::Result<&str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 ELF section name") + } + + #[inline] + fn segment_name_bytes(&self) -> read::Result<Option<&[u8]>> { + Ok(None) + } + + #[inline] + fn segment_name(&self) -> read::Result<Option<&str>> { + Ok(None) + } + + fn kind(&self) -> SectionKind { + let flags = self.section.sh_flags(self.file.endian).into(); + let sh_type = self.section.sh_type(self.file.endian); + match sh_type { + elf::SHT_PROGBITS => { + if flags & u64::from(elf::SHF_ALLOC) != 0 { + if flags & u64::from(elf::SHF_EXECINSTR) != 0 { + SectionKind::Text + } else if flags & u64::from(elf::SHF_TLS) != 0 { + SectionKind::Tls + } else if flags & u64::from(elf::SHF_WRITE) != 0 { + SectionKind::Data + } else if flags & u64::from(elf::SHF_STRINGS) != 0 { + SectionKind::ReadOnlyString + } else { + SectionKind::ReadOnlyData + } + } else if flags & u64::from(elf::SHF_STRINGS) != 0 { + SectionKind::OtherString + } else { + SectionKind::Other + } + } + elf::SHT_NOBITS => { + if flags & u64::from(elf::SHF_TLS) != 0 { + SectionKind::UninitializedTls + } else { + SectionKind::UninitializedData + } + } + elf::SHT_NOTE => SectionKind::Note, + elf::SHT_NULL + | elf::SHT_SYMTAB + | elf::SHT_STRTAB + | elf::SHT_RELA + | elf::SHT_HASH + | elf::SHT_DYNAMIC + | elf::SHT_REL + | elf::SHT_DYNSYM + | elf::SHT_GROUP => SectionKind::Metadata, + _ => SectionKind::Elf(sh_type), + } + } + + fn relocations(&self) -> ElfSectionRelocationIterator<'data, 'file, Elf, R> { + ElfSectionRelocationIterator { + section_index: self.index, + file: self.file, + relocations: None, + } + } + + fn flags(&self) -> SectionFlags { + SectionFlags::Elf { + sh_flags: self.section.sh_flags(self.file.endian).into(), + } + } +} + +/// A trait for generic access to `SectionHeader32` and `SectionHeader64`. +#[allow(missing_docs)] +pub trait SectionHeader: Debug + Pod { + type Elf: FileHeader<SectionHeader = Self, Endian = Self::Endian, Word = Self::Word>; + type Word: Into<u64>; + type Endian: endian::Endian; + + fn sh_name(&self, endian: Self::Endian) -> u32; + fn sh_type(&self, endian: Self::Endian) -> u32; + fn sh_flags(&self, endian: Self::Endian) -> Self::Word; + fn sh_addr(&self, endian: Self::Endian) -> Self::Word; + fn sh_offset(&self, endian: Self::Endian) -> Self::Word; + fn sh_size(&self, endian: Self::Endian) -> Self::Word; + fn sh_link(&self, endian: Self::Endian) -> u32; + fn sh_info(&self, endian: Self::Endian) -> u32; + fn sh_addralign(&self, endian: Self::Endian) -> Self::Word; + fn sh_entsize(&self, endian: Self::Endian) -> Self::Word; + + /// Parse the section name from the string table. + fn name<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + strings: StringTable<'data, R>, + ) -> read::Result<&'data [u8]> { + strings + .get(self.sh_name(endian)) + .read_error("Invalid ELF section name offset") + } + + /// Return the offset and size of the section in the file. + /// + /// Returns `None` for sections that have no data in the file. + fn file_range(&self, endian: Self::Endian) -> Option<(u64, u64)> { + if self.sh_type(endian) == elf::SHT_NOBITS { + None + } else { + Some((self.sh_offset(endian).into(), self.sh_size(endian).into())) + } + } + + /// Return the section data. + /// + /// Returns `Ok(&[])` if the section has no data. + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<&'data [u8]> { + if let Some((offset, size)) = self.file_range(endian) { + data.read_bytes_at(offset, size) + .read_error("Invalid ELF section size or offset") + } else { + Ok(&[]) + } + } + + /// Return the section data as a slice of the given type. + /// + /// Allows padding at the end of the data. + /// Returns `Ok(&[])` if the section has no data. + /// Returns `Err` for invalid values, including bad alignment. + fn data_as_array<'data, T: Pod, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<&'data [T]> { + let mut data = self.data(endian, data).map(Bytes)?; + data.read_slice(data.len() / mem::size_of::<T>()) + .read_error("Invalid ELF section size or offset") + } + + /// Return the strings in the section. + /// + /// Returns `Ok(None)` if the section does not contain strings. + /// Returns `Err` for invalid values. + fn strings<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<StringTable<'data, R>>> { + if self.sh_type(endian) != elf::SHT_STRTAB { + return Ok(None); + } + let str_offset = self.sh_offset(endian).into(); + let str_size = self.sh_size(endian).into(); + let str_end = str_offset + .checked_add(str_size) + .read_error("Invalid ELF string section offset or size")?; + Ok(Some(StringTable::new(data, str_offset, str_end))) + } + + /// Return the symbols in the section. + /// + /// Also finds the linked string table in `sections`. + /// + /// `section_index` must be the 0-based index of this section, and is used + /// to find the corresponding extended section index table in `sections`. + /// + /// Returns `Ok(None)` if the section does not contain symbols. + /// Returns `Err` for invalid values. + fn symbols<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + sections: &SectionTable<'data, Self::Elf, R>, + section_index: SectionIndex, + ) -> read::Result<Option<SymbolTable<'data, Self::Elf, R>>> { + let sh_type = self.sh_type(endian); + if sh_type != elf::SHT_SYMTAB && sh_type != elf::SHT_DYNSYM { + return Ok(None); + } + SymbolTable::parse(endian, data, sections, section_index, self).map(Some) + } + + /// Return the `Elf::Rel` entries in the section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if the section does not contain relocations. + /// Returns `Err` for invalid values. + fn rel<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(&'data [<Self::Elf as FileHeader>::Rel], SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_REL { + return Ok(None); + } + let rel = self + .data_as_array(endian, data) + .read_error("Invalid ELF relocation section offset or size")?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((rel, link))) + } + + /// Return the `Elf::Rela` entries in the section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if the section does not contain relocations. + /// Returns `Err` for invalid values. + fn rela<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(&'data [<Self::Elf as FileHeader>::Rela], SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_RELA { + return Ok(None); + } + let rela = self + .data_as_array(endian, data) + .read_error("Invalid ELF relocation section offset or size")?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((rela, link))) + } + + /// Return entries in a dynamic section. + /// + /// Also returns the linked string table index. + /// + /// Returns `Ok(None)` if the section type is not `SHT_DYNAMIC`. + /// Returns `Err` for invalid values. + fn dynamic<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(&'data [<Self::Elf as FileHeader>::Dyn], SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_DYNAMIC { + return Ok(None); + } + let dynamic = self + .data_as_array(endian, data) + .read_error("Invalid ELF dynamic section offset or size")?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((dynamic, link))) + } + + /// Return a note iterator for the section data. + /// + /// Returns `Ok(None)` if the section does not contain notes. + /// Returns `Err` for invalid values. + fn notes<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<NoteIterator<'data, Self::Elf>>> { + if self.sh_type(endian) != elf::SHT_NOTE { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF note section offset or size")?; + let notes = NoteIterator::new(endian, self.sh_addralign(endian), data)?; + Ok(Some(notes)) + } + + /// Return the contents of a group section. + /// + /// The first value is a `GRP_*` value, and the remaining values + /// are section indices. + /// + /// Returns `Ok(None)` if the section does not define a group. + /// Returns `Err` for invalid values. + fn group<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(u32, &'data [U32Bytes<Self::Endian>])>> { + if self.sh_type(endian) != elf::SHT_GROUP { + return Ok(None); + } + let mut data = self + .data(endian, data) + .read_error("Invalid ELF group section offset or size") + .map(Bytes)?; + let flag = data + .read::<U32Bytes<_>>() + .read_error("Invalid ELF group section offset or size")? + .get(endian); + let count = data.len() / mem::size_of::<U32Bytes<Self::Endian>>(); + let sections = data + .read_slice(count) + .read_error("Invalid ELF group section offset or size")?; + Ok(Some((flag, sections))) + } + + /// Return the header of a SysV hash section. + /// + /// Returns `Ok(None)` if the section does not contain a SysV hash. + /// Returns `Err` for invalid values. + fn hash_header<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<&'data elf::HashHeader<Self::Endian>>> { + if self.sh_type(endian) != elf::SHT_HASH { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF hash section offset or size")?; + let header = data + .read_at::<elf::HashHeader<Self::Endian>>(0) + .read_error("Invalid hash header")?; + Ok(Some(header)) + } + + /// Return the contents of a SysV hash section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if the section does not contain a SysV hash. + /// Returns `Err` for invalid values. + fn hash<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(HashTable<'data, Self::Elf>, SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_HASH { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF hash section offset or size")?; + let hash = HashTable::parse(endian, data)?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((hash, link))) + } + + /// Return the header of a GNU hash section. + /// + /// Returns `Ok(None)` if the section does not contain a GNU hash. + /// Returns `Err` for invalid values. + fn gnu_hash_header<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<&'data elf::GnuHashHeader<Self::Endian>>> { + if self.sh_type(endian) != elf::SHT_GNU_HASH { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF GNU hash section offset or size")?; + let header = data + .read_at::<elf::GnuHashHeader<Self::Endian>>(0) + .read_error("Invalid GNU hash header")?; + Ok(Some(header)) + } + + /// Return the contents of a GNU hash section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if the section does not contain a GNU hash. + /// Returns `Err` for invalid values. + fn gnu_hash<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(GnuHashTable<'data, Self::Elf>, SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_GNU_HASH { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF GNU hash section offset or size")?; + let hash = GnuHashTable::parse(endian, data)?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((hash, link))) + } + + /// Return the contents of a `SHT_GNU_VERSYM` section. + /// + /// Also returns the linked symbol table index. + /// + /// Returns `Ok(None)` if the section type is not `SHT_GNU_VERSYM`. + /// Returns `Err` for invalid values. + fn gnu_versym<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(&'data [elf::Versym<Self::Endian>], SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_GNU_VERSYM { + return Ok(None); + } + let versym = self + .data_as_array(endian, data) + .read_error("Invalid ELF GNU versym section offset or size")?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((versym, link))) + } + + /// Return an iterator for the entries of a `SHT_GNU_VERDEF` section. + /// + /// Also returns the linked string table index. + /// + /// Returns `Ok(None)` if the section type is not `SHT_GNU_VERDEF`. + /// Returns `Err` for invalid values. + fn gnu_verdef<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(VerdefIterator<'data, Self::Elf>, SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_GNU_VERDEF { + return Ok(None); + } + let verdef = self + .data(endian, data) + .read_error("Invalid ELF GNU verdef section offset or size")?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((VerdefIterator::new(endian, verdef), link))) + } + + /// Return an iterator for the entries of a `SHT_GNU_VERNEED` section. + /// + /// Also returns the linked string table index. + /// + /// Returns `Ok(None)` if the section type is not `SHT_GNU_VERNEED`. + /// Returns `Err` for invalid values. + fn gnu_verneed<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<(VerneedIterator<'data, Self::Elf>, SectionIndex)>> { + if self.sh_type(endian) != elf::SHT_GNU_VERNEED { + return Ok(None); + } + let verneed = self + .data(endian, data) + .read_error("Invalid ELF GNU verneed section offset or size")?; + let link = SectionIndex(self.sh_link(endian) as usize); + Ok(Some((VerneedIterator::new(endian, verneed), link))) + } +} + +impl<Endian: endian::Endian> SectionHeader for elf::SectionHeader32<Endian> { + type Elf = elf::FileHeader32<Endian>; + type Word = u32; + type Endian = Endian; + + #[inline] + fn sh_name(&self, endian: Self::Endian) -> u32 { + self.sh_name.get(endian) + } + + #[inline] + fn sh_type(&self, endian: Self::Endian) -> u32 { + self.sh_type.get(endian) + } + + #[inline] + fn sh_flags(&self, endian: Self::Endian) -> Self::Word { + self.sh_flags.get(endian) + } + + #[inline] + fn sh_addr(&self, endian: Self::Endian) -> Self::Word { + self.sh_addr.get(endian) + } + + #[inline] + fn sh_offset(&self, endian: Self::Endian) -> Self::Word { + self.sh_offset.get(endian) + } + + #[inline] + fn sh_size(&self, endian: Self::Endian) -> Self::Word { + self.sh_size.get(endian) + } + + #[inline] + fn sh_link(&self, endian: Self::Endian) -> u32 { + self.sh_link.get(endian) + } + + #[inline] + fn sh_info(&self, endian: Self::Endian) -> u32 { + self.sh_info.get(endian) + } + + #[inline] + fn sh_addralign(&self, endian: Self::Endian) -> Self::Word { + self.sh_addralign.get(endian) + } + + #[inline] + fn sh_entsize(&self, endian: Self::Endian) -> Self::Word { + self.sh_entsize.get(endian) + } +} + +impl<Endian: endian::Endian> SectionHeader for elf::SectionHeader64<Endian> { + type Word = u64; + type Endian = Endian; + type Elf = elf::FileHeader64<Endian>; + + #[inline] + fn sh_name(&self, endian: Self::Endian) -> u32 { + self.sh_name.get(endian) + } + + #[inline] + fn sh_type(&self, endian: Self::Endian) -> u32 { + self.sh_type.get(endian) + } + + #[inline] + fn sh_flags(&self, endian: Self::Endian) -> Self::Word { + self.sh_flags.get(endian) + } + + #[inline] + fn sh_addr(&self, endian: Self::Endian) -> Self::Word { + self.sh_addr.get(endian) + } + + #[inline] + fn sh_offset(&self, endian: Self::Endian) -> Self::Word { + self.sh_offset.get(endian) + } + + #[inline] + fn sh_size(&self, endian: Self::Endian) -> Self::Word { + self.sh_size.get(endian) + } + + #[inline] + fn sh_link(&self, endian: Self::Endian) -> u32 { + self.sh_link.get(endian) + } + + #[inline] + fn sh_info(&self, endian: Self::Endian) -> u32 { + self.sh_info.get(endian) + } + + #[inline] + fn sh_addralign(&self, endian: Self::Endian) -> Self::Word { + self.sh_addralign.get(endian) + } + + #[inline] + fn sh_entsize(&self, endian: Self::Endian) -> Self::Word { + self.sh_entsize.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/segment.rs b/third_party/rust/object/src/read/elf/segment.rs new file mode 100644 index 0000000000..445893c8d1 --- /dev/null +++ b/third_party/rust/object/src/read/elf/segment.rs @@ -0,0 +1,333 @@ +use core::fmt::Debug; +use core::{mem, slice, str}; + +use crate::elf; +use crate::endian::{self, Endianness}; +use crate::pod::Pod; +use crate::read::{self, Bytes, ObjectSegment, ReadError, ReadRef, SegmentFlags}; + +use super::{ElfFile, FileHeader, NoteIterator}; + +/// An iterator over the segments of an `ElfFile32`. +pub type ElfSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSegmentIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the segments of an `ElfFile64`. +pub type ElfSegmentIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSegmentIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the segments of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSegmentIterator<'data, 'file, Elf, R = &'data [u8]> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) iter: slice::Iter<'data, Elf::ProgramHeader>, +} + +impl<'data, 'file, Elf, R> Iterator for ElfSegmentIterator<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + type Item = ElfSegment<'data, 'file, Elf, R>; + + fn next(&mut self) -> Option<Self::Item> { + for segment in self.iter.by_ref() { + if segment.p_type(self.file.endian) == elf::PT_LOAD { + return Some(ElfSegment { + file: self.file, + segment, + }); + } + } + None + } +} + +/// A segment of an `ElfFile32`. +pub type ElfSegment32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSegment<'data, 'file, elf::FileHeader32<Endian>, R>; +/// A segment of an `ElfFile64`. +pub type ElfSegment64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSegment<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// A segment of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSegment<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file ElfFile<'data, Elf, R>, + pub(super) segment: &'data Elf::ProgramHeader, +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ElfSegment<'data, 'file, Elf, R> { + fn bytes(&self) -> read::Result<&'data [u8]> { + self.segment + .data(self.file.endian, self.file.data) + .read_error("Invalid ELF segment size or offset") + } +} + +impl<'data, 'file, Elf, R> read::private::Sealed for ElfSegment<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Elf, R> ObjectSegment<'data> for ElfSegment<'data, 'file, Elf, R> +where + Elf: FileHeader, + R: ReadRef<'data>, +{ + #[inline] + fn address(&self) -> u64 { + self.segment.p_vaddr(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.segment.p_memsz(self.file.endian).into() + } + + #[inline] + fn align(&self) -> u64 { + self.segment.p_align(self.file.endian).into() + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + self.segment.file_range(self.file.endian) + } + + #[inline] + fn data(&self) -> read::Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> read::Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn name_bytes(&self) -> read::Result<Option<&[u8]>> { + Ok(None) + } + + #[inline] + fn name(&self) -> read::Result<Option<&str>> { + Ok(None) + } + + #[inline] + fn flags(&self) -> SegmentFlags { + let p_flags = self.segment.p_flags(self.file.endian); + SegmentFlags::Elf { p_flags } + } +} + +/// A trait for generic access to `ProgramHeader32` and `ProgramHeader64`. +#[allow(missing_docs)] +pub trait ProgramHeader: Debug + Pod { + type Elf: FileHeader<ProgramHeader = Self, Endian = Self::Endian, Word = Self::Word>; + type Word: Into<u64>; + type Endian: endian::Endian; + + fn p_type(&self, endian: Self::Endian) -> u32; + fn p_flags(&self, endian: Self::Endian) -> u32; + fn p_offset(&self, endian: Self::Endian) -> Self::Word; + fn p_vaddr(&self, endian: Self::Endian) -> Self::Word; + fn p_paddr(&self, endian: Self::Endian) -> Self::Word; + fn p_filesz(&self, endian: Self::Endian) -> Self::Word; + fn p_memsz(&self, endian: Self::Endian) -> Self::Word; + fn p_align(&self, endian: Self::Endian) -> Self::Word; + + /// Return the offset and size of the segment in the file. + fn file_range(&self, endian: Self::Endian) -> (u64, u64) { + (self.p_offset(endian).into(), self.p_filesz(endian).into()) + } + + /// Return the segment data. + /// + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> Result<&'data [u8], ()> { + let (offset, size) = self.file_range(endian); + data.read_bytes_at(offset, size) + } + + /// Return the segment data as a slice of the given type. + /// + /// Allows padding at the end of the data. + /// Returns `Ok(&[])` if the segment has no data. + /// Returns `Err` for invalid values, including bad alignment. + fn data_as_array<'data, T: Pod, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> Result<&'data [T], ()> { + let mut data = self.data(endian, data).map(Bytes)?; + data.read_slice(data.len() / mem::size_of::<T>()) + } + + /// Return the segment data in the given virtual address range + /// + /// Returns `Ok(None)` if the segment does not contain the address. + /// Returns `Err` for invalid values. + fn data_range<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + address: u64, + size: u64, + ) -> Result<Option<&'data [u8]>, ()> { + Ok(read::util::data_range( + self.data(endian, data)?, + self.p_vaddr(endian).into(), + address, + size, + )) + } + + /// Return entries in a dynamic segment. + /// + /// Returns `Ok(None)` if the segment is not `PT_DYNAMIC`. + /// Returns `Err` for invalid values. + fn dynamic<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<&'data [<Self::Elf as FileHeader>::Dyn]>> { + if self.p_type(endian) != elf::PT_DYNAMIC { + return Ok(None); + } + let dynamic = self + .data_as_array(endian, data) + .read_error("Invalid ELF dynamic segment offset or size")?; + Ok(Some(dynamic)) + } + + /// Return a note iterator for the segment data. + /// + /// Returns `Ok(None)` if the segment does not contain notes. + /// Returns `Err` for invalid values. + fn notes<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> read::Result<Option<NoteIterator<'data, Self::Elf>>> { + if self.p_type(endian) != elf::PT_NOTE { + return Ok(None); + } + let data = self + .data(endian, data) + .read_error("Invalid ELF note segment offset or size")?; + let notes = NoteIterator::new(endian, self.p_align(endian), data)?; + Ok(Some(notes)) + } +} + +impl<Endian: endian::Endian> ProgramHeader for elf::ProgramHeader32<Endian> { + type Word = u32; + type Endian = Endian; + type Elf = elf::FileHeader32<Endian>; + + #[inline] + fn p_type(&self, endian: Self::Endian) -> u32 { + self.p_type.get(endian) + } + + #[inline] + fn p_flags(&self, endian: Self::Endian) -> u32 { + self.p_flags.get(endian) + } + + #[inline] + fn p_offset(&self, endian: Self::Endian) -> Self::Word { + self.p_offset.get(endian) + } + + #[inline] + fn p_vaddr(&self, endian: Self::Endian) -> Self::Word { + self.p_vaddr.get(endian) + } + + #[inline] + fn p_paddr(&self, endian: Self::Endian) -> Self::Word { + self.p_paddr.get(endian) + } + + #[inline] + fn p_filesz(&self, endian: Self::Endian) -> Self::Word { + self.p_filesz.get(endian) + } + + #[inline] + fn p_memsz(&self, endian: Self::Endian) -> Self::Word { + self.p_memsz.get(endian) + } + + #[inline] + fn p_align(&self, endian: Self::Endian) -> Self::Word { + self.p_align.get(endian) + } +} + +impl<Endian: endian::Endian> ProgramHeader for elf::ProgramHeader64<Endian> { + type Word = u64; + type Endian = Endian; + type Elf = elf::FileHeader64<Endian>; + + #[inline] + fn p_type(&self, endian: Self::Endian) -> u32 { + self.p_type.get(endian) + } + + #[inline] + fn p_flags(&self, endian: Self::Endian) -> u32 { + self.p_flags.get(endian) + } + + #[inline] + fn p_offset(&self, endian: Self::Endian) -> Self::Word { + self.p_offset.get(endian) + } + + #[inline] + fn p_vaddr(&self, endian: Self::Endian) -> Self::Word { + self.p_vaddr.get(endian) + } + + #[inline] + fn p_paddr(&self, endian: Self::Endian) -> Self::Word { + self.p_paddr.get(endian) + } + + #[inline] + fn p_filesz(&self, endian: Self::Endian) -> Self::Word { + self.p_filesz.get(endian) + } + + #[inline] + fn p_memsz(&self, endian: Self::Endian) -> Self::Word { + self.p_memsz.get(endian) + } + + #[inline] + fn p_align(&self, endian: Self::Endian) -> Self::Word { + self.p_align.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/symbol.rs b/third_party/rust/object/src/read/elf/symbol.rs new file mode 100644 index 0000000000..390aa466f9 --- /dev/null +++ b/third_party/rust/object/src/read/elf/symbol.rs @@ -0,0 +1,579 @@ +use alloc::fmt; +use alloc::vec::Vec; +use core::fmt::Debug; +use core::slice; +use core::str; + +use crate::endian::{self, Endianness}; +use crate::pod::Pod; +use crate::read::util::StringTable; +use crate::read::{ + self, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, SectionIndex, SymbolFlags, + SymbolIndex, SymbolKind, SymbolMap, SymbolMapEntry, SymbolScope, SymbolSection, +}; +use crate::{elf, U32}; + +use super::{FileHeader, SectionHeader, SectionTable}; + +/// A table of symbol entries in an ELF file. +/// +/// Also includes the string table used for the symbol names. +#[derive(Debug, Clone, Copy)] +pub struct SymbolTable<'data, Elf: FileHeader, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + section: SectionIndex, + string_section: SectionIndex, + shndx_section: SectionIndex, + symbols: &'data [Elf::Sym], + strings: StringTable<'data, R>, + shndx: &'data [U32<Elf::Endian>], +} + +impl<'data, Elf: FileHeader, R: ReadRef<'data>> Default for SymbolTable<'data, Elf, R> { + fn default() -> Self { + SymbolTable { + section: SectionIndex(0), + string_section: SectionIndex(0), + shndx_section: SectionIndex(0), + symbols: &[], + strings: Default::default(), + shndx: &[], + } + } +} + +impl<'data, Elf: FileHeader, R: ReadRef<'data>> SymbolTable<'data, Elf, R> { + /// Parse the given symbol table section. + pub fn parse( + endian: Elf::Endian, + data: R, + sections: &SectionTable<'data, Elf, R>, + section_index: SectionIndex, + section: &Elf::SectionHeader, + ) -> read::Result<SymbolTable<'data, Elf, R>> { + debug_assert!( + section.sh_type(endian) == elf::SHT_DYNSYM + || section.sh_type(endian) == elf::SHT_SYMTAB + ); + + let symbols = section + .data_as_array(endian, data) + .read_error("Invalid ELF symbol table data")?; + + let link = SectionIndex(section.sh_link(endian) as usize); + let strings = sections.strings(endian, data, link)?; + + let mut shndx_section = SectionIndex(0); + let mut shndx = &[][..]; + for (i, s) in sections.iter().enumerate() { + if s.sh_type(endian) == elf::SHT_SYMTAB_SHNDX + && s.sh_link(endian) as usize == section_index.0 + { + shndx_section = SectionIndex(i); + shndx = s + .data_as_array(endian, data) + .read_error("Invalid ELF symtab_shndx data")?; + } + } + + Ok(SymbolTable { + section: section_index, + string_section: link, + symbols, + strings, + shndx, + shndx_section, + }) + } + + /// Return the section index of this symbol table. + #[inline] + pub fn section(&self) -> SectionIndex { + self.section + } + + /// Return the section index of the shndx table. + #[inline] + pub fn shndx_section(&self) -> SectionIndex { + self.shndx_section + } + + /// Return the section index of the linked string table. + #[inline] + pub fn string_section(&self) -> SectionIndex { + self.string_section + } + + /// Return the string table used for the symbol names. + #[inline] + pub fn strings(&self) -> StringTable<'data, R> { + self.strings + } + + /// Return the symbol table. + #[inline] + pub fn symbols(&self) -> &'data [Elf::Sym] { + self.symbols + } + + /// Iterate over the symbols. + #[inline] + pub fn iter(&self) -> slice::Iter<'data, Elf::Sym> { + self.symbols.iter() + } + + /// Return true if the symbol table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + + /// The number of symbols. + #[inline] + pub fn len(&self) -> usize { + self.symbols.len() + } + + /// Return the symbol at the given index. + pub fn symbol(&self, index: usize) -> read::Result<&'data Elf::Sym> { + self.symbols + .get(index) + .read_error("Invalid ELF symbol index") + } + + /// Return the extended section index for the given symbol if present. + #[inline] + pub fn shndx(&self, endian: Elf::Endian, index: usize) -> Option<u32> { + self.shndx.get(index).map(|x| x.get(endian)) + } + + /// Return the section index for the given symbol. + /// + /// This uses the extended section index if present. + pub fn symbol_section( + &self, + endian: Elf::Endian, + symbol: &'data Elf::Sym, + index: usize, + ) -> read::Result<Option<SectionIndex>> { + match symbol.st_shndx(endian) { + elf::SHN_UNDEF => Ok(None), + elf::SHN_XINDEX => self + .shndx(endian, index) + .read_error("Missing ELF symbol extended index") + .map(|index| Some(SectionIndex(index as usize))), + shndx if shndx < elf::SHN_LORESERVE => Ok(Some(SectionIndex(shndx.into()))), + _ => Ok(None), + } + } + + /// Return the symbol name for the given symbol. + pub fn symbol_name( + &self, + endian: Elf::Endian, + symbol: &'data Elf::Sym, + ) -> read::Result<&'data [u8]> { + symbol.name(endian, self.strings) + } + + /// Construct a map from addresses to a user-defined map entry. + pub fn map<Entry: SymbolMapEntry, F: Fn(&'data Elf::Sym) -> Option<Entry>>( + &self, + endian: Elf::Endian, + f: F, + ) -> SymbolMap<Entry> { + let mut symbols = Vec::with_capacity(self.symbols.len()); + for symbol in self.symbols { + if !symbol.is_definition(endian) { + continue; + } + if let Some(entry) = f(symbol) { + symbols.push(entry); + } + } + SymbolMap::new(symbols) + } +} + +/// A symbol table of an `ElfFile32`. +pub type ElfSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSymbolTable<'data, 'file, elf::FileHeader32<Endian>, R>; +/// A symbol table of an `ElfFile32`. +pub type ElfSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSymbolTable<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// A symbol table of an `ElfFile`. +#[derive(Debug, Clone, Copy)] +pub struct ElfSymbolTable<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) endian: Elf::Endian, + pub(super) symbols: &'file SymbolTable<'data, Elf, R>, +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> read::private::Sealed + for ElfSymbolTable<'data, 'file, Elf, R> +{ +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbolTable<'data> + for ElfSymbolTable<'data, 'file, Elf, R> +{ + type Symbol = ElfSymbol<'data, 'file, Elf, R>; + type SymbolIterator = ElfSymbolIterator<'data, 'file, Elf, R>; + + fn symbols(&self) -> Self::SymbolIterator { + ElfSymbolIterator { + endian: self.endian, + symbols: self.symbols, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> read::Result<Self::Symbol> { + let symbol = self.symbols.symbol(index.0)?; + Ok(ElfSymbol { + endian: self.endian, + symbols: self.symbols, + index, + symbol, + }) + } +} + +/// An iterator over the symbols of an `ElfFile32`. +pub type ElfSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSymbolIterator<'data, 'file, elf::FileHeader32<Endian>, R>; +/// An iterator over the symbols of an `ElfFile64`. +pub type ElfSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSymbolIterator<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// An iterator over the symbols of an `ElfFile`. +pub struct ElfSymbolIterator<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) endian: Elf::Endian, + pub(super) symbols: &'file SymbolTable<'data, Elf, R>, + pub(super) index: usize, +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> fmt::Debug + for ElfSymbolIterator<'data, 'file, Elf, R> +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfSymbolIterator").finish() + } +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> Iterator + for ElfSymbolIterator<'data, 'file, Elf, R> +{ + type Item = ElfSymbol<'data, 'file, Elf, R>; + + fn next(&mut self) -> Option<Self::Item> { + let index = self.index; + let symbol = self.symbols.symbols.get(index)?; + self.index += 1; + Some(ElfSymbol { + endian: self.endian, + symbols: self.symbols, + index: SymbolIndex(index), + symbol, + }) + } +} + +/// A symbol of an `ElfFile32`. +pub type ElfSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSymbol<'data, 'file, elf::FileHeader32<Endian>, R>; +/// A symbol of an `ElfFile64`. +pub type ElfSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + ElfSymbol<'data, 'file, elf::FileHeader64<Endian>, R>; + +/// A symbol of an `ElfFile`. +#[derive(Debug, Clone, Copy)] +pub struct ElfSymbol<'data, 'file, Elf, R = &'data [u8]> +where + 'data: 'file, + Elf: FileHeader, + R: ReadRef<'data>, +{ + pub(super) endian: Elf::Endian, + pub(super) symbols: &'file SymbolTable<'data, Elf, R>, + pub(super) index: SymbolIndex, + pub(super) symbol: &'data Elf::Sym, +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> read::private::Sealed + for ElfSymbol<'data, 'file, Elf, R> +{ +} + +impl<'data, 'file, Elf: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> + for ElfSymbol<'data, 'file, Elf, R> +{ + #[inline] + fn index(&self) -> SymbolIndex { + self.index + } + + fn name_bytes(&self) -> read::Result<&'data [u8]> { + self.symbol.name(self.endian, self.symbols.strings()) + } + + fn name(&self) -> read::Result<&'data str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 ELF symbol name") + } + + #[inline] + fn address(&self) -> u64 { + self.symbol.st_value(self.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.symbol.st_size(self.endian).into() + } + + fn kind(&self) -> SymbolKind { + match self.symbol.st_type() { + elf::STT_NOTYPE if self.index.0 == 0 => SymbolKind::Null, + elf::STT_OBJECT | elf::STT_COMMON => SymbolKind::Data, + elf::STT_FUNC => SymbolKind::Text, + elf::STT_SECTION => SymbolKind::Section, + elf::STT_FILE => SymbolKind::File, + elf::STT_TLS => SymbolKind::Tls, + _ => SymbolKind::Unknown, + } + } + + fn section(&self) -> SymbolSection { + match self.symbol.st_shndx(self.endian) { + elf::SHN_UNDEF => SymbolSection::Undefined, + elf::SHN_ABS => { + if self.symbol.st_type() == elf::STT_FILE { + SymbolSection::None + } else { + SymbolSection::Absolute + } + } + elf::SHN_COMMON => SymbolSection::Common, + elf::SHN_XINDEX => match self.symbols.shndx(self.endian, self.index.0) { + Some(index) => SymbolSection::Section(SectionIndex(index as usize)), + None => SymbolSection::Unknown, + }, + index if index < elf::SHN_LORESERVE => { + SymbolSection::Section(SectionIndex(index as usize)) + } + _ => SymbolSection::Unknown, + } + } + + #[inline] + fn is_undefined(&self) -> bool { + self.symbol.st_shndx(self.endian) == elf::SHN_UNDEF + } + + #[inline] + fn is_definition(&self) -> bool { + self.symbol.is_definition(self.endian) + } + + #[inline] + fn is_common(&self) -> bool { + self.symbol.st_shndx(self.endian) == elf::SHN_COMMON + } + + #[inline] + fn is_weak(&self) -> bool { + self.symbol.st_bind() == elf::STB_WEAK + } + + fn scope(&self) -> SymbolScope { + if self.symbol.st_shndx(self.endian) == elf::SHN_UNDEF { + SymbolScope::Unknown + } else { + match self.symbol.st_bind() { + elf::STB_LOCAL => SymbolScope::Compilation, + elf::STB_GLOBAL | elf::STB_WEAK => { + if self.symbol.st_visibility() == elf::STV_HIDDEN { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + } + } + _ => SymbolScope::Unknown, + } + } + } + + #[inline] + fn is_global(&self) -> bool { + self.symbol.st_bind() != elf::STB_LOCAL + } + + #[inline] + fn is_local(&self) -> bool { + self.symbol.st_bind() == elf::STB_LOCAL + } + + #[inline] + fn flags(&self) -> SymbolFlags<SectionIndex> { + SymbolFlags::Elf { + st_info: self.symbol.st_info(), + st_other: self.symbol.st_other(), + } + } +} + +/// A trait for generic access to `Sym32` and `Sym64`. +#[allow(missing_docs)] +pub trait Sym: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + + fn st_name(&self, endian: Self::Endian) -> u32; + fn st_info(&self) -> u8; + fn st_bind(&self) -> u8; + fn st_type(&self) -> u8; + fn st_other(&self) -> u8; + fn st_visibility(&self) -> u8; + fn st_shndx(&self, endian: Self::Endian) -> u16; + fn st_value(&self, endian: Self::Endian) -> Self::Word; + fn st_size(&self, endian: Self::Endian) -> Self::Word; + + /// Parse the symbol name from the string table. + fn name<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + strings: StringTable<'data, R>, + ) -> read::Result<&'data [u8]> { + strings + .get(self.st_name(endian)) + .read_error("Invalid ELF symbol name offset") + } + + /// Return true if the symbol is undefined. + #[inline] + fn is_undefined(&self, endian: Self::Endian) -> bool { + self.st_shndx(endian) == elf::SHN_UNDEF + } + + /// Return true if the symbol is a definition of a function or data object. + fn is_definition(&self, endian: Self::Endian) -> bool { + let st_type = self.st_type(); + (st_type == elf::STT_NOTYPE || st_type == elf::STT_FUNC || st_type == elf::STT_OBJECT) + && self.st_shndx(endian) != elf::SHN_UNDEF + } +} + +impl<Endian: endian::Endian> Sym for elf::Sym32<Endian> { + type Word = u32; + type Endian = Endian; + + #[inline] + fn st_name(&self, endian: Self::Endian) -> u32 { + self.st_name.get(endian) + } + + #[inline] + fn st_info(&self) -> u8 { + self.st_info + } + + #[inline] + fn st_bind(&self) -> u8 { + self.st_bind() + } + + #[inline] + fn st_type(&self) -> u8 { + self.st_type() + } + + #[inline] + fn st_other(&self) -> u8 { + self.st_other + } + + #[inline] + fn st_visibility(&self) -> u8 { + self.st_visibility() + } + + #[inline] + fn st_shndx(&self, endian: Self::Endian) -> u16 { + self.st_shndx.get(endian) + } + + #[inline] + fn st_value(&self, endian: Self::Endian) -> Self::Word { + self.st_value.get(endian) + } + + #[inline] + fn st_size(&self, endian: Self::Endian) -> Self::Word { + self.st_size.get(endian) + } +} + +impl<Endian: endian::Endian> Sym for elf::Sym64<Endian> { + type Word = u64; + type Endian = Endian; + + #[inline] + fn st_name(&self, endian: Self::Endian) -> u32 { + self.st_name.get(endian) + } + + #[inline] + fn st_info(&self) -> u8 { + self.st_info + } + + #[inline] + fn st_bind(&self) -> u8 { + self.st_bind() + } + + #[inline] + fn st_type(&self) -> u8 { + self.st_type() + } + + #[inline] + fn st_other(&self) -> u8 { + self.st_other + } + + #[inline] + fn st_visibility(&self) -> u8 { + self.st_visibility() + } + + #[inline] + fn st_shndx(&self, endian: Self::Endian) -> u16 { + self.st_shndx.get(endian) + } + + #[inline] + fn st_value(&self, endian: Self::Endian) -> Self::Word { + self.st_value.get(endian) + } + + #[inline] + fn st_size(&self, endian: Self::Endian) -> Self::Word { + self.st_size.get(endian) + } +} diff --git a/third_party/rust/object/src/read/elf/version.rs b/third_party/rust/object/src/read/elf/version.rs new file mode 100644 index 0000000000..6d80ba1e35 --- /dev/null +++ b/third_party/rust/object/src/read/elf/version.rs @@ -0,0 +1,421 @@ +use alloc::vec::Vec; + +use crate::read::{Bytes, ReadError, ReadRef, Result, StringTable}; +use crate::{elf, endian}; + +use super::FileHeader; + +/// A version index. +#[derive(Debug, Default, Clone, Copy)] +pub struct VersionIndex(pub u16); + +impl VersionIndex { + /// Return the version index. + pub fn index(&self) -> u16 { + self.0 & elf::VERSYM_VERSION + } + + /// Return true if it is the local index. + pub fn is_local(&self) -> bool { + self.index() == elf::VER_NDX_LOCAL + } + + /// Return true if it is the global index. + pub fn is_global(&self) -> bool { + self.index() == elf::VER_NDX_GLOBAL + } + + /// Return the hidden flag. + pub fn is_hidden(&self) -> bool { + self.0 & elf::VERSYM_HIDDEN != 0 + } +} + +/// A version definition or requirement. +/// +/// This is derived from entries in the `SHT_GNU_verdef` and `SHT_GNU_verneed` sections. +#[derive(Debug, Default, Clone, Copy)] +pub struct Version<'data> { + name: &'data [u8], + hash: u32, + // Used to keep track of valid indices in `VersionTable`. + valid: bool, +} + +impl<'data> Version<'data> { + /// Return the version name. + pub fn name(&self) -> &'data [u8] { + self.name + } + + /// Return hash of the version name. + pub fn hash(&self) -> u32 { + self.hash + } +} + +/// A table of version definitions and requirements. +/// +/// It allows looking up the version information for a given symbol index. +/// +/// This is derived from entries in the `SHT_GNU_versym`, `SHT_GNU_verdef` and `SHT_GNU_verneed` sections. +#[derive(Debug, Clone)] +pub struct VersionTable<'data, Elf: FileHeader> { + symbols: &'data [elf::Versym<Elf::Endian>], + versions: Vec<Version<'data>>, +} + +impl<'data, Elf: FileHeader> Default for VersionTable<'data, Elf> { + fn default() -> Self { + VersionTable { + symbols: &[], + versions: Vec::new(), + } + } +} + +impl<'data, Elf: FileHeader> VersionTable<'data, Elf> { + /// Parse the version sections. + pub fn parse<R: ReadRef<'data>>( + endian: Elf::Endian, + versyms: &'data [elf::Versym<Elf::Endian>], + verdefs: Option<VerdefIterator<'data, Elf>>, + verneeds: Option<VerneedIterator<'data, Elf>>, + strings: StringTable<'data, R>, + ) -> Result<Self> { + let mut max_index = 0; + if let Some(mut verdefs) = verdefs.clone() { + while let Some((verdef, _)) = verdefs.next()? { + if verdef.vd_flags.get(endian) & elf::VER_FLG_BASE != 0 { + continue; + } + let index = verdef.vd_ndx.get(endian) & elf::VERSYM_VERSION; + if max_index < index { + max_index = index; + } + } + } + if let Some(mut verneeds) = verneeds.clone() { + while let Some((_, mut vernauxs)) = verneeds.next()? { + while let Some(vernaux) = vernauxs.next()? { + let index = vernaux.vna_other.get(endian) & elf::VERSYM_VERSION; + if max_index < index { + max_index = index; + } + } + } + } + + // Indices should be sequential, but this could be up to + // 32k * size_of::<Version>() if max_index is bad. + let mut versions = vec![Version::default(); max_index as usize + 1]; + + if let Some(mut verdefs) = verdefs { + while let Some((verdef, mut verdauxs)) = verdefs.next()? { + if verdef.vd_flags.get(endian) & elf::VER_FLG_BASE != 0 { + continue; + } + let index = verdef.vd_ndx.get(endian) & elf::VERSYM_VERSION; + if index <= elf::VER_NDX_GLOBAL { + // TODO: return error? + continue; + } + if let Some(verdaux) = verdauxs.next()? { + versions[usize::from(index)] = Version { + name: verdaux.name(endian, strings)?, + hash: verdef.vd_hash.get(endian), + valid: true, + }; + } + } + } + if let Some(mut verneeds) = verneeds { + while let Some((_, mut vernauxs)) = verneeds.next()? { + while let Some(vernaux) = vernauxs.next()? { + let index = vernaux.vna_other.get(endian) & elf::VERSYM_VERSION; + if index <= elf::VER_NDX_GLOBAL { + // TODO: return error? + continue; + } + versions[usize::from(index)] = Version { + name: vernaux.name(endian, strings)?, + hash: vernaux.vna_hash.get(endian), + valid: true, + }; + } + } + } + + Ok(VersionTable { + symbols: versyms, + versions, + }) + } + + /// Return true if the version table is empty. + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + + /// Return version index for a given symbol index. + pub fn version_index(&self, endian: Elf::Endian, index: usize) -> VersionIndex { + let version_index = match self.symbols.get(index) { + Some(x) => x.0.get(endian), + // Ideally this would be VER_NDX_LOCAL for undefined symbols, + // but currently there are no checks that need this distinction. + None => elf::VER_NDX_GLOBAL, + }; + VersionIndex(version_index) + } + + /// Return version information for a given symbol version index. + /// + /// Returns `Ok(None)` for local and global versions. + /// Returns `Err(_)` if index is invalid. + pub fn version(&self, index: VersionIndex) -> Result<Option<&Version<'data>>> { + if index.index() <= elf::VER_NDX_GLOBAL { + return Ok(None); + } + self.versions + .get(usize::from(index.index())) + .filter(|version| version.valid) + .read_error("Invalid ELF symbol version index") + .map(Some) + } + + /// Return true if the given symbol index satisifies the requirements of `need`. + /// + /// Returns false for any error. + /// + /// Note: this function hasn't been fully tested and is likely to be incomplete. + pub fn matches(&self, endian: Elf::Endian, index: usize, need: Option<&Version>) -> bool { + let version_index = self.version_index(endian, index); + let def = match self.version(version_index) { + Ok(def) => def, + Err(_) => return false, + }; + match (def, need) { + (Some(def), Some(need)) => need.hash == def.hash && need.name == def.name, + (None, Some(_need)) => { + // Version must be present if needed. + false + } + (Some(_def), None) => { + // For a dlsym call, use the newest version. + // TODO: if not a dlsym call, then use the oldest version. + !version_index.is_hidden() + } + (None, None) => true, + } + } +} + +/// An iterator over the entries in an ELF `SHT_GNU_verdef` section. +#[derive(Debug, Clone)] +pub struct VerdefIterator<'data, Elf: FileHeader> { + endian: Elf::Endian, + data: Bytes<'data>, +} + +impl<'data, Elf: FileHeader> VerdefIterator<'data, Elf> { + pub(super) fn new(endian: Elf::Endian, data: &'data [u8]) -> Self { + VerdefIterator { + endian, + data: Bytes(data), + } + } + + /// Return the next `Verdef` entry. + pub fn next( + &mut self, + ) -> Result<Option<(&'data elf::Verdef<Elf::Endian>, VerdauxIterator<'data, Elf>)>> { + if self.data.is_empty() { + return Ok(None); + } + + let verdef = self + .data + .read_at::<elf::Verdef<_>>(0) + .read_error("ELF verdef is too short")?; + + let mut verdaux_data = self.data; + verdaux_data + .skip(verdef.vd_aux.get(self.endian) as usize) + .read_error("Invalid ELF vd_aux")?; + let verdaux = + VerdauxIterator::new(self.endian, verdaux_data.0, verdef.vd_cnt.get(self.endian)); + + let next = verdef.vd_next.get(self.endian); + if next != 0 { + self.data + .skip(next as usize) + .read_error("Invalid ELF vd_next")?; + } else { + self.data = Bytes(&[]); + } + Ok(Some((verdef, verdaux))) + } +} + +/// An iterator over the auxiliary records for an entry in an ELF `SHT_GNU_verdef` section. +#[derive(Debug, Clone)] +pub struct VerdauxIterator<'data, Elf: FileHeader> { + endian: Elf::Endian, + data: Bytes<'data>, + count: u16, +} + +impl<'data, Elf: FileHeader> VerdauxIterator<'data, Elf> { + pub(super) fn new(endian: Elf::Endian, data: &'data [u8], count: u16) -> Self { + VerdauxIterator { + endian, + data: Bytes(data), + count, + } + } + + /// Return the next `Verdaux` entry. + pub fn next(&mut self) -> Result<Option<&'data elf::Verdaux<Elf::Endian>>> { + if self.count == 0 { + return Ok(None); + } + + let verdaux = self + .data + .read_at::<elf::Verdaux<_>>(0) + .read_error("ELF verdaux is too short")?; + + self.data + .skip(verdaux.vda_next.get(self.endian) as usize) + .read_error("Invalid ELF vda_next")?; + self.count -= 1; + Ok(Some(verdaux)) + } +} + +/// An iterator over the entries in an ELF `SHT_GNU_verneed` section. +#[derive(Debug, Clone)] +pub struct VerneedIterator<'data, Elf: FileHeader> { + endian: Elf::Endian, + data: Bytes<'data>, +} + +impl<'data, Elf: FileHeader> VerneedIterator<'data, Elf> { + pub(super) fn new(endian: Elf::Endian, data: &'data [u8]) -> Self { + VerneedIterator { + endian, + data: Bytes(data), + } + } + + /// Return the next `Verneed` entry. + pub fn next( + &mut self, + ) -> Result< + Option<( + &'data elf::Verneed<Elf::Endian>, + VernauxIterator<'data, Elf>, + )>, + > { + if self.data.is_empty() { + return Ok(None); + } + + let verneed = self + .data + .read_at::<elf::Verneed<_>>(0) + .read_error("ELF verneed is too short")?; + + let mut vernaux_data = self.data; + vernaux_data + .skip(verneed.vn_aux.get(self.endian) as usize) + .read_error("Invalid ELF vn_aux")?; + let vernaux = + VernauxIterator::new(self.endian, vernaux_data.0, verneed.vn_cnt.get(self.endian)); + + let next = verneed.vn_next.get(self.endian); + if next != 0 { + self.data + .skip(next as usize) + .read_error("Invalid ELF vn_next")?; + } else { + self.data = Bytes(&[]); + } + Ok(Some((verneed, vernaux))) + } +} + +/// An iterator over the auxiliary records for an entry in an ELF `SHT_GNU_verneed` section. +#[derive(Debug, Clone)] +pub struct VernauxIterator<'data, Elf: FileHeader> { + endian: Elf::Endian, + data: Bytes<'data>, + count: u16, +} + +impl<'data, Elf: FileHeader> VernauxIterator<'data, Elf> { + pub(super) fn new(endian: Elf::Endian, data: &'data [u8], count: u16) -> Self { + VernauxIterator { + endian, + data: Bytes(data), + count, + } + } + + /// Return the next `Vernaux` entry. + pub fn next(&mut self) -> Result<Option<&'data elf::Vernaux<Elf::Endian>>> { + if self.count == 0 { + return Ok(None); + } + + let vernaux = self + .data + .read_at::<elf::Vernaux<_>>(0) + .read_error("ELF vernaux is too short")?; + + self.data + .skip(vernaux.vna_next.get(self.endian) as usize) + .read_error("Invalid ELF vna_next")?; + self.count -= 1; + Ok(Some(vernaux)) + } +} + +impl<Endian: endian::Endian> elf::Verdaux<Endian> { + /// Parse the version name from the string table. + pub fn name<'data, R: ReadRef<'data>>( + &self, + endian: Endian, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + strings + .get(self.vda_name.get(endian)) + .read_error("Invalid ELF vda_name") + } +} + +impl<Endian: endian::Endian> elf::Verneed<Endian> { + /// Parse the file from the string table. + pub fn file<'data, R: ReadRef<'data>>( + &self, + endian: Endian, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + strings + .get(self.vn_file.get(endian)) + .read_error("Invalid ELF vn_file") + } +} + +impl<Endian: endian::Endian> elf::Vernaux<Endian> { + /// Parse the version name from the string table. + pub fn name<'data, R: ReadRef<'data>>( + &self, + endian: Endian, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + strings + .get(self.vna_name.get(endian)) + .read_error("Invalid ELF vna_name") + } +} diff --git a/third_party/rust/object/src/read/macho/dyld_cache.rs b/third_party/rust/object/src/read/macho/dyld_cache.rs new file mode 100644 index 0000000000..0839ded7d7 --- /dev/null +++ b/third_party/rust/object/src/read/macho/dyld_cache.rs @@ -0,0 +1,343 @@ +use alloc::vec::Vec; +use core::slice; + +use crate::read::{Error, File, ReadError, ReadRef, Result}; +use crate::{macho, Architecture, Endian, Endianness}; + +/// A parsed representation of the dyld shared cache. +#[derive(Debug)] +pub struct DyldCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + endian: E, + data: R, + subcaches: Vec<DyldSubCache<'data, E, R>>, + mappings: &'data [macho::DyldCacheMappingInfo<E>], + images: &'data [macho::DyldCacheImageInfo<E>], + arch: Architecture, +} + +/// Information about a subcache. +#[derive(Debug)] +pub struct DyldSubCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + data: R, + mappings: &'data [macho::DyldCacheMappingInfo<E>], +} + +// This is the offset of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; + +impl<'data, E, R> DyldCache<'data, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// Parse the raw dyld shared cache data. + /// For shared caches from macOS 12 / iOS 15 and above, the subcache files need to be + /// supplied as well, in the correct order, with the .symbols subcache last (if present). + /// For example, data would be the data for dyld_shared_cache_x86_64, + /// and subcache_data would be the data for [dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...] + pub fn parse(data: R, subcache_data: &[R]) -> Result<Self> { + let header = macho::DyldCacheHeader::parse(data)?; + let (arch, endian) = header.parse_magic()?; + let mappings = header.mappings(endian, data)?; + + let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); + let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); + + if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + return Err(Error("Incorrect number of SubCaches")); + } + + // Split out the .symbols subcache data from the other subcaches. + let (symbols_subcache_data_and_uuid, subcache_data) = + if let Some(symbols_uuid) = symbols_subcache_uuid { + let (sym_data, rest_data) = subcache_data.split_last().unwrap(); + (Some((*sym_data, symbols_uuid)), rest_data) + } else { + (None, subcache_data) + }; + + // Read the regular SubCaches (.1, .2, ...), if present. + let mut subcaches = Vec::new(); + for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { + let sc_header = macho::DyldCacheHeader::<E>::parse(data)?; + if sc_header.uuid != info.uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); + } + + // Read the .symbols SubCache, if present. + // Other than the UUID verification, the symbols SubCache is currently unused. + let _symbols_subcache = match symbols_subcache_data_and_uuid { + Some((data, uuid)) => { + let sc_header = macho::DyldCacheHeader::<E>::parse(data)?; + if sc_header.uuid != uuid { + return Err(Error("Unexpected .symbols SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + Some(DyldSubCache { data, mappings }) + } + None => None, + }; + + let images = header.images(endian, data)?; + Ok(DyldCache { + endian, + data, + subcaches, + mappings, + images, + arch, + }) + } + + /// Get the architecture type of the file. + pub fn architecture(&self) -> Architecture { + self.arch + } + + /// Get the endianness of the file. + #[inline] + pub fn endianness(&self) -> Endianness { + if self.is_little_endian() { + Endianness::Little + } else { + Endianness::Big + } + } + + /// Return true if the file is little endian, false if it is big endian. + pub fn is_little_endian(&self) -> bool { + self.endian.is_little_endian() + } + + /// Iterate over the images in this cache. + pub fn images<'cache>(&'cache self) -> DyldCacheImageIterator<'data, 'cache, E, R> { + DyldCacheImageIterator { + cache: self, + iter: self.images.iter(), + } + } + + /// Find the address in a mapping and return the cache or subcache data it was found in, + /// together with the translated file offset. + pub fn data_and_offset_for_address(&self, address: u64) -> Option<(R, u64)> { + if let Some(file_offset) = address_to_file_offset(address, self.endian, self.mappings) { + return Some((self.data, file_offset)); + } + for subcache in &self.subcaches { + if let Some(file_offset) = + address_to_file_offset(address, self.endian, subcache.mappings) + { + return Some((subcache.data, file_offset)); + } + } + None + } +} + +/// An iterator over all the images (dylibs) in the dyld shared cache. +#[derive(Debug)] +pub struct DyldCacheImageIterator<'data, 'cache, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + cache: &'cache DyldCache<'data, E, R>, + iter: slice::Iter<'data, macho::DyldCacheImageInfo<E>>, +} + +impl<'data, 'cache, E, R> Iterator for DyldCacheImageIterator<'data, 'cache, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + type Item = DyldCacheImage<'data, 'cache, E, R>; + + fn next(&mut self) -> Option<DyldCacheImage<'data, 'cache, E, R>> { + let image_info = self.iter.next()?; + Some(DyldCacheImage { + cache: self.cache, + image_info, + }) + } +} + +/// One image (dylib) from inside the dyld shared cache. +#[derive(Debug)] +pub struct DyldCacheImage<'data, 'cache, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + pub(crate) cache: &'cache DyldCache<'data, E, R>, + image_info: &'data macho::DyldCacheImageInfo<E>, +} + +impl<'data, 'cache, E, R> DyldCacheImage<'data, 'cache, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// The file system path of this image. + pub fn path(&self) -> Result<&'data str> { + let path = self.image_info.path(self.cache.endian, self.cache.data)?; + // The path should always be ascii, so from_utf8 should alway succeed. + let path = core::str::from_utf8(path).map_err(|_| Error("Path string not valid utf-8"))?; + Ok(path) + } + + /// The subcache data which contains the Mach-O header for this image, + /// together with the file offset at which this image starts. + pub fn image_data_and_offset(&self) -> Result<(R, u64)> { + let address = self.image_info.address.get(self.cache.endian); + self.cache + .data_and_offset_for_address(address) + .ok_or(Error("Address not found in any mapping")) + } + + /// Parse this image into an Object. + pub fn parse_object(&self) -> Result<File<'data, R>> { + File::parse_dyld_cache_image(self) + } +} + +impl<E: Endian> macho::DyldCacheHeader<E> { + /// Read the dyld cache header. + pub fn parse<'data, R: ReadRef<'data>>(data: R) -> Result<&'data Self> { + data.read_at::<macho::DyldCacheHeader<E>>(0) + .read_error("Invalid dyld cache header size or alignment") + } + + /// Returns (arch, endian) based on the magic string. + pub fn parse_magic(&self) -> Result<(Architecture, E)> { + let (arch, is_big_endian) = match &self.magic { + b"dyld_v1 i386\0" => (Architecture::I386, false), + b"dyld_v1 x86_64\0" => (Architecture::X86_64, false), + b"dyld_v1 x86_64h\0" => (Architecture::X86_64, false), + b"dyld_v1 ppc\0" => (Architecture::PowerPc, true), + b"dyld_v1 armv6\0" => (Architecture::Arm, false), + b"dyld_v1 armv7\0" => (Architecture::Arm, false), + b"dyld_v1 armv7f\0" => (Architecture::Arm, false), + b"dyld_v1 armv7s\0" => (Architecture::Arm, false), + b"dyld_v1 armv7k\0" => (Architecture::Arm, false), + b"dyld_v1 arm64\0" => (Architecture::Aarch64, false), + b"dyld_v1 arm64e\0" => (Architecture::Aarch64, false), + _ => return Err(Error("Unrecognized dyld cache magic")), + }; + let endian = + E::from_big_endian(is_big_endian).read_error("Unsupported dyld cache endian")?; + Ok((arch, endian)) + } + + /// Return the mapping information table. + pub fn mappings<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result<&'data [macho::DyldCacheMappingInfo<E>]> { + data.read_slice_at::<macho::DyldCacheMappingInfo<E>>( + self.mapping_offset.get(endian).into(), + self.mapping_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache mapping size or alignment") + } + + /// Return the information about subcaches, if present. + pub fn subcaches<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result<Option<&'data [macho::DyldSubCacheInfo<E>]>> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let subcaches = data + .read_slice_at::<macho::DyldSubCacheInfo<E>>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(subcaches)) + } else { + Ok(None) + } + } + + /// Return the UUID for the .symbols subcache, if present. + pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let uuid = self.symbols_subcache_uuid; + if uuid != [0; 16] { + return Some(uuid); + } + } + None + } + + /// Return the image information table. + pub fn images<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result<&'data [macho::DyldCacheImageInfo<E>]> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + data.read_slice_at::<macho::DyldCacheImageInfo<E>>( + self.images_across_all_subcaches_offset.get(endian).into(), + self.images_across_all_subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } else { + data.read_slice_at::<macho::DyldCacheImageInfo<E>>( + self.images_offset.get(endian).into(), + self.images_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } + } +} + +impl<E: Endian> macho::DyldCacheImageInfo<E> { + /// The file system path of this image. + pub fn path<'data, R: ReadRef<'data>>(&self, endian: E, data: R) -> Result<&'data [u8]> { + let r_start = self.path_file_offset.get(endian).into(); + let r_end = data.len().read_error("Couldn't get data len()")?; + data.read_bytes_at_until(r_start..r_end, 0) + .read_error("Couldn't read dyld cache image path") + } + + /// Find the file offset of the image by looking up its address in the mappings. + pub fn file_offset( + &self, + endian: E, + mappings: &[macho::DyldCacheMappingInfo<E>], + ) -> Result<u64> { + let address = self.address.get(endian); + address_to_file_offset(address, endian, mappings) + .read_error("Invalid dyld cache image address") + } +} + +/// Find the file offset of the image by looking up its address in the mappings. +pub fn address_to_file_offset<E: Endian>( + address: u64, + endian: E, + mappings: &[macho::DyldCacheMappingInfo<E>], +) -> Option<u64> { + for mapping in mappings { + let mapping_address = mapping.address.get(endian); + if address >= mapping_address + && address < mapping_address.wrapping_add(mapping.size.get(endian)) + { + return Some(address - mapping_address + mapping.file_offset.get(endian)); + } + } + None +} diff --git a/third_party/rust/object/src/read/macho/fat.rs b/third_party/rust/object/src/read/macho/fat.rs new file mode 100644 index 0000000000..d4301b7e11 --- /dev/null +++ b/third_party/rust/object/src/read/macho/fat.rs @@ -0,0 +1,122 @@ +use crate::read::{Architecture, Error, ReadError, ReadRef, Result}; +use crate::{macho, BigEndian, Pod}; + +pub use macho::{FatArch32, FatArch64, FatHeader}; + +impl FatHeader { + /// Attempt to parse a fat header. + /// + /// Does not validate the magic value. + pub fn parse<'data, R: ReadRef<'data>>(file: R) -> Result<&'data FatHeader> { + file.read_at::<FatHeader>(0) + .read_error("Invalid fat header size or alignment") + } + + /// Attempt to parse a fat header and 32-bit fat arches. + pub fn parse_arch32<'data, R: ReadRef<'data>>(file: R) -> Result<&'data [FatArch32]> { + let mut offset = 0; + let header = file + .read::<FatHeader>(&mut offset) + .read_error("Invalid fat header size or alignment")?; + if header.magic.get(BigEndian) != macho::FAT_MAGIC { + return Err(Error("Invalid 32-bit fat magic")); + } + file.read_slice::<FatArch32>(&mut offset, header.nfat_arch.get(BigEndian) as usize) + .read_error("Invalid nfat_arch") + } + + /// Attempt to parse a fat header and 64-bit fat arches. + pub fn parse_arch64<'data, R: ReadRef<'data>>(file: R) -> Result<&'data [FatArch64]> { + let mut offset = 0; + let header = file + .read::<FatHeader>(&mut offset) + .read_error("Invalid fat header size or alignment")?; + if header.magic.get(BigEndian) != macho::FAT_MAGIC_64 { + return Err(Error("Invalid 64-bit fat magic")); + } + file.read_slice::<FatArch64>(&mut offset, header.nfat_arch.get(BigEndian) as usize) + .read_error("Invalid nfat_arch") + } +} + +/// A trait for generic access to `FatArch32` and `FatArch64`. +#[allow(missing_docs)] +pub trait FatArch: Pod { + type Word: Into<u64>; + + fn cputype(&self) -> u32; + fn cpusubtype(&self) -> u32; + fn offset(&self) -> Self::Word; + fn size(&self) -> Self::Word; + fn align(&self) -> u32; + + fn architecture(&self) -> Architecture { + match self.cputype() { + macho::CPU_TYPE_ARM => Architecture::Arm, + macho::CPU_TYPE_ARM64 => Architecture::Aarch64, + macho::CPU_TYPE_X86 => Architecture::I386, + macho::CPU_TYPE_X86_64 => Architecture::X86_64, + macho::CPU_TYPE_MIPS => Architecture::Mips, + macho::CPU_TYPE_POWERPC => Architecture::PowerPc, + macho::CPU_TYPE_POWERPC64 => Architecture::PowerPc64, + _ => Architecture::Unknown, + } + } + + fn file_range(&self) -> (u64, u64) { + (self.offset().into(), self.size().into()) + } + + fn data<'data, R: ReadRef<'data>>(&self, file: R) -> Result<&'data [u8]> { + file.read_bytes_at(self.offset().into(), self.size().into()) + .read_error("Invalid fat arch offset or size") + } +} + +impl FatArch for FatArch32 { + type Word = u32; + + fn cputype(&self) -> u32 { + self.cputype.get(BigEndian) + } + + fn cpusubtype(&self) -> u32 { + self.cpusubtype.get(BigEndian) + } + + fn offset(&self) -> Self::Word { + self.offset.get(BigEndian) + } + + fn size(&self) -> Self::Word { + self.size.get(BigEndian) + } + + fn align(&self) -> u32 { + self.align.get(BigEndian) + } +} + +impl FatArch for FatArch64 { + type Word = u64; + + fn cputype(&self) -> u32 { + self.cputype.get(BigEndian) + } + + fn cpusubtype(&self) -> u32 { + self.cpusubtype.get(BigEndian) + } + + fn offset(&self) -> Self::Word { + self.offset.get(BigEndian) + } + + fn size(&self) -> Self::Word { + self.size.get(BigEndian) + } + + fn align(&self) -> u32 { + self.align.get(BigEndian) + } +} diff --git a/third_party/rust/object/src/read/macho/file.rs b/third_party/rust/object/src/read/macho/file.rs new file mode 100644 index 0000000000..ab8c05757f --- /dev/null +++ b/third_party/rust/object/src/read/macho/file.rs @@ -0,0 +1,731 @@ +use alloc::vec::Vec; +use core::fmt::Debug; +use core::{mem, str}; + +use crate::read::{ + self, Architecture, ComdatKind, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, + Object, ObjectComdat, ObjectKind, ObjectMap, ObjectSection, ReadError, ReadRef, Result, + SectionIndex, SymbolIndex, +}; +use crate::{endian, macho, BigEndian, ByteString, Endian, Endianness, Pod}; + +use super::{ + DyldCacheImage, LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, + MachOSegment, MachOSegmentInternal, MachOSegmentIterator, MachOSymbol, MachOSymbolIterator, + MachOSymbolTable, Nlist, Section, Segment, SymbolTable, +}; + +/// A 32-bit Mach-O object file. +pub type MachOFile32<'data, Endian = Endianness, R = &'data [u8]> = + MachOFile<'data, macho::MachHeader32<Endian>, R>; +/// A 64-bit Mach-O object file. +pub type MachOFile64<'data, Endian = Endianness, R = &'data [u8]> = + MachOFile<'data, macho::MachHeader64<Endian>, R>; + +/// A partially parsed Mach-O file. +/// +/// Most of the functionality of this type is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct MachOFile<'data, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) endian: Mach::Endian, + pub(super) data: R, + pub(super) header_offset: u64, + pub(super) header: &'data Mach, + pub(super) segments: Vec<MachOSegmentInternal<'data, Mach, R>>, + pub(super) sections: Vec<MachOSectionInternal<'data, Mach>>, + pub(super) symbols: SymbolTable<'data, Mach, R>, +} + +impl<'data, Mach, R> MachOFile<'data, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + /// Parse the raw Mach-O file data. + pub fn parse(data: R) -> Result<Self> { + let header = Mach::parse(data, 0)?; + let endian = header.endian()?; + + // Build a list of segments and sections to make some operations more efficient. + let mut segments = Vec::new(); + let mut sections = Vec::new(); + let mut symbols = SymbolTable::default(); + if let Ok(mut commands) = header.load_commands(endian, data, 0) { + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + for section in segment.sections(endian, section_data)? { + let index = SectionIndex(sections.len() + 1); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); + } + } else if let Some(symtab) = command.symtab()? { + symbols = symtab.symbols(endian, data)?; + } + } + } + + Ok(MachOFile { + endian, + data, + header_offset: 0, + header, + segments, + sections, + symbols, + }) + } + + /// Parse the Mach-O file for the given image from the dyld shared cache. + /// This will read different sections from different subcaches, if necessary. + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &DyldCacheImage<'data, 'cache, E, R>, + ) -> Result<Self> { + let (data, header_offset) = image.image_data_and_offset()?; + let header = Mach::parse(data, header_offset)?; + let endian = header.endian()?; + + // Build a list of sections to make some operations more efficient. + // Also build a list of segments, because we need to remember which ReadRef + // to read each section's data from. Only the DyldCache knows this information, + // and we won't have access to it once we've exited this function. + let mut segments = Vec::new(); + let mut sections = Vec::new(); + let mut linkedit_data: Option<R> = None; + let mut symtab = None; + if let Ok(mut commands) = header.load_commands(endian, data, header_offset) { + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + // Each segment can be stored in a different subcache. Get the segment's + // address and look it up in the cache mappings, to find the correct cache data. + let addr = segment.vmaddr(endian).into(); + let (data, _offset) = image + .cache + .data_and_offset_for_address(addr) + .read_error("Could not find segment data in dyld shared cache")?; + if segment.name() == macho::SEG_LINKEDIT.as_bytes() { + linkedit_data = Some(data); + } + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + + for section in segment.sections(endian, section_data)? { + let index = SectionIndex(sections.len() + 1); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); + } + } else if let Some(st) = command.symtab()? { + symtab = Some(st); + } + } + } + + // The symbols are found in the __LINKEDIT segment, so make sure to read them from the + // correct subcache. + let symbols = match (symtab, linkedit_data) { + (Some(symtab), Some(linkedit_data)) => symtab.symbols(endian, linkedit_data)?, + _ => SymbolTable::default(), + }; + + Ok(MachOFile { + endian, + data, + header_offset, + header, + segments, + sections, + symbols, + }) + } + + /// Return the section at the given index. + #[inline] + pub(super) fn section_internal( + &self, + index: SectionIndex, + ) -> Result<&MachOSectionInternal<'data, Mach>> { + index + .0 + .checked_sub(1) + .and_then(|index| self.sections.get(index)) + .read_error("Invalid Mach-O section index") + } + + pub(super) fn segment_internal( + &self, + index: usize, + ) -> Result<&MachOSegmentInternal<'data, Mach, R>> { + self.segments + .get(index) + .read_error("Invalid Mach-O segment index") + } +} + +impl<'data, Mach, R> read::private::Sealed for MachOFile<'data, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> Object<'data, 'file> for MachOFile<'data, Mach, R> +where + 'data: 'file, + Mach: MachHeader, + R: 'file + ReadRef<'data>, +{ + type Segment = MachOSegment<'data, 'file, Mach, R>; + type SegmentIterator = MachOSegmentIterator<'data, 'file, Mach, R>; + type Section = MachOSection<'data, 'file, Mach, R>; + type SectionIterator = MachOSectionIterator<'data, 'file, Mach, R>; + type Comdat = MachOComdat<'data, 'file, Mach, R>; + type ComdatIterator = MachOComdatIterator<'data, 'file, Mach, R>; + type Symbol = MachOSymbol<'data, 'file, Mach, R>; + type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>; + type SymbolTable = MachOSymbolTable<'data, 'file, Mach, R>; + type DynamicRelocationIterator = NoDynamicRelocationIterator; + + fn architecture(&self) -> Architecture { + match self.header.cputype(self.endian) { + macho::CPU_TYPE_ARM => Architecture::Arm, + macho::CPU_TYPE_ARM64 => Architecture::Aarch64, + macho::CPU_TYPE_X86 => Architecture::I386, + macho::CPU_TYPE_X86_64 => Architecture::X86_64, + macho::CPU_TYPE_MIPS => Architecture::Mips, + macho::CPU_TYPE_POWERPC => Architecture::PowerPc, + macho::CPU_TYPE_POWERPC64 => Architecture::PowerPc64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.header.is_little_endian() + } + + #[inline] + fn is_64(&self) -> bool { + self.header.is_type_64() + } + + fn kind(&self) -> ObjectKind { + match self.header.filetype(self.endian) { + macho::MH_OBJECT => ObjectKind::Relocatable, + macho::MH_EXECUTE => ObjectKind::Executable, + macho::MH_CORE => ObjectKind::Core, + macho::MH_DYLIB => ObjectKind::Dynamic, + _ => ObjectKind::Unknown, + } + } + + fn segments(&'file self) -> MachOSegmentIterator<'data, 'file, Mach, R> { + MachOSegmentIterator { + file: self, + iter: self.segments.iter(), + } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option<MachOSection<'data, 'file, Mach, R>> { + // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg + // ".debug_info" to "__debug_info", and limit to 16 bytes total. + let system_name = if section_name.starts_with(b".") { + if section_name.len() > 15 { + Some(§ion_name[1..15]) + } else { + Some(§ion_name[1..]) + } + } else { + None + }; + let cmp_section_name = |section: &MachOSection<'data, 'file, Mach, R>| { + section + .name_bytes() + .map(|name| { + section_name == name + || system_name + .filter(|system_name| { + name.starts_with(b"__") && name[2..] == **system_name + }) + .is_some() + }) + .unwrap_or(false) + }; + + self.sections().find(cmp_section_name) + } + + fn section_by_index( + &'file self, + index: SectionIndex, + ) -> Result<MachOSection<'data, 'file, Mach, R>> { + let internal = *self.section_internal(index)?; + Ok(MachOSection { + file: self, + internal, + }) + } + + fn sections(&'file self) -> MachOSectionIterator<'data, 'file, Mach, R> { + MachOSectionIterator { + file: self, + iter: self.sections.iter(), + } + } + + fn comdats(&'file self) -> MachOComdatIterator<'data, 'file, Mach, R> { + MachOComdatIterator { file: self } + } + + fn symbol_by_index( + &'file self, + index: SymbolIndex, + ) -> Result<MachOSymbol<'data, 'file, Mach, R>> { + let nlist = self.symbols.symbol(index.0)?; + MachOSymbol::new(self, index, nlist).read_error("Unsupported Mach-O symbol index") + } + + fn symbols(&'file self) -> MachOSymbolIterator<'data, 'file, Mach, R> { + MachOSymbolIterator { + file: self, + index: 0, + } + } + + #[inline] + fn symbol_table(&'file self) -> Option<MachOSymbolTable<'data, 'file, Mach, R>> { + Some(MachOSymbolTable { file: self }) + } + + fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data, 'file, Mach, R> { + MachOSymbolIterator { + file: self, + index: self.symbols.len(), + } + } + + #[inline] + fn dynamic_symbol_table(&'file self) -> Option<MachOSymbolTable<'data, 'file, Mach, R>> { + None + } + + fn object_map(&'file self) -> ObjectMap<'data> { + self.symbols.object_map(self.endian) + } + + fn imports(&self) -> Result<Vec<Import<'data>>> { + let mut dysymtab = None; + let mut libraries = Vec::new(); + let twolevel = self.header.flags(self.endian) & macho::MH_TWOLEVEL != 0; + if twolevel { + libraries.push(&[][..]); + } + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; + while let Some(command) = commands.next()? { + if let Some(command) = command.dysymtab()? { + dysymtab = Some(command); + } + if twolevel { + if let Some(dylib) = command.dylib()? { + libraries.push(command.string(self.endian, dylib.dylib.name)?); + } + } + } + + let mut imports = Vec::new(); + if let Some(dysymtab) = dysymtab { + let index = dysymtab.iundefsym.get(self.endian) as usize; + let number = dysymtab.nundefsym.get(self.endian) as usize; + for i in index..(index.wrapping_add(number)) { + let symbol = self.symbols.symbol(i)?; + let name = symbol.name(self.endian, self.symbols.strings())?; + let library = if twolevel { + libraries + .get(symbol.library_ordinal(self.endian) as usize) + .copied() + .read_error("Invalid Mach-O symbol library ordinal")? + } else { + &[] + }; + imports.push(Import { + name: ByteString(name), + library: ByteString(library), + }); + } + } + Ok(imports) + } + + fn exports(&self) -> Result<Vec<Export<'data>>> { + let mut dysymtab = None; + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; + while let Some(command) = commands.next()? { + if let Some(command) = command.dysymtab()? { + dysymtab = Some(command); + break; + } + } + + let mut exports = Vec::new(); + if let Some(dysymtab) = dysymtab { + let index = dysymtab.iextdefsym.get(self.endian) as usize; + let number = dysymtab.nextdefsym.get(self.endian) as usize; + for i in index..(index.wrapping_add(number)) { + let symbol = self.symbols.symbol(i)?; + let name = symbol.name(self.endian, self.symbols.strings())?; + let address = symbol.n_value(self.endian).into(); + exports.push(Export { + name: ByteString(name), + address, + }); + } + } + Ok(exports) + } + + #[inline] + fn dynamic_relocations(&'file self) -> Option<NoDynamicRelocationIterator> { + None + } + + fn has_debug_symbols(&self) -> bool { + self.section_by_name(".debug_info").is_some() + } + + fn mach_uuid(&self) -> Result<Option<[u8; 16]>> { + self.header.uuid(self.endian, self.data, self.header_offset) + } + + fn relative_address_base(&self) -> u64 { + 0 + } + + fn entry(&self) -> u64 { + if let Ok(mut commands) = + self.header + .load_commands(self.endian, self.data, self.header_offset) + { + while let Ok(Some(command)) = commands.next() { + if let Ok(Some(command)) = command.entry_point() { + return command.entryoff.get(self.endian); + } + } + } + 0 + } + + fn flags(&self) -> FileFlags { + FileFlags::MachO { + flags: self.header.flags(self.endian), + } + } +} + +/// An iterator over the COMDAT section groups of a `MachOFile64`. +pub type MachOComdatIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatIterator<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the COMDAT section groups of a `MachOFile64`. +pub type MachOComdatIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatIterator<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// An iterator over the COMDAT section groups of a `MachOFile`. +#[derive(Debug)] +pub struct MachOComdatIterator<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> Iterator for MachOComdatIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOComdat<'data, 'file, Mach, R>; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A COMDAT section group of a `MachOFile32`. +pub type MachOComdat32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdat<'data, 'file, macho::MachHeader32<Endian>, R>; + +/// A COMDAT section group of a `MachOFile64`. +pub type MachOComdat64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdat<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// A COMDAT section group of a `MachOFile`. +#[derive(Debug)] +pub struct MachOComdat<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOComdat<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectComdat<'data> for MachOComdat<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type SectionIterator = MachOComdatSectionIterator<'data, 'file, Mach, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + unreachable!(); + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + unreachable!(); + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + unreachable!(); + } + + #[inline] + fn name(&self) -> Result<&str> { + unreachable!(); + } + + #[inline] + fn sections(&self) -> Self::SectionIterator { + unreachable!(); + } +} + +/// An iterator over the sections in a COMDAT section group of a `MachOFile32`. +pub type MachOComdatSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatSectionIterator<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the sections in a COMDAT section group of a `MachOFile64`. +pub type MachOComdatSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOComdatSectionIterator<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// An iterator over the sections in a COMDAT section group of a `MachOFile`. +#[derive(Debug)] +pub struct MachOComdatSectionIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> Iterator for MachOComdatSectionIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A trait for generic access to `MachHeader32` and `MachHeader64`. +#[allow(missing_docs)] +pub trait MachHeader: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + type Segment: Segment<Endian = Self::Endian, Section = Self::Section>; + type Section: Section<Endian = Self::Endian>; + type Nlist: Nlist<Endian = Self::Endian>; + + /// Return true if this type is a 64-bit header. + /// + /// This is a property of the type, not a value in the header data. + fn is_type_64(&self) -> bool; + + /// Return true if the `magic` field signifies big-endian. + fn is_big_endian(&self) -> bool; + + /// Return true if the `magic` field signifies little-endian. + fn is_little_endian(&self) -> bool; + + fn magic(&self) -> u32; + fn cputype(&self, endian: Self::Endian) -> u32; + fn cpusubtype(&self, endian: Self::Endian) -> u32; + fn filetype(&self, endian: Self::Endian) -> u32; + fn ncmds(&self, endian: Self::Endian) -> u32; + fn sizeofcmds(&self, endian: Self::Endian) -> u32; + fn flags(&self, endian: Self::Endian) -> u32; + + // Provided methods. + + /// Read the file header. + /// + /// Also checks that the magic field in the file header is a supported format. + fn parse<'data, R: ReadRef<'data>>(data: R, offset: u64) -> read::Result<&'data Self> { + let header = data + .read_at::<Self>(offset) + .read_error("Invalid Mach-O header size or alignment")?; + if !header.is_supported() { + return Err(Error("Unsupported Mach-O header")); + } + Ok(header) + } + + fn is_supported(&self) -> bool { + self.is_little_endian() || self.is_big_endian() + } + + fn endian(&self) -> Result<Self::Endian> { + Self::Endian::from_big_endian(self.is_big_endian()).read_error("Unsupported Mach-O endian") + } + + fn load_commands<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + header_offset: u64, + ) -> Result<LoadCommandIterator<'data, Self::Endian>> { + let data = data + .read_bytes_at( + header_offset + mem::size_of::<Self>() as u64, + self.sizeofcmds(endian).into(), + ) + .read_error("Invalid Mach-O load command table size")?; + Ok(LoadCommandIterator::new(endian, data, self.ncmds(endian))) + } + + /// Return the UUID from the `LC_UUID` load command, if one is present. + fn uuid<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + header_offset: u64, + ) -> Result<Option<[u8; 16]>> { + let mut commands = self.load_commands(endian, data, header_offset)?; + while let Some(command) = commands.next()? { + if let Ok(Some(uuid)) = command.uuid() { + return Ok(Some(uuid.uuid)); + } + } + Ok(None) + } +} + +impl<Endian: endian::Endian> MachHeader for macho::MachHeader32<Endian> { + type Word = u32; + type Endian = Endian; + type Segment = macho::SegmentCommand32<Endian>; + type Section = macho::Section32<Endian>; + type Nlist = macho::Nlist32<Endian>; + + fn is_type_64(&self) -> bool { + false + } + + fn is_big_endian(&self) -> bool { + self.magic() == macho::MH_MAGIC + } + + fn is_little_endian(&self) -> bool { + self.magic() == macho::MH_CIGAM + } + + fn magic(&self) -> u32 { + self.magic.get(BigEndian) + } + + fn cputype(&self, endian: Self::Endian) -> u32 { + self.cputype.get(endian) + } + + fn cpusubtype(&self, endian: Self::Endian) -> u32 { + self.cpusubtype.get(endian) + } + + fn filetype(&self, endian: Self::Endian) -> u32 { + self.filetype.get(endian) + } + + fn ncmds(&self, endian: Self::Endian) -> u32 { + self.ncmds.get(endian) + } + + fn sizeofcmds(&self, endian: Self::Endian) -> u32 { + self.sizeofcmds.get(endian) + } + + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} + +impl<Endian: endian::Endian> MachHeader for macho::MachHeader64<Endian> { + type Word = u64; + type Endian = Endian; + type Segment = macho::SegmentCommand64<Endian>; + type Section = macho::Section64<Endian>; + type Nlist = macho::Nlist64<Endian>; + + fn is_type_64(&self) -> bool { + true + } + + fn is_big_endian(&self) -> bool { + self.magic() == macho::MH_MAGIC_64 + } + + fn is_little_endian(&self) -> bool { + self.magic() == macho::MH_CIGAM_64 + } + + fn magic(&self) -> u32 { + self.magic.get(BigEndian) + } + + fn cputype(&self, endian: Self::Endian) -> u32 { + self.cputype.get(endian) + } + + fn cpusubtype(&self, endian: Self::Endian) -> u32 { + self.cpusubtype.get(endian) + } + + fn filetype(&self, endian: Self::Endian) -> u32 { + self.filetype.get(endian) + } + + fn ncmds(&self, endian: Self::Endian) -> u32 { + self.ncmds.get(endian) + } + + fn sizeofcmds(&self, endian: Self::Endian) -> u32 { + self.sizeofcmds.get(endian) + } + + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} diff --git a/third_party/rust/object/src/read/macho/load_command.rs b/third_party/rust/object/src/read/macho/load_command.rs new file mode 100644 index 0000000000..10daf4ed10 --- /dev/null +++ b/third_party/rust/object/src/read/macho/load_command.rs @@ -0,0 +1,353 @@ +use core::marker::PhantomData; + +use crate::endian::Endian; +use crate::macho; +use crate::pod::Pod; +use crate::read::macho::{MachHeader, SymbolTable}; +use crate::read::{Bytes, ReadError, ReadRef, Result, StringTable}; + +/// An iterator over the load commands of a `MachHeader`. +#[derive(Debug, Default, Clone, Copy)] +pub struct LoadCommandIterator<'data, E: Endian> { + endian: E, + data: Bytes<'data>, + ncmds: u32, +} + +impl<'data, E: Endian> LoadCommandIterator<'data, E> { + pub(super) fn new(endian: E, data: &'data [u8], ncmds: u32) -> Self { + LoadCommandIterator { + endian, + data: Bytes(data), + ncmds, + } + } + + /// Return the next load command. + pub fn next(&mut self) -> Result<Option<LoadCommandData<'data, E>>> { + if self.ncmds == 0 { + return Ok(None); + } + let header = self + .data + .read_at::<macho::LoadCommand<E>>(0) + .read_error("Invalid Mach-O load command header")?; + let cmd = header.cmd.get(self.endian); + let cmdsize = header.cmdsize.get(self.endian) as usize; + let data = self + .data + .read_bytes(cmdsize) + .read_error("Invalid Mach-O load command size")?; + self.ncmds -= 1; + Ok(Some(LoadCommandData { + cmd, + data, + marker: Default::default(), + })) + } +} + +/// The data for a `LoadCommand`. +#[derive(Debug, Clone, Copy)] +pub struct LoadCommandData<'data, E: Endian> { + cmd: u32, + // Includes the header. + data: Bytes<'data>, + marker: PhantomData<E>, +} + +impl<'data, E: Endian> LoadCommandData<'data, E> { + /// Return the `cmd` field of the `LoadCommand`. + /// + /// This is one of the `LC_` constants. + pub fn cmd(&self) -> u32 { + self.cmd + } + + /// Return the `cmdsize` field of the `LoadCommand`. + pub fn cmdsize(&self) -> u32 { + self.data.len() as u32 + } + + /// Parse the data as the given type. + #[inline] + pub fn data<T: Pod>(&self) -> Result<&'data T> { + self.data + .read_at(0) + .read_error("Invalid Mach-O command size") + } + + /// Raw bytes of this LoadCommand structure. + pub fn raw_data(&self) -> &'data [u8] { + self.data.0 + } + + /// Parse a load command string value. + /// + /// Strings used by load commands are specified by offsets that are + /// relative to the load command header. + pub fn string(&self, endian: E, s: macho::LcStr<E>) -> Result<&'data [u8]> { + self.data + .read_string_at(s.offset.get(endian) as usize) + .read_error("Invalid load command string offset") + } + + /// Parse the command data according to the `cmd` field. + pub fn variant(&self) -> Result<LoadCommandVariant<'data, E>> { + Ok(match self.cmd { + macho::LC_SEGMENT => { + let mut data = self.data; + let segment = data.read().read_error("Invalid Mach-O command size")?; + LoadCommandVariant::Segment32(segment, data.0) + } + macho::LC_SYMTAB => LoadCommandVariant::Symtab(self.data()?), + macho::LC_THREAD | macho::LC_UNIXTHREAD => { + let mut data = self.data; + let thread = data.read().read_error("Invalid Mach-O command size")?; + LoadCommandVariant::Thread(thread, data.0) + } + macho::LC_DYSYMTAB => LoadCommandVariant::Dysymtab(self.data()?), + macho::LC_LOAD_DYLIB + | macho::LC_LOAD_WEAK_DYLIB + | macho::LC_REEXPORT_DYLIB + | macho::LC_LAZY_LOAD_DYLIB + | macho::LC_LOAD_UPWARD_DYLIB => LoadCommandVariant::Dylib(self.data()?), + macho::LC_ID_DYLIB => LoadCommandVariant::IdDylib(self.data()?), + macho::LC_LOAD_DYLINKER => LoadCommandVariant::LoadDylinker(self.data()?), + macho::LC_ID_DYLINKER => LoadCommandVariant::IdDylinker(self.data()?), + macho::LC_PREBOUND_DYLIB => LoadCommandVariant::PreboundDylib(self.data()?), + macho::LC_ROUTINES => LoadCommandVariant::Routines32(self.data()?), + macho::LC_SUB_FRAMEWORK => LoadCommandVariant::SubFramework(self.data()?), + macho::LC_SUB_UMBRELLA => LoadCommandVariant::SubUmbrella(self.data()?), + macho::LC_SUB_CLIENT => LoadCommandVariant::SubClient(self.data()?), + macho::LC_SUB_LIBRARY => LoadCommandVariant::SubLibrary(self.data()?), + macho::LC_TWOLEVEL_HINTS => LoadCommandVariant::TwolevelHints(self.data()?), + macho::LC_PREBIND_CKSUM => LoadCommandVariant::PrebindCksum(self.data()?), + macho::LC_SEGMENT_64 => { + let mut data = self.data; + let segment = data.read().read_error("Invalid Mach-O command size")?; + LoadCommandVariant::Segment64(segment, data.0) + } + macho::LC_ROUTINES_64 => LoadCommandVariant::Routines64(self.data()?), + macho::LC_UUID => LoadCommandVariant::Uuid(self.data()?), + macho::LC_RPATH => LoadCommandVariant::Rpath(self.data()?), + macho::LC_CODE_SIGNATURE + | macho::LC_SEGMENT_SPLIT_INFO + | macho::LC_FUNCTION_STARTS + | macho::LC_DATA_IN_CODE + | macho::LC_DYLIB_CODE_SIGN_DRS + | macho::LC_LINKER_OPTIMIZATION_HINT + | macho::LC_DYLD_EXPORTS_TRIE + | macho::LC_DYLD_CHAINED_FIXUPS => LoadCommandVariant::LinkeditData(self.data()?), + macho::LC_ENCRYPTION_INFO => LoadCommandVariant::EncryptionInfo32(self.data()?), + macho::LC_DYLD_INFO | macho::LC_DYLD_INFO_ONLY => { + LoadCommandVariant::DyldInfo(self.data()?) + } + macho::LC_VERSION_MIN_MACOSX + | macho::LC_VERSION_MIN_IPHONEOS + | macho::LC_VERSION_MIN_TVOS + | macho::LC_VERSION_MIN_WATCHOS => LoadCommandVariant::VersionMin(self.data()?), + macho::LC_DYLD_ENVIRONMENT => LoadCommandVariant::DyldEnvironment(self.data()?), + macho::LC_MAIN => LoadCommandVariant::EntryPoint(self.data()?), + macho::LC_SOURCE_VERSION => LoadCommandVariant::SourceVersion(self.data()?), + macho::LC_ENCRYPTION_INFO_64 => LoadCommandVariant::EncryptionInfo64(self.data()?), + macho::LC_LINKER_OPTION => LoadCommandVariant::LinkerOption(self.data()?), + macho::LC_NOTE => LoadCommandVariant::Note(self.data()?), + macho::LC_BUILD_VERSION => LoadCommandVariant::BuildVersion(self.data()?), + macho::LC_FILESET_ENTRY => LoadCommandVariant::FilesetEntry(self.data()?), + _ => LoadCommandVariant::Other, + }) + } + + /// Try to parse this command as a `SegmentCommand32`. + /// + /// Returns the segment command and the data containing the sections. + pub fn segment_32(self) -> Result<Option<(&'data macho::SegmentCommand32<E>, &'data [u8])>> { + if self.cmd == macho::LC_SEGMENT { + let mut data = self.data; + let segment = data.read().read_error("Invalid Mach-O command size")?; + Ok(Some((segment, data.0))) + } else { + Ok(None) + } + } + + /// Try to parse this command as a `SymtabCommand`. + /// + /// Returns the segment command and the data containing the sections. + pub fn symtab(self) -> Result<Option<&'data macho::SymtabCommand<E>>> { + if self.cmd == macho::LC_SYMTAB { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `DysymtabCommand`. + pub fn dysymtab(self) -> Result<Option<&'data macho::DysymtabCommand<E>>> { + if self.cmd == macho::LC_DYSYMTAB { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `DylibCommand`. + pub fn dylib(self) -> Result<Option<&'data macho::DylibCommand<E>>> { + if self.cmd == macho::LC_LOAD_DYLIB + || self.cmd == macho::LC_LOAD_WEAK_DYLIB + || self.cmd == macho::LC_REEXPORT_DYLIB + || self.cmd == macho::LC_LAZY_LOAD_DYLIB + || self.cmd == macho::LC_LOAD_UPWARD_DYLIB + { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `UuidCommand`. + pub fn uuid(self) -> Result<Option<&'data macho::UuidCommand<E>>> { + if self.cmd == macho::LC_UUID { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as a `SegmentCommand64`. + pub fn segment_64(self) -> Result<Option<(&'data macho::SegmentCommand64<E>, &'data [u8])>> { + if self.cmd == macho::LC_SEGMENT_64 { + let mut data = self.data; + let command = data.read().read_error("Invalid Mach-O command size")?; + Ok(Some((command, data.0))) + } else { + Ok(None) + } + } + + /// Try to parse this command as a `DyldInfoCommand`. + pub fn dyld_info(self) -> Result<Option<&'data macho::DyldInfoCommand<E>>> { + if self.cmd == macho::LC_DYLD_INFO || self.cmd == macho::LC_DYLD_INFO_ONLY { + Some(self.data()).transpose() + } else { + Ok(None) + } + } + + /// Try to parse this command as an `EntryPointCommand`. + pub fn entry_point(self) -> Result<Option<&'data macho::EntryPointCommand<E>>> { + if self.cmd == macho::LC_MAIN { + Some(self.data()).transpose() + } else { + Ok(None) + } + } +} + +/// A `LoadCommand` that has been interpreted according to its `cmd` field. +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum LoadCommandVariant<'data, E: Endian> { + /// `LC_SEGMENT` + Segment32(&'data macho::SegmentCommand32<E>, &'data [u8]), + /// `LC_SYMTAB` + Symtab(&'data macho::SymtabCommand<E>), + // obsolete: `LC_SYMSEG` + //Symseg(&'data macho::SymsegCommand<E>), + /// `LC_THREAD` or `LC_UNIXTHREAD` + Thread(&'data macho::ThreadCommand<E>, &'data [u8]), + // obsolete: `LC_IDFVMLIB` or `LC_LOADFVMLIB` + //Fvmlib(&'data macho::FvmlibCommand<E>), + // obsolete: `LC_IDENT` + //Ident(&'data macho::IdentCommand<E>), + // internal: `LC_FVMFILE` + //Fvmfile(&'data macho::FvmfileCommand<E>), + // internal: `LC_PREPAGE` + /// `LC_DYSYMTAB` + Dysymtab(&'data macho::DysymtabCommand<E>), + /// `LC_LOAD_DYLIB`, `LC_LOAD_WEAK_DYLIB`, `LC_REEXPORT_DYLIB`, + /// `LC_LAZY_LOAD_DYLIB`, or `LC_LOAD_UPWARD_DYLIB` + Dylib(&'data macho::DylibCommand<E>), + /// `LC_ID_DYLIB` + IdDylib(&'data macho::DylibCommand<E>), + /// `LC_LOAD_DYLINKER` + LoadDylinker(&'data macho::DylinkerCommand<E>), + /// `LC_ID_DYLINKER` + IdDylinker(&'data macho::DylinkerCommand<E>), + /// `LC_PREBOUND_DYLIB` + PreboundDylib(&'data macho::PreboundDylibCommand<E>), + /// `LC_ROUTINES` + Routines32(&'data macho::RoutinesCommand32<E>), + /// `LC_SUB_FRAMEWORK` + SubFramework(&'data macho::SubFrameworkCommand<E>), + /// `LC_SUB_UMBRELLA` + SubUmbrella(&'data macho::SubUmbrellaCommand<E>), + /// `LC_SUB_CLIENT` + SubClient(&'data macho::SubClientCommand<E>), + /// `LC_SUB_LIBRARY` + SubLibrary(&'data macho::SubLibraryCommand<E>), + /// `LC_TWOLEVEL_HINTS` + TwolevelHints(&'data macho::TwolevelHintsCommand<E>), + /// `LC_PREBIND_CKSUM` + PrebindCksum(&'data macho::PrebindCksumCommand<E>), + /// `LC_SEGMENT_64` + Segment64(&'data macho::SegmentCommand64<E>, &'data [u8]), + /// `LC_ROUTINES_64` + Routines64(&'data macho::RoutinesCommand64<E>), + /// `LC_UUID` + Uuid(&'data macho::UuidCommand<E>), + /// `LC_RPATH` + Rpath(&'data macho::RpathCommand<E>), + /// `LC_CODE_SIGNATURE`, `LC_SEGMENT_SPLIT_INFO`, `LC_FUNCTION_STARTS`, + /// `LC_DATA_IN_CODE`, `LC_DYLIB_CODE_SIGN_DRS`, `LC_LINKER_OPTIMIZATION_HINT`, + /// `LC_DYLD_EXPORTS_TRIE`, or `LC_DYLD_CHAINED_FIXUPS`. + LinkeditData(&'data macho::LinkeditDataCommand<E>), + /// `LC_ENCRYPTION_INFO` + EncryptionInfo32(&'data macho::EncryptionInfoCommand32<E>), + /// `LC_DYLD_INFO` or `LC_DYLD_INFO_ONLY` + DyldInfo(&'data macho::DyldInfoCommand<E>), + /// `LC_VERSION_MIN_MACOSX`, `LC_VERSION_MIN_IPHONEOS`, `LC_VERSION_MIN_WATCHOS`, + /// or `LC_VERSION_MIN_TVOS` + VersionMin(&'data macho::VersionMinCommand<E>), + /// `LC_DYLD_ENVIRONMENT` + DyldEnvironment(&'data macho::DylinkerCommand<E>), + /// `LC_MAIN` + EntryPoint(&'data macho::EntryPointCommand<E>), + /// `LC_SOURCE_VERSION` + SourceVersion(&'data macho::SourceVersionCommand<E>), + /// `LC_ENCRYPTION_INFO_64` + EncryptionInfo64(&'data macho::EncryptionInfoCommand64<E>), + /// `LC_LINKER_OPTION` + LinkerOption(&'data macho::LinkerOptionCommand<E>), + /// `LC_NOTE` + Note(&'data macho::NoteCommand<E>), + /// `LC_BUILD_VERSION` + BuildVersion(&'data macho::BuildVersionCommand<E>), + /// `LC_FILESET_ENTRY` + FilesetEntry(&'data macho::FilesetEntryCommand<E>), + /// An unrecognized or obsolete load command. + Other, +} + +impl<E: Endian> macho::SymtabCommand<E> { + /// Return the symbol table that this command references. + pub fn symbols<'data, Mach: MachHeader<Endian = E>, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result<SymbolTable<'data, Mach, R>> { + let symbols = data + .read_slice_at( + self.symoff.get(endian).into(), + self.nsyms.get(endian) as usize, + ) + .read_error("Invalid Mach-O symbol table offset or size")?; + let str_start: u64 = self.stroff.get(endian).into(); + let str_end = str_start + .checked_add(self.strsize.get(endian).into()) + .read_error("Invalid Mach-O string table length")?; + let strings = StringTable::new(data, str_start, str_end); + Ok(SymbolTable::new(symbols, strings)) + } +} diff --git a/third_party/rust/object/src/read/macho/mod.rs b/third_party/rust/object/src/read/macho/mod.rs new file mode 100644 index 0000000000..f07ed581b6 --- /dev/null +++ b/third_party/rust/object/src/read/macho/mod.rs @@ -0,0 +1,30 @@ +//! Support for reading Mach-O files. +//! +//! Defines traits to abstract over the difference between 32-bit and 64-bit +//! Mach-O files, and implements read functionality in terms of these traits. +//! +//! Also provides `MachOFile` and related types which implement the `Object` trait. + +mod dyld_cache; +pub use dyld_cache::*; + +mod fat; +pub use fat::*; + +mod file; +pub use file::*; + +mod load_command; +pub use load_command::*; + +mod segment; +pub use segment::*; + +mod section; +pub use section::*; + +mod symbol; +pub use symbol::*; + +mod relocation; +pub use relocation::*; diff --git a/third_party/rust/object/src/read/macho/relocation.rs b/third_party/rust/object/src/read/macho/relocation.rs new file mode 100644 index 0000000000..5dd7df8966 --- /dev/null +++ b/third_party/rust/object/src/read/macho/relocation.rs @@ -0,0 +1,126 @@ +use core::{fmt, slice}; + +use crate::endian::Endianness; +use crate::macho; +use crate::read::{ + ReadRef, Relocation, RelocationEncoding, RelocationKind, RelocationTarget, SectionIndex, + SymbolIndex, +}; + +use super::{MachHeader, MachOFile}; + +/// An iterator over the relocations in a `MachOSection32`. +pub type MachORelocationIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachORelocationIterator<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the relocations in a `MachOSection64`. +pub type MachORelocationIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachORelocationIterator<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// An iterator over the relocations in a `MachOSection`. +pub struct MachORelocationIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) relocations: slice::Iter<'data, macho::Relocation<Mach::Endian>>, +} + +impl<'data, 'file, Mach, R> Iterator for MachORelocationIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + loop { + let reloc = self.relocations.next()?; + let endian = self.file.endian; + let cputype = self.file.header.cputype(endian); + if reloc.r_scattered(endian, cputype) { + // FIXME: handle scattered relocations + // We need to add `RelocationTarget::Address` for this. + continue; + } + let reloc = reloc.info(self.file.endian); + let mut encoding = RelocationEncoding::Generic; + let kind = match cputype { + macho::CPU_TYPE_ARM => match (reloc.r_type, reloc.r_pcrel) { + (macho::ARM_RELOC_VANILLA, false) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + macho::CPU_TYPE_ARM64 => match (reloc.r_type, reloc.r_pcrel) { + (macho::ARM64_RELOC_UNSIGNED, false) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + macho::CPU_TYPE_X86 => match (reloc.r_type, reloc.r_pcrel) { + (macho::GENERIC_RELOC_VANILLA, false) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + macho::CPU_TYPE_X86_64 => match (reloc.r_type, reloc.r_pcrel) { + (macho::X86_64_RELOC_UNSIGNED, false) => RelocationKind::Absolute, + (macho::X86_64_RELOC_SIGNED, true) => { + encoding = RelocationEncoding::X86RipRelative; + RelocationKind::Relative + } + (macho::X86_64_RELOC_BRANCH, true) => { + encoding = RelocationEncoding::X86Branch; + RelocationKind::Relative + } + (macho::X86_64_RELOC_GOT, true) => RelocationKind::GotRelative, + (macho::X86_64_RELOC_GOT_LOAD, true) => { + encoding = RelocationEncoding::X86RipRelativeMovq; + RelocationKind::GotRelative + } + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }, + _ => RelocationKind::MachO { + value: reloc.r_type, + relative: reloc.r_pcrel, + }, + }; + let size = 8 << reloc.r_length; + let target = if reloc.r_extern { + RelocationTarget::Symbol(SymbolIndex(reloc.r_symbolnum as usize)) + } else { + RelocationTarget::Section(SectionIndex(reloc.r_symbolnum as usize)) + }; + let addend = if reloc.r_pcrel { -4 } else { 0 }; + return Some(( + reloc.r_address as u64, + Relocation { + kind, + encoding, + size, + target, + addend, + implicit_addend: true, + }, + )); + } + } +} + +impl<'data, 'file, Mach, R> fmt::Debug for MachORelocationIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachORelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/macho/section.rs b/third_party/rust/object/src/read/macho/section.rs new file mode 100644 index 0000000000..9e71aa8fd4 --- /dev/null +++ b/third_party/rust/object/src/read/macho/section.rs @@ -0,0 +1,384 @@ +use core::fmt::Debug; +use core::{fmt, result, slice, str}; + +use crate::endian::{self, Endianness}; +use crate::macho; +use crate::pod::Pod; +use crate::read::{ + self, CompressedData, CompressedFileRange, ObjectSection, ReadError, ReadRef, Result, + SectionFlags, SectionIndex, SectionKind, +}; + +use super::{MachHeader, MachOFile, MachORelocationIterator}; + +/// An iterator over the sections of a `MachOFile32`. +pub type MachOSectionIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSectionIterator<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the sections of a `MachOFile64`. +pub type MachOSectionIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSectionIterator<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// An iterator over the sections of a `MachOFile`. +pub struct MachOSectionIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) iter: slice::Iter<'file, MachOSectionInternal<'data, Mach>>, +} + +impl<'data, 'file, Mach, R> fmt::Debug for MachOSectionIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("MachOSectionIterator").finish() + } +} + +impl<'data, 'file, Mach, R> Iterator for MachOSectionIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOSection<'data, 'file, Mach, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|&internal| MachOSection { + file: self.file, + internal, + }) + } +} + +/// A section of a `MachOFile32`. +pub type MachOSection32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSection<'data, 'file, macho::MachHeader32<Endian>, R>; +/// A section of a `MachOFile64`. +pub type MachOSection64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSection<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// A section of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSection<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) internal: MachOSectionInternal<'data, Mach>, +} + +impl<'data, 'file, Mach, R> MachOSection<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn bytes(&self) -> Result<&'data [u8]> { + let segment_index = self.internal.segment_index; + let segment = self.file.segment_internal(segment_index)?; + self.internal + .section + .data(self.file.endian, segment.data) + .read_error("Invalid Mach-O section size or offset") + } +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSection<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSection<'data> for MachOSection<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type RelocationIterator = MachORelocationIterator<'data, 'file, Mach, R>; + + #[inline] + fn index(&self) -> SectionIndex { + self.internal.index + } + + #[inline] + fn address(&self) -> u64 { + self.internal.section.addr(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.internal.section.size(self.file.endian).into() + } + + #[inline] + fn align(&self) -> u64 { + 1 << self.internal.section.align(self.file.endian) + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + self.internal.section.file_range(self.file.endian) + } + + #[inline] + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn compressed_file_range(&self) -> Result<CompressedFileRange> { + Ok(CompressedFileRange::none(self.file_range())) + } + + #[inline] + fn compressed_data(&self) -> Result<CompressedData<'data>> { + self.data().map(CompressedData::none) + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + Ok(self.internal.section.name()) + } + + #[inline] + fn name(&self) -> Result<&str> { + str::from_utf8(self.internal.section.name()) + .ok() + .read_error("Non UTF-8 Mach-O section name") + } + + #[inline] + fn segment_name_bytes(&self) -> Result<Option<&[u8]>> { + Ok(Some(self.internal.section.segment_name())) + } + + #[inline] + fn segment_name(&self) -> Result<Option<&str>> { + Ok(Some( + str::from_utf8(self.internal.section.segment_name()) + .ok() + .read_error("Non UTF-8 Mach-O segment name")?, + )) + } + + fn kind(&self) -> SectionKind { + self.internal.kind + } + + fn relocations(&self) -> MachORelocationIterator<'data, 'file, Mach, R> { + MachORelocationIterator { + file: self.file, + relocations: self + .internal + .section + .relocations(self.file.endian, self.file.data) + .unwrap_or(&[]) + .iter(), + } + } + + fn flags(&self) -> SectionFlags { + SectionFlags::MachO { + flags: self.internal.section.flags(self.file.endian), + } + } +} + +#[derive(Debug, Clone, Copy)] +pub(super) struct MachOSectionInternal<'data, Mach: MachHeader> { + pub index: SectionIndex, + pub segment_index: usize, + pub kind: SectionKind, + pub section: &'data Mach::Section, +} + +impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { + pub(super) fn parse( + index: SectionIndex, + segment_index: usize, + section: &'data Mach::Section, + ) -> Self { + // TODO: we don't validate flags, should we? + let kind = match (section.segment_name(), section.name()) { + (b"__TEXT", b"__text") => SectionKind::Text, + (b"__TEXT", b"__const") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__cstring") => SectionKind::ReadOnlyString, + (b"__TEXT", b"__literal4") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__literal8") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__literal16") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__eh_frame") => SectionKind::ReadOnlyData, + (b"__TEXT", b"__gcc_except_tab") => SectionKind::ReadOnlyData, + (b"__DATA", b"__data") => SectionKind::Data, + (b"__DATA", b"__const") => SectionKind::ReadOnlyData, + (b"__DATA", b"__bss") => SectionKind::UninitializedData, + (b"__DATA", b"__common") => SectionKind::Common, + (b"__DATA", b"__thread_data") => SectionKind::Tls, + (b"__DATA", b"__thread_bss") => SectionKind::UninitializedTls, + (b"__DATA", b"__thread_vars") => SectionKind::TlsVariables, + (b"__DWARF", _) => SectionKind::Debug, + _ => SectionKind::Unknown, + }; + MachOSectionInternal { + index, + segment_index, + kind, + section, + } + } +} + +/// A trait for generic access to `Section32` and `Section64`. +#[allow(missing_docs)] +pub trait Section: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + + fn sectname(&self) -> &[u8; 16]; + fn segname(&self) -> &[u8; 16]; + fn addr(&self, endian: Self::Endian) -> Self::Word; + fn size(&self, endian: Self::Endian) -> Self::Word; + fn offset(&self, endian: Self::Endian) -> u32; + fn align(&self, endian: Self::Endian) -> u32; + fn reloff(&self, endian: Self::Endian) -> u32; + fn nreloc(&self, endian: Self::Endian) -> u32; + fn flags(&self, endian: Self::Endian) -> u32; + + /// Return the `sectname` bytes up until the null terminator. + fn name(&self) -> &[u8] { + let sectname = &self.sectname()[..]; + match memchr::memchr(b'\0', sectname) { + Some(end) => §name[..end], + None => sectname, + } + } + + /// Return the `segname` bytes up until the null terminator. + fn segment_name(&self) -> &[u8] { + let segname = &self.segname()[..]; + match memchr::memchr(b'\0', segname) { + Some(end) => &segname[..end], + None => segname, + } + } + + /// Return the offset and size of the section in the file. + /// + /// Returns `None` for sections that have no data in the file. + fn file_range(&self, endian: Self::Endian) -> Option<(u64, u64)> { + match self.flags(endian) & macho::SECTION_TYPE { + macho::S_ZEROFILL | macho::S_GB_ZEROFILL | macho::S_THREAD_LOCAL_ZEROFILL => None, + _ => Some((self.offset(endian).into(), self.size(endian).into())), + } + } + + /// Return the section data. + /// + /// Returns `Ok(&[])` if the section has no data. + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> result::Result<&'data [u8], ()> { + if let Some((offset, size)) = self.file_range(endian) { + data.read_bytes_at(offset, size) + } else { + Ok(&[]) + } + } + + /// Return the relocation array. + /// + /// Returns `Err` for invalid values. + fn relocations<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> Result<&'data [macho::Relocation<Self::Endian>]> { + data.read_slice_at(self.reloff(endian).into(), self.nreloc(endian) as usize) + .read_error("Invalid Mach-O relocations offset or number") + } +} + +impl<Endian: endian::Endian> Section for macho::Section32<Endian> { + type Word = u32; + type Endian = Endian; + + fn sectname(&self) -> &[u8; 16] { + &self.sectname + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn addr(&self, endian: Self::Endian) -> Self::Word { + self.addr.get(endian) + } + fn size(&self, endian: Self::Endian) -> Self::Word { + self.size.get(endian) + } + fn offset(&self, endian: Self::Endian) -> u32 { + self.offset.get(endian) + } + fn align(&self, endian: Self::Endian) -> u32 { + self.align.get(endian) + } + fn reloff(&self, endian: Self::Endian) -> u32 { + self.reloff.get(endian) + } + fn nreloc(&self, endian: Self::Endian) -> u32 { + self.nreloc.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} + +impl<Endian: endian::Endian> Section for macho::Section64<Endian> { + type Word = u64; + type Endian = Endian; + + fn sectname(&self) -> &[u8; 16] { + &self.sectname + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn addr(&self, endian: Self::Endian) -> Self::Word { + self.addr.get(endian) + } + fn size(&self, endian: Self::Endian) -> Self::Word { + self.size.get(endian) + } + fn offset(&self, endian: Self::Endian) -> u32 { + self.offset.get(endian) + } + fn align(&self, endian: Self::Endian) -> u32 { + self.align.get(endian) + } + fn reloff(&self, endian: Self::Endian) -> u32 { + self.reloff.get(endian) + } + fn nreloc(&self, endian: Self::Endian) -> u32 { + self.nreloc.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} diff --git a/third_party/rust/object/src/read/macho/segment.rs b/third_party/rust/object/src/read/macho/segment.rs new file mode 100644 index 0000000000..c7eaa6fff7 --- /dev/null +++ b/third_party/rust/object/src/read/macho/segment.rs @@ -0,0 +1,303 @@ +use core::fmt::Debug; +use core::{result, slice, str}; + +use crate::endian::{self, Endianness}; +use crate::macho; +use crate::pod::Pod; +use crate::read::{self, ObjectSegment, ReadError, ReadRef, Result, SegmentFlags}; + +use super::{LoadCommandData, MachHeader, MachOFile, Section}; + +/// An iterator over the segments of a `MachOFile32`. +pub type MachOSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegmentIterator<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the segments of a `MachOFile64`. +pub type MachOSegmentIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegmentIterator<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// An iterator over the segments of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegmentIterator<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) iter: slice::Iter<'file, MachOSegmentInternal<'data, Mach, R>>, +} + +impl<'data, 'file, Mach, R> Iterator for MachOSegmentIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOSegment<'data, 'file, Mach, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|internal| MachOSegment { + file: self.file, + internal, + }) + } +} + +/// A segment of a `MachOFile32`. +pub type MachOSegment32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegment<'data, 'file, macho::MachHeader32<Endian>, R>; +/// A segment of a `MachOFile64`. +pub type MachOSegment64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSegment<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// A segment of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegment<'data, 'file, Mach, R = &'data [u8]> +where + 'data: 'file, + Mach: MachHeader, + R: ReadRef<'data>, +{ + file: &'file MachOFile<'data, Mach, R>, + internal: &'file MachOSegmentInternal<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> MachOSegment<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn bytes(&self) -> Result<&'data [u8]> { + self.internal + .segment + .data(self.file.endian, self.file.data) + .read_error("Invalid Mach-O segment size or offset") + } +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSegment<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSegment<'data> for MachOSegment<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[inline] + fn address(&self) -> u64 { + self.internal.segment.vmaddr(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + self.internal.segment.vmsize(self.file.endian).into() + } + + #[inline] + fn align(&self) -> u64 { + // Page size. + 0x1000 + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + self.internal.segment.file_range(self.file.endian) + } + + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn name_bytes(&self) -> Result<Option<&[u8]>> { + Ok(Some(self.internal.segment.name())) + } + + #[inline] + fn name(&self) -> Result<Option<&str>> { + Ok(Some( + str::from_utf8(self.internal.segment.name()) + .ok() + .read_error("Non UTF-8 Mach-O segment name")?, + )) + } + + #[inline] + fn flags(&self) -> SegmentFlags { + let flags = self.internal.segment.flags(self.file.endian); + let maxprot = self.internal.segment.maxprot(self.file.endian); + let initprot = self.internal.segment.initprot(self.file.endian); + SegmentFlags::MachO { + flags, + maxprot, + initprot, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub(super) struct MachOSegmentInternal<'data, Mach: MachHeader, R: ReadRef<'data>> { + pub data: R, + pub segment: &'data Mach::Segment, +} + +/// A trait for generic access to `SegmentCommand32` and `SegmentCommand64`. +#[allow(missing_docs)] +pub trait Segment: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + type Section: Section<Endian = Self::Endian>; + + fn from_command(command: LoadCommandData<Self::Endian>) -> Result<Option<(&Self, &[u8])>>; + + fn cmd(&self, endian: Self::Endian) -> u32; + fn cmdsize(&self, endian: Self::Endian) -> u32; + fn segname(&self) -> &[u8; 16]; + fn vmaddr(&self, endian: Self::Endian) -> Self::Word; + fn vmsize(&self, endian: Self::Endian) -> Self::Word; + fn fileoff(&self, endian: Self::Endian) -> Self::Word; + fn filesize(&self, endian: Self::Endian) -> Self::Word; + fn maxprot(&self, endian: Self::Endian) -> u32; + fn initprot(&self, endian: Self::Endian) -> u32; + fn nsects(&self, endian: Self::Endian) -> u32; + fn flags(&self, endian: Self::Endian) -> u32; + + /// Return the `segname` bytes up until the null terminator. + fn name(&self) -> &[u8] { + let segname = &self.segname()[..]; + match memchr::memchr(b'\0', segname) { + Some(end) => &segname[..end], + None => segname, + } + } + + /// Return the offset and size of the segment in the file. + fn file_range(&self, endian: Self::Endian) -> (u64, u64) { + (self.fileoff(endian).into(), self.filesize(endian).into()) + } + + /// Get the segment data from the file data. + /// + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + data: R, + ) -> result::Result<&'data [u8], ()> { + let (offset, size) = self.file_range(endian); + data.read_bytes_at(offset, size) + } + + /// Get the array of sections from the data following the segment command. + /// + /// Returns `Err` for invalid values. + fn sections<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + section_data: R, + ) -> Result<&'data [Self::Section]> { + section_data + .read_slice_at(0, self.nsects(endian) as usize) + .read_error("Invalid Mach-O number of sections") + } +} + +impl<Endian: endian::Endian> Segment for macho::SegmentCommand32<Endian> { + type Word = u32; + type Endian = Endian; + type Section = macho::Section32<Self::Endian>; + + fn from_command(command: LoadCommandData<Self::Endian>) -> Result<Option<(&Self, &[u8])>> { + command.segment_32() + } + + fn cmd(&self, endian: Self::Endian) -> u32 { + self.cmd.get(endian) + } + fn cmdsize(&self, endian: Self::Endian) -> u32 { + self.cmdsize.get(endian) + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn vmaddr(&self, endian: Self::Endian) -> Self::Word { + self.vmaddr.get(endian) + } + fn vmsize(&self, endian: Self::Endian) -> Self::Word { + self.vmsize.get(endian) + } + fn fileoff(&self, endian: Self::Endian) -> Self::Word { + self.fileoff.get(endian) + } + fn filesize(&self, endian: Self::Endian) -> Self::Word { + self.filesize.get(endian) + } + fn maxprot(&self, endian: Self::Endian) -> u32 { + self.maxprot.get(endian) + } + fn initprot(&self, endian: Self::Endian) -> u32 { + self.initprot.get(endian) + } + fn nsects(&self, endian: Self::Endian) -> u32 { + self.nsects.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} + +impl<Endian: endian::Endian> Segment for macho::SegmentCommand64<Endian> { + type Word = u64; + type Endian = Endian; + type Section = macho::Section64<Self::Endian>; + + fn from_command(command: LoadCommandData<Self::Endian>) -> Result<Option<(&Self, &[u8])>> { + command.segment_64() + } + + fn cmd(&self, endian: Self::Endian) -> u32 { + self.cmd.get(endian) + } + fn cmdsize(&self, endian: Self::Endian) -> u32 { + self.cmdsize.get(endian) + } + fn segname(&self) -> &[u8; 16] { + &self.segname + } + fn vmaddr(&self, endian: Self::Endian) -> Self::Word { + self.vmaddr.get(endian) + } + fn vmsize(&self, endian: Self::Endian) -> Self::Word { + self.vmsize.get(endian) + } + fn fileoff(&self, endian: Self::Endian) -> Self::Word { + self.fileoff.get(endian) + } + fn filesize(&self, endian: Self::Endian) -> Self::Word { + self.filesize.get(endian) + } + fn maxprot(&self, endian: Self::Endian) -> u32 { + self.maxprot.get(endian) + } + fn initprot(&self, endian: Self::Endian) -> u32 { + self.initprot.get(endian) + } + fn nsects(&self, endian: Self::Endian) -> u32 { + self.nsects.get(endian) + } + fn flags(&self, endian: Self::Endian) -> u32 { + self.flags.get(endian) + } +} diff --git a/third_party/rust/object/src/read/macho/symbol.rs b/third_party/rust/object/src/read/macho/symbol.rs new file mode 100644 index 0000000000..e102c5d0b6 --- /dev/null +++ b/third_party/rust/object/src/read/macho/symbol.rs @@ -0,0 +1,488 @@ +use alloc::vec::Vec; +use core::fmt::Debug; +use core::{fmt, slice, str}; + +use crate::endian::{self, Endianness}; +use crate::macho; +use crate::pod::Pod; +use crate::read::util::StringTable; +use crate::read::{ + self, ObjectMap, ObjectMapEntry, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Result, + SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapEntry, + SymbolScope, SymbolSection, +}; + +use super::{MachHeader, MachOFile}; + +/// A table of symbol entries in a Mach-O file. +/// +/// Also includes the string table used for the symbol names. +#[derive(Debug, Clone, Copy)] +pub struct SymbolTable<'data, Mach: MachHeader, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + symbols: &'data [Mach::Nlist], + strings: StringTable<'data, R>, +} + +impl<'data, Mach: MachHeader, R: ReadRef<'data>> Default for SymbolTable<'data, Mach, R> { + fn default() -> Self { + SymbolTable { + symbols: &[], + strings: Default::default(), + } + } +} + +impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> { + #[inline] + pub(super) fn new(symbols: &'data [Mach::Nlist], strings: StringTable<'data, R>) -> Self { + SymbolTable { symbols, strings } + } + + /// Return the string table used for the symbol names. + #[inline] + pub fn strings(&self) -> StringTable<'data, R> { + self.strings + } + + /// Iterate over the symbols. + #[inline] + pub fn iter(&self) -> slice::Iter<'data, Mach::Nlist> { + self.symbols.iter() + } + + /// Return true if the symbol table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + + /// The number of symbols. + #[inline] + pub fn len(&self) -> usize { + self.symbols.len() + } + + /// Return the symbol at the given index. + pub fn symbol(&self, index: usize) -> Result<&'data Mach::Nlist> { + self.symbols + .get(index) + .read_error("Invalid Mach-O symbol index") + } + + /// Construct a map from addresses to a user-defined map entry. + pub fn map<Entry: SymbolMapEntry, F: Fn(&'data Mach::Nlist) -> Option<Entry>>( + &self, + f: F, + ) -> SymbolMap<Entry> { + let mut symbols = Vec::new(); + for nlist in self.symbols { + if !nlist.is_definition() { + continue; + } + if let Some(entry) = f(nlist) { + symbols.push(entry); + } + } + SymbolMap::new(symbols) + } + + /// Construct a map from addresses to symbol names and object file names. + pub fn object_map(&self, endian: Mach::Endian) -> ObjectMap<'data> { + let mut symbols = Vec::new(); + let mut objects = Vec::new(); + let mut object = None; + let mut current_function = None; + // Each module starts with one or two N_SO symbols (path, or directory + filename) + // and one N_OSO symbol. The module is terminated by an empty N_SO symbol. + for nlist in self.symbols { + let n_type = nlist.n_type(); + if n_type & macho::N_STAB == 0 { + continue; + } + // TODO: includes variables too (N_GSYM, N_STSYM). These may need to get their + // address from regular symbols though. + match n_type { + macho::N_SO => { + object = None; + } + macho::N_OSO => { + object = None; + if let Ok(name) = nlist.name(endian, self.strings) { + if !name.is_empty() { + object = Some(objects.len()); + objects.push(name); + } + } + } + macho::N_FUN => { + if let Ok(name) = nlist.name(endian, self.strings) { + if !name.is_empty() { + current_function = Some((name, nlist.n_value(endian).into())) + } else if let Some((name, address)) = current_function.take() { + if let Some(object) = object { + symbols.push(ObjectMapEntry { + address, + size: nlist.n_value(endian).into(), + name, + object, + }); + } + } + } + } + _ => {} + } + } + ObjectMap { + symbols: SymbolMap::new(symbols), + objects, + } + } +} + +/// An iterator over the symbols of a `MachOFile32`. +pub type MachOSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolTable<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the symbols of a `MachOFile64`. +pub type MachOSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolTable<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// A symbol table of a `MachOFile`. +#[derive(Debug, Clone, Copy)] +pub struct MachOSymbolTable<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbolTable<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSymbolTable<'data> for MachOSymbolTable<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Symbol = MachOSymbol<'data, 'file, Mach, R>; + type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>; + + fn symbols(&self) -> Self::SymbolIterator { + MachOSymbolIterator { + file: self.file, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> { + let nlist = self.file.symbols.symbol(index.0)?; + MachOSymbol::new(self.file, index, nlist).read_error("Unsupported Mach-O symbol index") + } +} + +/// An iterator over the symbols of a `MachOFile32`. +pub type MachOSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolIterator<'data, 'file, macho::MachHeader32<Endian>, R>; +/// An iterator over the symbols of a `MachOFile64`. +pub type MachOSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbolIterator<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// An iterator over the symbols of a `MachOFile`. +pub struct MachOSymbolIterator<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file MachOFile<'data, Mach, R>, + pub(super) index: usize, +} + +impl<'data, 'file, Mach, R> fmt::Debug for MachOSymbolIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachOSymbolIterator").finish() + } +} + +impl<'data, 'file, Mach, R> Iterator for MachOSymbolIterator<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + type Item = MachOSymbol<'data, 'file, Mach, R>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + let index = self.index; + let nlist = self.file.symbols.symbols.get(index)?; + self.index += 1; + if let Some(symbol) = MachOSymbol::new(self.file, SymbolIndex(index), nlist) { + return Some(symbol); + } + } + } +} + +/// A symbol of a `MachOFile32`. +pub type MachOSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbol<'data, 'file, macho::MachHeader32<Endian>, R>; +/// A symbol of a `MachOFile64`. +pub type MachOSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> = + MachOSymbol<'data, 'file, macho::MachHeader64<Endian>, R>; + +/// A symbol of a `MachOFile`. +#[derive(Debug, Clone, Copy)] +pub struct MachOSymbol<'data, 'file, Mach, R = &'data [u8]> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + file: &'file MachOFile<'data, Mach, R>, + index: SymbolIndex, + nlist: &'data Mach::Nlist, +} + +impl<'data, 'file, Mach, R> MachOSymbol<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + pub(super) fn new( + file: &'file MachOFile<'data, Mach, R>, + index: SymbolIndex, + nlist: &'data Mach::Nlist, + ) -> Option<Self> { + if nlist.n_type() & macho::N_STAB != 0 { + return None; + } + Some(MachOSymbol { file, index, nlist }) + } +} + +impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbol<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Mach, R> ObjectSymbol<'data> for MachOSymbol<'data, 'file, Mach, R> +where + Mach: MachHeader, + R: ReadRef<'data>, +{ + #[inline] + fn index(&self) -> SymbolIndex { + self.index + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + self.nlist.name(self.file.endian, self.file.symbols.strings) + } + + fn name(&self) -> Result<&'data str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 Mach-O symbol name") + } + + #[inline] + fn address(&self) -> u64 { + self.nlist.n_value(self.file.endian).into() + } + + #[inline] + fn size(&self) -> u64 { + 0 + } + + fn kind(&self) -> SymbolKind { + self.section() + .index() + .and_then(|index| self.file.section_internal(index).ok()) + .map(|section| match section.kind { + SectionKind::Text => SymbolKind::Text, + SectionKind::Data + | SectionKind::ReadOnlyData + | SectionKind::ReadOnlyString + | SectionKind::UninitializedData + | SectionKind::Common => SymbolKind::Data, + SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => { + SymbolKind::Tls + } + _ => SymbolKind::Unknown, + }) + .unwrap_or(SymbolKind::Unknown) + } + + fn section(&self) -> SymbolSection { + match self.nlist.n_type() & macho::N_TYPE { + macho::N_UNDF => SymbolSection::Undefined, + macho::N_ABS => SymbolSection::Absolute, + macho::N_SECT => { + let n_sect = self.nlist.n_sect(); + if n_sect != 0 { + SymbolSection::Section(SectionIndex(n_sect as usize)) + } else { + SymbolSection::Unknown + } + } + _ => SymbolSection::Unknown, + } + } + + #[inline] + fn is_undefined(&self) -> bool { + self.nlist.n_type() & macho::N_TYPE == macho::N_UNDF + } + + #[inline] + fn is_definition(&self) -> bool { + self.nlist.is_definition() + } + + #[inline] + fn is_common(&self) -> bool { + // Mach-O common symbols are based on section, not symbol + false + } + + #[inline] + fn is_weak(&self) -> bool { + self.nlist.n_desc(self.file.endian) & (macho::N_WEAK_REF | macho::N_WEAK_DEF) != 0 + } + + fn scope(&self) -> SymbolScope { + let n_type = self.nlist.n_type(); + if n_type & macho::N_TYPE == macho::N_UNDF { + SymbolScope::Unknown + } else if n_type & macho::N_EXT == 0 { + SymbolScope::Compilation + } else if n_type & macho::N_PEXT != 0 { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + } + } + + #[inline] + fn is_global(&self) -> bool { + self.scope() != SymbolScope::Compilation + } + + #[inline] + fn is_local(&self) -> bool { + self.scope() == SymbolScope::Compilation + } + + #[inline] + fn flags(&self) -> SymbolFlags<SectionIndex> { + let n_desc = self.nlist.n_desc(self.file.endian); + SymbolFlags::MachO { n_desc } + } +} + +/// A trait for generic access to `Nlist32` and `Nlist64`. +#[allow(missing_docs)] +pub trait Nlist: Debug + Pod { + type Word: Into<u64>; + type Endian: endian::Endian; + + fn n_strx(&self, endian: Self::Endian) -> u32; + fn n_type(&self) -> u8; + fn n_sect(&self) -> u8; + fn n_desc(&self, endian: Self::Endian) -> u16; + fn n_value(&self, endian: Self::Endian) -> Self::Word; + + fn name<'data, R: ReadRef<'data>>( + &self, + endian: Self::Endian, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + strings + .get(self.n_strx(endian)) + .read_error("Invalid Mach-O symbol name offset") + } + + /// Return true if this is a STAB symbol. + /// + /// This determines the meaning of the `n_type` field. + fn is_stab(&self) -> bool { + self.n_type() & macho::N_STAB != 0 + } + + /// Return true if this is an undefined symbol. + fn is_undefined(&self) -> bool { + let n_type = self.n_type(); + n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_UNDF + } + + /// Return true if the symbol is a definition of a function or data object. + fn is_definition(&self) -> bool { + let n_type = self.n_type(); + n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE != macho::N_UNDF + } + + /// Return the library ordinal. + /// + /// This is either a 1-based index into the dylib load commands, + /// or a special ordinal. + #[inline] + fn library_ordinal(&self, endian: Self::Endian) -> u8 { + (self.n_desc(endian) >> 8) as u8 + } +} + +impl<Endian: endian::Endian> Nlist for macho::Nlist32<Endian> { + type Word = u32; + type Endian = Endian; + + fn n_strx(&self, endian: Self::Endian) -> u32 { + self.n_strx.get(endian) + } + fn n_type(&self) -> u8 { + self.n_type + } + fn n_sect(&self) -> u8 { + self.n_sect + } + fn n_desc(&self, endian: Self::Endian) -> u16 { + self.n_desc.get(endian) + } + fn n_value(&self, endian: Self::Endian) -> Self::Word { + self.n_value.get(endian) + } +} + +impl<Endian: endian::Endian> Nlist for macho::Nlist64<Endian> { + type Word = u64; + type Endian = Endian; + + fn n_strx(&self, endian: Self::Endian) -> u32 { + self.n_strx.get(endian) + } + fn n_type(&self) -> u8 { + self.n_type + } + fn n_sect(&self) -> u8 { + self.n_sect + } + fn n_desc(&self, endian: Self::Endian) -> u16 { + self.n_desc.get(endian) + } + fn n_value(&self, endian: Self::Endian) -> Self::Word { + self.n_value.get(endian) + } +} diff --git a/third_party/rust/object/src/read/mod.rs b/third_party/rust/object/src/read/mod.rs new file mode 100644 index 0000000000..91a5c05a58 --- /dev/null +++ b/third_party/rust/object/src/read/mod.rs @@ -0,0 +1,725 @@ +//! Interface for reading object files. + +use alloc::borrow::Cow; +use alloc::vec::Vec; +use core::{fmt, result}; + +use crate::common::*; + +mod read_ref; +pub use read_ref::*; + +#[cfg(feature = "std")] +mod read_cache; +#[cfg(feature = "std")] +pub use read_cache::*; + +mod util; +pub use util::*; + +#[cfg(any( + feature = "coff", + feature = "elf", + feature = "macho", + feature = "pe", + feature = "wasm", + feature = "xcoff" +))] +mod any; +#[cfg(any( + feature = "coff", + feature = "elf", + feature = "macho", + feature = "pe", + feature = "wasm", + feature = "xcoff" +))] +pub use any::*; + +#[cfg(feature = "archive")] +pub mod archive; + +#[cfg(feature = "coff")] +pub mod coff; + +#[cfg(feature = "elf")] +pub mod elf; + +#[cfg(feature = "macho")] +pub mod macho; + +#[cfg(feature = "pe")] +pub mod pe; + +#[cfg(feature = "wasm")] +pub mod wasm; + +#[cfg(feature = "xcoff")] +pub mod xcoff; + +mod traits; +pub use traits::*; + +mod private { + pub trait Sealed {} +} + +/// The error type used within the read module. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Error(&'static str); + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.0) + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Error {} + +/// The result type used within the read module. +pub type Result<T> = result::Result<T, Error>; + +trait ReadError<T> { + fn read_error(self, error: &'static str) -> Result<T>; +} + +impl<T> ReadError<T> for result::Result<T, ()> { + fn read_error(self, error: &'static str) -> Result<T> { + self.map_err(|()| Error(error)) + } +} + +impl<T> ReadError<T> for result::Result<T, Error> { + fn read_error(self, error: &'static str) -> Result<T> { + self.map_err(|_| Error(error)) + } +} + +impl<T> ReadError<T> for Option<T> { + fn read_error(self, error: &'static str) -> Result<T> { + self.ok_or(Error(error)) + } +} + +/// The native executable file for the target platform. +#[cfg(all( + unix, + not(target_os = "macos"), + target_pointer_width = "32", + feature = "elf" +))] +pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile32<'data, crate::Endianness, R>; + +/// The native executable file for the target platform. +#[cfg(all( + unix, + not(target_os = "macos"), + target_pointer_width = "64", + feature = "elf" +))] +pub type NativeFile<'data, R = &'data [u8]> = elf::ElfFile64<'data, crate::Endianness, R>; + +/// The native executable file for the target platform. +#[cfg(all(target_os = "macos", target_pointer_width = "32", feature = "macho"))] +pub type NativeFile<'data, R = &'data [u8]> = macho::MachOFile32<'data, crate::Endianness, R>; + +/// The native executable file for the target platform. +#[cfg(all(target_os = "macos", target_pointer_width = "64", feature = "macho"))] +pub type NativeFile<'data, R = &'data [u8]> = macho::MachOFile64<'data, crate::Endianness, R>; + +/// The native executable file for the target platform. +#[cfg(all(target_os = "windows", target_pointer_width = "32", feature = "pe"))] +pub type NativeFile<'data, R = &'data [u8]> = pe::PeFile32<'data, R>; + +/// The native executable file for the target platform. +#[cfg(all(target_os = "windows", target_pointer_width = "64", feature = "pe"))] +pub type NativeFile<'data, R = &'data [u8]> = pe::PeFile64<'data, R>; + +/// The native executable file for the target platform. +#[cfg(all(feature = "wasm", target_arch = "wasm32", feature = "wasm"))] +pub type NativeFile<'data, R = &'data [u8]> = wasm::WasmFile<'data, R>; + +/// A file format kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum FileKind { + /// A Unix archive. + #[cfg(feature = "archive")] + Archive, + /// A COFF object file. + #[cfg(feature = "coff")] + Coff, + /// A dyld cache file containing Mach-O images. + #[cfg(feature = "macho")] + DyldCache, + /// A 32-bit ELF file. + #[cfg(feature = "elf")] + Elf32, + /// A 64-bit ELF file. + #[cfg(feature = "elf")] + Elf64, + /// A 32-bit Mach-O file. + #[cfg(feature = "macho")] + MachO32, + /// A 64-bit Mach-O file. + #[cfg(feature = "macho")] + MachO64, + /// A 32-bit Mach-O fat binary. + #[cfg(feature = "macho")] + MachOFat32, + /// A 64-bit Mach-O fat binary. + #[cfg(feature = "macho")] + MachOFat64, + /// A 32-bit PE file. + #[cfg(feature = "pe")] + Pe32, + /// A 64-bit PE file. + #[cfg(feature = "pe")] + Pe64, + /// A Wasm file. + #[cfg(feature = "wasm")] + Wasm, + /// A 32-bit XCOFF file. + #[cfg(feature = "xcoff")] + Xcoff32, + /// A 64-bit XCOFF file. + #[cfg(feature = "xcoff")] + Xcoff64, +} + +impl FileKind { + /// Determine a file kind by parsing the start of the file. + pub fn parse<'data, R: ReadRef<'data>>(data: R) -> Result<FileKind> { + Self::parse_at(data, 0) + } + + /// Determine a file kind by parsing at the given offset. + pub fn parse_at<'data, R: ReadRef<'data>>(data: R, offset: u64) -> Result<FileKind> { + let magic = data + .read_bytes_at(offset, 16) + .read_error("Could not read file magic")?; + if magic.len() < 16 { + return Err(Error("File too short")); + } + + let kind = match [magic[0], magic[1], magic[2], magic[3], magic[4], magic[5], magic[6], magic[7]] { + #[cfg(feature = "archive")] + [b'!', b'<', b'a', b'r', b'c', b'h', b'>', b'\n'] => FileKind::Archive, + #[cfg(feature = "macho")] + [b'd', b'y', b'l', b'd', b'_', b'v', b'1', b' '] => FileKind::DyldCache, + #[cfg(feature = "elf")] + [0x7f, b'E', b'L', b'F', 1, ..] => FileKind::Elf32, + #[cfg(feature = "elf")] + [0x7f, b'E', b'L', b'F', 2, ..] => FileKind::Elf64, + #[cfg(feature = "macho")] + [0xfe, 0xed, 0xfa, 0xce, ..] + | [0xce, 0xfa, 0xed, 0xfe, ..] => FileKind::MachO32, + #[cfg(feature = "macho")] + | [0xfe, 0xed, 0xfa, 0xcf, ..] + | [0xcf, 0xfa, 0xed, 0xfe, ..] => FileKind::MachO64, + #[cfg(feature = "macho")] + [0xca, 0xfe, 0xba, 0xbe, ..] => FileKind::MachOFat32, + #[cfg(feature = "macho")] + [0xca, 0xfe, 0xba, 0xbf, ..] => FileKind::MachOFat64, + #[cfg(feature = "wasm")] + [0x00, b'a', b's', b'm', ..] => FileKind::Wasm, + #[cfg(feature = "pe")] + [b'M', b'Z', ..] => { + match pe::optional_header_magic(data) { + Ok(crate::pe::IMAGE_NT_OPTIONAL_HDR32_MAGIC) => { + FileKind::Pe32 + } + Ok(crate::pe::IMAGE_NT_OPTIONAL_HDR64_MAGIC) => { + FileKind::Pe64 + } + _ => return Err(Error("Unknown MS-DOS file")), + } + } + // TODO: more COFF machines + #[cfg(feature = "coff")] + // COFF arm + [0xc4, 0x01, ..] + // COFF arm64 + | [0x64, 0xaa, ..] + // COFF x86 + | [0x4c, 0x01, ..] + // COFF x86-64 + | [0x64, 0x86, ..] => FileKind::Coff, + #[cfg(feature = "xcoff")] + [0x01, 0xDF, ..] => FileKind::Xcoff32, + #[cfg(feature = "xcoff")] + [0x01, 0xF7, ..] => FileKind::Xcoff64, + _ => return Err(Error("Unknown file magic")), + }; + Ok(kind) + } +} + +/// An object kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum ObjectKind { + /// The object kind is unknown. + Unknown, + /// Relocatable object. + Relocatable, + /// Executable. + Executable, + /// Dynamic shared object. + Dynamic, + /// Core. + Core, +} + +/// The index used to identify a section of a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SectionIndex(pub usize); + +/// The index used to identify a symbol of a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SymbolIndex(pub usize); + +/// The section where a symbol is defined. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum SymbolSection { + /// The section is unknown. + Unknown, + /// The section is not applicable for this symbol (such as file symbols). + None, + /// The symbol is undefined. + Undefined, + /// The symbol has an absolute value. + Absolute, + /// The symbol is a zero-initialized symbol that will be combined with duplicate definitions. + Common, + /// The symbol is defined in the given section. + Section(SectionIndex), +} + +impl SymbolSection { + /// Returns the section index for the section where the symbol is defined. + /// + /// May return `None` if the symbol is not defined in a section. + #[inline] + pub fn index(self) -> Option<SectionIndex> { + if let SymbolSection::Section(index) = self { + Some(index) + } else { + None + } + } +} + +/// An entry in a `SymbolMap`. +pub trait SymbolMapEntry { + /// The symbol address. + fn address(&self) -> u64; +} + +/// A map from addresses to symbols. +#[derive(Debug, Default, Clone)] +pub struct SymbolMap<T: SymbolMapEntry> { + symbols: Vec<T>, +} + +impl<T: SymbolMapEntry> SymbolMap<T> { + /// Construct a new symbol map. + /// + /// This function will sort the symbols by address. + pub fn new(mut symbols: Vec<T>) -> Self { + symbols.sort_unstable_by_key(|s| s.address()); + SymbolMap { symbols } + } + + /// Get the symbol before the given address. + pub fn get(&self, address: u64) -> Option<&T> { + let index = match self + .symbols + .binary_search_by_key(&address, |symbol| symbol.address()) + { + Ok(index) => index, + Err(index) => index.checked_sub(1)?, + }; + self.symbols.get(index) + } + + /// Get all symbols in the map. + #[inline] + pub fn symbols(&self) -> &[T] { + &self.symbols + } +} + +/// A `SymbolMap` entry for symbol names. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SymbolMapName<'data> { + address: u64, + name: &'data str, +} + +impl<'data> SymbolMapName<'data> { + /// Construct a `SymbolMapName`. + pub fn new(address: u64, name: &'data str) -> Self { + SymbolMapName { address, name } + } + + /// The symbol address. + #[inline] + pub fn address(&self) -> u64 { + self.address + } + + /// The symbol name. + #[inline] + pub fn name(&self) -> &'data str { + self.name + } +} + +impl<'data> SymbolMapEntry for SymbolMapName<'data> { + #[inline] + fn address(&self) -> u64 { + self.address + } +} + +/// A map from addresses to symbol names and object files. +/// +/// This is derived from STAB entries in Mach-O files. +#[derive(Debug, Default, Clone)] +pub struct ObjectMap<'data> { + symbols: SymbolMap<ObjectMapEntry<'data>>, + objects: Vec<&'data [u8]>, +} + +impl<'data> ObjectMap<'data> { + /// Get the entry containing the given address. + pub fn get(&self, address: u64) -> Option<&ObjectMapEntry<'data>> { + self.symbols + .get(address) + .filter(|entry| entry.size == 0 || address.wrapping_sub(entry.address) < entry.size) + } + + /// Get all symbols in the map. + #[inline] + pub fn symbols(&self) -> &[ObjectMapEntry<'data>] { + self.symbols.symbols() + } + + /// Get all objects in the map. + #[inline] + pub fn objects(&self) -> &[&'data [u8]] { + &self.objects + } +} + +/// A `ObjectMap` entry. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ObjectMapEntry<'data> { + address: u64, + size: u64, + name: &'data [u8], + object: usize, +} + +impl<'data> ObjectMapEntry<'data> { + /// Get the symbol address. + #[inline] + pub fn address(&self) -> u64 { + self.address + } + + /// Get the symbol size. + /// + /// This may be 0 if the size is unknown. + #[inline] + pub fn size(&self) -> u64 { + self.size + } + + /// Get the symbol name. + #[inline] + pub fn name(&self) -> &'data [u8] { + self.name + } + + /// Get the index of the object file name. + #[inline] + pub fn object_index(&self) -> usize { + self.object + } + + /// Get the object file name. + #[inline] + pub fn object(&self, map: &ObjectMap<'data>) -> &'data [u8] { + map.objects[self.object] + } +} + +impl<'data> SymbolMapEntry for ObjectMapEntry<'data> { + #[inline] + fn address(&self) -> u64 { + self.address + } +} + +/// An imported symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Import<'data> { + library: ByteString<'data>, + // TODO: or ordinal + name: ByteString<'data>, +} + +impl<'data> Import<'data> { + /// The symbol name. + #[inline] + pub fn name(&self) -> &'data [u8] { + self.name.0 + } + + /// The name of the library to import the symbol from. + #[inline] + pub fn library(&self) -> &'data [u8] { + self.library.0 + } +} + +/// An exported symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Export<'data> { + // TODO: and ordinal? + name: ByteString<'data>, + address: u64, +} + +impl<'data> Export<'data> { + /// The symbol name. + #[inline] + pub fn name(&self) -> &'data [u8] { + self.name.0 + } + + /// The virtual address of the symbol. + #[inline] + pub fn address(&self) -> u64 { + self.address + } +} + +/// PDB Information +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct CodeView<'data> { + guid: [u8; 16], + path: ByteString<'data>, + age: u32, +} + +impl<'data> CodeView<'data> { + /// The path to the PDB as stored in CodeView + #[inline] + pub fn path(&self) -> &'data [u8] { + self.path.0 + } + + /// The age of the PDB + #[inline] + pub fn age(&self) -> u32 { + self.age + } + + /// The GUID of the PDB. + #[inline] + pub fn guid(&self) -> [u8; 16] { + self.guid + } +} + +/// The target referenced by a relocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum RelocationTarget { + /// The target is a symbol. + Symbol(SymbolIndex), + /// The target is a section. + Section(SectionIndex), + /// The offset is an absolute address. + Absolute, +} + +/// A relocation entry. +#[derive(Debug)] +pub struct Relocation { + kind: RelocationKind, + encoding: RelocationEncoding, + size: u8, + target: RelocationTarget, + addend: i64, + implicit_addend: bool, +} + +impl Relocation { + /// The operation used to calculate the result of the relocation. + #[inline] + pub fn kind(&self) -> RelocationKind { + self.kind + } + + /// Information about how the result of the relocation operation is encoded in the place. + #[inline] + pub fn encoding(&self) -> RelocationEncoding { + self.encoding + } + + /// The size in bits of the place of the relocation. + /// + /// If 0, then the size is determined by the relocation kind. + #[inline] + pub fn size(&self) -> u8 { + self.size + } + + /// The target of the relocation. + #[inline] + pub fn target(&self) -> RelocationTarget { + self.target + } + + /// The addend to use in the relocation calculation. + #[inline] + pub fn addend(&self) -> i64 { + self.addend + } + + /// Set the addend to use in the relocation calculation. + #[inline] + pub fn set_addend(&mut self, addend: i64) { + self.addend = addend + } + + /// Returns true if there is an implicit addend stored in the data at the offset + /// to be relocated. + #[inline] + pub fn has_implicit_addend(&self) -> bool { + self.implicit_addend + } +} + +/// A data compression format. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum CompressionFormat { + /// The data is uncompressed. + None, + /// The data is compressed, but the compression format is unknown. + Unknown, + /// ZLIB/DEFLATE. + /// + /// Used for ELF compression and GNU compressed debug information. + Zlib, +} + +/// A range in a file that may be compressed. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct CompressedFileRange { + /// The data compression format. + pub format: CompressionFormat, + /// The file offset of the compressed data. + pub offset: u64, + /// The compressed data size. + pub compressed_size: u64, + /// The uncompressed data size. + pub uncompressed_size: u64, +} + +impl CompressedFileRange { + /// Data that is uncompressed. + #[inline] + pub fn none(range: Option<(u64, u64)>) -> Self { + if let Some((offset, size)) = range { + CompressedFileRange { + format: CompressionFormat::None, + offset, + compressed_size: size, + uncompressed_size: size, + } + } else { + CompressedFileRange { + format: CompressionFormat::None, + offset: 0, + compressed_size: 0, + uncompressed_size: 0, + } + } + } + + /// Convert to `CompressedData` by reading from the file. + pub fn data<'data, R: ReadRef<'data>>(self, file: R) -> Result<CompressedData<'data>> { + let data = file + .read_bytes_at(self.offset, self.compressed_size) + .read_error("Invalid compressed data size or offset")?; + Ok(CompressedData { + format: self.format, + data, + uncompressed_size: self.uncompressed_size, + }) + } +} + +/// Data that may be compressed. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct CompressedData<'data> { + /// The data compression format. + pub format: CompressionFormat, + /// The compressed data. + pub data: &'data [u8], + /// The uncompressed data size. + pub uncompressed_size: u64, +} + +impl<'data> CompressedData<'data> { + /// Data that is uncompressed. + #[inline] + pub fn none(data: &'data [u8]) -> Self { + CompressedData { + format: CompressionFormat::None, + data, + uncompressed_size: data.len() as u64, + } + } + + /// Return the uncompressed data. + /// + /// Returns an error for invalid data or unsupported compression. + /// This includes if the data is compressed but the `compression` feature + /// for this crate is disabled. + pub fn decompress(self) -> Result<Cow<'data, [u8]>> { + match self.format { + CompressionFormat::None => Ok(Cow::Borrowed(self.data)), + #[cfg(feature = "compression")] + CompressionFormat::Zlib => { + use core::convert::TryInto; + let size = self + .uncompressed_size + .try_into() + .ok() + .read_error("Uncompressed data size is too large.")?; + let mut decompressed = Vec::with_capacity(size); + let mut decompress = flate2::Decompress::new(true); + decompress + .decompress_vec( + self.data, + &mut decompressed, + flate2::FlushDecompress::Finish, + ) + .ok() + .read_error("Invalid zlib compressed data")?; + Ok(Cow::Owned(decompressed)) + } + _ => Err(Error("Unsupported compressed data.")), + } + } +} diff --git a/third_party/rust/object/src/read/pe/data_directory.rs b/third_party/rust/object/src/read/pe/data_directory.rs new file mode 100644 index 0000000000..f5d98774e3 --- /dev/null +++ b/third_party/rust/object/src/read/pe/data_directory.rs @@ -0,0 +1,211 @@ +use core::slice; + +use crate::read::{Error, ReadError, ReadRef, Result}; +use crate::{pe, LittleEndian as LE}; + +use super::{ + DelayLoadImportTable, ExportTable, ImportTable, RelocationBlockIterator, ResourceDirectory, + SectionTable, +}; + +/// The table of data directories in a PE file. +#[derive(Debug, Clone, Copy)] +pub struct DataDirectories<'data> { + entries: &'data [pe::ImageDataDirectory], +} + +impl<'data> DataDirectories<'data> { + /// Parse the data directory table. + /// + /// `data` must be the remaining optional data following the + /// [optional header](pe::ImageOptionalHeader64). `number` must be from the + /// [`number_of_rva_and_sizes`](pe::ImageOptionalHeader64::number_of_rva_and_sizes) + /// field of the optional header. + pub fn parse(data: &'data [u8], number: u32) -> Result<Self> { + let entries = data + .read_slice_at(0, number as usize) + .read_error("Invalid PE number of RVA and sizes")?; + Ok(DataDirectories { entries }) + } + + /// The number of data directories. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Iterator over the data directories. + pub fn iter(&self) -> slice::Iter<'data, pe::ImageDataDirectory> { + self.entries.iter() + } + + /// Iterator which gives the directories as well as their index (one of the IMAGE_DIRECTORY_ENTRY_* constants). + pub fn enumerate(&self) -> core::iter::Enumerate<slice::Iter<'data, pe::ImageDataDirectory>> { + self.entries.iter().enumerate() + } + + /// Returns the data directory at the given index. + /// + /// Index should be one of the `IMAGE_DIRECTORY_ENTRY_*` constants. + /// + /// Returns `None` if the index is larger than the table size, + /// or if the entry at the index has a zero virtual address. + pub fn get(&self, index: usize) -> Option<&'data pe::ImageDataDirectory> { + self.entries + .get(index) + .filter(|d| d.virtual_address.get(LE) != 0) + } + + /// Returns the unparsed export directory. + /// + /// `data` must be the entire file data. + pub fn export_directory<R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<Option<&'data pe::ImageExportDirectory>> { + let data_dir = match self.get(pe::IMAGE_DIRECTORY_ENTRY_EXPORT) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let export_data = data_dir.data(data, sections)?; + ExportTable::parse_directory(export_data).map(Some) + } + + /// Returns the partially parsed export directory. + /// + /// `data` must be the entire file data. + pub fn export_table<R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<Option<ExportTable<'data>>> { + let data_dir = match self.get(pe::IMAGE_DIRECTORY_ENTRY_EXPORT) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let export_va = data_dir.virtual_address.get(LE); + let export_data = data_dir.data(data, sections)?; + ExportTable::parse(export_data, export_va).map(Some) + } + + /// Returns the partially parsed import directory. + /// + /// `data` must be the entire file data. + pub fn import_table<R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<Option<ImportTable<'data>>> { + let data_dir = match self.get(pe::IMAGE_DIRECTORY_ENTRY_IMPORT) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let import_va = data_dir.virtual_address.get(LE); + let (section_data, section_va) = sections + .pe_data_containing(data, import_va) + .read_error("Invalid import data dir virtual address")?; + Ok(Some(ImportTable::new(section_data, section_va, import_va))) + } + + /// Returns the partially parsed delay-load import directory. + /// + /// `data` must be the entire file data. + pub fn delay_load_import_table<R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<Option<DelayLoadImportTable<'data>>> { + let data_dir = match self.get(pe::IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let import_va = data_dir.virtual_address.get(LE); + let (section_data, section_va) = sections + .pe_data_containing(data, import_va) + .read_error("Invalid import data dir virtual address")?; + Ok(Some(DelayLoadImportTable::new( + section_data, + section_va, + import_va, + ))) + } + + /// Returns the blocks in the base relocation directory. + /// + /// `data` must be the entire file data. + pub fn relocation_blocks<R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<Option<RelocationBlockIterator<'data>>> { + let data_dir = match self.get(pe::IMAGE_DIRECTORY_ENTRY_BASERELOC) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let reloc_data = data_dir.data(data, sections)?; + Ok(Some(RelocationBlockIterator::new(reloc_data))) + } + + /// Returns the resource directory. + /// + /// `data` must be the entire file data. + pub fn resource_directory<R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<Option<ResourceDirectory<'data>>> { + let data_dir = match self.get(pe::IMAGE_DIRECTORY_ENTRY_RESOURCE) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let rsrc_data = data_dir.data(data, sections)?; + Ok(Some(ResourceDirectory::new(rsrc_data))) + } +} + +impl pe::ImageDataDirectory { + /// Return the virtual address range of this directory entry. + pub fn address_range(&self) -> (u32, u32) { + (self.virtual_address.get(LE), self.size.get(LE)) + } + + /// Return the file offset and size of this directory entry. + /// + /// This function has some limitations: + /// - It requires that the data is contained in a single section. + /// - It uses the size field of the directory entry, which is + /// not desirable for all data directories. + /// - It uses the `virtual_address` of the directory entry as an address, + /// which is not valid for `IMAGE_DIRECTORY_ENTRY_SECURITY`. + pub fn file_range<'data>(&self, sections: &SectionTable<'data>) -> Result<(u32, u32)> { + let (offset, section_size) = sections + .pe_file_range_at(self.virtual_address.get(LE)) + .read_error("Invalid data dir virtual address")?; + let size = self.size.get(LE); + if size > section_size { + return Err(Error("Invalid data dir size")); + } + Ok((offset, size)) + } + + /// Get the data referenced by this directory entry. + /// + /// This function has some limitations: + /// - It requires that the data is contained in a single section. + /// - It uses the size field of the directory entry, which is + /// not desirable for all data directories. + /// - It uses the `virtual_address` of the directory entry as an address, + /// which is not valid for `IMAGE_DIRECTORY_ENTRY_SECURITY`. + pub fn data<'data, R: ReadRef<'data>>( + &self, + data: R, + sections: &SectionTable<'data>, + ) -> Result<&'data [u8]> { + sections + .pe_data_at(data, self.virtual_address.get(LE)) + .read_error("Invalid data dir virtual address")? + .get(..self.size.get(LE) as usize) + .read_error("Invalid data dir size") + } +} diff --git a/third_party/rust/object/src/read/pe/export.rs b/third_party/rust/object/src/read/pe/export.rs new file mode 100644 index 0000000000..88dc78d50b --- /dev/null +++ b/third_party/rust/object/src/read/pe/export.rs @@ -0,0 +1,331 @@ +use alloc::vec::Vec; +use core::fmt::Debug; + +use crate::read::{ByteString, Bytes, Error, ReadError, ReadRef, Result}; +use crate::{pe, LittleEndian as LE, U16Bytes, U32Bytes}; + +/// Where an export is pointing to. +#[derive(Clone, Copy)] +pub enum ExportTarget<'data> { + /// The address of the export, relative to the image base. + Address(u32), + /// Forwarded to an export ordinal in another DLL. + /// + /// This gives the name of the DLL, and the ordinal. + ForwardByOrdinal(&'data [u8], u32), + /// Forwarded to an export name in another DLL. + /// + /// This gives the name of the DLL, and the export name. + ForwardByName(&'data [u8], &'data [u8]), +} + +impl<'data> ExportTarget<'data> { + /// Returns true if the target is an address. + pub fn is_address(&self) -> bool { + match self { + ExportTarget::Address(_) => true, + _ => false, + } + } + + /// Returns true if the export is forwarded to another DLL. + pub fn is_forward(&self) -> bool { + !self.is_address() + } +} + +/// An export from a PE file. +/// +/// There are multiple kinds of PE exports (with or without a name, and local or forwarded). +#[derive(Clone, Copy)] +pub struct Export<'data> { + /// The ordinal of the export. + /// + /// These are sequential, starting at a base specified in the DLL. + pub ordinal: u32, + /// The name of the export, if known. + pub name: Option<&'data [u8]>, + /// The target of this export. + pub target: ExportTarget<'data>, +} + +impl<'a> Debug for Export<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::result::Result<(), core::fmt::Error> { + f.debug_struct("Export") + .field("ordinal", &self.ordinal) + .field("name", &self.name.map(ByteString)) + .field("target", &self.target) + .finish() + } +} + +impl<'a> Debug for ExportTarget<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::result::Result<(), core::fmt::Error> { + match self { + ExportTarget::Address(address) => write!(f, "Address({:#x})", address), + ExportTarget::ForwardByOrdinal(library, ordinal) => write!( + f, + "ForwardByOrdinal({:?}.#{})", + ByteString(library), + ordinal + ), + ExportTarget::ForwardByName(library, name) => write!( + f, + "ForwardByName({:?}.{:?})", + ByteString(library), + ByteString(name) + ), + } + } +} + +/// A partially parsed PE export table. +#[derive(Debug, Clone)] +pub struct ExportTable<'data> { + data: Bytes<'data>, + virtual_address: u32, + directory: &'data pe::ImageExportDirectory, + addresses: &'data [U32Bytes<LE>], + names: &'data [U32Bytes<LE>], + name_ordinals: &'data [U16Bytes<LE>], +} + +impl<'data> ExportTable<'data> { + /// Parse the export table given its section data and address. + pub fn parse(data: &'data [u8], virtual_address: u32) -> Result<Self> { + let directory = Self::parse_directory(data)?; + let data = Bytes(data); + + let mut addresses = &[][..]; + let address_of_functions = directory.address_of_functions.get(LE); + if address_of_functions != 0 { + addresses = data + .read_slice_at::<U32Bytes<_>>( + address_of_functions.wrapping_sub(virtual_address) as usize, + directory.number_of_functions.get(LE) as usize, + ) + .read_error("Invalid PE export address table")?; + } + + let mut names = &[][..]; + let mut name_ordinals = &[][..]; + let address_of_names = directory.address_of_names.get(LE); + let address_of_name_ordinals = directory.address_of_name_ordinals.get(LE); + if address_of_names != 0 { + if address_of_name_ordinals == 0 { + return Err(Error("Missing PE export ordinal table")); + } + + let number = directory.number_of_names.get(LE) as usize; + names = data + .read_slice_at::<U32Bytes<_>>( + address_of_names.wrapping_sub(virtual_address) as usize, + number, + ) + .read_error("Invalid PE export name pointer table")?; + name_ordinals = data + .read_slice_at::<U16Bytes<_>>( + address_of_name_ordinals.wrapping_sub(virtual_address) as usize, + number, + ) + .read_error("Invalid PE export ordinal table")?; + } + + Ok(ExportTable { + data, + virtual_address, + directory, + addresses, + names, + name_ordinals, + }) + } + + /// Parse the export directory given its section data. + pub fn parse_directory(data: &'data [u8]) -> Result<&'data pe::ImageExportDirectory> { + data.read_at::<pe::ImageExportDirectory>(0) + .read_error("Invalid PE export dir size") + } + + /// Returns the header of the export table. + pub fn directory(&self) -> &'data pe::ImageExportDirectory { + self.directory + } + + /// Returns the base value of ordinals. + /// + /// Adding this to an address index will give an ordinal. + pub fn ordinal_base(&self) -> u32 { + self.directory.base.get(LE) + } + + /// Returns the unparsed address table. + /// + /// An address table entry may be a local address, or the address of a forwarded export entry. + /// See [`Self::is_forward`] and [`Self::target_from_address`]. + pub fn addresses(&self) -> &'data [U32Bytes<LE>] { + self.addresses + } + + /// Returns the unparsed name pointer table. + /// + /// A name pointer table entry can be used with [`Self::name_from_pointer`]. + pub fn name_pointers(&self) -> &'data [U32Bytes<LE>] { + self.names + } + + /// Returns the unparsed ordinal table. + /// + /// An ordinal table entry is a 0-based index into the address table. + /// See [`Self::address_by_index`] and [`Self::target_by_index`]. + pub fn name_ordinals(&self) -> &'data [U16Bytes<LE>] { + self.name_ordinals + } + + /// Returns an iterator for the entries in the name pointer table and ordinal table. + /// + /// A name pointer table entry can be used with [`Self::name_from_pointer`]. + /// + /// An ordinal table entry is a 0-based index into the address table. + /// See [`Self::address_by_index`] and [`Self::target_by_index`]. + pub fn name_iter(&self) -> impl Iterator<Item = (u32, u16)> + 'data { + self.names + .iter() + .map(|x| x.get(LE)) + .zip(self.name_ordinals.iter().map(|x| x.get(LE))) + } + + /// Returns the export address table entry at the given address index. + /// + /// This may be a local address, or the address of a forwarded export entry. + /// See [`Self::is_forward`] and [`Self::target_from_address`]. + /// + /// `index` is a 0-based index into the export address table. + pub fn address_by_index(&self, index: u32) -> Result<u32> { + Ok(self + .addresses + .get(index as usize) + .read_error("Invalid PE export address index")? + .get(LE)) + } + + /// Returns the export address table entry at the given ordinal. + /// + /// This may be a local address, or the address of a forwarded export entry. + /// See [`Self::is_forward`] and [`Self::target_from_address`]. + pub fn address_by_ordinal(&self, ordinal: u32) -> Result<u32> { + self.address_by_index(ordinal.wrapping_sub(self.ordinal_base())) + } + + /// Returns the target of the export at the given address index. + /// + /// `index` is a 0-based index into the export address table. + pub fn target_by_index(&self, index: u32) -> Result<ExportTarget<'data>> { + self.target_from_address(self.address_by_index(index)?) + } + + /// Returns the target of the export at the given ordinal. + pub fn target_by_ordinal(&self, ordinal: u32) -> Result<ExportTarget<'data>> { + self.target_from_address(self.address_by_ordinal(ordinal)?) + } + + /// Convert an export address table entry into a target. + pub fn target_from_address(&self, address: u32) -> Result<ExportTarget<'data>> { + Ok(if let Some(forward) = self.forward_string(address)? { + let i = forward + .iter() + .position(|x| *x == b'.') + .read_error("Missing PE forwarded export separator")?; + let library = &forward[..i]; + match &forward[i + 1..] { + [b'#', digits @ ..] => { + let ordinal = + parse_ordinal(digits).read_error("Invalid PE forwarded export ordinal")?; + ExportTarget::ForwardByOrdinal(library, ordinal) + } + [] => { + return Err(Error("Missing PE forwarded export name")); + } + name => ExportTarget::ForwardByName(library, name), + } + } else { + ExportTarget::Address(address) + }) + } + + fn forward_offset(&self, address: u32) -> Option<usize> { + let offset = address.wrapping_sub(self.virtual_address) as usize; + if offset < self.data.len() { + Some(offset) + } else { + None + } + } + + /// Return true if the export address table entry is a forward. + pub fn is_forward(&self, address: u32) -> bool { + self.forward_offset(address).is_some() + } + + /// Return the forward string if the export address table entry is a forward. + pub fn forward_string(&self, address: u32) -> Result<Option<&'data [u8]>> { + if let Some(offset) = self.forward_offset(address) { + self.data + .read_string_at(offset) + .read_error("Invalid PE forwarded export address") + .map(Some) + } else { + Ok(None) + } + } + + /// Convert an export name pointer table entry into a name. + pub fn name_from_pointer(&self, name_pointer: u32) -> Result<&'data [u8]> { + let offset = name_pointer.wrapping_sub(self.virtual_address); + self.data + .read_string_at(offset as usize) + .read_error("Invalid PE export name pointer") + } + + /// Returns the parsed exports in this table. + pub fn exports(&self) -> Result<Vec<Export<'data>>> { + // First, let's list all exports. + let mut exports = Vec::new(); + let ordinal_base = self.ordinal_base(); + for (i, address) in self.addresses.iter().enumerate() { + // Convert from an array index to an ordinal. + let ordinal = ordinal_base.wrapping_add(i as u32); + let target = self.target_from_address(address.get(LE))?; + exports.push(Export { + ordinal, + target, + // Might be populated later. + name: None, + }); + } + + // Now, check whether some (or all) of them have an associated name. + // `ordinal_index` is a 0-based index into `addresses`. + for (name_pointer, ordinal_index) in self.name_iter() { + let name = self.name_from_pointer(name_pointer)?; + exports + .get_mut(ordinal_index as usize) + .read_error("Invalid PE export ordinal")? + .name = Some(name); + } + + Ok(exports) + } +} + +fn parse_ordinal(digits: &[u8]) -> Option<u32> { + if digits.is_empty() { + return None; + } + let mut result: u32 = 0; + for &c in digits { + let x = (c as char).to_digit(10)?; + result = result.checked_mul(10)?.checked_add(x)?; + } + Some(result) +} diff --git a/third_party/rust/object/src/read/pe/file.rs b/third_party/rust/object/src/read/pe/file.rs new file mode 100644 index 0000000000..8dd85131a4 --- /dev/null +++ b/third_party/rust/object/src/read/pe/file.rs @@ -0,0 +1,1029 @@ +use alloc::vec::Vec; +use core::fmt::Debug; +use core::{mem, str}; + +use core::convert::TryInto; + +use crate::read::coff::{CoffCommon, CoffSymbol, CoffSymbolIterator, CoffSymbolTable, SymbolTable}; +use crate::read::{ + self, Architecture, ComdatKind, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, + Object, ObjectComdat, ObjectKind, ReadError, ReadRef, Result, SectionIndex, SymbolIndex, +}; +use crate::{pe, ByteString, Bytes, CodeView, LittleEndian as LE, Pod, U32}; + +use super::{ + DataDirectories, ExportTable, ImageThunkData, ImportTable, PeSection, PeSectionIterator, + PeSegment, PeSegmentIterator, RichHeaderInfo, SectionTable, +}; + +/// A PE32 (32-bit) image file. +pub type PeFile32<'data, R = &'data [u8]> = PeFile<'data, pe::ImageNtHeaders32, R>; +/// A PE32+ (64-bit) image file. +pub type PeFile64<'data, R = &'data [u8]> = PeFile<'data, pe::ImageNtHeaders64, R>; + +/// A PE object file. +#[derive(Debug)] +pub struct PeFile<'data, Pe, R = &'data [u8]> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + pub(super) dos_header: &'data pe::ImageDosHeader, + pub(super) nt_headers: &'data Pe, + pub(super) data_directories: DataDirectories<'data>, + pub(super) common: CoffCommon<'data, R>, + pub(super) data: R, +} + +impl<'data, Pe, R> PeFile<'data, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + /// Parse the raw PE file data. + pub fn parse(data: R) -> Result<Self> { + let dos_header = pe::ImageDosHeader::parse(data)?; + let mut offset = dos_header.nt_headers_offset().into(); + let (nt_headers, data_directories) = Pe::parse(data, &mut offset)?; + let sections = nt_headers.sections(data, offset)?; + let coff_symbols = nt_headers.symbols(data); + let image_base = nt_headers.optional_header().image_base(); + + Ok(PeFile { + dos_header, + nt_headers, + data_directories, + common: CoffCommon { + sections, + // The PE file format deprecates the COFF symbol table (https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#coff-file-header-object-and-image) + // We do not want to prevent parsing the rest of the PE file for a corrupt COFF header, but rather return an empty symbol table + symbols: coff_symbols.unwrap_or_default(), + image_base, + }, + data, + }) + } + + /// Returns this binary data. + pub fn data(&self) -> R { + self.data + } + + /// Return the DOS header of this file. + pub fn dos_header(&self) -> &'data pe::ImageDosHeader { + self.dos_header + } + + /// Return the NT Headers of this file. + pub fn nt_headers(&self) -> &'data Pe { + self.nt_headers + } + + /// Returns information about the rich header of this file (if any). + pub fn rich_header_info(&self) -> Option<RichHeaderInfo> { + RichHeaderInfo::parse(self.data, self.dos_header.nt_headers_offset().into()) + } + + /// Returns the section table of this binary. + pub fn section_table(&self) -> SectionTable<'data> { + self.common.sections + } + + /// Returns the data directories of this file. + pub fn data_directories(&self) -> DataDirectories<'data> { + self.data_directories + } + + /// Returns the data directory at the given index. + pub fn data_directory(&self, id: usize) -> Option<&'data pe::ImageDataDirectory> { + self.data_directories.get(id) + } + + /// Returns the export table of this file. + /// + /// The export table is located using the data directory. + pub fn export_table(&self) -> Result<Option<ExportTable<'data>>> { + self.data_directories + .export_table(self.data, &self.common.sections) + } + + /// Returns the import table of this file. + /// + /// The import table is located using the data directory. + pub fn import_table(&self) -> Result<Option<ImportTable<'data>>> { + self.data_directories + .import_table(self.data, &self.common.sections) + } + + pub(super) fn section_alignment(&self) -> u64 { + u64::from(self.nt_headers.optional_header().section_alignment()) + } +} + +impl<'data, Pe, R> read::private::Sealed for PeFile<'data, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Pe, R> Object<'data, 'file> for PeFile<'data, Pe, R> +where + 'data: 'file, + Pe: ImageNtHeaders, + R: 'file + ReadRef<'data>, +{ + type Segment = PeSegment<'data, 'file, Pe, R>; + type SegmentIterator = PeSegmentIterator<'data, 'file, Pe, R>; + type Section = PeSection<'data, 'file, Pe, R>; + type SectionIterator = PeSectionIterator<'data, 'file, Pe, R>; + type Comdat = PeComdat<'data, 'file, Pe, R>; + type ComdatIterator = PeComdatIterator<'data, 'file, Pe, R>; + type Symbol = CoffSymbol<'data, 'file, R>; + type SymbolIterator = CoffSymbolIterator<'data, 'file, R>; + type SymbolTable = CoffSymbolTable<'data, 'file, R>; + type DynamicRelocationIterator = NoDynamicRelocationIterator; + + fn architecture(&self) -> Architecture { + match self.nt_headers.file_header().machine.get(LE) { + pe::IMAGE_FILE_MACHINE_ARMNT => Architecture::Arm, + pe::IMAGE_FILE_MACHINE_ARM64 => Architecture::Aarch64, + pe::IMAGE_FILE_MACHINE_I386 => Architecture::I386, + pe::IMAGE_FILE_MACHINE_AMD64 => Architecture::X86_64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + // Only little endian is supported. + true + } + + #[inline] + fn is_64(&self) -> bool { + self.nt_headers.is_type_64() + } + + fn kind(&self) -> ObjectKind { + let characteristics = self.nt_headers.file_header().characteristics.get(LE); + if characteristics & pe::IMAGE_FILE_DLL != 0 { + ObjectKind::Dynamic + } else if characteristics & pe::IMAGE_FILE_SYSTEM != 0 { + ObjectKind::Unknown + } else { + ObjectKind::Executable + } + } + + fn segments(&'file self) -> PeSegmentIterator<'data, 'file, Pe, R> { + PeSegmentIterator { + file: self, + iter: self.common.sections.iter(), + } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option<PeSection<'data, 'file, Pe, R>> { + self.common + .sections + .section_by_name(self.common.symbols.strings(), section_name) + .map(|(index, section)| PeSection { + file: self, + index: SectionIndex(index), + section, + }) + } + + fn section_by_index( + &'file self, + index: SectionIndex, + ) -> Result<PeSection<'data, 'file, Pe, R>> { + let section = self.common.sections.section(index.0)?; + Ok(PeSection { + file: self, + index, + section, + }) + } + + fn sections(&'file self) -> PeSectionIterator<'data, 'file, Pe, R> { + PeSectionIterator { + file: self, + iter: self.common.sections.iter().enumerate(), + } + } + + fn comdats(&'file self) -> PeComdatIterator<'data, 'file, Pe, R> { + PeComdatIterator { file: self } + } + + fn symbol_by_index(&'file self, index: SymbolIndex) -> Result<CoffSymbol<'data, 'file, R>> { + let symbol = self.common.symbols.symbol(index.0)?; + Ok(CoffSymbol { + file: &self.common, + index, + symbol, + }) + } + + fn symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R> { + CoffSymbolIterator { + file: &self.common, + index: 0, + } + } + + fn symbol_table(&'file self) -> Option<CoffSymbolTable<'data, 'file, R>> { + Some(CoffSymbolTable { file: &self.common }) + } + + fn dynamic_symbols(&'file self) -> CoffSymbolIterator<'data, 'file, R> { + CoffSymbolIterator { + file: &self.common, + // Hack: don't return any. + index: self.common.symbols.len(), + } + } + + fn dynamic_symbol_table(&'file self) -> Option<CoffSymbolTable<'data, 'file, R>> { + None + } + + fn dynamic_relocations(&'file self) -> Option<NoDynamicRelocationIterator> { + None + } + + fn imports(&self) -> Result<Vec<Import<'data>>> { + let mut imports = Vec::new(); + if let Some(import_table) = self.import_table()? { + let mut import_descs = import_table.descriptors()?; + while let Some(import_desc) = import_descs.next()? { + let library = import_table.name(import_desc.name.get(LE))?; + let mut first_thunk = import_desc.original_first_thunk.get(LE); + if first_thunk == 0 { + first_thunk = import_desc.first_thunk.get(LE); + } + let mut thunks = import_table.thunks(first_thunk)?; + while let Some(thunk) = thunks.next::<Pe>()? { + if !thunk.is_ordinal() { + let (_hint, name) = import_table.hint_name(thunk.address())?; + imports.push(Import { + library: ByteString(library), + name: ByteString(name), + }); + } + } + } + } + Ok(imports) + } + + fn exports(&self) -> Result<Vec<Export<'data>>> { + let mut exports = Vec::new(); + if let Some(export_table) = self.export_table()? { + for (name_pointer, address_index) in export_table.name_iter() { + let name = export_table.name_from_pointer(name_pointer)?; + let address = export_table.address_by_index(address_index.into())?; + if !export_table.is_forward(address) { + exports.push(Export { + name: ByteString(name), + address: self.common.image_base.wrapping_add(address.into()), + }) + } + } + } + Ok(exports) + } + + fn pdb_info(&self) -> Result<Option<CodeView>> { + let data_dir = match self.data_directory(pe::IMAGE_DIRECTORY_ENTRY_DEBUG) { + Some(data_dir) => data_dir, + None => return Ok(None), + }; + let debug_data = data_dir.data(self.data, &self.common.sections).map(Bytes)?; + let debug_data_size = data_dir.size.get(LE) as usize; + + let count = debug_data_size / mem::size_of::<pe::ImageDebugDirectory>(); + let rem = debug_data_size % mem::size_of::<pe::ImageDebugDirectory>(); + if rem != 0 || count < 1 { + return Err(Error("Invalid PE debug dir size")); + } + + let debug_dirs = debug_data + .read_slice_at::<pe::ImageDebugDirectory>(0, count) + .read_error("Invalid PE debug dir size")?; + + for debug_dir in debug_dirs { + if debug_dir.typ.get(LE) != pe::IMAGE_DEBUG_TYPE_CODEVIEW { + continue; + } + + let info = self + .data + .read_slice_at::<u8>( + debug_dir.pointer_to_raw_data.get(LE) as u64, + debug_dir.size_of_data.get(LE) as usize, + ) + .read_error("Invalid CodeView Info address")?; + + let mut info = Bytes(info); + + let sig = info + .read_bytes(4) + .read_error("Invalid CodeView signature")?; + if sig.0 != b"RSDS" { + continue; + } + + let guid: [u8; 16] = info + .read_bytes(16) + .read_error("Invalid CodeView GUID")? + .0 + .try_into() + .unwrap(); + + let age = info.read::<U32<LE>>().read_error("Invalid CodeView Age")?; + + let path = info + .read_string() + .read_error("Invalid CodeView file path")?; + + return Ok(Some(CodeView { + path: ByteString(path), + guid, + age: age.get(LE), + })); + } + Ok(None) + } + + fn has_debug_symbols(&self) -> bool { + self.section_by_name(".debug_info").is_some() + } + + fn relative_address_base(&self) -> u64 { + self.common.image_base + } + + fn entry(&self) -> u64 { + u64::from(self.nt_headers.optional_header().address_of_entry_point()) + .wrapping_add(self.common.image_base) + } + + fn flags(&self) -> FileFlags { + FileFlags::Coff { + characteristics: self.nt_headers.file_header().characteristics.get(LE), + } + } +} + +/// An iterator over the COMDAT section groups of a `PeFile32`. +pub type PeComdatIterator32<'data, 'file, R = &'data [u8]> = + PeComdatIterator<'data, 'file, pe::ImageNtHeaders32, R>; +/// An iterator over the COMDAT section groups of a `PeFile64`. +pub type PeComdatIterator64<'data, 'file, R = &'data [u8]> = + PeComdatIterator<'data, 'file, pe::ImageNtHeaders64, R>; + +/// An iterator over the COMDAT section groups of a `PeFile`. +#[derive(Debug)] +pub struct PeComdatIterator<'data, 'file, Pe, R = &'data [u8]> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file PeFile<'data, Pe, R>, +} + +impl<'data, 'file, Pe, R> Iterator for PeComdatIterator<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + type Item = PeComdat<'data, 'file, Pe, R>; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A COMDAT section group of a `PeFile32`. +pub type PeComdat32<'data, 'file, R = &'data [u8]> = + PeComdat<'data, 'file, pe::ImageNtHeaders32, R>; +/// A COMDAT section group of a `PeFile64`. +pub type PeComdat64<'data, 'file, R = &'data [u8]> = + PeComdat<'data, 'file, pe::ImageNtHeaders64, R>; + +/// A COMDAT section group of a `PeFile`. +#[derive(Debug)] +pub struct PeComdat<'data, 'file, Pe, R = &'data [u8]> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file PeFile<'data, Pe, R>, +} + +impl<'data, 'file, Pe, R> read::private::Sealed for PeComdat<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Pe, R> ObjectComdat<'data> for PeComdat<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + type SectionIterator = PeComdatSectionIterator<'data, 'file, Pe, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + unreachable!(); + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + unreachable!(); + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + unreachable!(); + } + + #[inline] + fn name(&self) -> Result<&str> { + unreachable!(); + } + + #[inline] + fn sections(&self) -> Self::SectionIterator { + unreachable!(); + } +} + +/// An iterator over the sections in a COMDAT section group of a `PeFile32`. +pub type PeComdatSectionIterator32<'data, 'file, R = &'data [u8]> = + PeComdatSectionIterator<'data, 'file, pe::ImageNtHeaders32, R>; +/// An iterator over the sections in a COMDAT section group of a `PeFile64`. +pub type PeComdatSectionIterator64<'data, 'file, R = &'data [u8]> = + PeComdatSectionIterator<'data, 'file, pe::ImageNtHeaders64, R>; + +/// An iterator over the sections in a COMDAT section group of a `PeFile`. +#[derive(Debug)] +pub struct PeComdatSectionIterator<'data, 'file, Pe, R = &'data [u8]> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file PeFile<'data, Pe, R>, +} + +impl<'data, 'file, Pe, R> Iterator for PeComdatSectionIterator<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +impl pe::ImageDosHeader { + /// Read the DOS header. + /// + /// Also checks that the `e_magic` field in the header is valid. + pub fn parse<'data, R: ReadRef<'data>>(data: R) -> read::Result<&'data Self> { + // DOS header comes first. + let dos_header = data + .read_at::<pe::ImageDosHeader>(0) + .read_error("Invalid DOS header size or alignment")?; + if dos_header.e_magic.get(LE) != pe::IMAGE_DOS_SIGNATURE { + return Err(Error("Invalid DOS magic")); + } + Ok(dos_header) + } + + /// Return the file offset of the nt_headers. + #[inline] + pub fn nt_headers_offset(&self) -> u32 { + self.e_lfanew.get(LE) + } +} + +/// Find the optional header and read the `optional_header.magic`. +/// +/// It can be useful to know this magic value before trying to +/// fully parse the NT headers. +pub fn optional_header_magic<'data, R: ReadRef<'data>>(data: R) -> Result<u16> { + let dos_header = pe::ImageDosHeader::parse(data)?; + // NT headers are at an offset specified in the DOS header. + let offset = dos_header.nt_headers_offset().into(); + // It doesn't matter which NT header type is used for the purpose + // of reading the optional header magic. + let nt_headers = data + .read_at::<pe::ImageNtHeaders32>(offset) + .read_error("Invalid NT headers offset, size, or alignment")?; + if nt_headers.signature() != pe::IMAGE_NT_SIGNATURE { + return Err(Error("Invalid PE magic")); + } + Ok(nt_headers.optional_header().magic()) +} + +/// A trait for generic access to `ImageNtHeaders32` and `ImageNtHeaders64`. +#[allow(missing_docs)] +pub trait ImageNtHeaders: Debug + Pod { + type ImageOptionalHeader: ImageOptionalHeader; + type ImageThunkData: ImageThunkData; + + /// Return true if this type is a 64-bit header. + /// + /// This is a property of the type, not a value in the header data. + fn is_type_64(&self) -> bool; + + /// Return true if the magic field in the optional header is valid. + fn is_valid_optional_magic(&self) -> bool; + + /// Return the signature + fn signature(&self) -> u32; + + /// Return the file header. + fn file_header(&self) -> &pe::ImageFileHeader; + + /// Return the optional header. + fn optional_header(&self) -> &Self::ImageOptionalHeader; + + // Provided methods. + + /// Read the NT headers, including the data directories. + /// + /// `data` must be for the entire file. + /// + /// `offset` must be headers offset, which can be obtained from `ImageDosHeader::nt_headers_offset`. + /// It is updated to point after the optional header, which is where the section headers are located. + /// + /// Also checks that the `signature` and `magic` fields in the headers are valid. + fn parse<'data, R: ReadRef<'data>>( + data: R, + offset: &mut u64, + ) -> read::Result<(&'data Self, DataDirectories<'data>)> { + // Note that this does not include the data directories in the optional header. + let nt_headers = data + .read::<Self>(offset) + .read_error("Invalid PE headers offset or size")?; + if nt_headers.signature() != pe::IMAGE_NT_SIGNATURE { + return Err(Error("Invalid PE magic")); + } + if !nt_headers.is_valid_optional_magic() { + return Err(Error("Invalid PE optional header magic")); + } + + // Read the rest of the optional header, and then read the data directories from that. + let optional_data_size = + u64::from(nt_headers.file_header().size_of_optional_header.get(LE)) + .checked_sub(mem::size_of::<Self::ImageOptionalHeader>() as u64) + .read_error("PE optional header size is too small")?; + let optional_data = data + .read_bytes(offset, optional_data_size) + .read_error("Invalid PE optional header size")?; + let data_directories = DataDirectories::parse( + optional_data, + nt_headers.optional_header().number_of_rva_and_sizes(), + )?; + + Ok((nt_headers, data_directories)) + } + + /// Read the section table. + /// + /// `data` must be for the entire file. + /// `offset` must be after the optional file header. + #[inline] + fn sections<'data, R: ReadRef<'data>>( + &self, + data: R, + offset: u64, + ) -> read::Result<SectionTable<'data>> { + SectionTable::parse(self.file_header(), data, offset) + } + + /// Read the COFF symbol table and string table. + /// + /// `data` must be the entire file data. + #[inline] + fn symbols<'data, R: ReadRef<'data>>(&self, data: R) -> read::Result<SymbolTable<'data, R>> { + SymbolTable::parse(self.file_header(), data) + } +} + +/// A trait for generic access to `ImageOptionalHeader32` and `ImageOptionalHeader64`. +#[allow(missing_docs)] +pub trait ImageOptionalHeader: Debug + Pod { + // Standard fields. + fn magic(&self) -> u16; + fn major_linker_version(&self) -> u8; + fn minor_linker_version(&self) -> u8; + fn size_of_code(&self) -> u32; + fn size_of_initialized_data(&self) -> u32; + fn size_of_uninitialized_data(&self) -> u32; + fn address_of_entry_point(&self) -> u32; + fn base_of_code(&self) -> u32; + fn base_of_data(&self) -> Option<u32>; + + // NT additional fields. + fn image_base(&self) -> u64; + fn section_alignment(&self) -> u32; + fn file_alignment(&self) -> u32; + fn major_operating_system_version(&self) -> u16; + fn minor_operating_system_version(&self) -> u16; + fn major_image_version(&self) -> u16; + fn minor_image_version(&self) -> u16; + fn major_subsystem_version(&self) -> u16; + fn minor_subsystem_version(&self) -> u16; + fn win32_version_value(&self) -> u32; + fn size_of_image(&self) -> u32; + fn size_of_headers(&self) -> u32; + fn check_sum(&self) -> u32; + fn subsystem(&self) -> u16; + fn dll_characteristics(&self) -> u16; + fn size_of_stack_reserve(&self) -> u64; + fn size_of_stack_commit(&self) -> u64; + fn size_of_heap_reserve(&self) -> u64; + fn size_of_heap_commit(&self) -> u64; + fn loader_flags(&self) -> u32; + fn number_of_rva_and_sizes(&self) -> u32; +} + +impl ImageNtHeaders for pe::ImageNtHeaders32 { + type ImageOptionalHeader = pe::ImageOptionalHeader32; + type ImageThunkData = pe::ImageThunkData32; + + #[inline] + fn is_type_64(&self) -> bool { + false + } + + #[inline] + fn is_valid_optional_magic(&self) -> bool { + self.optional_header.magic.get(LE) == pe::IMAGE_NT_OPTIONAL_HDR32_MAGIC + } + + #[inline] + fn signature(&self) -> u32 { + self.signature.get(LE) + } + + #[inline] + fn file_header(&self) -> &pe::ImageFileHeader { + &self.file_header + } + + #[inline] + fn optional_header(&self) -> &Self::ImageOptionalHeader { + &self.optional_header + } +} + +impl ImageOptionalHeader for pe::ImageOptionalHeader32 { + #[inline] + fn magic(&self) -> u16 { + self.magic.get(LE) + } + + #[inline] + fn major_linker_version(&self) -> u8 { + self.major_linker_version + } + + #[inline] + fn minor_linker_version(&self) -> u8 { + self.minor_linker_version + } + + #[inline] + fn size_of_code(&self) -> u32 { + self.size_of_code.get(LE) + } + + #[inline] + fn size_of_initialized_data(&self) -> u32 { + self.size_of_initialized_data.get(LE) + } + + #[inline] + fn size_of_uninitialized_data(&self) -> u32 { + self.size_of_uninitialized_data.get(LE) + } + + #[inline] + fn address_of_entry_point(&self) -> u32 { + self.address_of_entry_point.get(LE) + } + + #[inline] + fn base_of_code(&self) -> u32 { + self.base_of_code.get(LE) + } + + #[inline] + fn base_of_data(&self) -> Option<u32> { + Some(self.base_of_data.get(LE)) + } + + #[inline] + fn image_base(&self) -> u64 { + self.image_base.get(LE).into() + } + + #[inline] + fn section_alignment(&self) -> u32 { + self.section_alignment.get(LE) + } + + #[inline] + fn file_alignment(&self) -> u32 { + self.file_alignment.get(LE) + } + + #[inline] + fn major_operating_system_version(&self) -> u16 { + self.major_operating_system_version.get(LE) + } + + #[inline] + fn minor_operating_system_version(&self) -> u16 { + self.minor_operating_system_version.get(LE) + } + + #[inline] + fn major_image_version(&self) -> u16 { + self.major_image_version.get(LE) + } + + #[inline] + fn minor_image_version(&self) -> u16 { + self.minor_image_version.get(LE) + } + + #[inline] + fn major_subsystem_version(&self) -> u16 { + self.major_subsystem_version.get(LE) + } + + #[inline] + fn minor_subsystem_version(&self) -> u16 { + self.minor_subsystem_version.get(LE) + } + + #[inline] + fn win32_version_value(&self) -> u32 { + self.win32_version_value.get(LE) + } + + #[inline] + fn size_of_image(&self) -> u32 { + self.size_of_image.get(LE) + } + + #[inline] + fn size_of_headers(&self) -> u32 { + self.size_of_headers.get(LE) + } + + #[inline] + fn check_sum(&self) -> u32 { + self.check_sum.get(LE) + } + + #[inline] + fn subsystem(&self) -> u16 { + self.subsystem.get(LE) + } + + #[inline] + fn dll_characteristics(&self) -> u16 { + self.dll_characteristics.get(LE) + } + + #[inline] + fn size_of_stack_reserve(&self) -> u64 { + self.size_of_stack_reserve.get(LE).into() + } + + #[inline] + fn size_of_stack_commit(&self) -> u64 { + self.size_of_stack_commit.get(LE).into() + } + + #[inline] + fn size_of_heap_reserve(&self) -> u64 { + self.size_of_heap_reserve.get(LE).into() + } + + #[inline] + fn size_of_heap_commit(&self) -> u64 { + self.size_of_heap_commit.get(LE).into() + } + + #[inline] + fn loader_flags(&self) -> u32 { + self.loader_flags.get(LE) + } + + #[inline] + fn number_of_rva_and_sizes(&self) -> u32 { + self.number_of_rva_and_sizes.get(LE) + } +} + +impl ImageNtHeaders for pe::ImageNtHeaders64 { + type ImageOptionalHeader = pe::ImageOptionalHeader64; + type ImageThunkData = pe::ImageThunkData64; + + #[inline] + fn is_type_64(&self) -> bool { + true + } + + #[inline] + fn is_valid_optional_magic(&self) -> bool { + self.optional_header.magic.get(LE) == pe::IMAGE_NT_OPTIONAL_HDR64_MAGIC + } + + #[inline] + fn signature(&self) -> u32 { + self.signature.get(LE) + } + + #[inline] + fn file_header(&self) -> &pe::ImageFileHeader { + &self.file_header + } + + #[inline] + fn optional_header(&self) -> &Self::ImageOptionalHeader { + &self.optional_header + } +} + +impl ImageOptionalHeader for pe::ImageOptionalHeader64 { + #[inline] + fn magic(&self) -> u16 { + self.magic.get(LE) + } + + #[inline] + fn major_linker_version(&self) -> u8 { + self.major_linker_version + } + + #[inline] + fn minor_linker_version(&self) -> u8 { + self.minor_linker_version + } + + #[inline] + fn size_of_code(&self) -> u32 { + self.size_of_code.get(LE) + } + + #[inline] + fn size_of_initialized_data(&self) -> u32 { + self.size_of_initialized_data.get(LE) + } + + #[inline] + fn size_of_uninitialized_data(&self) -> u32 { + self.size_of_uninitialized_data.get(LE) + } + + #[inline] + fn address_of_entry_point(&self) -> u32 { + self.address_of_entry_point.get(LE) + } + + #[inline] + fn base_of_code(&self) -> u32 { + self.base_of_code.get(LE) + } + + #[inline] + fn base_of_data(&self) -> Option<u32> { + None + } + + #[inline] + fn image_base(&self) -> u64 { + self.image_base.get(LE) + } + + #[inline] + fn section_alignment(&self) -> u32 { + self.section_alignment.get(LE) + } + + #[inline] + fn file_alignment(&self) -> u32 { + self.file_alignment.get(LE) + } + + #[inline] + fn major_operating_system_version(&self) -> u16 { + self.major_operating_system_version.get(LE) + } + + #[inline] + fn minor_operating_system_version(&self) -> u16 { + self.minor_operating_system_version.get(LE) + } + + #[inline] + fn major_image_version(&self) -> u16 { + self.major_image_version.get(LE) + } + + #[inline] + fn minor_image_version(&self) -> u16 { + self.minor_image_version.get(LE) + } + + #[inline] + fn major_subsystem_version(&self) -> u16 { + self.major_subsystem_version.get(LE) + } + + #[inline] + fn minor_subsystem_version(&self) -> u16 { + self.minor_subsystem_version.get(LE) + } + + #[inline] + fn win32_version_value(&self) -> u32 { + self.win32_version_value.get(LE) + } + + #[inline] + fn size_of_image(&self) -> u32 { + self.size_of_image.get(LE) + } + + #[inline] + fn size_of_headers(&self) -> u32 { + self.size_of_headers.get(LE) + } + + #[inline] + fn check_sum(&self) -> u32 { + self.check_sum.get(LE) + } + + #[inline] + fn subsystem(&self) -> u16 { + self.subsystem.get(LE) + } + + #[inline] + fn dll_characteristics(&self) -> u16 { + self.dll_characteristics.get(LE) + } + + #[inline] + fn size_of_stack_reserve(&self) -> u64 { + self.size_of_stack_reserve.get(LE) + } + + #[inline] + fn size_of_stack_commit(&self) -> u64 { + self.size_of_stack_commit.get(LE) + } + + #[inline] + fn size_of_heap_reserve(&self) -> u64 { + self.size_of_heap_reserve.get(LE) + } + + #[inline] + fn size_of_heap_commit(&self) -> u64 { + self.size_of_heap_commit.get(LE) + } + + #[inline] + fn loader_flags(&self) -> u32 { + self.loader_flags.get(LE) + } + + #[inline] + fn number_of_rva_and_sizes(&self) -> u32 { + self.number_of_rva_and_sizes.get(LE) + } +} diff --git a/third_party/rust/object/src/read/pe/import.rs b/third_party/rust/object/src/read/pe/import.rs new file mode 100644 index 0000000000..a5535dc367 --- /dev/null +++ b/third_party/rust/object/src/read/pe/import.rs @@ -0,0 +1,332 @@ +use core::fmt::Debug; +use core::mem; + +use crate::read::{Bytes, ReadError, Result}; +use crate::{pe, LittleEndian as LE, Pod, U16Bytes}; + +use super::ImageNtHeaders; + +/// Information for parsing a PE import table. +#[derive(Debug, Clone)] +pub struct ImportTable<'data> { + section_data: Bytes<'data>, + section_address: u32, + import_address: u32, +} + +impl<'data> ImportTable<'data> { + /// Create a new import table parser. + /// + /// The import descriptors start at `import_address`. + /// The size declared in the `IMAGE_DIRECTORY_ENTRY_IMPORT` data directory is + /// ignored by the Windows loader, and so descriptors will be parsed until a null entry. + /// + /// `section_data` should be from the section containing `import_address`, and + /// `section_address` should be the address of that section. Pointers within the + /// descriptors and thunks may point to anywhere within the section data. + pub fn new(section_data: &'data [u8], section_address: u32, import_address: u32) -> Self { + ImportTable { + section_data: Bytes(section_data), + section_address, + import_address, + } + } + + /// Return an iterator for the import descriptors. + pub fn descriptors(&self) -> Result<ImportDescriptorIterator<'data>> { + let offset = self.import_address.wrapping_sub(self.section_address); + let mut data = self.section_data; + data.skip(offset as usize) + .read_error("Invalid PE import descriptor address")?; + Ok(ImportDescriptorIterator { data }) + } + + /// Return a library name given its address. + /// + /// This address may be from [`pe::ImageImportDescriptor::name`]. + pub fn name(&self, address: u32) -> Result<&'data [u8]> { + self.section_data + .read_string_at(address.wrapping_sub(self.section_address) as usize) + .read_error("Invalid PE import descriptor name") + } + + /// Return a list of thunks given its address. + /// + /// This address may be from [`pe::ImageImportDescriptor::original_first_thunk`] + /// or [`pe::ImageImportDescriptor::first_thunk`]. + pub fn thunks(&self, address: u32) -> Result<ImportThunkList<'data>> { + let offset = address.wrapping_sub(self.section_address); + let mut data = self.section_data; + data.skip(offset as usize) + .read_error("Invalid PE import thunk table address")?; + Ok(ImportThunkList { data }) + } + + /// Parse a thunk. + pub fn import<Pe: ImageNtHeaders>(&self, thunk: Pe::ImageThunkData) -> Result<Import<'data>> { + if thunk.is_ordinal() { + Ok(Import::Ordinal(thunk.ordinal())) + } else { + let (hint, name) = self.hint_name(thunk.address())?; + Ok(Import::Name(hint, name)) + } + } + + /// Return the hint and name at the given address. + /// + /// This address may be from [`pe::ImageThunkData32`] or [`pe::ImageThunkData64`]. + /// + /// The hint is an index into the export name pointer table in the target library. + pub fn hint_name(&self, address: u32) -> Result<(u16, &'data [u8])> { + let offset = address.wrapping_sub(self.section_address); + let mut data = self.section_data; + data.skip(offset as usize) + .read_error("Invalid PE import thunk address")?; + let hint = data + .read::<U16Bytes<LE>>() + .read_error("Missing PE import thunk hint")? + .get(LE); + let name = data + .read_string() + .read_error("Missing PE import thunk name")?; + Ok((hint, name)) + } +} + +/// A fallible iterator for the descriptors in the import data directory. +#[derive(Debug, Clone)] +pub struct ImportDescriptorIterator<'data> { + data: Bytes<'data>, +} + +impl<'data> ImportDescriptorIterator<'data> { + /// Return the next descriptor. + /// + /// Returns `Ok(None)` when a null descriptor is found. + pub fn next(&mut self) -> Result<Option<&'data pe::ImageImportDescriptor>> { + let import_desc = self + .data + .read::<pe::ImageImportDescriptor>() + .read_error("Missing PE null import descriptor")?; + if import_desc.is_null() { + Ok(None) + } else { + Ok(Some(import_desc)) + } + } +} + +/// A list of import thunks. +/// +/// These may be in the import lookup table, or the import address table. +#[derive(Debug, Clone)] +pub struct ImportThunkList<'data> { + data: Bytes<'data>, +} + +impl<'data> ImportThunkList<'data> { + /// Get the thunk at the given index. + pub fn get<Pe: ImageNtHeaders>(&self, index: usize) -> Result<Pe::ImageThunkData> { + let thunk = self + .data + .read_at(index * mem::size_of::<Pe::ImageThunkData>()) + .read_error("Invalid PE import thunk index")?; + Ok(*thunk) + } + + /// Return the first thunk in the list, and update `self` to point after it. + /// + /// Returns `Ok(None)` when a null thunk is found. + pub fn next<Pe: ImageNtHeaders>(&mut self) -> Result<Option<Pe::ImageThunkData>> { + let thunk = self + .data + .read::<Pe::ImageThunkData>() + .read_error("Missing PE null import thunk")?; + if thunk.address() == 0 { + Ok(None) + } else { + Ok(Some(*thunk)) + } + } +} + +/// A parsed import thunk. +#[derive(Debug, Clone, Copy)] +pub enum Import<'data> { + /// Import by ordinal. + Ordinal(u16), + /// Import by name. + /// + /// Includes a hint for the index into the export name pointer table in the target library. + Name(u16, &'data [u8]), +} + +/// A trait for generic access to [`pe::ImageThunkData32`] and [`pe::ImageThunkData64`]. +#[allow(missing_docs)] +pub trait ImageThunkData: Debug + Pod { + /// Return the raw thunk value. + fn raw(self) -> u64; + + /// Returns true if the ordinal flag is set. + fn is_ordinal(self) -> bool; + + /// Return the ordinal portion of the thunk. + /// + /// Does not check the ordinal flag. + fn ordinal(self) -> u16; + + /// Return the RVA portion of the thunk. + /// + /// Does not check the ordinal flag. + fn address(self) -> u32; +} + +impl ImageThunkData for pe::ImageThunkData64 { + fn raw(self) -> u64 { + self.0.get(LE) + } + + fn is_ordinal(self) -> bool { + self.0.get(LE) & pe::IMAGE_ORDINAL_FLAG64 != 0 + } + + fn ordinal(self) -> u16 { + self.0.get(LE) as u16 + } + + fn address(self) -> u32 { + self.0.get(LE) as u32 & 0x7fff_ffff + } +} + +impl ImageThunkData for pe::ImageThunkData32 { + fn raw(self) -> u64 { + self.0.get(LE).into() + } + + fn is_ordinal(self) -> bool { + self.0.get(LE) & pe::IMAGE_ORDINAL_FLAG32 != 0 + } + + fn ordinal(self) -> u16 { + self.0.get(LE) as u16 + } + + fn address(self) -> u32 { + self.0.get(LE) & 0x7fff_ffff + } +} + +/// Information for parsing a PE delay-load import table. +#[derive(Debug, Clone)] +pub struct DelayLoadImportTable<'data> { + section_data: Bytes<'data>, + section_address: u32, + import_address: u32, +} + +impl<'data> DelayLoadImportTable<'data> { + /// Create a new delay load import table parser. + /// + /// The import descriptors start at `import_address`. + /// This table works in the same way the import table does: descriptors will be + /// parsed until a null entry. + /// + /// `section_data` should be from the section containing `import_address`, and + /// `section_address` should be the address of that section. Pointers within the + /// descriptors and thunks may point to anywhere within the section data. + pub fn new(section_data: &'data [u8], section_address: u32, import_address: u32) -> Self { + DelayLoadImportTable { + section_data: Bytes(section_data), + section_address, + import_address, + } + } + + /// Return an iterator for the import descriptors. + pub fn descriptors(&self) -> Result<DelayLoadDescriptorIterator<'data>> { + let offset = self.import_address.wrapping_sub(self.section_address); + let mut data = self.section_data; + data.skip(offset as usize) + .read_error("Invalid PE delay-load import descriptor address")?; + Ok(DelayLoadDescriptorIterator { data }) + } + + /// Return a library name given its address. + /// + /// This address may be from [`pe::ImageDelayloadDescriptor::dll_name_rva`]. + pub fn name(&self, address: u32) -> Result<&'data [u8]> { + self.section_data + .read_string_at(address.wrapping_sub(self.section_address) as usize) + .read_error("Invalid PE import descriptor name") + } + + /// Return a list of thunks given its address. + /// + /// This address may be from the INT, i.e. from + /// [`pe::ImageDelayloadDescriptor::import_name_table_rva`]. + /// + /// Please note that others RVA values from [`pe::ImageDelayloadDescriptor`] are used + /// by the delay loader at runtime to store values, and thus do not point inside the same + /// section as the INT. Calling this function on those addresses will fail. + pub fn thunks(&self, address: u32) -> Result<ImportThunkList<'data>> { + let offset = address.wrapping_sub(self.section_address); + let mut data = self.section_data; + data.skip(offset as usize) + .read_error("Invalid PE delay load import thunk table address")?; + Ok(ImportThunkList { data }) + } + + /// Parse a thunk. + pub fn import<Pe: ImageNtHeaders>(&self, thunk: Pe::ImageThunkData) -> Result<Import<'data>> { + if thunk.is_ordinal() { + Ok(Import::Ordinal(thunk.ordinal())) + } else { + let (hint, name) = self.hint_name(thunk.address())?; + Ok(Import::Name(hint, name)) + } + } + + /// Return the hint and name at the given address. + /// + /// This address may be from [`pe::ImageThunkData32`] or [`pe::ImageThunkData64`]. + /// + /// The hint is an index into the export name pointer table in the target library. + pub fn hint_name(&self, address: u32) -> Result<(u16, &'data [u8])> { + let offset = address.wrapping_sub(self.section_address); + let mut data = self.section_data; + data.skip(offset as usize) + .read_error("Invalid PE delay load import thunk address")?; + let hint = data + .read::<U16Bytes<LE>>() + .read_error("Missing PE delay load import thunk hint")? + .get(LE); + let name = data + .read_string() + .read_error("Missing PE delay load import thunk name")?; + Ok((hint, name)) + } +} + +/// A fallible iterator for the descriptors in the delay-load data directory. +#[derive(Debug, Clone)] +pub struct DelayLoadDescriptorIterator<'data> { + data: Bytes<'data>, +} + +impl<'data> DelayLoadDescriptorIterator<'data> { + /// Return the next descriptor. + /// + /// Returns `Ok(None)` when a null descriptor is found. + pub fn next(&mut self) -> Result<Option<&'data pe::ImageDelayloadDescriptor>> { + let import_desc = self + .data + .read::<pe::ImageDelayloadDescriptor>() + .read_error("Missing PE null delay-load import descriptor")?; + if import_desc.is_null() { + Ok(None) + } else { + Ok(Some(import_desc)) + } + } +} diff --git a/third_party/rust/object/src/read/pe/mod.rs b/third_party/rust/object/src/read/pe/mod.rs new file mode 100644 index 0000000000..2b7cc5d7a0 --- /dev/null +++ b/third_party/rust/object/src/read/pe/mod.rs @@ -0,0 +1,34 @@ +//! Support for reading PE files. +//! +//! Defines traits to abstract over the difference between PE32/PE32+, +//! and implements read functionality in terms of these traits. +//! +//! This module reuses some of the COFF functionality. +//! +//! Also provides `PeFile` and related types which implement the `Object` trait. + +mod file; +pub use file::*; + +mod section; +pub use section::*; + +mod data_directory; +pub use data_directory::*; + +mod export; +pub use export::*; + +mod import; +pub use import::*; + +mod relocation; +pub use relocation::*; + +mod resource; +pub use resource::*; + +mod rich; +pub use rich::*; + +pub use super::coff::{SectionTable, SymbolTable}; diff --git a/third_party/rust/object/src/read/pe/relocation.rs b/third_party/rust/object/src/read/pe/relocation.rs new file mode 100644 index 0000000000..06215bd1a7 --- /dev/null +++ b/third_party/rust/object/src/read/pe/relocation.rs @@ -0,0 +1,90 @@ +use core::slice; + +use crate::endian::{LittleEndian as LE, U16}; +use crate::pe; +use crate::read::{Bytes, Error, ReadError, Result}; + +/// An iterator over the relocation blocks in the `.reloc` section of a PE file. +#[derive(Debug, Default, Clone, Copy)] +pub struct RelocationBlockIterator<'data> { + data: Bytes<'data>, +} + +impl<'data> RelocationBlockIterator<'data> { + /// Construct a new iterator from the data of the `.reloc` section. + pub fn new(data: &'data [u8]) -> Self { + RelocationBlockIterator { data: Bytes(data) } + } + + /// Read the next relocation page. + pub fn next(&mut self) -> Result<Option<RelocationIterator<'data>>> { + if self.data.is_empty() { + return Ok(None); + } + let header = self + .data + .read::<pe::ImageBaseRelocation>() + .read_error("Invalid PE reloc section size")?; + let virtual_address = header.virtual_address.get(LE); + let size = header.size_of_block.get(LE); + if size <= 8 || size & 3 != 0 { + return Err(Error("Invalid PE reloc block size")); + } + let count = (size - 8) / 2; + let relocs = self + .data + .read_slice::<U16<LE>>(count as usize) + .read_error("Invalid PE reloc block size")? + .iter(); + Ok(Some(RelocationIterator { + virtual_address, + size, + relocs, + })) + } +} + +/// An iterator of the relocations in a block in the `.reloc` section of a PE file. +#[derive(Debug, Clone)] +pub struct RelocationIterator<'data> { + virtual_address: u32, + size: u32, + relocs: slice::Iter<'data, U16<LE>>, +} + +impl<'data> RelocationIterator<'data> { + /// Return the virtual address of the page that this block of relocations applies to. + pub fn virtual_address(&self) -> u32 { + self.virtual_address + } + + /// Return the size in bytes of this block of relocations. + pub fn size(&self) -> u32 { + self.size + } +} + +impl<'data> Iterator for RelocationIterator<'data> { + type Item = Relocation; + + fn next(&mut self) -> Option<Relocation> { + loop { + let reloc = self.relocs.next()?.get(LE); + if reloc != 0 { + return Some(Relocation { + virtual_address: self.virtual_address.wrapping_add((reloc & 0xfff) as u32), + typ: reloc >> 12, + }); + } + } + } +} + +/// A relocation in the `.reloc` section of a PE file. +#[derive(Debug, Default, Clone, Copy)] +pub struct Relocation { + /// The virtual address of the relocation. + pub virtual_address: u32, + /// One of the `pe::IMAGE_REL_BASED_*` constants. + pub typ: u16, +} diff --git a/third_party/rust/object/src/read/pe/resource.rs b/third_party/rust/object/src/read/pe/resource.rs new file mode 100644 index 0000000000..e667f0d98b --- /dev/null +++ b/third_party/rust/object/src/read/pe/resource.rs @@ -0,0 +1,207 @@ +use alloc::string::String; +use core::char; + +use crate::read::{ReadError, ReadRef, Result}; +use crate::{pe, LittleEndian as LE, U16Bytes}; + +/// The `.rsrc` section of a PE file. +#[derive(Debug, Clone, Copy)] +pub struct ResourceDirectory<'data> { + data: &'data [u8], +} + +impl<'data> ResourceDirectory<'data> { + /// Construct from the data of the `.rsrc` section. + pub fn new(data: &'data [u8]) -> Self { + ResourceDirectory { data } + } + + /// Parses the root resource directory. + pub fn root(&self) -> Result<ResourceDirectoryTable<'data>> { + ResourceDirectoryTable::parse(self.data, 0) + } +} + +/// A table of resource entries. +#[derive(Debug, Clone)] +pub struct ResourceDirectoryTable<'data> { + /// The table header. + pub header: &'data pe::ImageResourceDirectory, + /// The table entries. + pub entries: &'data [pe::ImageResourceDirectoryEntry], +} + +impl<'data> ResourceDirectoryTable<'data> { + fn parse(data: &'data [u8], offset: u32) -> Result<Self> { + let mut offset = u64::from(offset); + let header = data + .read::<pe::ImageResourceDirectory>(&mut offset) + .read_error("Invalid resource table header")?; + let entries_count = header.number_of_id_entries.get(LE) as usize + + header.number_of_named_entries.get(LE) as usize; + let entries = data + .read_slice::<pe::ImageResourceDirectoryEntry>(&mut offset, entries_count) + .read_error("Invalid resource table entries")?; + Ok(Self { header, entries }) + } +} + +impl pe::ImageResourceDirectoryEntry { + /// Returns true if the entry has a name, rather than an ID. + pub fn has_name(&self) -> bool { + self.name_or_id.get(LE) & pe::IMAGE_RESOURCE_NAME_IS_STRING != 0 + } + + /// Returns the section offset of the name. + /// + /// Valid if `has_name()` returns true. + fn name(&self) -> ResourceName { + let offset = self.name_or_id.get(LE) & !pe::IMAGE_RESOURCE_NAME_IS_STRING; + ResourceName { offset } + } + + /// Returns the ID. + /// + /// Valid if `has_string_name()` returns false. + fn id(&self) -> u16 { + (self.name_or_id.get(LE) & 0x0000_FFFF) as u16 + } + + /// Returns the entry name + pub fn name_or_id(&self) -> ResourceNameOrId { + if self.has_name() { + ResourceNameOrId::Name(self.name()) + } else { + ResourceNameOrId::Id(self.id()) + } + } + + /// Returns true if the entry is a subtable. + pub fn is_table(&self) -> bool { + self.offset_to_data_or_directory.get(LE) & pe::IMAGE_RESOURCE_DATA_IS_DIRECTORY != 0 + } + + /// Returns the section offset of the associated table or data. + pub fn data_offset(&self) -> u32 { + self.offset_to_data_or_directory.get(LE) & !pe::IMAGE_RESOURCE_DATA_IS_DIRECTORY + } + + /// Returns the data associated to this directory entry. + pub fn data<'data>( + &self, + section: ResourceDirectory<'data>, + ) -> Result<ResourceDirectoryEntryData<'data>> { + if self.is_table() { + ResourceDirectoryTable::parse(section.data, self.data_offset()) + .map(ResourceDirectoryEntryData::Table) + } else { + section + .data + .read_at::<pe::ImageResourceDataEntry>(self.data_offset().into()) + .read_error("Invalid resource entry") + .map(ResourceDirectoryEntryData::Data) + } + } +} + +/// Data associated with a resource directory entry. +#[derive(Debug, Clone)] +pub enum ResourceDirectoryEntryData<'data> { + /// A subtable entry. + Table(ResourceDirectoryTable<'data>), + /// A resource data entry. + Data(&'data pe::ImageResourceDataEntry), +} + +impl<'data> ResourceDirectoryEntryData<'data> { + /// Converts to an option of table. + /// + /// Helper for iterator filtering. + pub fn table(self) -> Option<ResourceDirectoryTable<'data>> { + match self { + Self::Table(dir) => Some(dir), + _ => None, + } + } + + /// Converts to an option of data entry. + /// + /// Helper for iterator filtering. + pub fn data(self) -> Option<&'data pe::ImageResourceDataEntry> { + match self { + Self::Data(rsc) => Some(rsc), + _ => None, + } + } +} + +/// A resource name. +#[derive(Debug, Clone, Copy)] +pub struct ResourceName { + offset: u32, +} + +impl ResourceName { + /// Converts to a `String`. + pub fn to_string_lossy(&self, directory: ResourceDirectory) -> Result<String> { + let d = self.data(directory)?.iter().map(|c| c.get(LE)); + + Ok(char::decode_utf16(d) + .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER)) + .collect::<String>()) + } + + /// Returns the string unicode buffer. + pub fn data<'data>( + &self, + directory: ResourceDirectory<'data>, + ) -> Result<&'data [U16Bytes<LE>]> { + let mut offset = u64::from(self.offset); + let len = directory + .data + .read::<U16Bytes<LE>>(&mut offset) + .read_error("Invalid resource name offset")?; + directory + .data + .read_slice::<U16Bytes<LE>>(&mut offset, len.get(LE).into()) + .read_error("Invalid resource name length") + } + + /// Returns the string buffer as raw bytes. + pub fn raw_data<'data>(&self, directory: ResourceDirectory<'data>) -> Result<&'data [u8]> { + self.data(directory).map(crate::pod::bytes_of_slice) + } +} + +/// A resource name or ID. +/// +/// Can be either a string or a numeric ID. +#[derive(Debug)] +pub enum ResourceNameOrId { + /// A resource name. + Name(ResourceName), + /// A resource ID. + Id(u16), +} + +impl ResourceNameOrId { + /// Converts to an option of name. + /// + /// Helper for iterator filtering. + pub fn name(self) -> Option<ResourceName> { + match self { + Self::Name(name) => Some(name), + _ => None, + } + } + + /// Converts to an option of ID. + /// + /// Helper for iterator filtering. + pub fn id(self) -> Option<u16> { + match self { + Self::Id(id) => Some(id), + _ => None, + } + } +} diff --git a/third_party/rust/object/src/read/pe/rich.rs b/third_party/rust/object/src/read/pe/rich.rs new file mode 100644 index 0000000000..687dfc9950 --- /dev/null +++ b/third_party/rust/object/src/read/pe/rich.rs @@ -0,0 +1,91 @@ +//! PE rich header handling + +use core::mem; + +use crate::pod::bytes_of_slice; +use crate::read::Bytes; +use crate::{pe, LittleEndian as LE, ReadRef, U32}; + +/// Parsed information about a Rich Header. +#[derive(Debug, Clone, Copy)] +pub struct RichHeaderInfo<'data> { + /// The offset at which the rich header starts. + pub offset: usize, + /// The length (in bytes) of the rich header. + /// + /// This includes the payload, but also the 16-byte start sequence and the + /// 8-byte final "Rich" and XOR key. + pub length: usize, + /// The XOR key used to mask the rich header. + /// + /// Unless the file has been tampered with, it should be equal to a checksum + /// of the file header. + pub xor_key: u32, + masked_entries: &'data [pe::MaskedRichHeaderEntry], +} + +/// A PE rich header entry after it has been unmasked. +/// +/// See [`pe::MaskedRichHeaderEntry`]. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct RichHeaderEntry { + /// ID of the component. + pub comp_id: u32, + /// Number of times this component has been used when building this PE. + pub count: u32, +} + +impl<'data> RichHeaderInfo<'data> { + /// Try to locate a rich header and its entries in the current PE file. + pub fn parse<R: ReadRef<'data>>(data: R, nt_header_offset: u64) -> Option<Self> { + // Locate the rich header, if any. + // It ends with the "Rich" string and an XOR key, before the NT header. + let data = data.read_bytes_at(0, nt_header_offset).map(Bytes).ok()?; + let end_marker_offset = memmem(data.0, b"Rich", 4)?; + let xor_key = *data.read_at::<U32<LE>>(end_marker_offset + 4).ok()?; + + // It starts at the masked "DanS" string and 3 masked zeroes. + let masked_start_marker = U32::new(LE, 0x536e_6144 ^ xor_key.get(LE)); + let start_header = [masked_start_marker, xor_key, xor_key, xor_key]; + let start_sequence = bytes_of_slice(&start_header); + let start_marker_offset = memmem(&data.0[..end_marker_offset], start_sequence, 4)?; + + // Extract the items between the markers. + let items_offset = start_marker_offset + start_sequence.len(); + let items_len = end_marker_offset - items_offset; + let item_count = items_len / mem::size_of::<pe::MaskedRichHeaderEntry>(); + let items = data.read_slice_at(items_offset, item_count).ok()?; + Some(RichHeaderInfo { + offset: start_marker_offset, + // Includes "Rich" marker and the XOR key. + length: end_marker_offset - start_marker_offset + 8, + xor_key: xor_key.get(LE), + masked_entries: items, + }) + } + + /// Returns an iterator over the unmasked entries. + pub fn unmasked_entries(&self) -> impl Iterator<Item = RichHeaderEntry> + 'data { + let xor_key = self.xor_key; + self.masked_entries + .iter() + .map(move |entry| RichHeaderEntry { + comp_id: entry.masked_comp_id.get(LE) ^ xor_key, + count: entry.masked_count.get(LE) ^ xor_key, + }) + } +} + +/// Find the offset of the first occurence of needle in the data. +/// +/// The offset must have the given alignment. +fn memmem(data: &[u8], needle: &[u8], align: usize) -> Option<usize> { + let mut offset = 0; + loop { + if data.get(offset..)?.get(..needle.len())? == needle { + return Some(offset); + } + offset += align; + } +} diff --git a/third_party/rust/object/src/read/pe/section.rs b/third_party/rust/object/src/read/pe/section.rs new file mode 100644 index 0000000000..439d42dac1 --- /dev/null +++ b/third_party/rust/object/src/read/pe/section.rs @@ -0,0 +1,436 @@ +use core::marker::PhantomData; +use core::{cmp, iter, slice, str}; + +use crate::endian::LittleEndian as LE; +use crate::pe; +use crate::pe::ImageSectionHeader; +use crate::read::{ + self, CompressedData, CompressedFileRange, ObjectSection, ObjectSegment, ReadError, ReadRef, + Relocation, Result, SectionFlags, SectionIndex, SectionKind, SegmentFlags, +}; + +use super::{ImageNtHeaders, PeFile, SectionTable}; + +/// An iterator over the loadable sections of a `PeFile32`. +pub type PeSegmentIterator32<'data, 'file, R = &'data [u8]> = + PeSegmentIterator<'data, 'file, pe::ImageNtHeaders32, R>; +/// An iterator over the loadable sections of a `PeFile64`. +pub type PeSegmentIterator64<'data, 'file, R = &'data [u8]> = + PeSegmentIterator<'data, 'file, pe::ImageNtHeaders64, R>; + +/// An iterator over the loadable sections of a `PeFile`. +#[derive(Debug)] +pub struct PeSegmentIterator<'data, 'file, Pe, R = &'data [u8]> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + pub(super) file: &'file PeFile<'data, Pe, R>, + pub(super) iter: slice::Iter<'data, pe::ImageSectionHeader>, +} + +impl<'data, 'file, Pe, R> Iterator for PeSegmentIterator<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + type Item = PeSegment<'data, 'file, Pe, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|section| PeSegment { + file: self.file, + section, + }) + } +} + +/// A loadable section of a `PeFile32`. +pub type PeSegment32<'data, 'file, R = &'data [u8]> = + PeSegment<'data, 'file, pe::ImageNtHeaders32, R>; +/// A loadable section of a `PeFile64`. +pub type PeSegment64<'data, 'file, R = &'data [u8]> = + PeSegment<'data, 'file, pe::ImageNtHeaders64, R>; + +/// A loadable section of a `PeFile`. +#[derive(Debug)] +pub struct PeSegment<'data, 'file, Pe, R = &'data [u8]> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + file: &'file PeFile<'data, Pe, R>, + section: &'data pe::ImageSectionHeader, +} + +impl<'data, 'file, Pe, R> read::private::Sealed for PeSegment<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Pe, R> ObjectSegment<'data> for PeSegment<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address.get(LE)).wrapping_add(self.file.common.image_base) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size.get(LE)) + } + + #[inline] + fn align(&self) -> u64 { + self.file.section_alignment() + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + let (offset, size) = self.section.pe_file_range(); + (u64::from(offset), u64::from(size)) + } + + fn data(&self) -> Result<&'data [u8]> { + self.section.pe_data(self.file.data) + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.data()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn name_bytes(&self) -> Result<Option<&[u8]>> { + self.section + .name(self.file.common.symbols.strings()) + .map(Some) + } + + #[inline] + fn name(&self) -> Result<Option<&str>> { + let name = self.section.name(self.file.common.symbols.strings())?; + Ok(Some( + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 PE section name")?, + )) + } + + #[inline] + fn flags(&self) -> SegmentFlags { + let characteristics = self.section.characteristics.get(LE); + SegmentFlags::Coff { characteristics } + } +} + +/// An iterator over the sections of a `PeFile32`. +pub type PeSectionIterator32<'data, 'file, R = &'data [u8]> = + PeSectionIterator<'data, 'file, pe::ImageNtHeaders32, R>; +/// An iterator over the sections of a `PeFile64`. +pub type PeSectionIterator64<'data, 'file, R = &'data [u8]> = + PeSectionIterator<'data, 'file, pe::ImageNtHeaders64, R>; + +/// An iterator over the sections of a `PeFile`. +#[derive(Debug)] +pub struct PeSectionIterator<'data, 'file, Pe, R = &'data [u8]> +where + 'data: 'file, + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + pub(super) file: &'file PeFile<'data, Pe, R>, + pub(super) iter: iter::Enumerate<slice::Iter<'data, pe::ImageSectionHeader>>, +} + +impl<'data, 'file, Pe, R> Iterator for PeSectionIterator<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + type Item = PeSection<'data, 'file, Pe, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| PeSection { + file: self.file, + index: SectionIndex(index + 1), + section, + }) + } +} + +/// A section of a `PeFile32`. +pub type PeSection32<'data, 'file, R = &'data [u8]> = + PeSection<'data, 'file, pe::ImageNtHeaders32, R>; +/// A section of a `PeFile64`. +pub type PeSection64<'data, 'file, R = &'data [u8]> = + PeSection<'data, 'file, pe::ImageNtHeaders64, R>; + +/// A section of a `PeFile`. +#[derive(Debug)] +pub struct PeSection<'data, 'file, Pe, R = &'data [u8]> +where + 'data: 'file, + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + pub(super) file: &'file PeFile<'data, Pe, R>, + pub(super) index: SectionIndex, + pub(super) section: &'data pe::ImageSectionHeader, +} + +impl<'data, 'file, Pe, R> read::private::Sealed for PeSection<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Pe, R> ObjectSection<'data> for PeSection<'data, 'file, Pe, R> +where + Pe: ImageNtHeaders, + R: ReadRef<'data>, +{ + type RelocationIterator = PeRelocationIterator<'data, 'file, R>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address.get(LE)).wrapping_add(self.file.common.image_base) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size.get(LE)) + } + + #[inline] + fn align(&self) -> u64 { + self.file.section_alignment() + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + let (offset, size) = self.section.pe_file_range(); + if size == 0 { + None + } else { + Some((u64::from(offset), u64::from(size))) + } + } + + fn data(&self) -> Result<&'data [u8]> { + self.section.pe_data(self.file.data) + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.data()?, + self.address(), + address, + size, + )) + } + + #[inline] + fn compressed_file_range(&self) -> Result<CompressedFileRange> { + Ok(CompressedFileRange::none(self.file_range())) + } + + #[inline] + fn compressed_data(&self) -> Result<CompressedData<'data>> { + self.data().map(CompressedData::none) + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + self.section.name(self.file.common.symbols.strings()) + } + + #[inline] + fn name(&self) -> Result<&str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 PE section name") + } + + #[inline] + fn segment_name_bytes(&self) -> Result<Option<&[u8]>> { + Ok(None) + } + + #[inline] + fn segment_name(&self) -> Result<Option<&str>> { + Ok(None) + } + + #[inline] + fn kind(&self) -> SectionKind { + self.section.kind() + } + + fn relocations(&self) -> PeRelocationIterator<'data, 'file, R> { + PeRelocationIterator(PhantomData) + } + + fn flags(&self) -> SectionFlags { + SectionFlags::Coff { + characteristics: self.section.characteristics.get(LE), + } + } +} + +impl<'data> SectionTable<'data> { + /// Return the file offset of the given virtual address, and the size up + /// to the end of the section containing it. + /// + /// Returns `None` if no section contains the address. + pub fn pe_file_range_at(&self, va: u32) -> Option<(u32, u32)> { + self.iter().find_map(|section| section.pe_file_range_at(va)) + } + + /// Return the data starting at the given virtual address, up to the end of the + /// section containing it. + /// + /// Ignores sections with invalid data. + /// + /// Returns `None` if no section contains the address. + pub fn pe_data_at<R: ReadRef<'data>>(&self, data: R, va: u32) -> Option<&'data [u8]> { + self.iter().find_map(|section| section.pe_data_at(data, va)) + } + + /// Return the data of the section that contains the given virtual address in a PE file. + /// + /// Also returns the virtual address of that section. + /// + /// Ignores sections with invalid data. + pub fn pe_data_containing<R: ReadRef<'data>>( + &self, + data: R, + va: u32, + ) -> Option<(&'data [u8], u32)> { + self.iter() + .find_map(|section| section.pe_data_containing(data, va)) + } + + /// Return the section that contains a given virtual address. + pub fn section_containing(&self, va: u32) -> Option<&'data ImageSectionHeader> { + self.iter().find(|section| section.contains_rva(va)) + } +} + +impl pe::ImageSectionHeader { + /// Return the offset and size of the section in a PE file. + /// + /// The size of the range will be the minimum of the file size and virtual size. + pub fn pe_file_range(&self) -> (u32, u32) { + // Pointer and size will be zero for uninitialized data; we don't need to validate this. + let offset = self.pointer_to_raw_data.get(LE); + let size = cmp::min(self.virtual_size.get(LE), self.size_of_raw_data.get(LE)); + (offset, size) + } + + /// Return the file offset of the given virtual address, and the remaining size up + /// to the end of the section. + /// + /// Returns `None` if the section does not contain the address. + pub fn pe_file_range_at(&self, va: u32) -> Option<(u32, u32)> { + let section_va = self.virtual_address.get(LE); + let offset = va.checked_sub(section_va)?; + let (section_offset, section_size) = self.pe_file_range(); + // Address must be within section (and not at its end). + if offset < section_size { + Some((section_offset.checked_add(offset)?, section_size - offset)) + } else { + None + } + } + + /// Return the virtual address and size of the section. + pub fn pe_address_range(&self) -> (u32, u32) { + (self.virtual_address.get(LE), self.virtual_size.get(LE)) + } + + /// Return the section data in a PE file. + /// + /// The length of the data will be the minimum of the file size and virtual size. + pub fn pe_data<'data, R: ReadRef<'data>>(&self, data: R) -> Result<&'data [u8]> { + let (offset, size) = self.pe_file_range(); + data.read_bytes_at(offset.into(), size.into()) + .read_error("Invalid PE section offset or size") + } + + /// Return the data starting at the given virtual address, up to the end of the + /// section. + /// + /// Ignores sections with invalid data. + /// + /// Returns `None` if the section does not contain the address. + pub fn pe_data_at<'data, R: ReadRef<'data>>(&self, data: R, va: u32) -> Option<&'data [u8]> { + let (offset, size) = self.pe_file_range_at(va)?; + data.read_bytes_at(offset.into(), size.into()).ok() + } + + /// Tests whether a given RVA is part of this section + pub fn contains_rva(&self, va: u32) -> bool { + let section_va = self.virtual_address.get(LE); + match va.checked_sub(section_va) { + None => false, + Some(offset) => { + // Address must be within section (and not at its end). + offset < self.virtual_size.get(LE) + } + } + } + + /// Return the section data if it contains the given virtual address. + /// + /// Also returns the virtual address of that section. + /// + /// Ignores sections with invalid data. + pub fn pe_data_containing<'data, R: ReadRef<'data>>( + &self, + data: R, + va: u32, + ) -> Option<(&'data [u8], u32)> { + let section_va = self.virtual_address.get(LE); + let offset = va.checked_sub(section_va)?; + let (section_offset, section_size) = self.pe_file_range(); + // Address must be within section (and not at its end). + if offset < section_size { + let section_data = data + .read_bytes_at(section_offset.into(), section_size.into()) + .ok()?; + Some((section_data, section_va)) + } else { + None + } + } +} + +/// An iterator over the relocations in an `PeSection`. +#[derive(Debug)] +pub struct PeRelocationIterator<'data, 'file, R = &'data [u8]>( + PhantomData<(&'data (), &'file (), R)>, +); + +impl<'data, 'file, R> Iterator for PeRelocationIterator<'data, 'file, R> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + None + } +} diff --git a/third_party/rust/object/src/read/read_cache.rs b/third_party/rust/object/src/read/read_cache.rs new file mode 100644 index 0000000000..19a98a44da --- /dev/null +++ b/third_party/rust/object/src/read/read_cache.rs @@ -0,0 +1,185 @@ +use core::ops::Range; +use std::boxed::Box; +use std::cell::RefCell; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::convert::TryInto; +use std::io::{Read, Seek, SeekFrom}; +use std::mem; +use std::vec::Vec; + +use crate::read::ReadRef; + +/// An implementation of `ReadRef` for data in a stream that implements +/// `Read + Seek`. +/// +/// Contains a cache of read-only blocks of data, allowing references to +/// them to be returned. Entries in the cache are never removed. +/// Entries are keyed on the offset and size of the read. +/// Currently overlapping reads are considered separate reads. +#[derive(Debug)] +pub struct ReadCache<R: Read + Seek> { + cache: RefCell<ReadCacheInternal<R>>, +} + +#[derive(Debug)] +struct ReadCacheInternal<R: Read + Seek> { + read: R, + bufs: HashMap<(u64, u64), Box<[u8]>>, + strings: HashMap<(u64, u8), Box<[u8]>>, +} + +impl<R: Read + Seek> ReadCache<R> { + /// Create an empty `ReadCache` for the given stream. + pub fn new(read: R) -> Self { + ReadCache { + cache: RefCell::new(ReadCacheInternal { + read, + bufs: HashMap::new(), + strings: HashMap::new(), + }), + } + } + + /// Return an implementation of `ReadRef` that restricts reads + /// to the given range of the stream. + pub fn range(&self, offset: u64, size: u64) -> ReadCacheRange<'_, R> { + ReadCacheRange { + r: self, + offset, + size, + } + } + + /// Free buffers used by the cache. + pub fn clear(&mut self) { + self.cache.borrow_mut().bufs.clear(); + } + + /// Unwrap this `ReadCache<R>`, returning the underlying reader. + pub fn into_inner(self) -> R { + self.cache.into_inner().read + } +} + +impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> { + fn len(self) -> Result<u64, ()> { + let cache = &mut *self.cache.borrow_mut(); + cache.read.seek(SeekFrom::End(0)).map_err(|_| ()) + } + + fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> { + if size == 0 { + return Ok(&[]); + } + let cache = &mut *self.cache.borrow_mut(); + let buf = match cache.bufs.entry((offset, size)) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + let size = size.try_into().map_err(|_| ())?; + cache + .read + .seek(SeekFrom::Start(offset as u64)) + .map_err(|_| ())?; + let mut bytes = vec![0; size].into_boxed_slice(); + cache.read.read_exact(&mut bytes).map_err(|_| ())?; + entry.insert(bytes) + } + }; + // Extend the lifetime to that of self. + // This is OK because we never mutate or remove entries. + Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) }) + } + + fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8], ()> { + let cache = &mut *self.cache.borrow_mut(); + let buf = match cache.strings.entry((range.start, delimiter)) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + cache + .read + .seek(SeekFrom::Start(range.start)) + .map_err(|_| ())?; + + let max_check: usize = (range.end - range.start).try_into().map_err(|_| ())?; + // Strings should be relatively small. + // TODO: make this configurable? + let max_check = max_check.min(4096); + + let mut bytes = Vec::new(); + let mut checked = 0; + loop { + bytes.resize((checked + 256).min(max_check), 0); + let read = cache.read.read(&mut bytes[checked..]).map_err(|_| ())?; + if read == 0 { + return Err(()); + } + if let Some(len) = memchr::memchr(delimiter, &bytes[checked..][..read]) { + bytes.truncate(checked + len); + break entry.insert(bytes.into_boxed_slice()); + } + checked += read; + if checked >= max_check { + return Err(()); + } + } + } + }; + // Extend the lifetime to that of self. + // This is OK because we never mutate or remove entries. + Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) }) + } +} + +/// An implementation of `ReadRef` for a range of data in a stream that +/// implements `Read + Seek`. +/// +/// Shares an underlying `ReadCache` with a lifetime of `'a`. +#[derive(Debug)] +pub struct ReadCacheRange<'a, R: Read + Seek> { + r: &'a ReadCache<R>, + offset: u64, + size: u64, +} + +impl<'a, R: Read + Seek> Clone for ReadCacheRange<'a, R> { + fn clone(&self) -> Self { + Self { + r: self.r, + offset: self.offset, + size: self.size, + } + } +} + +impl<'a, R: Read + Seek> Copy for ReadCacheRange<'a, R> {} + +impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> { + fn len(self) -> Result<u64, ()> { + Ok(self.size) + } + + fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8], ()> { + if size == 0 { + return Ok(&[]); + } + let end = offset.checked_add(size).ok_or(())?; + if end > self.size { + return Err(()); + } + let r_offset = self.offset.checked_add(offset).ok_or(())?; + self.r.read_bytes_at(r_offset, size) + } + + fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8], ()> { + let r_start = self.offset.checked_add(range.start).ok_or(())?; + let r_end = self.offset.checked_add(range.end).ok_or(())?; + let bytes = self.r.read_bytes_at_until(r_start..r_end, delimiter)?; + let size = bytes.len().try_into().map_err(|_| ())?; + let end = range.start.checked_add(size).ok_or(())?; + if end > self.size { + return Err(()); + } + Ok(bytes) + } +} diff --git a/third_party/rust/object/src/read/read_ref.rs b/third_party/rust/object/src/read/read_ref.rs new file mode 100644 index 0000000000..2f547a4e2c --- /dev/null +++ b/third_party/rust/object/src/read/read_ref.rs @@ -0,0 +1,137 @@ +#![allow(clippy::len_without_is_empty)] + +use core::convert::TryInto; +use core::ops::Range; +use core::{mem, result}; + +use crate::pod::{from_bytes, slice_from_bytes, Pod}; + +type Result<T> = result::Result<T, ()>; + +/// A trait for reading references to `Pod` types from a block of data. +/// +/// This allows parsers to handle both of these cases: +/// - the block of data exists in memory, and it is desirable +/// to use references to this block instead of copying it, +/// - the block of data exists in storage, and it is desirable +/// to read on demand to minimize I/O and memory usage. +/// +/// The methods accept `self` by value because `Self` is expected to behave +/// similar to a reference: it may be a reference with a lifetime of `'a`, +/// or it may be a wrapper of a reference. +/// +/// The `Clone` and `Copy` bounds are for convenience, and since `Self` is +/// expected to be similar to a reference, these are easily satisfied. +/// +/// Object file parsers typically use offsets to locate the structures +/// in the block, and will most commonly use the `*_at` methods to +/// read a structure at a known offset. +/// +/// Occasionally file parsers will need to treat the block as a stream, +/// and so convenience methods are provided that update an offset with +/// the size that was read. +// +// An alternative would be for methods to accept `&mut self` and use a +// `seek` method instead of the `offset` parameters, but this is less +// convenient for implementers. +pub trait ReadRef<'a>: Clone + Copy { + /// The total size of the block of data. + fn len(self) -> Result<u64>; + + /// Get a reference to a `u8` slice at the given offset. + /// + /// Returns an error if offset or size are out of bounds. + fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8]>; + + /// Get a reference to a delimited `u8` slice which starts at range.start. + /// + /// Does not include the delimiter. + /// + /// Returns an error if the range is out of bounds or the delimiter is + /// not found in the range. + fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8]>; + + /// Get a reference to a `u8` slice at the given offset, and update the offset. + /// + /// Returns an error if offset or size are out of bounds. + fn read_bytes(self, offset: &mut u64, size: u64) -> Result<&'a [u8]> { + let bytes = self.read_bytes_at(*offset, size)?; + *offset = offset.wrapping_add(size); + Ok(bytes) + } + + /// Get a reference to a `Pod` type at the given offset, and update the offset. + /// + /// Returns an error if offset or size are out of bounds. + /// + /// The default implementation uses `read_bytes`, and returns an error if + /// `read_bytes` does not return bytes with the correct alignment for `T`. + /// Implementors may want to provide their own implementation that ensures + /// the alignment can be satisified. Alternatively, only use this method with + /// types that do not need alignment (see the `unaligned` feature of this crate). + fn read<T: Pod>(self, offset: &mut u64) -> Result<&'a T> { + let size = mem::size_of::<T>().try_into().map_err(|_| ())?; + let bytes = self.read_bytes(offset, size)?; + let (t, _) = from_bytes(bytes)?; + Ok(t) + } + + /// Get a reference to a `Pod` type at the given offset. + /// + /// Returns an error if offset or size are out of bounds. + /// + /// Also see the `read` method for information regarding alignment of `T`. + fn read_at<T: Pod>(self, mut offset: u64) -> Result<&'a T> { + self.read(&mut offset) + } + + /// Get a reference to a slice of a `Pod` type at the given offset, and update the offset. + /// + /// Returns an error if offset or size are out of bounds. + /// + /// Also see the `read` method for information regarding alignment of `T`. + fn read_slice<T: Pod>(self, offset: &mut u64, count: usize) -> Result<&'a [T]> { + let size = count + .checked_mul(mem::size_of::<T>()) + .ok_or(())? + .try_into() + .map_err(|_| ())?; + let bytes = self.read_bytes(offset, size)?; + let (t, _) = slice_from_bytes(bytes, count)?; + Ok(t) + } + + /// Get a reference to a slice of a `Pod` type at the given offset. + /// + /// Returns an error if offset or size are out of bounds. + /// + /// Also see the `read` method for information regarding alignment of `T`. + fn read_slice_at<T: Pod>(self, mut offset: u64, count: usize) -> Result<&'a [T]> { + self.read_slice(&mut offset, count) + } +} + +impl<'a> ReadRef<'a> for &'a [u8] { + fn len(self) -> Result<u64> { + self.len().try_into().map_err(|_| ()) + } + + fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8]> { + let offset: usize = offset.try_into().map_err(|_| ())?; + let size: usize = size.try_into().map_err(|_| ())?; + self.get(offset..).ok_or(())?.get(..size).ok_or(()) + } + + fn read_bytes_at_until(self, range: Range<u64>, delimiter: u8) -> Result<&'a [u8]> { + let start: usize = range.start.try_into().map_err(|_| ())?; + let end: usize = range.end.try_into().map_err(|_| ())?; + let bytes = self.get(start..end).ok_or(())?; + match memchr::memchr(delimiter, bytes) { + Some(len) => { + // This will never fail. + bytes.get(..len).ok_or(()) + } + None => Err(()), + } + } +} diff --git a/third_party/rust/object/src/read/traits.rs b/third_party/rust/object/src/read/traits.rs new file mode 100644 index 0000000000..f1a473e0ad --- /dev/null +++ b/third_party/rust/object/src/read/traits.rs @@ -0,0 +1,469 @@ +use alloc::borrow::Cow; +use alloc::vec::Vec; + +use crate::read::{ + self, Architecture, CodeView, ComdatKind, CompressedData, CompressedFileRange, Export, + FileFlags, Import, ObjectKind, ObjectMap, Relocation, Result, SectionFlags, SectionIndex, + SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap, SymbolMapName, + SymbolScope, SymbolSection, +}; +use crate::Endianness; + +/// An object file. +pub trait Object<'data: 'file, 'file>: read::private::Sealed { + /// A segment in the object file. + type Segment: ObjectSegment<'data>; + + /// An iterator over the segments in the object file. + type SegmentIterator: Iterator<Item = Self::Segment>; + + /// A section in the object file. + type Section: ObjectSection<'data>; + + /// An iterator over the sections in the object file. + type SectionIterator: Iterator<Item = Self::Section>; + + /// A COMDAT section group in the object file. + type Comdat: ObjectComdat<'data>; + + /// An iterator over the COMDAT section groups in the object file. + type ComdatIterator: Iterator<Item = Self::Comdat>; + + /// A symbol in the object file. + type Symbol: ObjectSymbol<'data>; + + /// An iterator over symbols in the object file. + type SymbolIterator: Iterator<Item = Self::Symbol>; + + /// A symbol table in the object file. + type SymbolTable: ObjectSymbolTable< + 'data, + Symbol = Self::Symbol, + SymbolIterator = Self::SymbolIterator, + >; + + /// An iterator over dynamic relocations in the file. + /// + /// The first field in the item tuple is the address + /// that the relocation applies to. + type DynamicRelocationIterator: Iterator<Item = (u64, Relocation)>; + + /// Get the architecture type of the file. + fn architecture(&self) -> Architecture; + + /// Get the endianness of the file. + #[inline] + fn endianness(&self) -> Endianness { + if self.is_little_endian() { + Endianness::Little + } else { + Endianness::Big + } + } + + /// Return true if the file is little endian, false if it is big endian. + fn is_little_endian(&self) -> bool; + + /// Return true if the file can contain 64-bit addresses. + fn is_64(&self) -> bool; + + /// Return the kind of this object. + fn kind(&self) -> ObjectKind; + + /// Get an iterator over the segments in the file. + fn segments(&'file self) -> Self::SegmentIterator; + + /// Get the section named `section_name`, if such a section exists. + /// + /// If `section_name` starts with a '.' then it is treated as a system section name, + /// and is compared using the conventions specific to the object file format. This + /// includes: + /// - if ".debug_str_offsets" is requested for a Mach-O object file, then the actual + /// section name that is searched for is "__debug_str_offs". + /// - if ".debug_info" is requested for an ELF object file, then + /// ".zdebug_info" may be returned (and similarly for other debug sections). + /// + /// For some object files, multiple segments may contain sections with the same + /// name. In this case, the first matching section will be used. + /// + /// This method skips over sections with invalid names. + fn section_by_name(&'file self, section_name: &str) -> Option<Self::Section> { + self.section_by_name_bytes(section_name.as_bytes()) + } + + /// Like [`Self::section_by_name`], but allows names that are not UTF-8. + fn section_by_name_bytes(&'file self, section_name: &[u8]) -> Option<Self::Section>; + + /// Get the section at the given index. + /// + /// The meaning of the index depends on the object file. + /// + /// For some object files, this requires iterating through all sections. + /// + /// Returns an error if the index is invalid. + fn section_by_index(&'file self, index: SectionIndex) -> Result<Self::Section>; + + /// Get an iterator over the sections in the file. + fn sections(&'file self) -> Self::SectionIterator; + + /// Get an iterator over the COMDAT section groups in the file. + fn comdats(&'file self) -> Self::ComdatIterator; + + /// Get the symbol table, if any. + fn symbol_table(&'file self) -> Option<Self::SymbolTable>; + + /// Get the debugging symbol at the given index. + /// + /// The meaning of the index depends on the object file. + /// + /// Returns an error if the index is invalid. + fn symbol_by_index(&'file self, index: SymbolIndex) -> Result<Self::Symbol>; + + /// Get an iterator over the debugging symbols in the file. + /// + /// This may skip over symbols that are malformed or unsupported. + /// + /// For Mach-O files, this does not include STAB entries. + fn symbols(&'file self) -> Self::SymbolIterator; + + /// Get the dynamic linking symbol table, if any. + /// + /// Only ELF has a separate dynamic linking symbol table. + fn dynamic_symbol_table(&'file self) -> Option<Self::SymbolTable>; + + /// Get an iterator over the dynamic linking symbols in the file. + /// + /// This may skip over symbols that are malformed or unsupported. + /// + /// Only ELF has separate dynamic linking symbols. + /// Other file formats will return an empty iterator. + fn dynamic_symbols(&'file self) -> Self::SymbolIterator; + + /// Get the dynamic relocations for this file. + /// + /// Symbol indices in these relocations refer to the dynamic symbol table. + /// + /// Only ELF has dynamic relocations. + fn dynamic_relocations(&'file self) -> Option<Self::DynamicRelocationIterator>; + + /// Construct a map from addresses to symbol names. + /// + /// The map will only contain defined text and data symbols. + /// The dynamic symbol table will only be used if there are no debugging symbols. + fn symbol_map(&'file self) -> SymbolMap<SymbolMapName<'data>> { + let mut symbols = Vec::new(); + if let Some(table) = self.symbol_table().or_else(|| self.dynamic_symbol_table()) { + for symbol in table.symbols() { + if !symbol.is_definition() { + continue; + } + if let Ok(name) = symbol.name() { + symbols.push(SymbolMapName::new(symbol.address(), name)); + } + } + } + SymbolMap::new(symbols) + } + + /// Construct a map from addresses to symbol names and object file names. + /// + /// This is derived from Mach-O STAB entries. + fn object_map(&'file self) -> ObjectMap<'data> { + ObjectMap::default() + } + + /// Get the imported symbols. + fn imports(&self) -> Result<Vec<Import<'data>>>; + + /// Get the exported symbols that expose both a name and an address. + /// + /// Some file formats may provide other kinds of symbols, that can be retrieved using + /// the lower-level API. + fn exports(&self) -> Result<Vec<Export<'data>>>; + + /// Return true if the file contains debug information sections, false if not. + fn has_debug_symbols(&self) -> bool; + + /// The UUID from a Mach-O `LC_UUID` load command. + #[inline] + fn mach_uuid(&self) -> Result<Option<[u8; 16]>> { + Ok(None) + } + + /// The build ID from an ELF `NT_GNU_BUILD_ID` note. + #[inline] + fn build_id(&self) -> Result<Option<&'data [u8]>> { + Ok(None) + } + + /// The filename and CRC from a `.gnu_debuglink` section. + #[inline] + fn gnu_debuglink(&self) -> Result<Option<(&'data [u8], u32)>> { + Ok(None) + } + + /// The filename and build ID from a `.gnu_debugaltlink` section. + #[inline] + fn gnu_debugaltlink(&self) -> Result<Option<(&'data [u8], &'data [u8])>> { + Ok(None) + } + + /// The filename and GUID from the PE CodeView section + #[inline] + fn pdb_info(&self) -> Result<Option<CodeView>> { + Ok(None) + } + + /// Get the base address used for relative virtual addresses. + /// + /// Currently this is only non-zero for PE. + fn relative_address_base(&'file self) -> u64; + + /// Get the virtual address of the entry point of the binary + fn entry(&'file self) -> u64; + + /// File flags that are specific to each file format. + fn flags(&self) -> FileFlags; +} + +/// A loadable segment defined in an object file. +/// +/// For ELF, this is a program header with type `PT_LOAD`. +/// For Mach-O, this is a load command with type `LC_SEGMENT` or `LC_SEGMENT_64`. +pub trait ObjectSegment<'data>: read::private::Sealed { + /// Returns the virtual address of the segment. + fn address(&self) -> u64; + + /// Returns the size of the segment in memory. + fn size(&self) -> u64; + + /// Returns the alignment of the segment in memory. + fn align(&self) -> u64; + + /// Returns the offset and size of the segment in the file. + fn file_range(&self) -> (u64, u64); + + /// Returns a reference to the file contents of the segment. + /// + /// The length of this data may be different from the size of the + /// segment in memory. + fn data(&self) -> Result<&'data [u8]>; + + /// Return the segment data in the given range. + /// + /// Returns `Ok(None)` if the segment does not contain the given range. + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>>; + + /// Returns the name of the segment. + fn name_bytes(&self) -> Result<Option<&[u8]>>; + + /// Returns the name of the segment. + /// + /// Returns an error if the name is not UTF-8. + fn name(&self) -> Result<Option<&str>>; + + /// Return the flags of segment. + fn flags(&self) -> SegmentFlags; +} + +/// A section defined in an object file. +pub trait ObjectSection<'data>: read::private::Sealed { + /// An iterator over the relocations for a section. + /// + /// The first field in the item tuple is the section offset + /// that the relocation applies to. + type RelocationIterator: Iterator<Item = (u64, Relocation)>; + + /// Returns the section index. + fn index(&self) -> SectionIndex; + + /// Returns the address of the section. + fn address(&self) -> u64; + + /// Returns the size of the section in memory. + fn size(&self) -> u64; + + /// Returns the alignment of the section in memory. + fn align(&self) -> u64; + + /// Returns offset and size of on-disk segment (if any). + fn file_range(&self) -> Option<(u64, u64)>; + + /// Returns the raw contents of the section. + /// + /// The length of this data may be different from the size of the + /// section in memory. + /// + /// This does not do any decompression. + fn data(&self) -> Result<&'data [u8]>; + + /// Return the raw contents of the section data in the given range. + /// + /// This does not do any decompression. + /// + /// Returns `Ok(None)` if the section does not contain the given range. + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>>; + + /// Returns the potentially compressed file range of the section, + /// along with information about the compression. + fn compressed_file_range(&self) -> Result<CompressedFileRange>; + + /// Returns the potentially compressed contents of the section, + /// along with information about the compression. + fn compressed_data(&self) -> Result<CompressedData<'data>>; + + /// Returns the uncompressed contents of the section. + /// + /// The length of this data may be different from the size of the + /// section in memory. + /// + /// If no compression is detected, then returns the data unchanged. + /// Returns `Err` if decompression fails. + fn uncompressed_data(&self) -> Result<Cow<'data, [u8]>> { + self.compressed_data()?.decompress() + } + + /// Returns the name of the section. + fn name_bytes(&self) -> Result<&[u8]>; + + /// Returns the name of the section. + /// + /// Returns an error if the name is not UTF-8. + fn name(&self) -> Result<&str>; + + /// Returns the name of the segment for this section. + fn segment_name_bytes(&self) -> Result<Option<&[u8]>>; + + /// Returns the name of the segment for this section. + /// + /// Returns an error if the name is not UTF-8. + fn segment_name(&self) -> Result<Option<&str>>; + + /// Return the kind of this section. + fn kind(&self) -> SectionKind; + + /// Get the relocations for this section. + fn relocations(&self) -> Self::RelocationIterator; + + /// Section flags that are specific to each file format. + fn flags(&self) -> SectionFlags; +} + +/// A COMDAT section group defined in an object file. +pub trait ObjectComdat<'data>: read::private::Sealed { + /// An iterator over the sections in the object file. + type SectionIterator: Iterator<Item = SectionIndex>; + + /// Returns the COMDAT selection kind. + fn kind(&self) -> ComdatKind; + + /// Returns the index of the symbol used for the name of COMDAT section group. + fn symbol(&self) -> SymbolIndex; + + /// Returns the name of the COMDAT section group. + fn name_bytes(&self) -> Result<&[u8]>; + + /// Returns the name of the COMDAT section group. + /// + /// Returns an error if the name is not UTF-8. + fn name(&self) -> Result<&str>; + + /// Get the sections in this section group. + fn sections(&self) -> Self::SectionIterator; +} + +/// A symbol table. +pub trait ObjectSymbolTable<'data>: read::private::Sealed { + /// A symbol table entry. + type Symbol: ObjectSymbol<'data>; + + /// An iterator over the symbols in a symbol table. + type SymbolIterator: Iterator<Item = Self::Symbol>; + + /// Get an iterator over the symbols in the table. + /// + /// This may skip over symbols that are malformed or unsupported. + fn symbols(&self) -> Self::SymbolIterator; + + /// Get the symbol at the given index. + /// + /// The meaning of the index depends on the object file. + /// + /// Returns an error if the index is invalid. + fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol>; +} + +/// A symbol table entry. +pub trait ObjectSymbol<'data>: read::private::Sealed { + /// The index of the symbol. + fn index(&self) -> SymbolIndex; + + /// The name of the symbol. + fn name_bytes(&self) -> Result<&'data [u8]>; + + /// The name of the symbol. + /// + /// Returns an error if the name is not UTF-8. + fn name(&self) -> Result<&'data str>; + + /// The address of the symbol. May be zero if the address is unknown. + fn address(&self) -> u64; + + /// The size of the symbol. May be zero if the size is unknown. + fn size(&self) -> u64; + + /// Return the kind of this symbol. + fn kind(&self) -> SymbolKind; + + /// Returns the section where the symbol is defined. + fn section(&self) -> SymbolSection; + + /// Returns the section index for the section containing this symbol. + /// + /// May return `None` if the symbol is not defined in a section. + fn section_index(&self) -> Option<SectionIndex> { + self.section().index() + } + + /// Return true if the symbol is undefined. + fn is_undefined(&self) -> bool; + + /// Return true if the symbol is a definition of a function or data object + /// that has a known address. + fn is_definition(&self) -> bool; + + /// Return true if the symbol is common data. + /// + /// Note: does not check for `SymbolSection::Section` with `SectionKind::Common`. + fn is_common(&self) -> bool; + + /// Return true if the symbol is weak. + fn is_weak(&self) -> bool; + + /// Returns the symbol scope. + fn scope(&self) -> SymbolScope; + + /// Return true if the symbol visible outside of the compilation unit. + /// + /// This treats `SymbolScope::Unknown` as global. + fn is_global(&self) -> bool; + + /// Return true if the symbol is only visible within the compilation unit. + fn is_local(&self) -> bool; + + /// Symbol flags that are specific to each file format. + fn flags(&self) -> SymbolFlags<SectionIndex>; +} + +/// An iterator for files that don't have dynamic relocations. +#[derive(Debug)] +pub struct NoDynamicRelocationIterator; + +impl Iterator for NoDynamicRelocationIterator { + type Item = (u64, Relocation); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} diff --git a/third_party/rust/object/src/read/util.rs b/third_party/rust/object/src/read/util.rs new file mode 100644 index 0000000000..842bd6ca16 --- /dev/null +++ b/third_party/rust/object/src/read/util.rs @@ -0,0 +1,383 @@ +use alloc::string::String; +use core::convert::TryInto; +use core::fmt; +use core::marker::PhantomData; + +use crate::pod::{from_bytes, slice_from_bytes, Pod}; +use crate::ReadRef; + +/// A newtype for byte slices. +/// +/// It has these important features: +/// - no methods that can panic, such as `Index` +/// - convenience methods for `Pod` types +/// - a useful `Debug` implementation +#[derive(Default, Clone, Copy, PartialEq, Eq)] +pub struct Bytes<'data>(pub &'data [u8]); + +impl<'data> fmt::Debug for Bytes<'data> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + debug_list_bytes(self.0, fmt) + } +} + +impl<'data> Bytes<'data> { + /// Return the length of the byte slice. + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Return true if the byte slice is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Skip over the given number of bytes at the start of the byte slice. + /// + /// Modifies the byte slice to start after the bytes. + /// + /// Returns an error if there are too few bytes. + #[inline] + pub fn skip(&mut self, offset: usize) -> Result<(), ()> { + match self.0.get(offset..) { + Some(tail) => { + self.0 = tail; + Ok(()) + } + None => { + self.0 = &[]; + Err(()) + } + } + } + + /// Return a reference to the given number of bytes at the start of the byte slice. + /// + /// Modifies the byte slice to start after the bytes. + /// + /// Returns an error if there are too few bytes. + #[inline] + pub fn read_bytes(&mut self, count: usize) -> Result<Bytes<'data>, ()> { + match (self.0.get(..count), self.0.get(count..)) { + (Some(head), Some(tail)) => { + self.0 = tail; + Ok(Bytes(head)) + } + _ => { + self.0 = &[]; + Err(()) + } + } + } + + /// Return a reference to the given number of bytes at the given offset of the byte slice. + /// + /// Returns an error if the offset is invalid or there are too few bytes. + #[inline] + pub fn read_bytes_at(mut self, offset: usize, count: usize) -> Result<Bytes<'data>, ()> { + self.skip(offset)?; + self.read_bytes(count) + } + + /// Return a reference to a `Pod` struct at the start of the byte slice. + /// + /// Modifies the byte slice to start after the bytes. + /// + /// Returns an error if there are too few bytes or the slice is incorrectly aligned. + #[inline] + pub fn read<T: Pod>(&mut self) -> Result<&'data T, ()> { + match from_bytes(self.0) { + Ok((value, tail)) => { + self.0 = tail; + Ok(value) + } + Err(()) => { + self.0 = &[]; + Err(()) + } + } + } + + /// Return a reference to a `Pod` struct at the given offset of the byte slice. + /// + /// Returns an error if there are too few bytes or the offset is incorrectly aligned. + #[inline] + pub fn read_at<T: Pod>(mut self, offset: usize) -> Result<&'data T, ()> { + self.skip(offset)?; + self.read() + } + + /// Return a reference to a slice of `Pod` structs at the start of the byte slice. + /// + /// Modifies the byte slice to start after the bytes. + /// + /// Returns an error if there are too few bytes or the offset is incorrectly aligned. + #[inline] + pub fn read_slice<T: Pod>(&mut self, count: usize) -> Result<&'data [T], ()> { + match slice_from_bytes(self.0, count) { + Ok((value, tail)) => { + self.0 = tail; + Ok(value) + } + Err(()) => { + self.0 = &[]; + Err(()) + } + } + } + + /// Return a reference to a slice of `Pod` structs at the given offset of the byte slice. + /// + /// Returns an error if there are too few bytes or the offset is incorrectly aligned. + #[inline] + pub fn read_slice_at<T: Pod>(mut self, offset: usize, count: usize) -> Result<&'data [T], ()> { + self.skip(offset)?; + self.read_slice(count) + } + + /// Read a null terminated string. + /// + /// Does not assume any encoding. + /// Reads past the null byte, but doesn't return it. + #[inline] + pub fn read_string(&mut self) -> Result<&'data [u8], ()> { + match memchr::memchr(b'\0', self.0) { + Some(null) => { + // These will never fail. + let bytes = self.read_bytes(null)?; + self.skip(1)?; + Ok(bytes.0) + } + None => { + self.0 = &[]; + Err(()) + } + } + } + + /// Read a null terminated string at an offset. + /// + /// Does not assume any encoding. Does not return the null byte. + #[inline] + pub fn read_string_at(mut self, offset: usize) -> Result<&'data [u8], ()> { + self.skip(offset)?; + self.read_string() + } +} + +// Only for Debug impl of `Bytes`. +fn debug_list_bytes(bytes: &[u8], fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = fmt.debug_list(); + list.entries(bytes.iter().take(8).copied().map(DebugByte)); + if bytes.len() > 8 { + list.entry(&DebugLen(bytes.len())); + } + list.finish() +} + +struct DebugByte(u8); + +impl fmt::Debug for DebugByte { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "0x{:02x}", self.0) + } +} + +struct DebugLen(usize); + +impl fmt::Debug for DebugLen { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "...; {}", self.0) + } +} + +/// A newtype for byte strings. +/// +/// For byte slices that are strings of an unknown encoding. +/// +/// Provides a `Debug` implementation that interprets the bytes as UTF-8. +#[derive(Default, Clone, Copy, PartialEq, Eq)] +pub(crate) struct ByteString<'data>(pub &'data [u8]); + +impl<'data> fmt::Debug for ByteString<'data> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "\"{}\"", String::from_utf8_lossy(self.0)) + } +} + +#[allow(dead_code)] +#[inline] +pub(crate) fn align(offset: usize, size: usize) -> usize { + (offset + (size - 1)) & !(size - 1) +} + +#[allow(dead_code)] +pub(crate) fn data_range( + data: &[u8], + data_address: u64, + range_address: u64, + size: u64, +) -> Option<&[u8]> { + let offset = range_address.checked_sub(data_address)?; + data.get(offset.try_into().ok()?..)? + .get(..size.try_into().ok()?) +} + +/// A table of zero-terminated strings. +/// +/// This is used for most file formats. +#[derive(Debug, Clone, Copy)] +pub struct StringTable<'data, R = &'data [u8]> +where + R: ReadRef<'data>, +{ + data: Option<R>, + start: u64, + end: u64, + marker: PhantomData<&'data ()>, +} + +impl<'data, R: ReadRef<'data>> StringTable<'data, R> { + /// Interpret the given data as a string table. + pub fn new(data: R, start: u64, end: u64) -> Self { + StringTable { + data: Some(data), + start, + end, + marker: PhantomData, + } + } + + /// Return the string at the given offset. + pub fn get(&self, offset: u32) -> Result<&'data [u8], ()> { + match self.data { + Some(data) => { + let r_start = self.start.checked_add(offset.into()).ok_or(())?; + data.read_bytes_at_until(r_start..self.end, 0) + } + None => Err(()), + } + } +} + +impl<'data, R: ReadRef<'data>> Default for StringTable<'data, R> { + fn default() -> Self { + StringTable { + data: None, + start: 0, + end: 0, + marker: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::pod::bytes_of; + + #[test] + fn bytes() { + let x = u32::to_be(0x0123_4567); + let data = Bytes(bytes_of(&x)); + + let mut bytes = data; + assert_eq!(bytes.skip(0), Ok(())); + assert_eq!(bytes, data); + + let mut bytes = data; + assert_eq!(bytes.skip(4), Ok(())); + assert_eq!(bytes, Bytes(&[])); + + let mut bytes = data; + assert_eq!(bytes.skip(5), Err(())); + assert_eq!(bytes, Bytes(&[])); + + let mut bytes = data; + assert_eq!(bytes.read_bytes(0), Ok(Bytes(&[]))); + assert_eq!(bytes, data); + + let mut bytes = data; + assert_eq!(bytes.read_bytes(4), Ok(data)); + assert_eq!(bytes, Bytes(&[])); + + let mut bytes = data; + assert_eq!(bytes.read_bytes(5), Err(())); + assert_eq!(bytes, Bytes(&[])); + + assert_eq!(data.read_bytes_at(0, 0), Ok(Bytes(&[]))); + assert_eq!(data.read_bytes_at(4, 0), Ok(Bytes(&[]))); + assert_eq!(data.read_bytes_at(0, 4), Ok(data)); + assert_eq!(data.read_bytes_at(1, 4), Err(())); + + let mut bytes = data; + assert_eq!(bytes.read::<u16>(), Ok(&u16::to_be(0x0123))); + assert_eq!(bytes, Bytes(&[0x45, 0x67])); + assert_eq!(data.read_at::<u16>(2), Ok(&u16::to_be(0x4567))); + assert_eq!(data.read_at::<u16>(3), Err(())); + assert_eq!(data.read_at::<u16>(4), Err(())); + + let mut bytes = data; + assert_eq!(bytes.read::<u32>(), Ok(&x)); + assert_eq!(bytes, Bytes(&[])); + + let mut bytes = data; + assert_eq!(bytes.read::<u64>(), Err(())); + assert_eq!(bytes, Bytes(&[])); + + let mut bytes = data; + assert_eq!(bytes.read_slice::<u8>(0), Ok(&[][..])); + assert_eq!(bytes, data); + + let mut bytes = data; + assert_eq!(bytes.read_slice::<u8>(4), Ok(data.0)); + assert_eq!(bytes, Bytes(&[])); + + let mut bytes = data; + assert_eq!(bytes.read_slice::<u8>(5), Err(())); + assert_eq!(bytes, Bytes(&[])); + + assert_eq!(data.read_slice_at::<u8>(0, 0), Ok(&[][..])); + assert_eq!(data.read_slice_at::<u8>(4, 0), Ok(&[][..])); + assert_eq!(data.read_slice_at::<u8>(0, 4), Ok(data.0)); + assert_eq!(data.read_slice_at::<u8>(1, 4), Err(())); + + let data = Bytes(&[0x01, 0x02, 0x00, 0x04]); + + let mut bytes = data; + assert_eq!(bytes.read_string(), Ok(&data.0[..2])); + assert_eq!(bytes.0, &data.0[3..]); + + let mut bytes = data; + bytes.skip(3).unwrap(); + assert_eq!(bytes.read_string(), Err(())); + assert_eq!(bytes.0, &[]); + + assert_eq!(data.read_string_at(0), Ok(&data.0[..2])); + assert_eq!(data.read_string_at(1), Ok(&data.0[1..2])); + assert_eq!(data.read_string_at(2), Ok(&[][..])); + assert_eq!(data.read_string_at(3), Err(())); + } + + #[test] + fn bytes_debug() { + assert_eq!(format!("{:?}", Bytes(&[])), "[]"); + assert_eq!(format!("{:?}", Bytes(&[0x01])), "[0x01]"); + assert_eq!( + format!( + "{:?}", + Bytes(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]) + ), + "[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]" + ); + assert_eq!( + format!( + "{:?}", + Bytes(&[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09]) + ), + "[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, ...; 9]" + ); + } +} diff --git a/third_party/rust/object/src/read/wasm.rs b/third_party/rust/object/src/read/wasm.rs new file mode 100644 index 0000000000..0113f59714 --- /dev/null +++ b/third_party/rust/object/src/read/wasm.rs @@ -0,0 +1,908 @@ +//! Support for reading Wasm files. +//! +//! Provides `WasmFile` and related types which implement the `Object` trait. +//! +//! Currently implements the minimum required to access DWARF debugging information. +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::marker::PhantomData; +use core::{slice, str}; +use wasmparser as wp; + +use crate::read::{ + self, Architecture, ComdatKind, CompressedData, CompressedFileRange, Error, Export, FileFlags, + Import, NoDynamicRelocationIterator, Object, ObjectComdat, ObjectKind, ObjectSection, + ObjectSegment, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Relocation, Result, + SectionFlags, SectionIndex, SectionKind, SegmentFlags, SymbolFlags, SymbolIndex, SymbolKind, + SymbolScope, SymbolSection, +}; + +const SECTION_CUSTOM: usize = 0; +const SECTION_TYPE: usize = 1; +const SECTION_IMPORT: usize = 2; +const SECTION_FUNCTION: usize = 3; +const SECTION_TABLE: usize = 4; +const SECTION_MEMORY: usize = 5; +const SECTION_GLOBAL: usize = 6; +const SECTION_EXPORT: usize = 7; +const SECTION_START: usize = 8; +const SECTION_ELEMENT: usize = 9; +const SECTION_CODE: usize = 10; +const SECTION_DATA: usize = 11; +const SECTION_DATA_COUNT: usize = 12; +// Update this constant when adding new section id: +const MAX_SECTION_ID: usize = SECTION_DATA_COUNT; + +/// A WebAssembly object file. +#[derive(Debug)] +pub struct WasmFile<'data, R = &'data [u8]> { + // All sections, including custom sections. + sections: Vec<wp::Section<'data>>, + // Indices into `sections` of sections with a non-zero id. + id_sections: Box<[Option<usize>; MAX_SECTION_ID + 1]>, + // Whether the file has DWARF information. + has_debug_symbols: bool, + // Symbols collected from imports, exports, code and name sections. + symbols: Vec<WasmSymbolInternal<'data>>, + // Address of the function body for the entry point. + entry: u64, + marker: PhantomData<R>, +} + +#[derive(Clone)] +enum LocalFunctionKind { + Unknown, + Exported { symbol_ids: Vec<u32> }, + Local { symbol_id: u32 }, +} + +impl<T> ReadError<T> for wasmparser::Result<T> { + fn read_error(self, error: &'static str) -> Result<T> { + self.map_err(|_| Error(error)) + } +} + +impl<'data, R: ReadRef<'data>> WasmFile<'data, R> { + /// Parse the raw wasm data. + pub fn parse(data: R) -> Result<Self> { + let len = data.len().read_error("Unknown Wasm file size")?; + let data = data.read_bytes_at(0, len).read_error("Wasm read failed")?; + let module = wp::ModuleReader::new(data).read_error("Invalid Wasm header")?; + + let mut file = WasmFile { + sections: Vec::new(), + id_sections: Default::default(), + has_debug_symbols: false, + symbols: Vec::new(), + entry: 0, + marker: PhantomData, + }; + + let mut main_file_symbol = Some(WasmSymbolInternal { + name: "", + address: 0, + size: 0, + kind: SymbolKind::File, + section: SymbolSection::None, + scope: SymbolScope::Compilation, + }); + + let mut imported_funcs_count = 0; + let mut local_func_kinds = Vec::new(); + let mut entry_func_id = None; + + for section in module { + let section = section.read_error("Invalid Wasm section header")?; + + match section.code { + wp::SectionCode::Import => { + let mut last_module_name = None; + + for import in section + .get_import_section_reader() + .read_error("Couldn't read header of the import section")? + { + let import = import.read_error("Couldn't read an import item")?; + let module_name = import.module; + + if last_module_name != Some(module_name) { + file.symbols.push(WasmSymbolInternal { + name: module_name, + address: 0, + size: 0, + kind: SymbolKind::File, + section: SymbolSection::None, + scope: SymbolScope::Dynamic, + }); + last_module_name = Some(module_name); + } + + let kind = match import.ty { + wp::ImportSectionEntryType::Function(_) => { + imported_funcs_count += 1; + SymbolKind::Text + } + wp::ImportSectionEntryType::Table(_) + | wp::ImportSectionEntryType::Memory(_) + | wp::ImportSectionEntryType::Global(_) => SymbolKind::Data, + }; + + file.symbols.push(WasmSymbolInternal { + name: import.field, + address: 0, + size: 0, + kind, + section: SymbolSection::Undefined, + scope: SymbolScope::Dynamic, + }); + } + } + wp::SectionCode::Function => { + local_func_kinds = vec![ + LocalFunctionKind::Unknown; + section + .get_function_section_reader() + .read_error("Couldn't read header of the function section")? + .get_count() as usize + ]; + } + wp::SectionCode::Export => { + if let Some(main_file_symbol) = main_file_symbol.take() { + file.symbols.push(main_file_symbol); + } + + for export in section + .get_export_section_reader() + .read_error("Couldn't read header of the export section")? + { + let export = export.read_error("Couldn't read an export item")?; + + let (kind, section_idx) = match export.kind { + wp::ExternalKind::Function => { + if let Some(local_func_id) = + export.index.checked_sub(imported_funcs_count) + { + let local_func_kind = + &mut local_func_kinds[local_func_id as usize]; + if let LocalFunctionKind::Unknown = local_func_kind { + *local_func_kind = LocalFunctionKind::Exported { + symbol_ids: Vec::new(), + }; + } + let symbol_ids = match local_func_kind { + LocalFunctionKind::Exported { symbol_ids } => symbol_ids, + _ => unreachable!(), + }; + symbol_ids.push(file.symbols.len() as u32); + } + (SymbolKind::Text, SECTION_CODE) + } + wp::ExternalKind::Table + | wp::ExternalKind::Memory + | wp::ExternalKind::Global => (SymbolKind::Data, SECTION_DATA), + }; + + file.symbols.push(WasmSymbolInternal { + name: export.field, + address: 0, + size: 0, + kind, + section: SymbolSection::Section(SectionIndex(section_idx)), + scope: SymbolScope::Dynamic, + }); + } + } + wp::SectionCode::Start => { + entry_func_id = Some( + section + .get_start_section_content() + .read_error("Couldn't read contents of the start section")?, + ); + } + wp::SectionCode::Code => { + if let Some(main_file_symbol) = main_file_symbol.take() { + file.symbols.push(main_file_symbol); + } + + for (i, (body, local_func_kind)) in section + .get_code_section_reader() + .read_error("Couldn't read header of the code section")? + .into_iter() + .zip(&mut local_func_kinds) + .enumerate() + { + let body = body.read_error("Couldn't read a function body")?; + let range = body.range(); + + let address = range.start as u64 - section.range().start as u64; + let size = (range.end - range.start) as u64; + + if entry_func_id == Some(i as u32) { + file.entry = address; + } + + match local_func_kind { + LocalFunctionKind::Unknown => { + *local_func_kind = LocalFunctionKind::Local { + symbol_id: file.symbols.len() as u32, + }; + file.symbols.push(WasmSymbolInternal { + name: "", + address, + size, + kind: SymbolKind::Text, + section: SymbolSection::Section(SectionIndex(SECTION_CODE)), + scope: SymbolScope::Compilation, + }); + } + LocalFunctionKind::Exported { symbol_ids } => { + for symbol_id in core::mem::take(symbol_ids) { + let export_symbol = &mut file.symbols[symbol_id as usize]; + export_symbol.address = address; + export_symbol.size = size; + } + } + _ => unreachable!(), + } + } + } + wp::SectionCode::Custom { + kind: wp::CustomSectionKind::Name, + .. + } => { + for name in section + .get_name_section_reader() + .read_error("Couldn't read header of the name section")? + { + // TODO: Right now, ill-formed name subsections + // are silently ignored in order to maintain + // compatibility with extended name sections, which + // are not yet supported by the version of + // `wasmparser` currently used. + // A better fix would be to update `wasmparser` to + // the newest version, but this requires + // a major rewrite of this file. + if let Ok(wp::Name::Function(name)) = name { + let mut name_map = name.get_map().read_error( + "Couldn't read header of the function name subsection", + )?; + for _ in 0..name_map.get_count() { + let naming = name_map + .read() + .read_error("Couldn't read a function name")?; + if let Some(local_index) = + naming.index.checked_sub(imported_funcs_count) + { + if let LocalFunctionKind::Local { symbol_id } = + local_func_kinds[local_index as usize] + { + file.symbols[symbol_id as usize].name = naming.name; + } + } + } + } + } + } + wp::SectionCode::Custom { name, .. } if name.starts_with(".debug_") => { + file.has_debug_symbols = true; + } + _ => {} + } + + let id = section_code_to_id(section.code); + file.id_sections[id] = Some(file.sections.len()); + + file.sections.push(section); + } + + Ok(file) + } +} + +impl<'data, R> read::private::Sealed for WasmFile<'data, R> {} + +impl<'data, 'file, R> Object<'data, 'file> for WasmFile<'data, R> +where + 'data: 'file, + R: 'file, +{ + type Segment = WasmSegment<'data, 'file, R>; + type SegmentIterator = WasmSegmentIterator<'data, 'file, R>; + type Section = WasmSection<'data, 'file, R>; + type SectionIterator = WasmSectionIterator<'data, 'file, R>; + type Comdat = WasmComdat<'data, 'file, R>; + type ComdatIterator = WasmComdatIterator<'data, 'file, R>; + type Symbol = WasmSymbol<'data, 'file>; + type SymbolIterator = WasmSymbolIterator<'data, 'file>; + type SymbolTable = WasmSymbolTable<'data, 'file>; + type DynamicRelocationIterator = NoDynamicRelocationIterator; + + #[inline] + fn architecture(&self) -> Architecture { + Architecture::Wasm32 + } + + #[inline] + fn is_little_endian(&self) -> bool { + true + } + + #[inline] + fn is_64(&self) -> bool { + false + } + + fn kind(&self) -> ObjectKind { + // TODO: check for `linking` custom section + ObjectKind::Unknown + } + + fn segments(&'file self) -> Self::SegmentIterator { + WasmSegmentIterator { file: self } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option<WasmSection<'data, 'file, R>> { + self.sections() + .find(|section| section.name_bytes() == Ok(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Result<WasmSection<'data, 'file, R>> { + // TODO: Missing sections should return an empty section. + let id_section = self + .id_sections + .get(index.0) + .and_then(|x| *x) + .read_error("Invalid Wasm section index")?; + let section = self.sections.get(id_section).unwrap(); + Ok(WasmSection { + section, + marker: PhantomData, + }) + } + + fn sections(&'file self) -> Self::SectionIterator { + WasmSectionIterator { + sections: self.sections.iter(), + marker: PhantomData, + } + } + + fn comdats(&'file self) -> Self::ComdatIterator { + WasmComdatIterator { file: self } + } + + #[inline] + fn symbol_by_index(&'file self, index: SymbolIndex) -> Result<WasmSymbol<'data, 'file>> { + let symbol = self + .symbols + .get(index.0) + .read_error("Invalid Wasm symbol index")?; + Ok(WasmSymbol { index, symbol }) + } + + fn symbols(&'file self) -> Self::SymbolIterator { + WasmSymbolIterator { + symbols: self.symbols.iter().enumerate(), + } + } + + fn symbol_table(&'file self) -> Option<WasmSymbolTable<'data, 'file>> { + Some(WasmSymbolTable { + symbols: &self.symbols, + }) + } + + fn dynamic_symbols(&'file self) -> Self::SymbolIterator { + WasmSymbolIterator { + symbols: [].iter().enumerate(), + } + } + + #[inline] + fn dynamic_symbol_table(&'file self) -> Option<WasmSymbolTable<'data, 'file>> { + None + } + + #[inline] + fn dynamic_relocations(&self) -> Option<NoDynamicRelocationIterator> { + None + } + + fn imports(&self) -> Result<Vec<Import<'data>>> { + // TODO: return entries in the import section + Ok(Vec::new()) + } + + fn exports(&self) -> Result<Vec<Export<'data>>> { + // TODO: return entries in the export section + Ok(Vec::new()) + } + + fn has_debug_symbols(&self) -> bool { + self.has_debug_symbols + } + + fn relative_address_base(&self) -> u64 { + 0 + } + + #[inline] + fn entry(&'file self) -> u64 { + self.entry + } + + #[inline] + fn flags(&self) -> FileFlags { + FileFlags::None + } +} + +/// An iterator over the segments of a `WasmFile`. +#[derive(Debug)] +pub struct WasmSegmentIterator<'data, 'file, R = &'data [u8]> { + #[allow(unused)] + file: &'file WasmFile<'data, R>, +} + +impl<'data, 'file, R> Iterator for WasmSegmentIterator<'data, 'file, R> { + type Item = WasmSegment<'data, 'file, R>; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A segment of a `WasmFile`. +#[derive(Debug)] +pub struct WasmSegment<'data, 'file, R = &'data [u8]> { + #[allow(unused)] + file: &'file WasmFile<'data, R>, +} + +impl<'data, 'file, R> read::private::Sealed for WasmSegment<'data, 'file, R> {} + +impl<'data, 'file, R> ObjectSegment<'data> for WasmSegment<'data, 'file, R> { + #[inline] + fn address(&self) -> u64 { + unreachable!() + } + + #[inline] + fn size(&self) -> u64 { + unreachable!() + } + + #[inline] + fn align(&self) -> u64 { + unreachable!() + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + unreachable!() + } + + fn data(&self) -> Result<&'data [u8]> { + unreachable!() + } + + fn data_range(&self, _address: u64, _size: u64) -> Result<Option<&'data [u8]>> { + unreachable!() + } + + #[inline] + fn name_bytes(&self) -> Result<Option<&[u8]>> { + unreachable!() + } + + #[inline] + fn name(&self) -> Result<Option<&str>> { + unreachable!() + } + + #[inline] + fn flags(&self) -> SegmentFlags { + unreachable!() + } +} + +/// An iterator over the sections of a `WasmFile`. +#[derive(Debug)] +pub struct WasmSectionIterator<'data, 'file, R = &'data [u8]> { + sections: slice::Iter<'file, wp::Section<'data>>, + marker: PhantomData<R>, +} + +impl<'data, 'file, R> Iterator for WasmSectionIterator<'data, 'file, R> { + type Item = WasmSection<'data, 'file, R>; + + fn next(&mut self) -> Option<Self::Item> { + let section = self.sections.next()?; + Some(WasmSection { + section, + marker: PhantomData, + }) + } +} + +/// A section of a `WasmFile`. +#[derive(Debug)] +pub struct WasmSection<'data, 'file, R = &'data [u8]> { + section: &'file wp::Section<'data>, + marker: PhantomData<R>, +} + +impl<'data, 'file, R> read::private::Sealed for WasmSection<'data, 'file, R> {} + +impl<'data, 'file, R> ObjectSection<'data> for WasmSection<'data, 'file, R> { + type RelocationIterator = WasmRelocationIterator<'data, 'file, R>; + + #[inline] + fn index(&self) -> SectionIndex { + // Note that we treat all custom sections as index 0. + // This is ok because they are never looked up by index. + SectionIndex(section_code_to_id(self.section.code)) + } + + #[inline] + fn address(&self) -> u64 { + 0 + } + + #[inline] + fn size(&self) -> u64 { + let range = self.section.range(); + (range.end - range.start) as u64 + } + + #[inline] + fn align(&self) -> u64 { + 1 + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + let range = self.section.range(); + Some((range.start as _, range.end as _)) + } + + #[inline] + fn data(&self) -> Result<&'data [u8]> { + let mut reader = self.section.get_binary_reader(); + // TODO: raise a feature request upstream to be able + // to get remaining slice from a BinaryReader directly. + Ok(reader.read_bytes(reader.bytes_remaining()).unwrap()) + } + + fn data_range(&self, _address: u64, _size: u64) -> Result<Option<&'data [u8]>> { + unimplemented!() + } + + #[inline] + fn compressed_file_range(&self) -> Result<CompressedFileRange> { + Ok(CompressedFileRange::none(self.file_range())) + } + + #[inline] + fn compressed_data(&self) -> Result<CompressedData<'data>> { + self.data().map(CompressedData::none) + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + self.name().map(str::as_bytes) + } + + #[inline] + fn name(&self) -> Result<&str> { + Ok(match self.section.code { + wp::SectionCode::Custom { name, .. } => name, + wp::SectionCode::Type => "<type>", + wp::SectionCode::Import => "<import>", + wp::SectionCode::Function => "<function>", + wp::SectionCode::Table => "<table>", + wp::SectionCode::Memory => "<memory>", + wp::SectionCode::Global => "<global>", + wp::SectionCode::Export => "<export>", + wp::SectionCode::Start => "<start>", + wp::SectionCode::Element => "<element>", + wp::SectionCode::Code => "<code>", + wp::SectionCode::Data => "<data>", + wp::SectionCode::DataCount => "<data_count>", + }) + } + + #[inline] + fn segment_name_bytes(&self) -> Result<Option<&[u8]>> { + Ok(None) + } + + #[inline] + fn segment_name(&self) -> Result<Option<&str>> { + Ok(None) + } + + #[inline] + fn kind(&self) -> SectionKind { + match self.section.code { + wp::SectionCode::Custom { kind, .. } => match kind { + wp::CustomSectionKind::Reloc | wp::CustomSectionKind::Linking => { + SectionKind::Linker + } + _ => SectionKind::Other, + }, + wp::SectionCode::Type => SectionKind::Metadata, + wp::SectionCode::Import => SectionKind::Linker, + wp::SectionCode::Function => SectionKind::Metadata, + wp::SectionCode::Table => SectionKind::UninitializedData, + wp::SectionCode::Memory => SectionKind::UninitializedData, + wp::SectionCode::Global => SectionKind::Data, + wp::SectionCode::Export => SectionKind::Linker, + wp::SectionCode::Start => SectionKind::Linker, + wp::SectionCode::Element => SectionKind::Data, + wp::SectionCode::Code => SectionKind::Text, + wp::SectionCode::Data => SectionKind::Data, + wp::SectionCode::DataCount => SectionKind::UninitializedData, + } + } + + #[inline] + fn relocations(&self) -> WasmRelocationIterator<'data, 'file, R> { + WasmRelocationIterator(PhantomData) + } + + #[inline] + fn flags(&self) -> SectionFlags { + SectionFlags::None + } +} + +/// An iterator over the COMDAT section groups of a `WasmFile`. +#[derive(Debug)] +pub struct WasmComdatIterator<'data, 'file, R = &'data [u8]> { + #[allow(unused)] + file: &'file WasmFile<'data, R>, +} + +impl<'data, 'file, R> Iterator for WasmComdatIterator<'data, 'file, R> { + type Item = WasmComdat<'data, 'file, R>; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A COMDAT section group of a `WasmFile`. +#[derive(Debug)] +pub struct WasmComdat<'data, 'file, R = &'data [u8]> { + #[allow(unused)] + file: &'file WasmFile<'data, R>, +} + +impl<'data, 'file, R> read::private::Sealed for WasmComdat<'data, 'file, R> {} + +impl<'data, 'file, R> ObjectComdat<'data> for WasmComdat<'data, 'file, R> { + type SectionIterator = WasmComdatSectionIterator<'data, 'file, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + unreachable!(); + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + unreachable!(); + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + unreachable!(); + } + + #[inline] + fn name(&self) -> Result<&str> { + unreachable!(); + } + + #[inline] + fn sections(&self) -> Self::SectionIterator { + unreachable!(); + } +} + +/// An iterator over the sections in a COMDAT section group of a `WasmFile`. +#[derive(Debug)] +pub struct WasmComdatSectionIterator<'data, 'file, R = &'data [u8]> +where + 'data: 'file, +{ + #[allow(unused)] + file: &'file WasmFile<'data, R>, +} + +impl<'data, 'file, R> Iterator for WasmComdatSectionIterator<'data, 'file, R> { + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A symbol table of a `WasmFile`. +#[derive(Debug)] +pub struct WasmSymbolTable<'data, 'file> { + symbols: &'file [WasmSymbolInternal<'data>], +} + +impl<'data, 'file> read::private::Sealed for WasmSymbolTable<'data, 'file> {} + +impl<'data, 'file> ObjectSymbolTable<'data> for WasmSymbolTable<'data, 'file> { + type Symbol = WasmSymbol<'data, 'file>; + type SymbolIterator = WasmSymbolIterator<'data, 'file>; + + fn symbols(&self) -> Self::SymbolIterator { + WasmSymbolIterator { + symbols: self.symbols.iter().enumerate(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> { + let symbol = self + .symbols + .get(index.0) + .read_error("Invalid Wasm symbol index")?; + Ok(WasmSymbol { index, symbol }) + } +} + +/// An iterator over the symbols of a `WasmFile`. +#[derive(Debug)] +pub struct WasmSymbolIterator<'data, 'file> { + symbols: core::iter::Enumerate<slice::Iter<'file, WasmSymbolInternal<'data>>>, +} + +impl<'data, 'file> Iterator for WasmSymbolIterator<'data, 'file> { + type Item = WasmSymbol<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + let (index, symbol) = self.symbols.next()?; + Some(WasmSymbol { + index: SymbolIndex(index), + symbol, + }) + } +} + +/// A symbol of a `WasmFile`. +#[derive(Clone, Copy, Debug)] +pub struct WasmSymbol<'data, 'file> { + index: SymbolIndex, + symbol: &'file WasmSymbolInternal<'data>, +} + +#[derive(Clone, Debug)] +struct WasmSymbolInternal<'data> { + name: &'data str, + address: u64, + size: u64, + kind: SymbolKind, + section: SymbolSection, + scope: SymbolScope, +} + +impl<'data, 'file> read::private::Sealed for WasmSymbol<'data, 'file> {} + +impl<'data, 'file> ObjectSymbol<'data> for WasmSymbol<'data, 'file> { + #[inline] + fn index(&self) -> SymbolIndex { + self.index + } + + #[inline] + fn name_bytes(&self) -> read::Result<&'data [u8]> { + Ok(self.symbol.name.as_bytes()) + } + + #[inline] + fn name(&self) -> read::Result<&'data str> { + Ok(self.symbol.name) + } + + #[inline] + fn address(&self) -> u64 { + self.symbol.address + } + + #[inline] + fn size(&self) -> u64 { + self.symbol.size + } + + #[inline] + fn kind(&self) -> SymbolKind { + self.symbol.kind + } + + #[inline] + fn section(&self) -> SymbolSection { + self.symbol.section + } + + #[inline] + fn is_undefined(&self) -> bool { + self.symbol.section == SymbolSection::Undefined + } + + #[inline] + fn is_definition(&self) -> bool { + self.symbol.kind == SymbolKind::Text && self.symbol.section != SymbolSection::Undefined + } + + #[inline] + fn is_common(&self) -> bool { + self.symbol.section == SymbolSection::Common + } + + #[inline] + fn is_weak(&self) -> bool { + false + } + + #[inline] + fn scope(&self) -> SymbolScope { + self.symbol.scope + } + + #[inline] + fn is_global(&self) -> bool { + self.symbol.scope != SymbolScope::Compilation + } + + #[inline] + fn is_local(&self) -> bool { + self.symbol.scope == SymbolScope::Compilation + } + + #[inline] + fn flags(&self) -> SymbolFlags<SectionIndex> { + SymbolFlags::None + } +} + +/// An iterator over the relocations in a `WasmSection`. +#[derive(Debug)] +pub struct WasmRelocationIterator<'data, 'file, R = &'data [u8]>( + PhantomData<(&'data (), &'file (), R)>, +); + +impl<'data, 'file, R> Iterator for WasmRelocationIterator<'data, 'file, R> { + type Item = (u64, Relocation); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +fn section_code_to_id(code: wp::SectionCode) -> usize { + match code { + wp::SectionCode::Custom { .. } => SECTION_CUSTOM, + wp::SectionCode::Type => SECTION_TYPE, + wp::SectionCode::Import => SECTION_IMPORT, + wp::SectionCode::Function => SECTION_FUNCTION, + wp::SectionCode::Table => SECTION_TABLE, + wp::SectionCode::Memory => SECTION_MEMORY, + wp::SectionCode::Global => SECTION_GLOBAL, + wp::SectionCode::Export => SECTION_EXPORT, + wp::SectionCode::Start => SECTION_START, + wp::SectionCode::Element => SECTION_ELEMENT, + wp::SectionCode::Code => SECTION_CODE, + wp::SectionCode::Data => SECTION_DATA, + wp::SectionCode::DataCount => SECTION_DATA_COUNT, + } +} diff --git a/third_party/rust/object/src/read/xcoff/comdat.rs b/third_party/rust/object/src/read/xcoff/comdat.rs new file mode 100644 index 0000000000..eeed2f54d6 --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/comdat.rs @@ -0,0 +1,130 @@ +//! XCOFF doesn't support the COMDAT section. + +use core::fmt::Debug; + +use crate::xcoff; + +use crate::read::{self, ComdatKind, ObjectComdat, ReadRef, Result, SectionIndex, SymbolIndex}; + +use super::{FileHeader, XcoffFile}; + +/// An iterator over the COMDAT section groups of a `XcoffFile32`. +pub type XcoffComdatIterator32<'data, 'file, R = &'data [u8]> = + XcoffComdatIterator<'data, 'file, xcoff::FileHeader32, R>; +/// An iterator over the COMDAT section groups of a `XcoffFile64`. +pub type XcoffComdatIterator64<'data, 'file, R = &'data [u8]> = + XcoffComdatIterator<'data, 'file, xcoff::FileHeader64, R>; + +/// An iterator over the COMDAT section groups of a `XcoffFile`. +#[derive(Debug)] +pub struct XcoffComdatIterator<'data, 'file, Xcoff, R = &'data [u8]> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, +} + +impl<'data, 'file, Xcoff, R> Iterator for XcoffComdatIterator<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type Item = XcoffComdat<'data, 'file, Xcoff, R>; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A COMDAT section group of a `XcoffFile32`. +pub type XcoffComdat32<'data, 'file, R = &'data [u8]> = + XcoffComdat<'data, 'file, xcoff::FileHeader32, R>; + +/// A COMDAT section group of a `XcoffFile64`. +pub type XcoffComdat64<'data, 'file, R = &'data [u8]> = + XcoffComdat<'data, 'file, xcoff::FileHeader64, R>; + +/// A COMDAT section group of a `XcoffFile`. +#[derive(Debug)] +pub struct XcoffComdat<'data, 'file, Xcoff, R = &'data [u8]> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file XcoffFile<'data, Xcoff, R>, +} + +impl<'data, 'file, Xcoff, R> read::private::Sealed for XcoffComdat<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Xcoff, R> ObjectComdat<'data> for XcoffComdat<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type SectionIterator = XcoffComdatSectionIterator<'data, 'file, Xcoff, R>; + + #[inline] + fn kind(&self) -> ComdatKind { + unreachable!(); + } + + #[inline] + fn symbol(&self) -> SymbolIndex { + unreachable!(); + } + + #[inline] + fn name_bytes(&self) -> Result<&[u8]> { + unreachable!(); + } + + #[inline] + fn name(&self) -> Result<&str> { + unreachable!(); + } + + #[inline] + fn sections(&self) -> Self::SectionIterator { + unreachable!(); + } +} + +/// An iterator over the sections in a COMDAT section group of a `XcoffFile32`. +pub type XcoffComdatSectionIterator32<'data, 'file, R = &'data [u8]> = + XcoffComdatSectionIterator<'data, 'file, xcoff::FileHeader32, R>; +/// An iterator over the sections in a COMDAT section group of a `XcoffFile64`. +pub type XcoffComdatSectionIterator64<'data, 'file, R = &'data [u8]> = + XcoffComdatSectionIterator<'data, 'file, xcoff::FileHeader64, R>; + +/// An iterator over the sections in a COMDAT section group of a `XcoffFile`. +#[derive(Debug)] +pub struct XcoffComdatSectionIterator<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + file: &'file XcoffFile<'data, Xcoff, R>, +} + +impl<'data, 'file, Xcoff, R> Iterator for XcoffComdatSectionIterator<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type Item = SectionIndex; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} diff --git a/third_party/rust/object/src/read/xcoff/file.rs b/third_party/rust/object/src/read/xcoff/file.rs new file mode 100644 index 0000000000..bac9e70756 --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/file.rs @@ -0,0 +1,629 @@ +use core::fmt::Debug; +use core::mem; + +use alloc::vec::Vec; + +use crate::read::{self, Error, NoDynamicRelocationIterator, Object, ReadError, ReadRef, Result}; + +use crate::{ + xcoff, Architecture, BigEndian as BE, FileFlags, ObjectKind, ObjectSection, Pod, SectionIndex, + SymbolIndex, +}; + +use super::{ + CsectAux, FileAux, SectionHeader, SectionTable, Symbol, SymbolTable, XcoffComdat, + XcoffComdatIterator, XcoffSection, XcoffSectionIterator, XcoffSegment, XcoffSegmentIterator, + XcoffSymbol, XcoffSymbolIterator, XcoffSymbolTable, +}; + +/// A 32-bit XCOFF object file. +pub type XcoffFile32<'data, R = &'data [u8]> = XcoffFile<'data, xcoff::FileHeader32, R>; +/// A 64-bit XCOFF object file. +pub type XcoffFile64<'data, R = &'data [u8]> = XcoffFile<'data, xcoff::FileHeader64, R>; + +/// A partially parsed XCOFF file. +/// +/// Most of the functionality of this type is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct XcoffFile<'data, Xcoff, R = &'data [u8]> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + pub(super) data: R, + pub(super) header: &'data Xcoff, + pub(super) aux_header: Option<&'data Xcoff::AuxHeader>, + pub(super) sections: SectionTable<'data, Xcoff>, + pub(super) symbols: SymbolTable<'data, Xcoff, R>, +} + +impl<'data, Xcoff, R> XcoffFile<'data, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + /// Parse the raw XCOFF file data. + pub fn parse(data: R) -> Result<Self> { + let mut offset = 0; + let header = Xcoff::parse(data, &mut offset)?; + let aux_header = header.aux_header(data, &mut offset)?; + let sections = header.sections(data, &mut offset)?; + let symbols = header.symbols(data)?; + + Ok(XcoffFile { + data, + header, + aux_header, + sections, + symbols, + }) + } + + /// Returns the raw data. + pub fn data(&self) -> R { + self.data + } + + /// Returns the raw XCOFF file header. + pub fn raw_header(&self) -> &'data Xcoff { + self.header + } +} + +impl<'data, Xcoff, R> read::private::Sealed for XcoffFile<'data, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Xcoff, R> Object<'data, 'file> for XcoffFile<'data, Xcoff, R> +where + 'data: 'file, + Xcoff: FileHeader, + R: 'file + ReadRef<'data>, +{ + type Segment = XcoffSegment<'data, 'file, Xcoff, R>; + type SegmentIterator = XcoffSegmentIterator<'data, 'file, Xcoff, R>; + type Section = XcoffSection<'data, 'file, Xcoff, R>; + type SectionIterator = XcoffSectionIterator<'data, 'file, Xcoff, R>; + type Comdat = XcoffComdat<'data, 'file, Xcoff, R>; + type ComdatIterator = XcoffComdatIterator<'data, 'file, Xcoff, R>; + type Symbol = XcoffSymbol<'data, 'file, Xcoff, R>; + type SymbolIterator = XcoffSymbolIterator<'data, 'file, Xcoff, R>; + type SymbolTable = XcoffSymbolTable<'data, 'file, Xcoff, R>; + type DynamicRelocationIterator = NoDynamicRelocationIterator; + + fn architecture(&self) -> crate::Architecture { + if self.is_64() { + Architecture::PowerPc64 + } else { + Architecture::PowerPc + } + } + + fn is_little_endian(&self) -> bool { + false + } + + fn is_64(&self) -> bool { + self.header.is_type_64() + } + + fn kind(&self) -> ObjectKind { + let flags = self.header.f_flags(); + if flags & xcoff::F_EXEC != 0 { + ObjectKind::Executable + } else if flags & xcoff::F_SHROBJ != 0 { + ObjectKind::Dynamic + } else if flags & xcoff::F_RELFLG == 0 { + ObjectKind::Relocatable + } else { + ObjectKind::Unknown + } + } + + fn segments(&'file self) -> XcoffSegmentIterator<'data, 'file, Xcoff, R> { + XcoffSegmentIterator { file: self } + } + + fn section_by_name_bytes( + &'file self, + section_name: &[u8], + ) -> Option<XcoffSection<'data, 'file, Xcoff, R>> { + self.sections() + .find(|section| section.name_bytes() == Ok(section_name)) + } + + fn section_by_index( + &'file self, + index: SectionIndex, + ) -> Result<XcoffSection<'data, 'file, Xcoff, R>> { + let section = self.sections.section(index)?; + Ok(XcoffSection { + file: self, + section, + index, + }) + } + + fn sections(&'file self) -> XcoffSectionIterator<'data, 'file, Xcoff, R> { + XcoffSectionIterator { + file: self, + iter: self.sections.iter().enumerate(), + } + } + + fn comdats(&'file self) -> XcoffComdatIterator<'data, 'file, Xcoff, R> { + XcoffComdatIterator { file: self } + } + + fn symbol_table(&'file self) -> Option<XcoffSymbolTable<'data, 'file, Xcoff, R>> { + if self.symbols.is_empty() { + return None; + } + Some(XcoffSymbolTable { + symbols: &self.symbols, + file: self, + }) + } + + fn symbol_by_index( + &'file self, + index: SymbolIndex, + ) -> Result<XcoffSymbol<'data, 'file, Xcoff, R>> { + let symbol = self.symbols.symbol(index.0)?; + Ok(XcoffSymbol { + symbols: &self.symbols, + index, + symbol, + file: self, + }) + } + + fn symbols(&'file self) -> XcoffSymbolIterator<'data, 'file, Xcoff, R> { + XcoffSymbolIterator { + symbols: &self.symbols, + index: 0, + file: self, + } + } + + fn dynamic_symbol_table(&'file self) -> Option<XcoffSymbolTable<'data, 'file, Xcoff, R>> { + None + } + + fn dynamic_symbols(&'file self) -> XcoffSymbolIterator<'data, 'file, Xcoff, R> { + // TODO: return the symbols in the STYP_LOADER section. + XcoffSymbolIterator { + file: self, + symbols: &self.symbols, + // Hack: don't return any. + index: self.symbols.len(), + } + } + + fn dynamic_relocations(&'file self) -> Option<Self::DynamicRelocationIterator> { + // TODO: return the relocations in the STYP_LOADER section. + None + } + + fn imports(&self) -> Result<alloc::vec::Vec<crate::Import<'data>>> { + // TODO: return the imports in the STYP_LOADER section. + Ok(Vec::new()) + } + + fn exports(&self) -> Result<alloc::vec::Vec<crate::Export<'data>>> { + // TODO: return the exports in the STYP_LOADER section. + Ok(Vec::new()) + } + + fn has_debug_symbols(&self) -> bool { + self.section_by_name(".debug").is_some() || self.section_by_name(".dwinfo").is_some() + } + + fn relative_address_base(&'file self) -> u64 { + 0 + } + + fn entry(&'file self) -> u64 { + if let Some(aux_header) = self.aux_header { + aux_header.o_entry().into() + } else { + 0 + } + } + + fn flags(&self) -> FileFlags { + FileFlags::Xcoff { + f_flags: self.header.f_flags(), + } + } +} + +/// A trait for generic access to `FileHeader32` and `FileHeader64`. +#[allow(missing_docs)] +pub trait FileHeader: Debug + Pod { + type Word: Into<u64>; + type AuxHeader: AuxHeader<Word = Self::Word>; + type SectionHeader: SectionHeader<Word = Self::Word>; + type Symbol: Symbol<Word = Self::Word>; + type FileAux: FileAux; + type CsectAux: CsectAux; + + /// Return true if this type is a 64-bit header. + fn is_type_64(&self) -> bool; + + fn f_magic(&self) -> u16; + fn f_nscns(&self) -> u16; + fn f_timdat(&self) -> u32; + fn f_symptr(&self) -> Self::Word; + fn f_nsyms(&self) -> u32; + fn f_opthdr(&self) -> u16; + fn f_flags(&self) -> u16; + + // Provided methods. + + /// Read the file header. + /// + /// Also checks that the magic field in the file header is a supported format. + fn parse<'data, R: ReadRef<'data>>(data: R, offset: &mut u64) -> Result<&'data Self> { + let header = data + .read::<Self>(offset) + .read_error("Invalid XCOFF header size or alignment")?; + if !header.is_supported() { + return Err(Error("Unsupported XCOFF header")); + } + Ok(header) + } + + fn is_supported(&self) -> bool { + (self.is_type_64() && self.f_magic() == xcoff::MAGIC_64) + || (!self.is_type_64() && self.f_magic() == xcoff::MAGIC_32) + } + + /// Read the auxiliary file header. + fn aux_header<'data, R: ReadRef<'data>>( + &self, + data: R, + offset: &mut u64, + ) -> Result<Option<&'data Self::AuxHeader>> { + let aux_header_size = self.f_opthdr(); + if self.f_flags() & xcoff::F_EXEC == 0 { + // No auxiliary header is required for an object file that is not an executable. + // TODO: Some AIX programs generate auxiliary headers for 32-bit object files + // that end after the data_start field. + *offset += u64::from(aux_header_size); + return Ok(None); + } + // Executables, however, must have auxiliary headers that include the + // full structure definitions. + if aux_header_size != mem::size_of::<Self::AuxHeader>() as u16 { + *offset += u64::from(aux_header_size); + return Ok(None); + } + let aux_header = data + .read::<Self::AuxHeader>(offset) + .read_error("Invalid XCOFF auxiliary header size")?; + Ok(Some(aux_header)) + } + + /// Read the section table. + #[inline] + fn sections<'data, R: ReadRef<'data>>( + &self, + data: R, + offset: &mut u64, + ) -> Result<SectionTable<'data, Self>> { + SectionTable::parse(self, data, offset) + } + + /// Return the symbol table. + #[inline] + fn symbols<'data, R: ReadRef<'data>>(&self, data: R) -> Result<SymbolTable<'data, Self, R>> { + SymbolTable::parse(*self, data) + } +} + +impl FileHeader for xcoff::FileHeader32 { + type Word = u32; + type AuxHeader = xcoff::AuxHeader32; + type SectionHeader = xcoff::SectionHeader32; + type Symbol = xcoff::Symbol32; + type FileAux = xcoff::FileAux32; + type CsectAux = xcoff::CsectAux32; + + fn is_type_64(&self) -> bool { + false + } + + fn f_magic(&self) -> u16 { + self.f_magic.get(BE) + } + + fn f_nscns(&self) -> u16 { + self.f_nscns.get(BE) + } + + fn f_timdat(&self) -> u32 { + self.f_timdat.get(BE) + } + + fn f_symptr(&self) -> Self::Word { + self.f_symptr.get(BE) + } + + fn f_nsyms(&self) -> u32 { + self.f_nsyms.get(BE) + } + + fn f_opthdr(&self) -> u16 { + self.f_opthdr.get(BE) + } + + fn f_flags(&self) -> u16 { + self.f_flags.get(BE) + } +} + +impl FileHeader for xcoff::FileHeader64 { + type Word = u64; + type AuxHeader = xcoff::AuxHeader64; + type SectionHeader = xcoff::SectionHeader64; + type Symbol = xcoff::Symbol64; + type FileAux = xcoff::FileAux64; + type CsectAux = xcoff::CsectAux64; + + fn is_type_64(&self) -> bool { + true + } + + fn f_magic(&self) -> u16 { + self.f_magic.get(BE) + } + + fn f_nscns(&self) -> u16 { + self.f_nscns.get(BE) + } + + fn f_timdat(&self) -> u32 { + self.f_timdat.get(BE) + } + + fn f_symptr(&self) -> Self::Word { + self.f_symptr.get(BE) + } + + fn f_nsyms(&self) -> u32 { + self.f_nsyms.get(BE) + } + + fn f_opthdr(&self) -> u16 { + self.f_opthdr.get(BE) + } + + fn f_flags(&self) -> u16 { + self.f_flags.get(BE) + } +} + +#[allow(missing_docs)] +pub trait AuxHeader: Debug + Pod { + type Word: Into<u64>; + + fn o_vstamp(&self) -> u16; + fn o_tsize(&self) -> Self::Word; + fn o_dsize(&self) -> Self::Word; + fn o_bsize(&self) -> Self::Word; + fn o_entry(&self) -> Self::Word; + fn o_text_start(&self) -> Self::Word; + fn o_data_start(&self) -> Self::Word; + fn o_toc(&self) -> Self::Word; + fn o_snentry(&self) -> u16; + fn o_sntext(&self) -> u16; + fn o_sndata(&self) -> u16; + fn o_sntoc(&self) -> u16; + fn o_snloader(&self) -> u16; + fn o_snbss(&self) -> u16; + fn o_sntdata(&self) -> u16; + fn o_sntbss(&self) -> u16; + fn o_algntext(&self) -> u16; + fn o_algndata(&self) -> u16; + fn o_maxstack(&self) -> Self::Word; + fn o_maxdata(&self) -> Self::Word; + fn o_textpsize(&self) -> u8; + fn o_datapsize(&self) -> u8; + fn o_stackpsize(&self) -> u8; +} + +impl AuxHeader for xcoff::AuxHeader32 { + type Word = u32; + + fn o_vstamp(&self) -> u16 { + self.o_vstamp.get(BE) + } + + fn o_tsize(&self) -> Self::Word { + self.o_tsize.get(BE) + } + + fn o_dsize(&self) -> Self::Word { + self.o_dsize.get(BE) + } + + fn o_bsize(&self) -> Self::Word { + self.o_bsize.get(BE) + } + + fn o_entry(&self) -> Self::Word { + self.o_entry.get(BE) + } + + fn o_text_start(&self) -> Self::Word { + self.o_text_start.get(BE) + } + + fn o_data_start(&self) -> Self::Word { + self.o_data_start.get(BE) + } + + fn o_toc(&self) -> Self::Word { + self.o_toc.get(BE) + } + + fn o_snentry(&self) -> u16 { + self.o_snentry.get(BE) + } + + fn o_sntext(&self) -> u16 { + self.o_sntext.get(BE) + } + + fn o_sndata(&self) -> u16 { + self.o_sndata.get(BE) + } + + fn o_sntoc(&self) -> u16 { + self.o_sntoc.get(BE) + } + + fn o_snloader(&self) -> u16 { + self.o_snloader.get(BE) + } + + fn o_snbss(&self) -> u16 { + self.o_snbss.get(BE) + } + + fn o_sntdata(&self) -> u16 { + self.o_sntdata.get(BE) + } + + fn o_sntbss(&self) -> u16 { + self.o_sntbss.get(BE) + } + + fn o_algntext(&self) -> u16 { + self.o_algntext.get(BE) + } + + fn o_algndata(&self) -> u16 { + self.o_algndata.get(BE) + } + + fn o_maxstack(&self) -> Self::Word { + self.o_maxstack.get(BE) + } + + fn o_maxdata(&self) -> Self::Word { + self.o_maxdata.get(BE) + } + + fn o_textpsize(&self) -> u8 { + self.o_textpsize + } + + fn o_datapsize(&self) -> u8 { + self.o_datapsize + } + + fn o_stackpsize(&self) -> u8 { + self.o_stackpsize + } +} + +impl AuxHeader for xcoff::AuxHeader64 { + type Word = u64; + + fn o_vstamp(&self) -> u16 { + self.o_vstamp.get(BE) + } + + fn o_tsize(&self) -> Self::Word { + self.o_tsize.get(BE) + } + + fn o_dsize(&self) -> Self::Word { + self.o_dsize.get(BE) + } + + fn o_bsize(&self) -> Self::Word { + self.o_bsize.get(BE) + } + + fn o_entry(&self) -> Self::Word { + self.o_entry.get(BE) + } + + fn o_text_start(&self) -> Self::Word { + self.o_text_start.get(BE) + } + + fn o_data_start(&self) -> Self::Word { + self.o_data_start.get(BE) + } + + fn o_toc(&self) -> Self::Word { + self.o_toc.get(BE) + } + + fn o_snentry(&self) -> u16 { + self.o_snentry.get(BE) + } + + fn o_sntext(&self) -> u16 { + self.o_sntext.get(BE) + } + + fn o_sndata(&self) -> u16 { + self.o_sndata.get(BE) + } + + fn o_sntoc(&self) -> u16 { + self.o_sntoc.get(BE) + } + + fn o_snloader(&self) -> u16 { + self.o_snloader.get(BE) + } + + fn o_snbss(&self) -> u16 { + self.o_snbss.get(BE) + } + + fn o_sntdata(&self) -> u16 { + self.o_sntdata.get(BE) + } + + fn o_sntbss(&self) -> u16 { + self.o_sntbss.get(BE) + } + + fn o_algntext(&self) -> u16 { + self.o_algntext.get(BE) + } + + fn o_algndata(&self) -> u16 { + self.o_algndata.get(BE) + } + + fn o_maxstack(&self) -> Self::Word { + self.o_maxstack.get(BE) + } + + fn o_maxdata(&self) -> Self::Word { + self.o_maxdata.get(BE) + } + + fn o_textpsize(&self) -> u8 { + self.o_textpsize + } + + fn o_datapsize(&self) -> u8 { + self.o_datapsize + } + + fn o_stackpsize(&self) -> u8 { + self.o_stackpsize + } +} diff --git a/third_party/rust/object/src/read/xcoff/mod.rs b/third_party/rust/object/src/read/xcoff/mod.rs new file mode 100644 index 0000000000..136e31073b --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/mod.rs @@ -0,0 +1,21 @@ +//! Support for reading AIX XCOFF files. +//! +//! Provides `XcoffFile` and related types which implement the `Object` trait. + +mod file; +pub use file::*; + +mod section; +pub use section::*; + +mod symbol; +pub use symbol::*; + +mod relocation; +pub use relocation::*; + +mod comdat; +pub use comdat::*; + +mod segment; +pub use segment::*; diff --git a/third_party/rust/object/src/read/xcoff/relocation.rs b/third_party/rust/object/src/read/xcoff/relocation.rs new file mode 100644 index 0000000000..8107a2e826 --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/relocation.rs @@ -0,0 +1,128 @@ +use alloc::fmt; +use core::fmt::Debug; +use core::slice; + +use crate::pod::Pod; +use crate::{xcoff, BigEndian as BE, Relocation}; + +use crate::read::{ReadRef, RelocationEncoding, RelocationKind, RelocationTarget, SymbolIndex}; + +use super::{FileHeader, SectionHeader, XcoffFile}; + +/// An iterator over the relocations in a `XcoffSection32`. +pub type XcoffRelocationIterator32<'data, 'file, R = &'data [u8]> = + XcoffRelocationIterator<'data, 'file, xcoff::FileHeader32, R>; +/// An iterator over the relocations in a `XcoffSection64`. +pub type XcoffRelocationIterator64<'data, 'file, R = &'data [u8]> = + XcoffRelocationIterator<'data, 'file, xcoff::FileHeader64, R>; + +/// An iterator over the relocations in a `XcoffSection`. +pub struct XcoffRelocationIterator<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) relocations: + slice::Iter<'data, <<Xcoff as FileHeader>::SectionHeader as SectionHeader>::Rel>, +} + +impl<'data, 'file, Xcoff, R> Iterator for XcoffRelocationIterator<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + self.relocations.next().map(|relocation| { + let encoding = RelocationEncoding::Generic; + let (kind, addend) = match relocation.r_rtype() { + xcoff::R_POS + | xcoff::R_RL + | xcoff::R_RLA + | xcoff::R_BA + | xcoff::R_RBA + | xcoff::R_TLS => (RelocationKind::Absolute, 0), + xcoff::R_REL | xcoff::R_BR | xcoff::R_RBR => (RelocationKind::Relative, -4), + xcoff::R_TOC | xcoff::R_TOCL | xcoff::R_TOCU => (RelocationKind::Got, 0), + r_type => (RelocationKind::Xcoff(r_type), 0), + }; + let size = (relocation.r_rsize() & 0x3F) + 1; + let target = RelocationTarget::Symbol(SymbolIndex(relocation.r_symndx() as usize)); + ( + relocation.r_vaddr().into(), + Relocation { + kind, + encoding, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file, Xcoff, R> fmt::Debug for XcoffRelocationIterator<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("XcoffRelocationIterator").finish() + } +} + +/// A trait for generic access to `Rel32` and `Rel64`. +#[allow(missing_docs)] +pub trait Rel: Debug + Pod { + type Word: Into<u64>; + fn r_vaddr(&self) -> Self::Word; + fn r_symndx(&self) -> u32; + fn r_rsize(&self) -> u8; + fn r_rtype(&self) -> u8; +} + +impl Rel for xcoff::Rel32 { + type Word = u32; + + fn r_vaddr(&self) -> Self::Word { + self.r_vaddr.get(BE) + } + + fn r_symndx(&self) -> u32 { + self.r_symndx.get(BE) + } + + fn r_rsize(&self) -> u8 { + self.r_rsize + } + + fn r_rtype(&self) -> u8 { + self.r_rtype + } +} + +impl Rel for xcoff::Rel64 { + type Word = u64; + + fn r_vaddr(&self) -> Self::Word { + self.r_vaddr.get(BE) + } + + fn r_symndx(&self) -> u32 { + self.r_symndx.get(BE) + } + + fn r_rsize(&self) -> u8 { + self.r_rsize + } + + fn r_rtype(&self) -> u8 { + self.r_rtype + } +} diff --git a/third_party/rust/object/src/read/xcoff/section.rs b/third_party/rust/object/src/read/xcoff/section.rs new file mode 100644 index 0000000000..0944e10c83 --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/section.rs @@ -0,0 +1,426 @@ +use core::fmt::Debug; +use core::{iter, result, slice, str}; + +use crate::{ + xcoff, BigEndian as BE, CompressedData, CompressedFileRange, Pod, SectionFlags, SectionKind, +}; + +use crate::read::{self, Error, ObjectSection, ReadError, ReadRef, Result, SectionIndex}; + +use super::{AuxHeader, FileHeader, Rel, XcoffFile, XcoffRelocationIterator}; + +/// An iterator over the sections of an `XcoffFile32`. +pub type XcoffSectionIterator32<'data, 'file, R = &'data [u8]> = + XcoffSectionIterator<'data, 'file, xcoff::FileHeader32, R>; +/// An iterator over the sections of an `XcoffFile64`. +pub type XcoffSectionIterator64<'data, 'file, R = &'data [u8]> = + XcoffSectionIterator<'data, 'file, xcoff::FileHeader64, R>; + +/// An iterator over the sections of an `XcoffFile`. +#[derive(Debug)] +pub struct XcoffSectionIterator<'data, 'file, Xcoff, R = &'data [u8]> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) iter: iter::Enumerate<slice::Iter<'data, Xcoff::SectionHeader>>, +} + +impl<'data, 'file, Xcoff, R> Iterator for XcoffSectionIterator<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type Item = XcoffSection<'data, 'file, Xcoff, R>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| XcoffSection { + index: SectionIndex(index), + file: self.file, + section, + }) + } +} + +/// A section of an `XcoffFile32`. +pub type XcoffSection32<'data, 'file, R = &'data [u8]> = + XcoffSection<'data, 'file, xcoff::FileHeader32, R>; +/// A section of an `XcoffFile64`. +pub type XcoffSection64<'data, 'file, R = &'data [u8]> = + XcoffSection<'data, 'file, xcoff::FileHeader64, R>; + +/// A section of an `XcoffFile`. +#[derive(Debug)] +pub struct XcoffSection<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) section: &'data Xcoff::SectionHeader, + pub(super) index: SectionIndex, +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> XcoffSection<'data, 'file, Xcoff, R> { + fn bytes(&self) -> Result<&'data [u8]> { + self.section + .data(self.file.data) + .read_error("Invalid XCOFF section offset or size") + } +} + +impl<'data, 'file, Xcoff, R> read::private::Sealed for XcoffSection<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Xcoff, R> ObjectSection<'data> for XcoffSection<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type RelocationIterator = XcoffRelocationIterator<'data, 'file, Xcoff, R>; + + fn index(&self) -> SectionIndex { + self.index + } + + fn address(&self) -> u64 { + self.section.s_paddr().into() + } + + fn size(&self) -> u64 { + self.section.s_size().into() + } + + fn align(&self) -> u64 { + // The default section alignment is 4. + if let Some(aux_header) = self.file.aux_header { + match self.kind() { + SectionKind::Text => aux_header.o_algntext().into(), + SectionKind::Data => aux_header.o_algndata().into(), + _ => 4, + } + } else { + 4 + } + } + + fn file_range(&self) -> Option<(u64, u64)> { + self.section.file_range() + } + + fn data(&self) -> Result<&'data [u8]> { + self.bytes() + } + + fn data_range(&self, address: u64, size: u64) -> Result<Option<&'data [u8]>> { + Ok(read::util::data_range( + self.bytes()?, + self.address(), + address, + size, + )) + } + + fn compressed_file_range(&self) -> Result<CompressedFileRange> { + Ok(CompressedFileRange::none(self.file_range())) + } + + fn compressed_data(&self) -> Result<CompressedData<'data>> { + self.data().map(CompressedData::none) + } + + fn name_bytes(&self) -> read::Result<&[u8]> { + Ok(self.section.name()) + } + + fn name(&self) -> read::Result<&str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 XCOFF section name") + } + + fn segment_name_bytes(&self) -> Result<Option<&[u8]>> { + Ok(None) + } + + fn segment_name(&self) -> Result<Option<&str>> { + Ok(None) + } + + fn kind(&self) -> SectionKind { + let section_type = self.section.s_flags() as u16; + if section_type & xcoff::STYP_TEXT != 0 { + SectionKind::Text + } else if section_type & xcoff::STYP_DATA != 0 { + SectionKind::Data + } else if section_type & xcoff::STYP_TDATA != 0 { + SectionKind::Tls + } else if section_type & xcoff::STYP_BSS != 0 { + SectionKind::UninitializedData + } else if section_type & xcoff::STYP_TBSS != 0 { + SectionKind::UninitializedTls + } else if section_type & (xcoff::STYP_DEBUG | xcoff::STYP_DWARF) != 0 { + SectionKind::Debug + } else if section_type & (xcoff::STYP_LOADER | xcoff::STYP_OVRFLO) != 0 { + SectionKind::Metadata + } else if section_type + & (xcoff::STYP_INFO | xcoff::STYP_EXCEPT | xcoff::STYP_PAD | xcoff::STYP_TYPCHK) + != 0 + { + SectionKind::Other + } else { + SectionKind::Unknown + } + } + + fn relocations(&self) -> Self::RelocationIterator { + let rel = self.section.relocations(self.file.data).unwrap_or(&[]); + XcoffRelocationIterator { + file: self.file, + relocations: rel.iter(), + } + } + + fn flags(&self) -> SectionFlags { + SectionFlags::Xcoff { + s_flags: self.section.s_flags(), + } + } + + fn uncompressed_data(&self) -> Result<alloc::borrow::Cow<'data, [u8]>> { + self.compressed_data()?.decompress() + } +} + +/// The table of section headers in an XCOFF file. +#[derive(Debug, Clone, Copy)] +pub struct SectionTable<'data, Xcoff: FileHeader> { + sections: &'data [Xcoff::SectionHeader], +} + +impl<'data, Xcoff> Default for SectionTable<'data, Xcoff> +where + Xcoff: FileHeader, +{ + fn default() -> Self { + Self { sections: &[] } + } +} + +impl<'data, Xcoff> SectionTable<'data, Xcoff> +where + Xcoff: FileHeader, +{ + /// Parse the section table. + /// + /// `data` must be the entire file data. + /// `offset` must be after the optional file header. + pub fn parse<R: ReadRef<'data>>(header: &Xcoff, data: R, offset: &mut u64) -> Result<Self> { + let section_num = header.f_nscns(); + if section_num == 0 { + return Ok(SectionTable::default()); + } + let sections = data + .read_slice(offset, section_num as usize) + .read_error("Invalid XCOFF section headers")?; + Ok(SectionTable { sections }) + } + + /// Iterate over the section headers. + #[inline] + pub fn iter(&self) -> slice::Iter<'data, Xcoff::SectionHeader> { + self.sections.iter() + } + + /// Return true if the section table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.sections.is_empty() + } + + /// The number of section headers. + #[inline] + pub fn len(&self) -> usize { + self.sections.len() + } + + /// Return the section header at the given index. + pub fn section(&self, index: SectionIndex) -> read::Result<&'data Xcoff::SectionHeader> { + self.sections + .get(index.0) + .read_error("Invalid XCOFF section index") + } +} + +/// A trait for generic access to `SectionHeader32` and `SectionHeader64`. +#[allow(missing_docs)] +pub trait SectionHeader: Debug + Pod { + type Word: Into<u64>; + type HalfWord: Into<u32>; + type Xcoff: FileHeader<SectionHeader = Self, Word = Self::Word>; + type Rel: Rel<Word = Self::Word>; + + fn s_name(&self) -> &[u8; 8]; + fn s_paddr(&self) -> Self::Word; + fn s_vaddr(&self) -> Self::Word; + fn s_size(&self) -> Self::Word; + fn s_scnptr(&self) -> Self::Word; + fn s_relptr(&self) -> Self::Word; + fn s_lnnoptr(&self) -> Self::Word; + fn s_nreloc(&self) -> Self::HalfWord; + fn s_nlnno(&self) -> Self::HalfWord; + fn s_flags(&self) -> u32; + + /// Return the section name. + fn name(&self) -> &[u8] { + let sectname = &self.s_name()[..]; + match memchr::memchr(b'\0', sectname) { + Some(end) => §name[..end], + None => sectname, + } + } + + /// Return the offset and size of the section in the file. + fn file_range(&self) -> Option<(u64, u64)> { + Some((self.s_scnptr().into(), self.s_size().into())) + } + + /// Return the section data. + /// + /// Returns `Ok(&[])` if the section has no data. + /// Returns `Err` for invalid values. + fn data<'data, R: ReadRef<'data>>(&self, data: R) -> result::Result<&'data [u8], ()> { + if let Some((offset, size)) = self.file_range() { + data.read_bytes_at(offset, size) + } else { + Ok(&[]) + } + } + + /// Read the relocations. + fn relocations<'data, R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [Self::Rel]>; +} + +impl SectionHeader for xcoff::SectionHeader32 { + type Word = u32; + type HalfWord = u16; + type Xcoff = xcoff::FileHeader32; + type Rel = xcoff::Rel32; + + fn s_name(&self) -> &[u8; 8] { + &self.s_name + } + + fn s_paddr(&self) -> Self::Word { + self.s_paddr.get(BE) + } + + fn s_vaddr(&self) -> Self::Word { + self.s_vaddr.get(BE) + } + + fn s_size(&self) -> Self::Word { + self.s_size.get(BE) + } + + fn s_scnptr(&self) -> Self::Word { + self.s_scnptr.get(BE) + } + + fn s_relptr(&self) -> Self::Word { + self.s_relptr.get(BE) + } + + fn s_lnnoptr(&self) -> Self::Word { + self.s_lnnoptr.get(BE) + } + + fn s_nreloc(&self) -> Self::HalfWord { + self.s_nreloc.get(BE) + } + + fn s_nlnno(&self) -> Self::HalfWord { + self.s_nlnno.get(BE) + } + + fn s_flags(&self) -> u32 { + self.s_flags.get(BE) + } + + /// Read the relocations in a XCOFF32 file. + /// + /// `data` must be the entire file data. + fn relocations<'data, R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [Self::Rel]> { + let reloc_num = self.s_nreloc() as usize; + // TODO: If more than 65,534 relocation entries are required, the field value will be 65535, + // and an STYP_OVRFLO section header will contain the actual count of relocation entries in + // the s_paddr field. + if reloc_num == 65535 { + return Err(Error("Overflow section is not supported yet.")); + } + data.read_slice_at(self.s_relptr().into(), reloc_num) + .read_error("Invalid XCOFF relocation offset or number") + } +} + +impl SectionHeader for xcoff::SectionHeader64 { + type Word = u64; + type HalfWord = u32; + type Xcoff = xcoff::FileHeader64; + type Rel = xcoff::Rel64; + + fn s_name(&self) -> &[u8; 8] { + &self.s_name + } + + fn s_paddr(&self) -> Self::Word { + self.s_paddr.get(BE) + } + + fn s_vaddr(&self) -> Self::Word { + self.s_vaddr.get(BE) + } + + fn s_size(&self) -> Self::Word { + self.s_size.get(BE) + } + + fn s_scnptr(&self) -> Self::Word { + self.s_scnptr.get(BE) + } + + fn s_relptr(&self) -> Self::Word { + self.s_relptr.get(BE) + } + + fn s_lnnoptr(&self) -> Self::Word { + self.s_lnnoptr.get(BE) + } + + fn s_nreloc(&self) -> Self::HalfWord { + self.s_nreloc.get(BE) + } + + fn s_nlnno(&self) -> Self::HalfWord { + self.s_nlnno.get(BE) + } + + fn s_flags(&self) -> u32 { + self.s_flags.get(BE) + } + + /// Read the relocations in a XCOFF64 file. + /// + /// `data` must be the entire file data. + fn relocations<'data, R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [Self::Rel]> { + data.read_slice_at(self.s_relptr(), self.s_nreloc() as usize) + .read_error("Invalid XCOFF relocation offset or number") + } +} diff --git a/third_party/rust/object/src/read/xcoff/segment.rs b/third_party/rust/object/src/read/xcoff/segment.rs new file mode 100644 index 0000000000..49969438de --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/segment.rs @@ -0,0 +1,115 @@ +//! TODO: Support the segment for XCOFF when auxiliary file header and loader section is ready. + +use core::fmt::Debug; +use core::str; + +use crate::read::{self, ObjectSegment, ReadRef, Result}; +use crate::xcoff; + +use super::{FileHeader, XcoffFile}; + +/// An iterator over the segments of an `XcoffFile32`. +pub type XcoffSegmentIterator32<'data, 'file, R = &'data [u8]> = + XcoffSegmentIterator<'data, 'file, xcoff::FileHeader32, R>; +/// An iterator over the segments of an `XcoffFile64`. +pub type XcoffSegmentIterator64<'data, 'file, R = &'data [u8]> = + XcoffSegmentIterator<'data, 'file, xcoff::FileHeader64, R>; + +/// An iterator over the segments of an `XcoffFile`. +#[derive(Debug)] +pub struct XcoffSegmentIterator<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, +} + +impl<'data, 'file, Xcoff, R> Iterator for XcoffSegmentIterator<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + type Item = XcoffSegment<'data, 'file, Xcoff, R>; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A segment of an `XcoffFile32`. +pub type XcoffSegment32<'data, 'file, R = &'data [u8]> = + XcoffSegment<'data, 'file, xcoff::FileHeader32, R>; +/// A segment of an `XcoffFile64`. +pub type XcoffSegment64<'data, 'file, R = &'data [u8]> = + XcoffSegment<'data, 'file, xcoff::FileHeader64, R>; + +/// A loadable section of an `XcoffFile`. +#[derive(Debug)] +pub struct XcoffSegment<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + #[allow(unused)] + pub(super) file: &'file XcoffFile<'data, Xcoff, R>, +} + +impl<'data, 'file, Xcoff, R> XcoffSegment<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Xcoff, R> read::private::Sealed for XcoffSegment<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ +} + +impl<'data, 'file, Xcoff, R> ObjectSegment<'data> for XcoffSegment<'data, 'file, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + fn address(&self) -> u64 { + unreachable!(); + } + + fn size(&self) -> u64 { + unreachable!(); + } + + fn align(&self) -> u64 { + unreachable!(); + } + + fn file_range(&self) -> (u64, u64) { + unreachable!(); + } + + fn data(&self) -> Result<&'data [u8]> { + unreachable!(); + } + + fn data_range(&self, _address: u64, _size: u64) -> Result<Option<&'data [u8]>> { + unreachable!(); + } + + fn name_bytes(&self) -> Result<Option<&[u8]>> { + unreachable!(); + } + + fn name(&self) -> Result<Option<&str>> { + unreachable!(); + } + + fn flags(&self) -> crate::SegmentFlags { + unreachable!(); + } +} diff --git a/third_party/rust/object/src/read/xcoff/symbol.rs b/third_party/rust/object/src/read/xcoff/symbol.rs new file mode 100644 index 0000000000..6738ad1714 --- /dev/null +++ b/third_party/rust/object/src/read/xcoff/symbol.rs @@ -0,0 +1,634 @@ +use alloc::fmt; +use core::convert::TryInto; +use core::fmt::Debug; +use core::marker::PhantomData; +use core::str; + +use crate::endian::{BigEndian as BE, U32Bytes}; +use crate::pod::Pod; +use crate::read::util::StringTable; +use crate::{bytes_of, xcoff, Object, ObjectSection, SectionKind}; + +use crate::read::{ + self, Bytes, Error, ObjectSymbol, ObjectSymbolTable, ReadError, ReadRef, Result, SectionIndex, + SymbolFlags, SymbolIndex, SymbolKind, SymbolScope, SymbolSection, +}; + +use super::{FileHeader, XcoffFile}; + +/// A table of symbol entries in an XCOFF file. +/// +/// Also includes the string table used for the symbol names. +#[derive(Debug)] +pub struct SymbolTable<'data, Xcoff, R = &'data [u8]> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + symbols: &'data [xcoff::SymbolBytes], + strings: StringTable<'data, R>, + header: PhantomData<Xcoff>, +} + +impl<'data, Xcoff, R> Default for SymbolTable<'data, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + fn default() -> Self { + Self { + symbols: &[], + strings: StringTable::default(), + header: PhantomData, + } + } +} + +impl<'data, Xcoff, R> SymbolTable<'data, Xcoff, R> +where + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + /// Parse the symbol table. + pub fn parse(header: Xcoff, data: R) -> Result<Self> { + let mut offset = header.f_symptr().into(); + let (symbols, strings) = if offset != 0 { + let symbols = data + .read_slice(&mut offset, header.f_nsyms() as usize) + .read_error("Invalid XCOFF symbol table offset or size")?; + + // Parse the string table. + // Note: don't update data when reading length; the length includes itself. + let length = data + .read_at::<U32Bytes<_>>(offset) + .read_error("Missing XCOFF string table")? + .get(BE); + let str_end = offset + .checked_add(length as u64) + .read_error("Invalid XCOFF string table length")?; + let strings = StringTable::new(data, offset, str_end); + + (symbols, strings) + } else { + (&[][..], StringTable::default()) + }; + + Ok(SymbolTable { + symbols, + strings, + header: PhantomData, + }) + } + + /// Return the symbol entry at the given index and offset. + pub fn get<T: Pod>(&self, index: usize, offset: usize) -> Result<&'data T> { + let entry = index + .checked_add(offset) + .and_then(|x| self.symbols.get(x)) + .read_error("Invalid XCOFF symbol index")?; + let bytes = bytes_of(entry); + Bytes(bytes).read().read_error("Invalid XCOFF symbol data") + } + + /// Return the symbol at the given index. + pub fn symbol(&self, index: usize) -> Result<&'data Xcoff::Symbol> { + self.get::<Xcoff::Symbol>(index, 0) + } + + /// Return the file auxiliary symbol. + pub fn aux_file(&self, index: usize) -> Result<&'data Xcoff::FileAux> { + debug_assert!(self.symbol(index)?.has_aux_file()); + let aux_file = self.get::<Xcoff::FileAux>(index, 1)?; + if let Some(aux_type) = aux_file.x_auxtype() { + if aux_type != xcoff::AUX_FILE { + return Err(Error("Invalid index for file auxiliary symbol.")); + } + } + Ok(aux_file) + } + + /// Return the csect auxiliary symbol. + pub fn aux_csect(&self, index: usize, offset: usize) -> Result<&'data Xcoff::CsectAux> { + debug_assert!(self.symbol(index)?.has_aux_csect()); + let aux_csect = self.get::<Xcoff::CsectAux>(index, offset)?; + if let Some(aux_type) = aux_csect.x_auxtype() { + if aux_type != xcoff::AUX_CSECT { + return Err(Error("Invalid index/offset for csect auxiliary symbol.")); + } + } + Ok(aux_csect) + } + + /// Return true if the symbol table is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } + + /// The number of symbol table entries. + /// + /// This includes auxiliary symbol table entries. + #[inline] + pub fn len(&self) -> usize { + self.symbols.len() + } +} + +/// A symbol table of an `XcoffFile32`. +pub type XcoffSymbolTable32<'data, 'file, R = &'data [u8]> = + XcoffSymbolTable<'data, 'file, xcoff::FileHeader32, R>; +/// A symbol table of an `XcoffFile64`. +pub type XcoffSymbolTable64<'data, 'file, R = &'data [u8]> = + XcoffSymbolTable<'data, 'file, xcoff::FileHeader64, R>; + +/// A symbol table of an `XcoffFile`. +#[derive(Debug, Clone, Copy)] +pub struct XcoffSymbolTable<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) symbols: &'file SymbolTable<'data, Xcoff, R>, +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> read::private::Sealed + for XcoffSymbolTable<'data, 'file, Xcoff, R> +{ +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbolTable<'data> + for XcoffSymbolTable<'data, 'file, Xcoff, R> +{ + type Symbol = XcoffSymbol<'data, 'file, Xcoff, R>; + type SymbolIterator = XcoffSymbolIterator<'data, 'file, Xcoff, R>; + + fn symbols(&self) -> Self::SymbolIterator { + XcoffSymbolIterator { + file: self.file, + symbols: self.symbols, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> read::Result<Self::Symbol> { + let symbol = self.symbols.symbol(index.0)?; + Ok(XcoffSymbol { + file: self.file, + symbols: self.symbols, + index, + symbol, + }) + } +} + +/// An iterator over the symbols of an `XcoffFile32`. +pub type XcoffSymbolIterator32<'data, 'file, R = &'data [u8]> = + XcoffSymbolIterator<'data, 'file, xcoff::FileHeader32, R>; +/// An iterator over the symbols of an `XcoffFile64`. +pub type XcoffSymbolIterator64<'data, 'file, R = &'data [u8]> = + XcoffSymbolIterator<'data, 'file, xcoff::FileHeader64, R>; + +/// An iterator over the symbols of an `XcoffFile`. +pub struct XcoffSymbolIterator<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) symbols: &'file SymbolTable<'data, Xcoff, R>, + pub(super) index: usize, +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> fmt::Debug + for XcoffSymbolIterator<'data, 'file, Xcoff, R> +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("XcoffSymbolIterator").finish() + } +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> Iterator + for XcoffSymbolIterator<'data, 'file, Xcoff, R> +{ + type Item = XcoffSymbol<'data, 'file, Xcoff, R>; + + fn next(&mut self) -> Option<Self::Item> { + let index = self.index; + let symbol = self.symbols.symbol(index).ok()?; + // TODO: skip over the auxiliary symbols for now. + self.index += 1 + symbol.n_numaux() as usize; + Some(XcoffSymbol { + file: self.file, + symbols: self.symbols, + index: SymbolIndex(index), + symbol, + }) + } +} + +/// A symbol of an `XcoffFile32`. +pub type XcoffSymbol32<'data, 'file, R = &'data [u8]> = + XcoffSymbol<'data, 'file, xcoff::FileHeader32, R>; +/// A symbol of an `XcoffFile64`. +pub type XcoffSymbol64<'data, 'file, R = &'data [u8]> = + XcoffSymbol<'data, 'file, xcoff::FileHeader64, R>; + +/// A symbol of an `XcoffFile`. +#[derive(Debug, Clone, Copy)] +pub struct XcoffSymbol<'data, 'file, Xcoff, R = &'data [u8]> +where + 'data: 'file, + Xcoff: FileHeader, + R: ReadRef<'data>, +{ + pub(crate) file: &'file XcoffFile<'data, Xcoff, R>, + pub(super) symbols: &'file SymbolTable<'data, Xcoff, R>, + pub(super) index: SymbolIndex, + pub(super) symbol: &'data Xcoff::Symbol, +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> read::private::Sealed + for XcoffSymbol<'data, 'file, Xcoff, R> +{ +} + +impl<'data, 'file, Xcoff: FileHeader, R: ReadRef<'data>> ObjectSymbol<'data> + for XcoffSymbol<'data, 'file, Xcoff, R> +{ + #[inline] + fn index(&self) -> SymbolIndex { + self.index + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + self.symbol.name(self.symbols.strings) + } + + fn name(&self) -> Result<&'data str> { + let name = self.name_bytes()?; + str::from_utf8(name) + .ok() + .read_error("Non UTF-8 XCOFF symbol name") + } + + #[inline] + fn address(&self) -> u64 { + match self.symbol.n_sclass() { + // Relocatable address. + xcoff::C_EXT + | xcoff::C_WEAKEXT + | xcoff::C_HIDEXT + | xcoff::C_FCN + | xcoff::C_BLOCK + | xcoff::C_STAT => self.symbol.n_value().into(), + _ => 0, + } + } + + #[inline] + fn size(&self) -> u64 { + if self.symbol.has_aux_csect() { + // XCOFF32 must have the csect auxiliary entry as the last auxiliary entry. + // XCOFF64 doesn't require this, but conventionally does. + if let Ok(aux_csect) = self + .file + .symbols + .aux_csect(self.index.0, self.symbol.n_numaux() as usize) + { + let sym_type = aux_csect.sym_type() & 0x07; + if sym_type == xcoff::XTY_SD || sym_type == xcoff::XTY_CM { + aux_csect.x_scnlen() + } else { + 0 + } + } else { + 0 + } + } else { + 0 + } + } + + fn kind(&self) -> SymbolKind { + match self.symbol.n_sclass() { + xcoff::C_FILE => SymbolKind::File, + xcoff::C_NULL => SymbolKind::Null, + _ => self + .file + .section_by_index(SectionIndex((self.symbol.n_scnum() - 1) as usize)) + .map(|section| match section.kind() { + SectionKind::Data | SectionKind::UninitializedData => SymbolKind::Data, + SectionKind::UninitializedTls | SectionKind::Tls => SymbolKind::Tls, + SectionKind::Text => SymbolKind::Text, + _ => SymbolKind::Unknown, + }) + .unwrap_or(SymbolKind::Unknown), + } + } + + fn section(&self) -> SymbolSection { + match self.symbol.n_scnum() { + xcoff::N_ABS => SymbolSection::Absolute, + xcoff::N_UNDEF => SymbolSection::Undefined, + xcoff::N_DEBUG => SymbolSection::None, + index if index > 0 => SymbolSection::Section(SectionIndex(index as usize)), + _ => SymbolSection::Unknown, + } + } + + #[inline] + fn is_undefined(&self) -> bool { + self.symbol.is_undefined() + } + + /// Return true if the symbol is a definition of a function or data object. + #[inline] + fn is_definition(&self) -> bool { + if self.symbol.has_aux_csect() { + if let Ok(aux_csect) = self + .symbols + .aux_csect(self.index.0, self.symbol.n_numaux() as usize) + { + let smclas = aux_csect.x_smclas(); + self.symbol.n_scnum() != xcoff::N_UNDEF + && (smclas == xcoff::XMC_PR + || smclas == xcoff::XMC_RW + || smclas == xcoff::XMC_RO) + } else { + false + } + } else { + false + } + } + + #[inline] + fn is_common(&self) -> bool { + self.symbol.n_sclass() == xcoff::C_EXT && self.symbol.n_scnum() == xcoff::N_UNDEF + } + + #[inline] + fn is_weak(&self) -> bool { + self.symbol.n_sclass() == xcoff::C_WEAKEXT + } + + fn scope(&self) -> SymbolScope { + if self.symbol.n_scnum() == xcoff::N_UNDEF { + SymbolScope::Unknown + } else { + match self.symbol.n_sclass() { + xcoff::C_EXT | xcoff::C_WEAKEXT | xcoff::C_HIDEXT => { + let visibility = self.symbol.n_type() & xcoff::SYM_V_MASK; + if visibility == xcoff::SYM_V_HIDDEN { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + } + } + _ => SymbolScope::Compilation, + } + } + } + + #[inline] + fn is_global(&self) -> bool { + match self.symbol.n_sclass() { + xcoff::C_EXT | xcoff::C_WEAKEXT => true, + _ => false, + } + } + + #[inline] + fn is_local(&self) -> bool { + !self.is_global() + } + + #[inline] + fn flags(&self) -> SymbolFlags<SectionIndex> { + SymbolFlags::None + } +} + +/// A trait for generic access to `Symbol32` and `Symbol64`. +#[allow(missing_docs)] +pub trait Symbol: Debug + Pod { + type Word: Into<u64>; + + fn n_value(&self) -> Self::Word; + fn n_scnum(&self) -> i16; + fn n_type(&self) -> u16; + fn n_sclass(&self) -> u8; + fn n_numaux(&self) -> u8; + + fn name<'data, R: ReadRef<'data>>( + &'data self, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]>; + + /// Return true if the symbol is undefined. + #[inline] + fn is_undefined(&self) -> bool { + let n_sclass = self.n_sclass(); + (n_sclass == xcoff::C_EXT || n_sclass == xcoff::C_WEAKEXT) + && self.n_scnum() == xcoff::N_UNDEF + } + + /// Return true if the symbol has file auxiliary entry. + fn has_aux_file(&self) -> bool { + self.n_numaux() > 0 && self.n_sclass() == xcoff::C_FILE + } + + /// Return true if the symbol has csect auxiliary entry. + /// + /// A csect auxiliary entry is required for each symbol table entry that has + /// a storage class value of C_EXT, C_WEAKEXT, or C_HIDEXT. + fn has_aux_csect(&self) -> bool { + let sclass = self.n_sclass(); + self.n_numaux() > 0 + && (sclass == xcoff::C_EXT || sclass == xcoff::C_WEAKEXT || sclass == xcoff::C_HIDEXT) + } +} + +impl Symbol for xcoff::Symbol64 { + type Word = u64; + + fn n_value(&self) -> Self::Word { + self.n_value.get(BE) + } + + fn n_scnum(&self) -> i16 { + self.n_scnum.get(BE) + } + + fn n_type(&self) -> u16 { + self.n_type.get(BE) + } + + fn n_sclass(&self) -> u8 { + self.n_sclass + } + + fn n_numaux(&self) -> u8 { + self.n_numaux + } + + /// Parse the symbol name for XCOFF64. + fn name<'data, R: ReadRef<'data>>( + &'data self, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + strings + .get(self.n_offset.get(BE)) + .read_error("Invalid XCOFF symbol name offset") + } +} + +impl Symbol for xcoff::Symbol32 { + type Word = u32; + + fn n_value(&self) -> Self::Word { + self.n_value.get(BE) + } + + fn n_scnum(&self) -> i16 { + self.n_scnum.get(BE) + } + + fn n_type(&self) -> u16 { + self.n_type.get(BE) + } + + fn n_sclass(&self) -> u8 { + self.n_sclass + } + + fn n_numaux(&self) -> u8 { + self.n_numaux + } + + /// Parse the symbol name for XCOFF32. + fn name<'data, R: ReadRef<'data>>( + &'data self, + strings: StringTable<'data, R>, + ) -> Result<&'data [u8]> { + if self.n_name[0] == 0 { + // If the name starts with 0 then the last 4 bytes are a string table offset. + let offset = u32::from_be_bytes(self.n_name[4..8].try_into().unwrap()); + strings + .get(offset) + .read_error("Invalid XCOFF symbol name offset") + } else { + // The name is inline and padded with nulls. + Ok(match memchr::memchr(b'\0', &self.n_name) { + Some(end) => &self.n_name[..end], + None => &self.n_name, + }) + } + } +} + +/// A trait for generic access to `FileAux32` and `FileAux64`. +#[allow(missing_docs)] +pub trait FileAux: Debug + Pod { + fn x_fname(&self) -> &[u8; 8]; + fn x_ftype(&self) -> u8; + fn x_auxtype(&self) -> Option<u8>; +} + +impl FileAux for xcoff::FileAux64 { + fn x_fname(&self) -> &[u8; 8] { + &self.x_fname + } + + fn x_ftype(&self) -> u8 { + self.x_ftype + } + + fn x_auxtype(&self) -> Option<u8> { + Some(self.x_auxtype) + } +} + +impl FileAux for xcoff::FileAux32 { + fn x_fname(&self) -> &[u8; 8] { + &self.x_fname + } + + fn x_ftype(&self) -> u8 { + self.x_ftype + } + + fn x_auxtype(&self) -> Option<u8> { + None + } +} + +/// A trait for generic access to `CsectAux32` and `CsectAux64`. +#[allow(missing_docs)] +pub trait CsectAux: Debug + Pod { + fn x_scnlen(&self) -> u64; + fn x_parmhash(&self) -> u32; + fn x_snhash(&self) -> u16; + fn x_smtyp(&self) -> u8; + fn x_smclas(&self) -> u8; + fn x_auxtype(&self) -> Option<u8>; + + fn sym_type(&self) -> u8 { + self.x_smtyp() & 0x07 + } +} + +impl CsectAux for xcoff::CsectAux64 { + fn x_scnlen(&self) -> u64 { + self.x_scnlen_lo.get(BE) as u64 | ((self.x_scnlen_hi.get(BE) as u64) << 32) + } + + fn x_parmhash(&self) -> u32 { + self.x_parmhash.get(BE) + } + + fn x_snhash(&self) -> u16 { + self.x_snhash.get(BE) + } + + fn x_smtyp(&self) -> u8 { + self.x_smtyp + } + + fn x_smclas(&self) -> u8 { + self.x_smclas + } + + fn x_auxtype(&self) -> Option<u8> { + Some(self.x_auxtype) + } +} + +impl CsectAux for xcoff::CsectAux32 { + fn x_scnlen(&self) -> u64 { + self.x_scnlen.get(BE) as u64 + } + + fn x_parmhash(&self) -> u32 { + self.x_parmhash.get(BE) + } + + fn x_snhash(&self) -> u16 { + self.x_snhash.get(BE) + } + + fn x_smtyp(&self) -> u8 { + self.x_smtyp + } + + fn x_smclas(&self) -> u8 { + self.x_smclas + } + + fn x_auxtype(&self) -> Option<u8> { + None + } +} |