diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/object/src | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/object/src')
-rw-r--r-- | third_party/rust/object/src/common.rs | 187 | ||||
-rw-r--r-- | third_party/rust/object/src/lib.rs | 45 | ||||
-rw-r--r-- | third_party/rust/object/src/read/any.rs | 523 | ||||
-rw-r--r-- | third_party/rust/object/src/read/coff.rs | 527 | ||||
-rw-r--r-- | third_party/rust/object/src/read/elf.rs | 709 | ||||
-rw-r--r-- | third_party/rust/object/src/read/macho.rs | 615 | ||||
-rw-r--r-- | third_party/rust/object/src/read/mod.rs | 250 | ||||
-rw-r--r-- | third_party/rust/object/src/read/pe.rs | 407 | ||||
-rw-r--r-- | third_party/rust/object/src/read/traits.rs | 220 | ||||
-rw-r--r-- | third_party/rust/object/src/read/wasm.rs | 315 | ||||
-rw-r--r-- | third_party/rust/object/src/write/coff.rs | 486 | ||||
-rw-r--r-- | third_party/rust/object/src/write/elf.rs | 736 | ||||
-rw-r--r-- | third_party/rust/object/src/write/macho.rs | 558 | ||||
-rw-r--r-- | third_party/rust/object/src/write/mod.rs | 674 | ||||
-rw-r--r-- | third_party/rust/object/src/write/string.rs | 140 | ||||
-rw-r--r-- | third_party/rust/object/src/write/util.rs | 14 |
16 files changed, 6406 insertions, 0 deletions
diff --git a/third_party/rust/object/src/common.rs b/third_party/rust/object/src/common.rs new file mode 100644 index 0000000000..6cc3f61488 --- /dev/null +++ b/third_party/rust/object/src/common.rs @@ -0,0 +1,187 @@ +/// The kind of a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SectionKind { + /// The section kind is unknown. + Unknown, + /// An executable code section. + /// + /// Example ELF sections: `.text` + /// + /// Example Mach-O sections: `__TEXT/__text` + Text, + /// A data section. + /// + /// Example ELF sections: `.data` + /// + /// Example Mach-O sections: `__DATA/__data` + Data, + /// A read only data section. + /// + /// Example ELF sections: `.rodata` + /// + /// Example Mach-O sections: `__TEXT/__const`, `__DATA/__const` + ReadOnlyData, + /// A loadable string section. + /// + /// Example ELF sections: `.rodata.str` + /// + /// Example Mach-O sections: `__TEXT/__cstring` + ReadOnlyString, + /// An uninitialized data section. + /// + /// Example ELF sections: `.bss` + /// + /// Example Mach-O sections: `__DATA/__bss` + UninitializedData, + /// A TLS data section. + /// + /// Example ELF sections: `.tdata` + /// + /// Example Mach-O sections: `__DATA/__thread_data` + Tls, + /// An uninitialized TLS data section. + /// + /// Example ELF sections: `.tbss` + /// + /// Example Mach-O sections: `__DATA/__thread_bss` + UninitializedTls, + /// A TLS variables section. + /// + /// This contains TLS variable structures, rather than the variable initializers. + /// + /// Example Mach-O sections: `__DATA/__thread_vars` + TlsVariables, + /// A non-loadable string section. + /// + /// Example ELF sections: `.comment`, `.debug_str` + OtherString, + /// Some other non-loadable section. + /// + /// Example ELF sections: `.debug_info` + Other, + /// Debug information. + /// + /// Example Mach-O sections: `__DWARF/__debug_info` + Debug, + /// Information for the linker. + /// + /// Example COFF sections: `.drectve` + Linker, + /// Metadata such as symbols or relocations. + /// + /// Example ELF sections: `.symtab`, `.strtab` + Metadata, +} + +/// The kind of a symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolKind { + /// The symbol kind is unknown. + Unknown, + /// The symbol is a null placeholder. + Null, + /// The symbol is for executable code. + Text, + /// The symbol is for a data object. + Data, + /// The symbol is for a section. + Section, + /// The symbol is the name of a file. It precedes symbols within that file. + File, + /// The symbol is for a code label. + Label, + /// The symbol is for an uninitialized common block. + Common, + /// The symbol is for a thread local storage entity. + Tls, +} + +/// A symbol scope. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SymbolScope { + /// Unknown scope. + Unknown, + /// Symbol is visible to the compilation unit. + Compilation, + /// Symbol is visible to the static linkage unit. + Linkage, + /// Symbol is visible to dynamically linked objects. + Dynamic, +} + +/// The operation used to calculate the result of the relocation. +/// +/// The relocation descriptions use the following definitions. Note that +/// these definitions probably don't match any ELF ABI. +/// +/// * A - The value of the addend. +/// * G - The address of the symbol's entry within the global offset table. +/// * L - The address of the symbol's entry within the procedure linkage table. +/// * P - The address of the place of the relocation. +/// * S - The address of the symbol. +/// * GotBase - The address of the global offset table. +/// * Image - The base address of the image. +/// * Section - The address of the section containing the symbol. +/// +/// 'XxxRelative' means 'Xxx + A - P'. 'XxxOffset' means 'S + A - Xxx'. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelocationKind { + /// S + A + Absolute, + /// S + A - P + Relative, + /// G + A - GotBase + Got, + /// G + A - P + GotRelative, + /// GotBase + A - P + GotBaseRelative, + /// S + A - GotBase + GotBaseOffset, + /// L + A - P + PltRelative, + /// S + A - Image + ImageOffset, + /// S + A - Section + SectionOffset, + /// The index of the section containing the symbol. + SectionIndex, + /// Some other ELF relocation. The value is dependent on the architecture. + Elf(u32), + /// Some other Mach-O relocation. The value is dependent on the architecture. + MachO { + /// The relocation type. + value: u8, + /// Whether the relocation is relative to the place. + relative: bool, + }, + /// Some other COFF relocation. The value is dependent on the architecture. + Coff(u16), +} + +/// Information about how the result of the relocation operation is encoded in the place. +/// +/// This is usually architecture specific, such as specifying an addressing mode or +/// a specific instruction. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelocationEncoding { + /// Generic encoding. + Generic, + + /// x86 sign extension at runtime. + /// + /// Used with `RelocationKind::Absolute`. + X86Signed, + /// x86 rip-relative addressing. + /// + /// The `RelocationKind` must be PC relative. + X86RipRelative, + /// x86 rip-relative addressing in movq instruction. + /// + /// The `RelocationKind` must be PC relative. + X86RipRelativeMovq, + /// x86 branch instruction. + /// + /// The `RelocationKind` must be PC relative. + X86Branch, +} diff --git a/third_party/rust/object/src/lib.rs b/third_party/rust/object/src/lib.rs new file mode 100644 index 0000000000..9719575ea8 --- /dev/null +++ b/third_party/rust/object/src/lib.rs @@ -0,0 +1,45 @@ +//! # `object` +//! +//! The `object` crate provides a unified interface to working with object files +//! across platforms. +//! +//! See the [`File` struct](./read/struct.File.html) for details. + +#![deny(missing_docs)] +#![deny(missing_debug_implementations)] +#![no_std] +#![cfg_attr(not(feature = "std"), feature(alloc))] + +#[cfg(feature = "std")] +#[macro_use] +extern crate std; + +#[cfg(all(not(feature = "std"), feature = "compression"))] +#[macro_use] +extern crate alloc; +#[cfg(all(not(feature = "std"), not(feature = "compression")))] +extern crate alloc; +#[cfg(not(feature = "std"))] +extern crate core as std; + +#[cfg(feature = "std")] +mod alloc { + pub use std::borrow; + pub use std::fmt; + pub use std::vec; +} + +// Re-export since these are used in public signatures. +pub use target_lexicon; +pub use uuid; + +mod common; +pub use common::*; + +#[cfg(feature = "read")] +pub mod read; +#[cfg(feature = "read")] +pub use read::*; + +#[cfg(feature = "write")] +pub mod write; diff --git a/third_party/rust/object/src/read/any.rs b/third_party/rust/object/src/read/any.rs new file mode 100644 index 0000000000..74f03f735f --- /dev/null +++ b/third_party/rust/object/src/read/any.rs @@ -0,0 +1,523 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use target_lexicon::{Architecture, BinaryFormat}; +use uuid::Uuid; + +#[cfg(feature = "wasm")] +use crate::read::wasm; +use crate::read::{coff, elf, macho, pe}; +use crate::read::{ + Object, ObjectSection, ObjectSegment, Relocation, SectionIndex, SectionKind, Symbol, + SymbolIndex, SymbolMap, +}; + +/// Evaluate an expression on the contents of a file format enum. +/// +/// This is a hack to avoid virtual calls. +macro_rules! with_inner { + ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { + match $inner { + $enum::Coff(ref $var) => $body, + $enum::Elf(ref $var) => $body, + $enum::MachO(ref $var) => $body, + $enum::Pe(ref $var) => $body, + #[cfg(feature = "wasm")] + $enum::Wasm(ref $var) => $body, + } + }; +} + +macro_rules! with_inner_mut { + ($inner:expr, $enum:ident, | $var:ident | $body:expr) => { + match $inner { + $enum::Coff(ref mut $var) => $body, + $enum::Elf(ref mut $var) => $body, + $enum::MachO(ref mut $var) => $body, + $enum::Pe(ref mut $var) => $body, + #[cfg(feature = "wasm")] + $enum::Wasm(ref mut $var) => $body, + } + }; +} + +/// Like `with_inner!`, but wraps the result in another enum. +macro_rules! map_inner { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + $from::Coff(ref $var) => $to::Coff($body), + $from::Elf(ref $var) => $to::Elf($body), + $from::MachO(ref $var) => $to::MachO($body), + $from::Pe(ref $var) => $to::Pe($body), + #[cfg(feature = "wasm")] + $from::Wasm(ref $var) => $to::Wasm($body), + } + }; +} + +/// Like `map_inner!`, but the result is a Result or Option. +macro_rules! map_inner_option { + ($inner:expr, $from:ident, $to:ident, | $var:ident | $body:expr) => { + match $inner { + $from::Coff(ref $var) => $body.map($to::Coff), + $from::Elf(ref $var) => $body.map($to::Elf), + $from::MachO(ref $var) => $body.map($to::MachO), + $from::Pe(ref $var) => $body.map($to::Pe), + #[cfg(feature = "wasm")] + $from::Wasm(ref $var) => $body.map($to::Wasm), + } + }; +} + +/// Call `next` for a file format iterator. +macro_rules! next_inner { + ($inner:expr, $from:ident, $to:ident) => { + match $inner { + $from::Coff(ref mut iter) => iter.next().map($to::Coff), + $from::Elf(ref mut iter) => iter.next().map($to::Elf), + $from::MachO(ref mut iter) => iter.next().map($to::MachO), + $from::Pe(ref mut iter) => iter.next().map($to::Pe), + #[cfg(feature = "wasm")] + $from::Wasm(ref mut iter) => iter.next().map($to::Wasm), + } + }; +} + +/// An object file. +/// +/// Most functionality is provided by the `Object` trait implementation. +#[derive(Debug)] +pub struct File<'data> { + inner: FileInternal<'data>, +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug)] +enum FileInternal<'data> { + Coff(coff::CoffFile<'data>), + Elf(elf::ElfFile<'data>), + MachO(macho::MachOFile<'data>), + Pe(pe::PeFile<'data>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmFile), +} + +impl<'data> File<'data> { + /// Parse the raw file data. + pub fn parse(data: &'data [u8]) -> Result<Self, &'static str> { + if data.len() < 16 { + return Err("File too short"); + } + + let inner = match [data[0], data[1], data[2], data[3]] { + // ELF + [0x7f, b'E', b'L', b'F'] => FileInternal::Elf(elf::ElfFile::parse(data)?), + // 32-bit Mach-O + [0xfe, 0xed, 0xfa, 0xce] + | [0xce, 0xfa, 0xed, 0xfe] + // 64-bit Mach-O + | [0xfe, 0xed, 0xfa, 0xcf] + | [0xcf, 0xfa, 0xed, 0xfe] => FileInternal::MachO(macho::MachOFile::parse(data)?), + // WASM + #[cfg(feature = "wasm")] + [0x00, b'a', b's', b'm'] => FileInternal::Wasm(wasm::WasmFile::parse(data)?), + // MS-DOS, assume stub for Windows PE + [b'M', b'Z', _, _] => FileInternal::Pe(pe::PeFile::parse(data)?), + // TODO: more COFF machines + // COFF x86 + [0x4c, 0x01, _, _] + // COFF x86-64 + | [0x64, 0x86, _, _] => FileInternal::Coff(coff::CoffFile::parse(data)?), + _ => return Err("Unknown file magic"), + }; + Ok(File { inner }) + } + + /// Return the file format. + pub fn format(&self) -> BinaryFormat { + match self.inner { + FileInternal::Elf(_) => BinaryFormat::Elf, + FileInternal::MachO(_) => BinaryFormat::Macho, + FileInternal::Coff(_) | FileInternal::Pe(_) => BinaryFormat::Coff, + #[cfg(feature = "wasm")] + FileInternal::Wasm(_) => BinaryFormat::Wasm, + } + } +} + +impl<'data, 'file> Object<'data, 'file> for File<'data> +where + 'data: 'file, +{ + type Segment = Segment<'data, 'file>; + type SegmentIterator = SegmentIterator<'data, 'file>; + type Section = Section<'data, 'file>; + type SectionIterator = SectionIterator<'data, 'file>; + type SymbolIterator = SymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + with_inner!(self.inner, FileInternal, |x| x.architecture()) + } + + fn is_little_endian(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.is_little_endian()) + } + + fn is_64(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.is_64()) + } + + fn segments(&'file self) -> SegmentIterator<'data, 'file> { + SegmentIterator { + inner: map_inner!(self.inner, FileInternal, SegmentIteratorInternal, |x| x + .segments()), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option<Section<'data, 'file>> { + map_inner_option!(self.inner, FileInternal, SectionInternal, |x| x + .section_by_name(section_name)) + .map(|inner| Section { inner }) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<Section<'data, 'file>> { + map_inner_option!(self.inner, FileInternal, SectionInternal, |x| x + .section_by_index(index)) + .map(|inner| Section { inner }) + } + + fn section_data_by_name(&self, section_name: &str) -> Option<Cow<'data, [u8]>> { + with_inner!(self.inner, FileInternal, |x| x + .section_data_by_name(section_name)) + } + + fn sections(&'file self) -> SectionIterator<'data, 'file> { + SectionIterator { + inner: map_inner!(self.inner, FileInternal, SectionIteratorInternal, |x| x + .sections()), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option<Symbol<'data>> { + with_inner!(self.inner, FileInternal, |x| x.symbol_by_index(index)) + } + + fn symbols(&'file self) -> SymbolIterator<'data, 'file> { + SymbolIterator { + inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| x + .symbols()), + } + } + + fn dynamic_symbols(&'file self) -> SymbolIterator<'data, 'file> { + SymbolIterator { + inner: map_inner!(self.inner, FileInternal, SymbolIteratorInternal, |x| x + .dynamic_symbols()), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + with_inner!(self.inner, FileInternal, |x| x.symbol_map()) + } + + fn has_debug_symbols(&self) -> bool { + with_inner!(self.inner, FileInternal, |x| x.has_debug_symbols()) + } + + #[inline] + fn mach_uuid(&self) -> Option<Uuid> { + with_inner!(self.inner, FileInternal, |x| x.mach_uuid()) + } + + #[inline] + fn build_id(&self) -> Option<&'data [u8]> { + with_inner!(self.inner, FileInternal, |x| x.build_id()) + } + + #[inline] + fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { + with_inner!(self.inner, FileInternal, |x| x.gnu_debuglink()) + } + + fn entry(&self) -> u64 { + with_inner!(self.inner, FileInternal, |x| x.entry()) + } +} + +/// An iterator over the segments of a `File`. +#[derive(Debug)] +pub struct SegmentIterator<'data, 'file> +where + 'data: 'file, +{ + inner: SegmentIteratorInternal<'data, 'file>, +} + +#[derive(Debug)] +enum SegmentIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSegmentIterator<'data, 'file>), + Elf(elf::ElfSegmentIterator<'data, 'file>), + MachO(macho::MachOSegmentIterator<'data, 'file>), + Pe(pe::PeSegmentIterator<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSegmentIterator<'file>), +} + +impl<'data, 'file> Iterator for SegmentIterator<'data, 'file> { + type Item = Segment<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + next_inner!(self.inner, SegmentIteratorInternal, SegmentInternal) + .map(|inner| Segment { inner }) + } +} + +/// A segment of a `File`. +pub struct Segment<'data, 'file> +where + 'data: 'file, +{ + inner: SegmentInternal<'data, 'file>, +} + +#[derive(Debug)] +enum SegmentInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSegment<'data, 'file>), + Elf(elf::ElfSegment<'data, 'file>), + MachO(macho::MachOSegment<'data, 'file>), + Pe(pe::PeSegment<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSegment<'file>), +} + +impl<'data, 'file> fmt::Debug for Segment<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("Segment") + .field("name", &self.name().unwrap_or("<unnamed>")) + .field("address", &self.address()) + .field("size", &self.data().len()) + .finish() + } +} + +impl<'data, 'file> ObjectSegment<'data> for Segment<'data, 'file> { + fn address(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.size()) + } + + fn align(&self) -> u64 { + with_inner!(self.inner, SegmentInternal, |x| x.align()) + } + + fn file_range(&self) -> (u64, u64) { + with_inner!(self.inner, SegmentInternal, |x| x.file_range()) + } + + fn data(&self) -> &'data [u8] { + with_inner!(self.inner, SegmentInternal, |x| x.data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + with_inner!(self.inner, SegmentInternal, |x| x.data_range(address, size)) + } + + fn name(&self) -> Option<&str> { + with_inner!(self.inner, SegmentInternal, |x| x.name()) + } +} + +/// An iterator of the sections of a `File`. +#[derive(Debug)] +pub struct SectionIterator<'data, 'file> +where + 'data: 'file, +{ + inner: SectionIteratorInternal<'data, 'file>, +} + +// we wrap our enums in a struct so that they are kept private. +#[derive(Debug)] +enum SectionIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSectionIterator<'data, 'file>), + Elf(elf::ElfSectionIterator<'data, 'file>), + MachO(macho::MachOSectionIterator<'data, 'file>), + Pe(pe::PeSectionIterator<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSectionIterator<'file>), +} + +impl<'data, 'file> Iterator for SectionIterator<'data, 'file> { + type Item = Section<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + next_inner!(self.inner, SectionIteratorInternal, SectionInternal) + .map(|inner| Section { inner }) + } +} + +/// A Section of a File +pub struct Section<'data, 'file> +where + 'data: 'file, +{ + inner: SectionInternal<'data, 'file>, +} + +enum SectionInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSection<'data, 'file>), + Elf(elf::ElfSection<'data, 'file>), + MachO(macho::MachOSection<'data, 'file>), + Pe(pe::PeSection<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSection<'file>), +} + +impl<'data, 'file> fmt::Debug for Section<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + let mut s = f.debug_struct("Section"); + if let Some(segment) = self.segment_name() { + s.field("segment", &segment); + } + s.field("name", &self.name().unwrap_or("<invalid name>")) + .field("address", &self.address()) + .field("size", &self.data().len()) + .field("kind", &self.kind()) + .finish() + } +} + +impl<'data, 'file> ObjectSection<'data> for Section<'data, 'file> { + type RelocationIterator = RelocationIterator<'data, 'file>; + + fn index(&self) -> SectionIndex { + with_inner!(self.inner, SectionInternal, |x| x.index()) + } + + fn address(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.address()) + } + + fn size(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.size()) + } + + fn align(&self) -> u64 { + with_inner!(self.inner, SectionInternal, |x| x.align()) + } + + fn file_range(&self) -> Option<(u64, u64)> { + with_inner!(self.inner, SectionInternal, |x| x.file_range()) + } + + fn data(&self) -> Cow<'data, [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.data_range(address, size)) + } + + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + with_inner!(self.inner, SectionInternal, |x| x.uncompressed_data()) + } + + fn name(&self) -> Option<&str> { + with_inner!(self.inner, SectionInternal, |x| x.name()) + } + + fn segment_name(&self) -> Option<&str> { + with_inner!(self.inner, SectionInternal, |x| x.segment_name()) + } + + fn kind(&self) -> SectionKind { + with_inner!(self.inner, SectionInternal, |x| x.kind()) + } + + fn relocations(&self) -> RelocationIterator<'data, 'file> { + RelocationIterator { + inner: map_inner!( + self.inner, + SectionInternal, + RelocationIteratorInternal, + |x| x.relocations() + ), + } + } +} + +/// An iterator over symbol table entries. +#[derive(Debug)] +pub struct SymbolIterator<'data, 'file> +where + 'data: 'file, +{ + inner: SymbolIteratorInternal<'data, 'file>, +} + +#[derive(Debug)] +enum SymbolIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffSymbolIterator<'data, 'file>), + Elf(elf::ElfSymbolIterator<'data, 'file>), + MachO(macho::MachOSymbolIterator<'data, 'file>), + Pe(pe::PeSymbolIterator<'data, 'file>), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmSymbolIterator<'file>), +} + +impl<'data, 'file> Iterator for SymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option<Self::Item> { + with_inner_mut!(self.inner, SymbolIteratorInternal, |x| x.next()) + } +} + +/// An iterator over relocation entries +#[derive(Debug)] +pub struct RelocationIterator<'data, 'file> +where + 'data: 'file, +{ + inner: RelocationIteratorInternal<'data, 'file>, +} + +#[derive(Debug)] +enum RelocationIteratorInternal<'data, 'file> +where + 'data: 'file, +{ + Coff(coff::CoffRelocationIterator<'data, 'file>), + Elf(elf::ElfRelocationIterator<'data, 'file>), + MachO(macho::MachORelocationIterator<'data, 'file>), + Pe(pe::PeRelocationIterator), + #[cfg(feature = "wasm")] + Wasm(wasm::WasmRelocationIterator), +} + +impl<'data, 'file> Iterator for RelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + with_inner_mut!(self.inner, RelocationIteratorInternal, |x| x.next()) + } +} diff --git a/third_party/rust/object/src/read/coff.rs b/third_party/rust/object/src/read/coff.rs new file mode 100644 index 0000000000..72f985e458 --- /dev/null +++ b/third_party/rust/object/src/read/coff.rs @@ -0,0 +1,527 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use crate::alloc::vec::Vec; +use goblin::pe; +use std::{iter, slice}; +use target_lexicon::Architecture; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// A COFF object file. +#[derive(Debug)] +pub struct CoffFile<'data> { + coff: pe::Coff<'data>, + data: &'data [u8], +} + +/// An iterator over the loadable sections of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + iter: slice::Iter<'file, pe::section_table::SectionTable>, +} + +/// A loadable section of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSegment<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + section: &'file pe::section_table::SectionTable, +} + +/// An iterator over the sections of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + iter: iter::Enumerate<slice::Iter<'file, pe::section_table::SectionTable>>, +} + +/// A section of a `CoffFile`. +#[derive(Debug)] +pub struct CoffSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + index: SectionIndex, + section: &'file pe::section_table::SectionTable, +} + +/// An iterator over the symbols of a `CoffFile`. +pub struct CoffSymbolIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file CoffFile<'data>, + symbols: pe::symbol::SymbolIterator<'data>, +} + +/// An iterator over the relocations in an `CoffSection`. +pub struct CoffRelocationIterator<'data, 'file> { + file: &'file CoffFile<'data>, + relocations: pe::relocation::Relocations<'data>, +} + +impl<'data> CoffFile<'data> { + /// Get the COFF headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn coff(&self) -> &pe::Coff<'data> { + &self.coff + } + + /// Parse the raw COFF file data. + pub fn parse(data: &'data [u8]) -> Result<Self, &'static str> { + let coff = pe::Coff::parse(data).map_err(|_| "Could not parse COFF header")?; + Ok(CoffFile { coff, data }) + } +} + +impl<'data, 'file> Object<'data, 'file> for CoffFile<'data> +where + 'data: 'file, +{ + type Segment = CoffSegment<'data, 'file>; + type SegmentIterator = CoffSegmentIterator<'data, 'file>; + type Section = CoffSection<'data, 'file>; + type SectionIterator = CoffSectionIterator<'data, 'file>; + type SymbolIterator = CoffSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.coff.header.machine { + pe::header::COFF_MACHINE_X86 => Architecture::I386, + pe::header::COFF_MACHINE_X86_64 => Architecture::X86_64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + true + } + + #[inline] + fn is_64(&self) -> bool { + false + } + + fn segments(&'file self) -> CoffSegmentIterator<'data, 'file> { + CoffSegmentIterator { + file: self, + iter: self.coff.sections.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option<CoffSection<'data, 'file>> { + self.sections() + .find(|section| section.name() == Some(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<CoffSection<'data, 'file>> { + self.sections().find(|section| section.index() == index) + } + + fn sections(&'file self) -> CoffSectionIterator<'data, 'file> { + CoffSectionIterator { + file: self, + iter: self.coff.sections.iter().enumerate(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option<Symbol<'data>> { + self.coff + .symbols + .get(index.0) + .map(|(name, symbol)| parse_symbol(index.0, name, &symbol, &self.coff)) + } + + fn symbols(&'file self) -> CoffSymbolIterator<'data, 'file> { + CoffSymbolIterator { + file: self, + symbols: self.coff.symbols.iter(), + } + } + + fn dynamic_symbols(&'file self) -> CoffSymbolIterator<'data, 'file> { + CoffSymbolIterator { + file: self, + symbols: goblin::pe::symbol::SymbolIterator::default(), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + // TODO: untested + let mut symbols: Vec<_> = self + .symbols() + .map(|(_, s)| s) + .filter(SymbolMap::filter) + .collect(); + symbols.sort_by_key(|x| x.address); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + for section in &self.coff.sections { + if let Ok(name) = section.name() { + if name == ".debug_info" { + return true; + } + } + } + false + } + + fn entry(&self) -> u64 { + 0 + } +} + +impl<'data, 'file> Iterator for CoffSegmentIterator<'data, 'file> { + type Item = CoffSegment<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|section| CoffSegment { + file: self.file, + section, + }) + } +} + +fn section_alignment(characteristics: u32) -> u64 { + match characteristics & pe::section_table::IMAGE_SCN_ALIGN_MASK { + pe::section_table::IMAGE_SCN_ALIGN_2BYTES => 2, + pe::section_table::IMAGE_SCN_ALIGN_4BYTES => 4, + pe::section_table::IMAGE_SCN_ALIGN_8BYTES => 8, + pe::section_table::IMAGE_SCN_ALIGN_16BYTES => 16, + pe::section_table::IMAGE_SCN_ALIGN_32BYTES => 32, + pe::section_table::IMAGE_SCN_ALIGN_64BYTES => 64, + pe::section_table::IMAGE_SCN_ALIGN_128BYTES => 128, + pe::section_table::IMAGE_SCN_ALIGN_256BYTES => 256, + pe::section_table::IMAGE_SCN_ALIGN_512BYTES => 512, + pe::section_table::IMAGE_SCN_ALIGN_1024BYTES => 1024, + pe::section_table::IMAGE_SCN_ALIGN_2048BYTES => 2048, + pe::section_table::IMAGE_SCN_ALIGN_4096BYTES => 4096, + pe::section_table::IMAGE_SCN_ALIGN_8192BYTES => 8192, + _ => 1, + } +} + +impl<'data, 'file> ObjectSegment<'data> for CoffSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size) + } + + #[inline] + fn align(&self) -> u64 { + section_alignment(self.section.characteristics) + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + ( + self.section.pointer_to_raw_data as u64, + self.section.size_of_raw_data as u64, + ) + } + + fn data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = self.section.size_of_raw_data as usize; + &self.file.data[offset..][..size] + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + self.section.name().ok() + } +} + +impl<'data, 'file> Iterator for CoffSectionIterator<'data, 'file> { + type Item = CoffSection<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| CoffSection { + file: self.file, + index: SectionIndex(index), + section, + }) + } +} + +impl<'data, 'file> CoffSection<'data, 'file> { + fn raw_data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = self.section.size_of_raw_data as usize; + &self.file.data[offset..][..size] + } +} + +impl<'data, 'file> ObjectSection<'data> for CoffSection<'data, 'file> { + type RelocationIterator = CoffRelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.size_of_raw_data) + } + + #[inline] + fn align(&self) -> u64 { + section_alignment(self.section.characteristics) + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + Some(( + self.section.pointer_to_raw_data as u64, + self.section.size_of_raw_data as u64, + )) + } + + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.raw_data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.raw_data(), self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + self.data() + } + + fn name(&self) -> Option<&str> { + self.section.name().ok() + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + None + } + + #[inline] + fn kind(&self) -> SectionKind { + if self.section.characteristics + & (pe::section_table::IMAGE_SCN_CNT_CODE | pe::section_table::IMAGE_SCN_MEM_EXECUTE) + != 0 + { + SectionKind::Text + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_CNT_INITIALIZED_DATA + != 0 + { + if self.section.characteristics & pe::section_table::IMAGE_SCN_MEM_DISCARDABLE != 0 { + SectionKind::Other + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_MEM_WRITE != 0 { + SectionKind::Data + } else { + SectionKind::ReadOnlyData + } + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_CNT_UNINITIALIZED_DATA + != 0 + { + SectionKind::UninitializedData + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_LNK_INFO != 0 { + SectionKind::Linker + } else { + SectionKind::Unknown + } + } + + fn relocations(&self) -> CoffRelocationIterator<'data, 'file> { + CoffRelocationIterator { + file: self.file, + relocations: self.section.relocations(self.file.data).unwrap_or_default(), + } + } +} + +impl<'data, 'file> fmt::Debug for CoffSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CoffSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for CoffSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option<Self::Item> { + self.symbols.next().map(|(index, name, symbol)| { + ( + SymbolIndex(index), + parse_symbol(index, name, &symbol, &self.file.coff), + ) + }) + } +} + +fn parse_symbol<'data>( + index: usize, + name: Option<&'data str>, + symbol: &pe::symbol::Symbol, + coff: &pe::Coff<'data>, +) -> Symbol<'data> { + let name = if symbol.is_file() { + coff.symbols + .aux_file(index + 1, symbol.number_of_aux_symbols as usize) + } else { + name.or_else(|| { + symbol.name_offset().and_then(|offset| { + coff.strings + .get(offset as usize) + .map(Result::ok) + .unwrap_or_default() + }) + }) + }; + let size = if symbol.is_function_definition() && symbol.number_of_aux_symbols > 0 { + coff.symbols + .aux_function_definition(index + 1) + .map(|aux| u64::from(aux.total_size)) + .unwrap_or(0) + } else { + 0 + }; + let kind = if symbol.is_section_definition() { + SymbolKind::Section + } else { + match symbol.storage_class { + pe::symbol::IMAGE_SYM_CLASS_SECTION => SymbolKind::Section, + pe::symbol::IMAGE_SYM_CLASS_FILE => SymbolKind::File, + pe::symbol::IMAGE_SYM_CLASS_LABEL => SymbolKind::Label, + pe::symbol::IMAGE_SYM_CLASS_EXTERNAL + | pe::symbol::IMAGE_SYM_CLASS_WEAK_EXTERNAL + | pe::symbol::IMAGE_SYM_CLASS_STATIC => { + if symbol.derived_type() == pe::symbol::IMAGE_SYM_DTYPE_FUNCTION { + SymbolKind::Text + } else { + SymbolKind::Data + } + } + _ => SymbolKind::Unknown, + } + }; + let section_index = if symbol.section_number <= 0 { + None + } else { + Some(SectionIndex(symbol.section_number as usize - 1)) + }; + let undefined = symbol.section_number == pe::symbol::IMAGE_SYM_UNDEFINED; + let weak = symbol.storage_class == pe::symbol::IMAGE_SYM_CLASS_WEAK_EXTERNAL; + let scope = match symbol.storage_class { + _ if undefined => SymbolScope::Unknown, + pe::symbol::IMAGE_SYM_CLASS_EXTERNAL + | pe::symbol::IMAGE_SYM_CLASS_EXTERNAL_DEF + | pe::symbol::IMAGE_SYM_CLASS_WEAK_EXTERNAL => { + // TODO: determine if symbol is exported + SymbolScope::Linkage + } + _ => SymbolScope::Compilation, + }; + Symbol { + name, + address: u64::from(symbol.value), + size, + kind, + section_index, + undefined, + weak, + scope, + } +} + +impl<'data, 'file> Iterator for CoffRelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + self.relocations.next().map(|relocation| { + let (kind, size, addend) = match self.file.coff.header.machine { + pe::header::COFF_MACHINE_X86 => match relocation.typ { + pe::relocation::IMAGE_REL_I386_DIR16 => (RelocationKind::Absolute, 16, 0), + pe::relocation::IMAGE_REL_I386_REL16 => (RelocationKind::Relative, 16, 0), + pe::relocation::IMAGE_REL_I386_DIR32 => (RelocationKind::Absolute, 32, 0), + pe::relocation::IMAGE_REL_I386_DIR32NB => (RelocationKind::ImageOffset, 32, 0), + pe::relocation::IMAGE_REL_I386_SECTION => (RelocationKind::SectionIndex, 16, 0), + pe::relocation::IMAGE_REL_I386_SECREL => (RelocationKind::SectionOffset, 32, 0), + pe::relocation::IMAGE_REL_I386_SECREL7 => (RelocationKind::SectionOffset, 7, 0), + pe::relocation::IMAGE_REL_I386_REL32 => (RelocationKind::Relative, 32, -4), + _ => (RelocationKind::Coff(relocation.typ), 0, 0), + }, + pe::header::COFF_MACHINE_X86_64 => match relocation.typ { + pe::relocation::IMAGE_REL_AMD64_ADDR64 => (RelocationKind::Absolute, 64, 0), + pe::relocation::IMAGE_REL_AMD64_ADDR32 => (RelocationKind::Absolute, 32, 0), + pe::relocation::IMAGE_REL_AMD64_ADDR32NB => { + (RelocationKind::ImageOffset, 32, 0) + } + pe::relocation::IMAGE_REL_AMD64_REL32 => (RelocationKind::Relative, 32, -4), + pe::relocation::IMAGE_REL_AMD64_REL32_1 => (RelocationKind::Relative, 32, -5), + pe::relocation::IMAGE_REL_AMD64_REL32_2 => (RelocationKind::Relative, 32, -6), + pe::relocation::IMAGE_REL_AMD64_REL32_3 => (RelocationKind::Relative, 32, -7), + pe::relocation::IMAGE_REL_AMD64_REL32_4 => (RelocationKind::Relative, 32, -8), + pe::relocation::IMAGE_REL_AMD64_REL32_5 => (RelocationKind::Relative, 32, -9), + pe::relocation::IMAGE_REL_AMD64_SECTION => { + (RelocationKind::SectionIndex, 16, 0) + } + pe::relocation::IMAGE_REL_AMD64_SECREL => { + (RelocationKind::SectionOffset, 32, 0) + } + pe::relocation::IMAGE_REL_AMD64_SECREL7 => { + (RelocationKind::SectionOffset, 7, 0) + } + _ => (RelocationKind::Coff(relocation.typ), 0, 0), + }, + _ => (RelocationKind::Coff(relocation.typ), 0, 0), + }; + let target = + RelocationTarget::Symbol(SymbolIndex(relocation.symbol_table_index as usize)); + ( + u64::from(relocation.virtual_address), + Relocation { + kind, + encoding: RelocationEncoding::Generic, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file> fmt::Debug for CoffRelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CoffRelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/elf.rs b/third_party/rust/object/src/read/elf.rs new file mode 100644 index 0000000000..6af690756a --- /dev/null +++ b/third_party/rust/object/src/read/elf.rs @@ -0,0 +1,709 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use crate::alloc::vec::Vec; +#[cfg(feature = "compression")] +use flate2::{Decompress, FlushDecompress}; +#[cfg(feature = "compression")] +use goblin::container; +use goblin::{elf, strtab}; +#[cfg(feature = "compression")] +use scroll::ctx::TryFromCtx; +use scroll::{self, Pread}; +use std::{iter, slice}; +use target_lexicon::{Aarch64Architecture, Architecture, ArmArchitecture}; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// An ELF object file. +#[derive(Debug)] +pub struct ElfFile<'data> { + elf: elf::Elf<'data>, + data: &'data [u8], +} + +impl<'data> ElfFile<'data> { + /// Get the ELF headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn elf(&self) -> &elf::Elf<'data> { + &self.elf + } + + /// Parse the raw ELF file data. + pub fn parse(data: &'data [u8]) -> Result<Self, &'static str> { + let elf = elf::Elf::parse(data).map_err(|_| "Could not parse ELF header")?; + Ok(ElfFile { elf, data }) + } + + fn raw_section_by_name<'file>( + &'file self, + section_name: &str, + ) -> Option<ElfSection<'data, 'file>> { + for (index, section) in self.elf.section_headers.iter().enumerate() { + if let Some(Ok(name)) = self.elf.shdr_strtab.get(section.sh_name) { + if name == section_name { + return Some(ElfSection { + file: self, + index: SectionIndex(index), + section, + }); + } + } + } + None + } + + #[cfg(feature = "compression")] + fn zdebug_section_by_name<'file>( + &'file self, + section_name: &str, + ) -> Option<ElfSection<'data, 'file>> { + if !section_name.starts_with(".debug_") { + return None; + } + self.raw_section_by_name(&format!(".zdebug_{}", §ion_name[7..])) + } + + #[cfg(not(feature = "compression"))] + fn zdebug_section_by_name<'file>( + &'file self, + _section_name: &str, + ) -> Option<ElfSection<'data, 'file>> { + None + } +} + +impl<'data, 'file> Object<'data, 'file> for ElfFile<'data> +where + 'data: 'file, +{ + type Segment = ElfSegment<'data, 'file>; + type SegmentIterator = ElfSegmentIterator<'data, 'file>; + type Section = ElfSection<'data, 'file>; + type SectionIterator = ElfSectionIterator<'data, 'file>; + type SymbolIterator = ElfSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.elf.header.e_machine { + elf::header::EM_ARM => Architecture::Arm(ArmArchitecture::Arm), + elf::header::EM_AARCH64 => Architecture::Aarch64(Aarch64Architecture::Aarch64), + elf::header::EM_386 => Architecture::I386, + elf::header::EM_X86_64 => Architecture::X86_64, + elf::header::EM_MIPS => Architecture::Mips, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.elf.little_endian + } + + #[inline] + fn is_64(&self) -> bool { + self.elf.is_64 + } + + fn segments(&'file self) -> ElfSegmentIterator<'data, 'file> { + ElfSegmentIterator { + file: self, + iter: self.elf.program_headers.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option<ElfSection<'data, 'file>> { + self.raw_section_by_name(section_name) + .or_else(|| self.zdebug_section_by_name(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<ElfSection<'data, 'file>> { + self.elf + .section_headers + .get(index.0) + .map(|section| ElfSection { + file: self, + index, + section, + }) + } + + fn sections(&'file self) -> ElfSectionIterator<'data, 'file> { + ElfSectionIterator { + file: self, + iter: self.elf.section_headers.iter().enumerate(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option<Symbol<'data>> { + self.elf + .syms + .get(index.0) + .map(|symbol| parse_symbol(index.0, &symbol, &self.elf.strtab)) + } + + fn symbols(&'file self) -> ElfSymbolIterator<'data, 'file> { + ElfSymbolIterator { + strtab: &self.elf.strtab, + symbols: self.elf.syms.iter().enumerate(), + } + } + + fn dynamic_symbols(&'file self) -> ElfSymbolIterator<'data, 'file> { + ElfSymbolIterator { + strtab: &self.elf.dynstrtab, + symbols: self.elf.dynsyms.iter().enumerate(), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + let mut symbols: Vec<_> = self + .symbols() + .map(|(_, s)| s) + .filter(SymbolMap::filter) + .collect(); + symbols.sort_by_key(|x| x.address); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + for header in &self.elf.section_headers { + if let Some(Ok(name)) = self.elf.shdr_strtab.get(header.sh_name) { + if name == ".debug_info" || name == ".zdebug_info" { + return true; + } + } + } + false + } + + fn build_id(&self) -> Option<&'data [u8]> { + if let Some(mut notes) = self.elf.iter_note_headers(self.data) { + while let Some(Ok(note)) = notes.next() { + if note.n_type == elf::note::NT_GNU_BUILD_ID { + return Some(note.desc); + } + } + } + if let Some(mut notes) = self + .elf + .iter_note_sections(self.data, Some(".note.gnu.build-id")) + { + while let Some(Ok(note)) = notes.next() { + if note.n_type == elf::note::NT_GNU_BUILD_ID { + return Some(note.desc); + } + } + } + None + } + + fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { + if let Some(Cow::Borrowed(data)) = self.section_data_by_name(".gnu_debuglink") { + if let Some(filename_len) = data.iter().position(|x| *x == 0) { + let filename = &data[..filename_len]; + // Round to 4 byte alignment after null terminator. + let offset = (filename_len + 1 + 3) & !3; + if offset + 4 <= data.len() { + let endian = if self.is_little_endian() { + scroll::LE + } else { + scroll::BE + }; + let crc: u32 = data.pread_with(offset, endian).unwrap(); + return Some((filename, crc)); + } + } + } + None + } + + fn entry(&self) -> u64 { + self.elf.entry + } +} + +/// An iterator over the segments of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + iter: slice::Iter<'file, elf::ProgramHeader>, +} + +impl<'data, 'file> Iterator for ElfSegmentIterator<'data, 'file> { + type Item = ElfSegment<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + while let Some(segment) = self.iter.next() { + if segment.p_type == elf::program_header::PT_LOAD { + return Some(ElfSegment { + file: self.file, + segment, + }); + } + } + None + } +} + +/// A segment of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSegment<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + segment: &'file elf::ProgramHeader, +} + +impl<'data, 'file> ObjectSegment<'data> for ElfSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + self.segment.p_vaddr + } + + #[inline] + fn size(&self) -> u64 { + self.segment.p_memsz + } + + #[inline] + fn align(&self) -> u64 { + self.segment.p_align + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + (self.segment.p_offset, self.segment.p_filesz) + } + + fn data(&self) -> &'data [u8] { + &self.file.data[self.segment.p_offset as usize..][..self.segment.p_filesz as usize] + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + None + } +} + +/// An iterator over the sections of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + iter: iter::Enumerate<slice::Iter<'file, elf::SectionHeader>>, +} + +impl<'data, 'file> Iterator for ElfSectionIterator<'data, 'file> { + type Item = ElfSection<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| ElfSection { + index: SectionIndex(index), + file: self.file, + section, + }) + } +} + +/// A section of an `ElfFile`. +#[derive(Debug)] +pub struct ElfSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file ElfFile<'data>, + index: SectionIndex, + section: &'file elf::SectionHeader, +} + +impl<'data, 'file> ElfSection<'data, 'file> { + fn raw_offset(&self) -> Option<(u64, u64)> { + if self.section.sh_type == elf::section_header::SHT_NOBITS { + None + } else { + Some((self.section.sh_offset, self.section.sh_size)) + } + } + + fn raw_data(&self) -> &'data [u8] { + if let Some((offset, size)) = self.raw_offset() { + &self.file.data[offset as usize..][..size as usize] + } else { + &[] + } + } + + #[cfg(feature = "compression")] + fn maybe_decompress_data(&self) -> Option<Cow<'data, [u8]>> { + if (self.section.sh_flags & u64::from(elf::section_header::SHF_COMPRESSED)) == 0 { + return None; + } + + let container = match self.file.elf.header.container() { + Ok(c) => c, + Err(_) => return None, + }; + let endianness = match self.file.elf.header.endianness() { + Ok(e) => e, + Err(_) => return None, + }; + let ctx = container::Ctx::new(container, endianness); + let data = self.raw_data(); + let (compression_type, uncompressed_size, compressed_data) = + match elf::compression_header::CompressionHeader::try_from_ctx(&data, ctx) { + Ok((chdr, size)) => (chdr.ch_type, chdr.ch_size, &data[size..]), + Err(_) => return None, + }; + if compression_type != elf::compression_header::ELFCOMPRESS_ZLIB { + return None; + } + + let mut decompressed = Vec::with_capacity(uncompressed_size as usize); + let mut decompress = Decompress::new(true); + if decompress + .decompress_vec(compressed_data, &mut decompressed, FlushDecompress::Finish) + .is_err() + { + return None; + } + Some(Cow::Owned(decompressed)) + } + + /// Try GNU-style "ZLIB" header decompression. + #[cfg(feature = "compression")] + fn maybe_decompress_data_gnu(&self) -> Option<Cow<'data, [u8]>> { + let name = match self.name() { + Some(name) => name, + None => return None, + }; + if !name.starts_with(".zdebug_") { + return None; + } + let data = self.raw_data(); + // Assume ZLIB-style uncompressed data is no more than 4GB to avoid accidentally + // huge allocations. This also reduces the chance of accidentally matching on a + // .debug_str that happens to start with "ZLIB". + if data.len() < 12 || &data[..8] != b"ZLIB\0\0\0\0" { + return None; + } + let uncompressed_size: u32 = data.pread_with(8, scroll::BE).unwrap(); + let mut decompressed = Vec::with_capacity(uncompressed_size as usize); + let mut decompress = Decompress::new(true); + if decompress + .decompress_vec(&data[12..], &mut decompressed, FlushDecompress::Finish) + .is_err() + { + return None; + } + Some(Cow::Owned(decompressed)) + } +} + +impl<'data, 'file> ObjectSection<'data> for ElfSection<'data, 'file> { + type RelocationIterator = ElfRelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + self.section.sh_addr + } + + #[inline] + fn size(&self) -> u64 { + self.section.sh_size + } + + #[inline] + fn align(&self) -> u64 { + self.section.sh_addralign + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + self.raw_offset() + } + + #[inline] + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.raw_data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.raw_data(), self.address(), address, size) + } + + #[cfg(feature = "compression")] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + self.maybe_decompress_data() + .or_else(|| self.maybe_decompress_data_gnu()) + .unwrap_or_else(|| self.data()) + } + + #[cfg(not(feature = "compression"))] + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + self.data() + } + + fn name(&self) -> Option<&str> { + self.file + .elf + .shdr_strtab + .get(self.section.sh_name) + .and_then(Result::ok) + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + None + } + + fn kind(&self) -> SectionKind { + match self.section.sh_type { + elf::section_header::SHT_PROGBITS => { + if self.section.sh_flags & u64::from(elf::section_header::SHF_ALLOC) != 0 { + if self.section.sh_flags & u64::from(elf::section_header::SHF_EXECINSTR) != 0 { + SectionKind::Text + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_TLS) != 0 { + SectionKind::Tls + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_WRITE) != 0 + { + SectionKind::Data + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_STRINGS) + != 0 + { + SectionKind::ReadOnlyString + } else { + SectionKind::ReadOnlyData + } + } else if self.section.sh_flags & u64::from(elf::section_header::SHF_STRINGS) != 0 { + SectionKind::OtherString + } else { + SectionKind::Other + } + } + elf::section_header::SHT_NOBITS => { + if self.section.sh_flags & u64::from(elf::section_header::SHF_TLS) != 0 { + SectionKind::UninitializedTls + } else { + SectionKind::UninitializedData + } + } + elf::section_header::SHT_NULL + | elf::section_header::SHT_SYMTAB + | elf::section_header::SHT_STRTAB + | elf::section_header::SHT_RELA + | elf::section_header::SHT_HASH + | elf::section_header::SHT_DYNAMIC + | elf::section_header::SHT_REL + | elf::section_header::SHT_DYNSYM => SectionKind::Metadata, + _ => { + // TODO: maybe add more specialised kinds based on sh_type (e.g. Unwind) + SectionKind::Unknown + } + } + } + + fn relocations(&self) -> ElfRelocationIterator<'data, 'file> { + ElfRelocationIterator { + section_index: self.index, + file: self.file, + sections: self.file.elf.shdr_relocs.iter(), + relocations: None, + } + } +} + +/// An iterator over the symbols of an `ElfFile`. +pub struct ElfSymbolIterator<'data, 'file> +where + 'data: 'file, +{ + strtab: &'file strtab::Strtab<'data>, + symbols: iter::Enumerate<elf::sym::SymIterator<'data>>, +} + +impl<'data, 'file> fmt::Debug for ElfSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for ElfSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option<Self::Item> { + self.symbols.next().map(|(index, symbol)| { + ( + SymbolIndex(index), + parse_symbol(index, &symbol, self.strtab), + ) + }) + } +} + +fn parse_symbol<'data>( + index: usize, + symbol: &elf::sym::Sym, + strtab: &strtab::Strtab<'data>, +) -> Symbol<'data> { + let name = strtab.get(symbol.st_name).and_then(Result::ok); + let kind = match elf::sym::st_type(symbol.st_info) { + elf::sym::STT_NOTYPE if index == 0 => SymbolKind::Null, + elf::sym::STT_OBJECT => SymbolKind::Data, + elf::sym::STT_FUNC => SymbolKind::Text, + elf::sym::STT_SECTION => SymbolKind::Section, + elf::sym::STT_FILE => SymbolKind::File, + elf::sym::STT_COMMON => SymbolKind::Common, + elf::sym::STT_TLS => SymbolKind::Tls, + _ => SymbolKind::Unknown, + }; + let undefined = symbol.st_shndx == elf::section_header::SHN_UNDEF as usize; + let section_index = + if undefined || symbol.st_shndx >= elf::section_header::SHN_LORESERVE as usize { + None + } else { + Some(SectionIndex(symbol.st_shndx)) + }; + let weak = symbol.st_bind() == elf::sym::STB_WEAK; + let scope = match symbol.st_bind() { + _ if undefined => SymbolScope::Unknown, + elf::sym::STB_LOCAL => SymbolScope::Compilation, + elf::sym::STB_GLOBAL | elf::sym::STB_WEAK => { + if symbol.st_visibility() == elf::sym::STV_HIDDEN { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + } + } + _ => SymbolScope::Unknown, + }; + Symbol { + name, + address: symbol.st_value, + size: symbol.st_size, + kind, + section_index, + undefined, + weak, + scope, + } +} + +/// An iterator over the relocations in an `ElfSection`. +pub struct ElfRelocationIterator<'data, 'file> +where + 'data: 'file, +{ + /// The index of the section that the relocations apply to. + section_index: SectionIndex, + file: &'file ElfFile<'data>, + sections: slice::Iter<'file, (elf::ShdrIdx, elf::RelocSection<'data>)>, + relocations: Option<elf::reloc::RelocIterator<'data>>, +} + +impl<'data, 'file> Iterator for ElfRelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + loop { + if let Some(ref mut relocations) = self.relocations { + if let Some(reloc) = relocations.next() { + let mut encoding = RelocationEncoding::Generic; + let (kind, size) = match self.file.elf.header.e_machine { + elf::header::EM_ARM => match reloc.r_type { + elf::reloc::R_ARM_ABS32 => (RelocationKind::Absolute, 32), + _ => (RelocationKind::Elf(reloc.r_type), 0), + }, + elf::header::EM_AARCH64 => match reloc.r_type { + elf::reloc::R_AARCH64_ABS64 => (RelocationKind::Absolute, 64), + elf::reloc::R_AARCH64_ABS32 => (RelocationKind::Absolute, 32), + elf::reloc::R_AARCH64_ABS16 => (RelocationKind::Absolute, 16), + elf::reloc::R_AARCH64_PREL64 => (RelocationKind::Relative, 64), + elf::reloc::R_AARCH64_PREL32 => (RelocationKind::Relative, 32), + elf::reloc::R_AARCH64_PREL16 => (RelocationKind::Relative, 16), + _ => (RelocationKind::Elf(reloc.r_type), 0), + }, + elf::header::EM_386 => match reloc.r_type { + elf::reloc::R_386_32 => (RelocationKind::Absolute, 32), + elf::reloc::R_386_PC32 => (RelocationKind::Relative, 32), + elf::reloc::R_386_GOT32 => (RelocationKind::Got, 32), + elf::reloc::R_386_PLT32 => (RelocationKind::PltRelative, 32), + elf::reloc::R_386_GOTOFF => (RelocationKind::GotBaseOffset, 32), + elf::reloc::R_386_GOTPC => (RelocationKind::GotBaseRelative, 32), + elf::reloc::R_386_16 => (RelocationKind::Absolute, 16), + elf::reloc::R_386_PC16 => (RelocationKind::Relative, 16), + elf::reloc::R_386_8 => (RelocationKind::Absolute, 8), + elf::reloc::R_386_PC8 => (RelocationKind::Relative, 8), + _ => (RelocationKind::Elf(reloc.r_type), 0), + }, + elf::header::EM_X86_64 => match reloc.r_type { + elf::reloc::R_X86_64_64 => (RelocationKind::Absolute, 64), + elf::reloc::R_X86_64_PC32 => (RelocationKind::Relative, 32), + elf::reloc::R_X86_64_GOT32 => (RelocationKind::Got, 32), + elf::reloc::R_X86_64_PLT32 => (RelocationKind::PltRelative, 32), + elf::reloc::R_X86_64_GOTPCREL => (RelocationKind::GotRelative, 32), + elf::reloc::R_X86_64_32 => (RelocationKind::Absolute, 32), + elf::reloc::R_X86_64_32S => { + encoding = RelocationEncoding::X86Signed; + (RelocationKind::Absolute, 32) + } + elf::reloc::R_X86_64_16 => (RelocationKind::Absolute, 16), + elf::reloc::R_X86_64_PC16 => (RelocationKind::Relative, 16), + elf::reloc::R_X86_64_8 => (RelocationKind::Absolute, 8), + elf::reloc::R_X86_64_PC8 => (RelocationKind::Relative, 8), + _ => (RelocationKind::Elf(reloc.r_type), 0), + }, + _ => (RelocationKind::Elf(reloc.r_type), 0), + }; + let target = RelocationTarget::Symbol(SymbolIndex(reloc.r_sym as usize)); + return Some(( + reloc.r_offset, + Relocation { + kind, + encoding, + size, + target, + addend: reloc.r_addend.unwrap_or(0), + implicit_addend: reloc.r_addend.is_none(), + }, + )); + } + } + match self.sections.next() { + None => return None, + Some((index, relocs)) => { + let section = &self.file.elf.section_headers[*index]; + if section.sh_info as usize == self.section_index.0 { + self.relocations = Some(relocs.into_iter()); + } + // TODO: do we need to return section.sh_link? + } + } + } + } +} + +impl<'data, 'file> fmt::Debug for ElfRelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElfRelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/macho.rs b/third_party/rust/object/src/read/macho.rs new file mode 100644 index 0000000000..ce54938a10 --- /dev/null +++ b/third_party/rust/object/src/read/macho.rs @@ -0,0 +1,615 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::vec::Vec; +use goblin::container; +use goblin::mach; +use goblin::mach::load_command::CommandVariant; +use std::{fmt, iter, ops, slice}; +use target_lexicon::{Aarch64Architecture, Architecture, ArmArchitecture}; +use uuid::Uuid; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, RelocationEncoding, RelocationKind, + RelocationTarget, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolKind, SymbolMap, + SymbolScope, +}; + +/// A Mach-O object file. +#[derive(Debug)] +pub struct MachOFile<'data> { + macho: mach::MachO<'data>, + data: &'data [u8], + ctx: container::Ctx, + sections: Vec<MachOSectionInternal<'data>>, +} + +impl<'data> MachOFile<'data> { + /// Get the Mach-O headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn macho(&self) -> &mach::MachO<'data> { + &self.macho + } + + /// Parse the raw Mach-O file data. + pub fn parse(data: &'data [u8]) -> Result<Self, &'static str> { + let (_magic, ctx) = + mach::parse_magic_and_ctx(data, 0).map_err(|_| "Could not parse Mach-O magic")?; + let ctx = ctx.ok_or("Invalid Mach-O magic")?; + let macho = mach::MachO::parse(data, 0).map_err(|_| "Could not parse Mach-O header")?; + // Build a list of sections to make some operations more efficient. + let mut sections = Vec::new(); + 'segments: for segment in &macho.segments { + for section_result in segment { + if let Ok((section, data)) = section_result { + sections.push(MachOSectionInternal::parse(section, data)); + } else { + break 'segments; + } + } + } + Ok(MachOFile { + macho, + data, + ctx, + sections, + }) + } + + /// Return the section at the given index. + #[inline] + fn section_internal(&self, index: SectionIndex) -> Option<&MachOSectionInternal<'data>> { + index + .0 + .checked_sub(1) + .and_then(|index| self.sections.get(index)) + } +} + +impl<'data, 'file> Object<'data, 'file> for MachOFile<'data> +where + 'data: 'file, +{ + type Segment = MachOSegment<'data, 'file>; + type SegmentIterator = MachOSegmentIterator<'data, 'file>; + type Section = MachOSection<'data, 'file>; + type SectionIterator = MachOSectionIterator<'data, 'file>; + type SymbolIterator = MachOSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.macho.header.cputype { + mach::cputype::CPU_TYPE_ARM => Architecture::Arm(ArmArchitecture::Arm), + mach::cputype::CPU_TYPE_ARM64 => Architecture::Aarch64(Aarch64Architecture::Aarch64), + mach::cputype::CPU_TYPE_X86 => Architecture::I386, + mach::cputype::CPU_TYPE_X86_64 => Architecture::X86_64, + mach::cputype::CPU_TYPE_MIPS => Architecture::Mips, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + self.macho.little_endian + } + + #[inline] + fn is_64(&self) -> bool { + self.macho.is_64 + } + + fn segments(&'file self) -> MachOSegmentIterator<'data, 'file> { + MachOSegmentIterator { + segments: self.macho.segments.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option<MachOSection<'data, 'file>> { + // Translate the "." prefix to the "__" prefix used by OSX/Mach-O, eg + // ".debug_info" to "__debug_info". + let system_section = section_name.starts_with('.'); + let cmp_section_name = |section: &MachOSection| { + section + .name() + .map(|name| { + section_name == name + || (system_section + && name.starts_with("__") + && §ion_name[1..] == &name[2..]) + }) + .unwrap_or(false) + }; + + self.sections().find(cmp_section_name) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<MachOSection<'data, 'file>> { + self.section_internal(index) + .map(|_| MachOSection { file: self, index }) + } + + fn sections(&'file self) -> MachOSectionIterator<'data, 'file> { + MachOSectionIterator { + file: self, + iter: 0..self.sections.len(), + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Option<Symbol<'data>> { + self.macho + .symbols + .as_ref() + .and_then(|symbols| symbols.get(index.0).ok()) + .and_then(|(name, nlist)| parse_symbol(self, name, &nlist)) + } + + fn symbols(&'file self) -> MachOSymbolIterator<'data, 'file> { + let symbols = match self.macho.symbols { + Some(ref symbols) => symbols.into_iter(), + None => mach::symbols::SymbolIterator::default(), + } + .enumerate(); + + MachOSymbolIterator { + file: self, + symbols, + } + } + + fn dynamic_symbols(&'file self) -> MachOSymbolIterator<'data, 'file> { + // The LC_DYSYMTAB command contains indices into the same symbol + // table as the LC_SYMTAB command, so return all of them. + self.symbols() + } + + fn symbol_map(&self) -> SymbolMap<'data> { + let mut symbols: Vec<_> = self.symbols().map(|(_, s)| s).collect(); + + // Add symbols for the end of each section. + for section in self.sections() { + symbols.push(Symbol { + name: None, + address: section.address() + section.size(), + size: 0, + kind: SymbolKind::Section, + section_index: None, + undefined: false, + weak: false, + scope: SymbolScope::Compilation, + }); + } + + // Calculate symbol sizes by sorting and finding the next symbol. + symbols.sort_by(|a, b| { + a.address.cmp(&b.address).then_with(|| { + // Place the end of section symbols last. + (a.kind == SymbolKind::Section).cmp(&(b.kind == SymbolKind::Section)) + }) + }); + + for i in 0..symbols.len() { + let (before, after) = symbols.split_at_mut(i + 1); + let symbol = &mut before[i]; + if symbol.kind != SymbolKind::Section { + if let Some(next) = after + .iter() + .skip_while(|x| x.kind != SymbolKind::Section && x.address == symbol.address) + .next() + { + symbol.size = next.address - symbol.address; + } + } + } + + symbols.retain(SymbolMap::filter); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + self.section_data_by_name(".debug_info").is_some() + } + + fn mach_uuid(&self) -> Option<Uuid> { + // Return the UUID from the `LC_UUID` load command, if one is present. + self.macho + .load_commands + .iter() + .filter_map(|lc| { + match lc.command { + CommandVariant::Uuid(ref cmd) => { + //TODO: Uuid should have a `from_array` method that can't fail. + Uuid::from_slice(&cmd.uuid).ok() + } + _ => None, + } + }) + .nth(0) + } + + fn entry(&self) -> u64 { + self.macho.entry + } +} + +/// An iterator over the segments of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + segments: slice::Iter<'file, mach::segment::Segment<'data>>, +} + +impl<'data, 'file> Iterator for MachOSegmentIterator<'data, 'file> { + type Item = MachOSegment<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.segments.next().map(|segment| MachOSegment { segment }) + } +} + +/// A segment of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSegment<'data, 'file> +where + 'data: 'file, +{ + segment: &'file mach::segment::Segment<'data>, +} + +impl<'data, 'file> ObjectSegment<'data> for MachOSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + self.segment.vmaddr + } + + #[inline] + fn size(&self) -> u64 { + self.segment.vmsize + } + + #[inline] + fn align(&self) -> u64 { + // Page size. + 0x1000 + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + (self.segment.fileoff, self.segment.filesize) + } + + #[inline] + fn data(&self) -> &'data [u8] { + self.segment.data + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + self.segment.name().ok() + } +} + +/// An iterator over the sections of a `MachOFile`. +pub struct MachOSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + iter: ops::Range<usize>, +} + +impl<'data, 'file> fmt::Debug for MachOSectionIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's painful to do much better than this + f.debug_struct("MachOSectionIterator").finish() + } +} + +impl<'data, 'file> Iterator for MachOSectionIterator<'data, 'file> { + type Item = MachOSection<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|index| MachOSection { + file: self.file, + index: SectionIndex(index + 1), + }) + } +} + +/// A section of a `MachOFile`. +#[derive(Debug)] +pub struct MachOSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + index: SectionIndex, +} + +impl<'data, 'file> MachOSection<'data, 'file> { + #[inline] + fn internal(&self) -> &'file MachOSectionInternal<'data> { + // We ensure the index is always valid. + &self.file.section_internal(self.index).unwrap() + } +} + +impl<'data, 'file> ObjectSection<'data> for MachOSection<'data, 'file> { + type RelocationIterator = MachORelocationIterator<'data, 'file>; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + self.internal().section.addr + } + + #[inline] + fn size(&self) -> u64 { + self.internal().section.size + } + + #[inline] + fn align(&self) -> u64 { + 1 << self.internal().section.align + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + let internal = &self.internal().section; + Some((internal.offset as u64, internal.size)) + } + + #[inline] + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.internal().data) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.internal().data, self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + // TODO: does MachO support compression? + self.data() + } + + #[inline] + fn name(&self) -> Option<&str> { + self.internal().section.name().ok() + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + self.internal().section.segname().ok() + } + + fn kind(&self) -> SectionKind { + self.internal().kind + } + + fn relocations(&self) -> MachORelocationIterator<'data, 'file> { + MachORelocationIterator { + file: self.file, + relocations: self + .internal() + .section + .iter_relocations(self.file.data, self.file.ctx), + } + } +} + +#[derive(Debug)] +struct MachOSectionInternal<'data> { + section: mach::segment::Section, + data: mach::segment::SectionData<'data>, + kind: SectionKind, +} + +impl<'data> MachOSectionInternal<'data> { + fn parse(section: mach::segment::Section, data: mach::segment::SectionData<'data>) -> Self { + let kind = if let (Ok(segname), Ok(name)) = (section.segname(), section.name()) { + match (segname, name) { + ("__TEXT", "__text") => SectionKind::Text, + ("__TEXT", "__const") => SectionKind::ReadOnlyData, + ("__TEXT", "__cstring") => SectionKind::ReadOnlyString, + ("__TEXT", "__eh_frame") => SectionKind::ReadOnlyData, + ("__TEXT", "__gcc_except_tab") => SectionKind::ReadOnlyData, + ("__DATA", "__data") => SectionKind::Data, + ("__DATA", "__const") => SectionKind::ReadOnlyData, + ("__DATA", "__bss") => SectionKind::UninitializedData, + ("__DATA", "__thread_data") => SectionKind::Tls, + ("__DATA", "__thread_bss") => SectionKind::UninitializedTls, + ("__DATA", "__thread_vars") => SectionKind::TlsVariables, + ("__DWARF", _) => SectionKind::Debug, + _ => SectionKind::Unknown, + } + } else { + SectionKind::Unknown + }; + MachOSectionInternal { + section, + data, + kind, + } + } +} + +/// An iterator over the symbols of a `MachOFile`. +pub struct MachOSymbolIterator<'data, 'file> { + file: &'file MachOFile<'data>, + symbols: iter::Enumerate<mach::symbols::SymbolIterator<'data>>, +} + +impl<'data, 'file> fmt::Debug for MachOSymbolIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachOSymbolIterator").finish() + } +} + +impl<'data, 'file> Iterator for MachOSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option<Self::Item> { + while let Some((index, Ok((name, nlist)))) = self.symbols.next() { + if let Some(symbol) = parse_symbol(self.file, name, &nlist) { + return Some((SymbolIndex(index), symbol)); + } + } + None + } +} + +fn parse_symbol<'data>( + file: &MachOFile<'data>, + name: &'data str, + nlist: &mach::symbols::Nlist, +) -> Option<Symbol<'data>> { + if nlist.n_type & mach::symbols::N_STAB != 0 { + return None; + } + let n_type = nlist.n_type & mach::symbols::NLIST_TYPE_MASK; + let section_index = if n_type == mach::symbols::N_SECT { + if nlist.n_sect == 0 { + None + } else { + Some(SectionIndex(nlist.n_sect)) + } + } else { + // TODO: better handling for other n_type values + None + }; + let kind = section_index + .and_then(|index| file.section_internal(index)) + .map(|section| match section.kind { + SectionKind::Text => SymbolKind::Text, + SectionKind::Data + | SectionKind::ReadOnlyData + | SectionKind::ReadOnlyString + | SectionKind::UninitializedData => SymbolKind::Data, + SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => { + SymbolKind::Tls + } + _ => SymbolKind::Unknown, + }) + .unwrap_or(SymbolKind::Unknown); + let undefined = nlist.is_undefined(); + let weak = nlist.is_weak(); + let scope = if undefined { + SymbolScope::Unknown + } else if nlist.n_type & mach::symbols::N_EXT == 0 { + SymbolScope::Compilation + } else if nlist.n_type & mach::symbols::N_PEXT != 0 { + SymbolScope::Linkage + } else { + SymbolScope::Dynamic + }; + Some(Symbol { + name: Some(name), + address: nlist.n_value, + // Only calculated for symbol maps + size: 0, + kind, + section_index, + undefined, + weak, + scope, + }) +} + +/// An iterator over the relocations in an `MachOSection`. +pub struct MachORelocationIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file MachOFile<'data>, + relocations: mach::segment::RelocationIterator<'data>, +} + +impl<'data, 'file> Iterator for MachORelocationIterator<'data, 'file> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + self.relocations.next()?.ok().map(|reloc| { + let mut encoding = RelocationEncoding::Generic; + let kind = match self.file.macho.header.cputype { + mach::cputype::CPU_TYPE_ARM => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::ARM_RELOC_VANILLA, 0) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + mach::cputype::CPU_TYPE_ARM64 => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::ARM64_RELOC_UNSIGNED, 0) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + mach::cputype::CPU_TYPE_X86 => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::GENERIC_RELOC_VANILLA, 0) => RelocationKind::Absolute, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + mach::cputype::CPU_TYPE_X86_64 => match (reloc.r_type(), reloc.r_pcrel()) { + (mach::relocation::X86_64_RELOC_UNSIGNED, 0) => RelocationKind::Absolute, + (mach::relocation::X86_64_RELOC_SIGNED, 1) => { + encoding = RelocationEncoding::X86RipRelative; + RelocationKind::Relative + } + (mach::relocation::X86_64_RELOC_BRANCH, 1) => { + encoding = RelocationEncoding::X86Branch; + RelocationKind::Relative + } + (mach::relocation::X86_64_RELOC_GOT, 1) => RelocationKind::GotRelative, + (mach::relocation::X86_64_RELOC_GOT_LOAD, 1) => { + encoding = RelocationEncoding::X86RipRelativeMovq; + RelocationKind::GotRelative + } + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }, + _ => RelocationKind::MachO { + value: reloc.r_type(), + relative: reloc.is_pic(), + }, + }; + let size = 8 << reloc.r_length(); + let target = if reloc.is_extern() { + RelocationTarget::Symbol(SymbolIndex(reloc.r_symbolnum())) + } else { + RelocationTarget::Section(SectionIndex(reloc.r_symbolnum())) + }; + let addend = if reloc.r_pcrel() != 0 { -4 } else { 0 }; + ( + reloc.r_address as u64, + Relocation { + kind, + encoding, + size, + target, + addend, + implicit_addend: true, + }, + ) + }) + } +} + +impl<'data, 'file> fmt::Debug for MachORelocationIterator<'data, 'file> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MachORelocationIterator").finish() + } +} diff --git a/third_party/rust/object/src/read/mod.rs b/third_party/rust/object/src/read/mod.rs new file mode 100644 index 0000000000..c120db9a83 --- /dev/null +++ b/third_party/rust/object/src/read/mod.rs @@ -0,0 +1,250 @@ +//! Interface for reading object files. + +use crate::alloc::vec::Vec; +use crate::common::{RelocationEncoding, RelocationKind, SectionKind, SymbolKind, SymbolScope}; + +mod any; +pub use any::*; + +mod coff; +pub use coff::*; + +mod elf; +pub use elf::*; + +mod macho; +pub use macho::*; + +mod pe; +pub use pe::*; + +mod traits; +pub use traits::*; + +#[cfg(feature = "wasm")] +mod wasm; +#[cfg(feature = "wasm")] +pub use wasm::*; + +/// The native object file for the target platform. +#[cfg(target_os = "linux")] +pub type NativeFile<'data> = ElfFile<'data>; + +/// The native object file for the target platform. +#[cfg(target_os = "macos")] +pub type NativeFile<'data> = MachOFile<'data>; + +/// The native object file for the target platform. +#[cfg(target_os = "windows")] +pub type NativeFile<'data> = PeFile<'data>; + +/// The native object file for the target platform. +#[cfg(all(feature = "wasm", target_arch = "wasm32"))] +pub type NativeFile<'data> = WasmFile<'data>; + +/// The index used to identify a section of a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SectionIndex(pub usize); + +/// The index used to identify a symbol of a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SymbolIndex(pub usize); + +/// A symbol table entry. +#[derive(Debug)] +pub struct Symbol<'data> { + name: Option<&'data str>, + address: u64, + size: u64, + kind: SymbolKind, + section_index: Option<SectionIndex>, + undefined: bool, + weak: bool, + scope: SymbolScope, +} + +impl<'data> Symbol<'data> { + /// Return the kind of this symbol. + #[inline] + pub fn kind(&self) -> SymbolKind { + self.kind + } + + /// Returns the section index for the section containing this symbol. + /// + /// May return `None` if the section is unknown or the symbol is undefined. + #[inline] + pub fn section_index(&self) -> Option<SectionIndex> { + self.section_index + } + + /// Return true if the symbol is undefined. + #[inline] + pub fn is_undefined(&self) -> bool { + self.undefined + } + + /// Return true if the symbol is weak. + #[inline] + pub fn is_weak(&self) -> bool { + self.weak + } + + /// Return true if the symbol visible outside of the compilation unit. + /// + /// This treats `SymbolScope::Unknown` as global. + #[inline] + pub fn is_global(&self) -> bool { + !self.is_local() + } + + /// Return true if the symbol is only visible within the compilation unit. + #[inline] + pub fn is_local(&self) -> bool { + self.scope == SymbolScope::Compilation + } + + /// Returns the symbol scope. + #[inline] + pub fn scope(&self) -> SymbolScope { + self.scope + } + + /// The name of the symbol. + #[inline] + pub fn name(&self) -> Option<&'data str> { + self.name + } + + /// The address of the symbol. May be zero if the address is unknown. + #[inline] + pub fn address(&self) -> u64 { + self.address + } + + /// The size of the symbol. May be zero if the size is unknown. + #[inline] + pub fn size(&self) -> u64 { + self.size + } +} + +/// A map from addresses to symbols. +#[derive(Debug)] +pub struct SymbolMap<'data> { + symbols: Vec<Symbol<'data>>, +} + +impl<'data> SymbolMap<'data> { + /// Get the symbol containing the given address. + pub fn get(&self, address: u64) -> Option<&Symbol<'data>> { + self.symbols + .binary_search_by(|symbol| { + if address < symbol.address { + std::cmp::Ordering::Greater + } else if address < symbol.address + symbol.size { + std::cmp::Ordering::Equal + } else { + std::cmp::Ordering::Less + } + }) + .ok() + .and_then(|index| self.symbols.get(index)) + } + + /// Get all symbols in the map. + pub fn symbols(&self) -> &[Symbol<'data>] { + &self.symbols + } + + /// Return true for symbols that should be included in the map. + fn filter(symbol: &Symbol<'_>) -> bool { + match symbol.kind() { + SymbolKind::Unknown | SymbolKind::Text | SymbolKind::Data => {} + SymbolKind::Null + | SymbolKind::Section + | SymbolKind::File + | SymbolKind::Label + | SymbolKind::Common + | SymbolKind::Tls => { + return false; + } + } + !symbol.is_undefined() && symbol.size() > 0 + } +} + +/// The target referenced by a relocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RelocationTarget { + /// The target is a symbol. + Symbol(SymbolIndex), + /// The target is a section. + Section(SectionIndex), +} + +/// A relocation entry. +#[derive(Debug)] +pub struct Relocation { + kind: RelocationKind, + encoding: RelocationEncoding, + size: u8, + target: RelocationTarget, + addend: i64, + implicit_addend: bool, +} + +impl Relocation { + /// The operation used to calculate the result of the relocation. + #[inline] + pub fn kind(&self) -> RelocationKind { + self.kind + } + + /// Information about how the result of the relocation operation is encoded in the place. + #[inline] + pub fn encoding(&self) -> RelocationEncoding { + self.encoding + } + + /// The size in bits of the place of the relocation. + /// + /// If 0, then the size is determined by the relocation kind. + #[inline] + pub fn size(&self) -> u8 { + self.size + } + + /// The target of the relocation. + #[inline] + pub fn target(&self) -> RelocationTarget { + self.target + } + + /// The addend to use in the relocation calculation. + pub fn addend(&self) -> i64 { + self.addend + } + + /// Set the addend to use in the relocation calculation. + pub fn set_addend(&mut self, addend: i64) { + self.addend = addend + } + + /// Returns true if there is an implicit addend stored in the data at the offset + /// to be relocated. + pub fn has_implicit_addend(&self) -> bool { + self.implicit_addend + } +} + +fn data_range(data: &[u8], data_address: u64, range_address: u64, size: u64) -> Option<&[u8]> { + if range_address >= data_address { + let start_offset = (range_address - data_address) as usize; + let end_offset = start_offset + size as usize; + if end_offset <= data.len() { + return Some(&data[start_offset..end_offset]); + } + } + None +} diff --git a/third_party/rust/object/src/read/pe.rs b/third_party/rust/object/src/read/pe.rs new file mode 100644 index 0000000000..49d3fe83ce --- /dev/null +++ b/third_party/rust/object/src/read/pe.rs @@ -0,0 +1,407 @@ +use crate::alloc::borrow::Cow; +use crate::alloc::vec::Vec; +use goblin::pe; +use std::{cmp, iter, slice}; +use target_lexicon::Architecture; + +use crate::read::{ + self, Object, ObjectSection, ObjectSegment, Relocation, SectionIndex, SectionKind, Symbol, + SymbolIndex, SymbolKind, SymbolMap, SymbolScope, +}; + +/// A PE object file. +#[derive(Debug)] +pub struct PeFile<'data> { + pe: pe::PE<'data>, + data: &'data [u8], +} + +impl<'data> PeFile<'data> { + /// Get the PE headers of the file. + // TODO: this is temporary to allow access to features this crate doesn't provide yet + #[inline] + pub fn pe(&self) -> &pe::PE<'data> { + &self.pe + } + + /// Parse the raw PE file data. + pub fn parse(data: &'data [u8]) -> Result<Self, &'static str> { + let pe = pe::PE::parse(data).map_err(|_| "Could not parse PE header")?; + Ok(PeFile { pe, data }) + } + + fn section_alignment(&self) -> u64 { + u64::from( + self.pe + .header + .optional_header + .map(|h| h.windows_fields.section_alignment) + .unwrap_or(0x1000), + ) + } +} + +impl<'data, 'file> Object<'data, 'file> for PeFile<'data> +where + 'data: 'file, +{ + type Segment = PeSegment<'data, 'file>; + type SegmentIterator = PeSegmentIterator<'data, 'file>; + type Section = PeSection<'data, 'file>; + type SectionIterator = PeSectionIterator<'data, 'file>; + type SymbolIterator = PeSymbolIterator<'data, 'file>; + + fn architecture(&self) -> Architecture { + match self.pe.header.coff_header.machine { + // TODO: Arm/Arm64 + pe::header::COFF_MACHINE_X86 => Architecture::I386, + pe::header::COFF_MACHINE_X86_64 => Architecture::X86_64, + _ => Architecture::Unknown, + } + } + + #[inline] + fn is_little_endian(&self) -> bool { + // TODO: always little endian? The COFF header has some bits in the + // characteristics flags, but these are obsolete. + true + } + + #[inline] + fn is_64(&self) -> bool { + self.pe.is_64 + } + + fn segments(&'file self) -> PeSegmentIterator<'data, 'file> { + PeSegmentIterator { + file: self, + iter: self.pe.sections.iter(), + } + } + + fn section_by_name(&'file self, section_name: &str) -> Option<PeSection<'data, 'file>> { + self.sections() + .find(|section| section.name() == Some(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<PeSection<'data, 'file>> { + self.sections().find(|section| section.index() == index) + } + + fn sections(&'file self) -> PeSectionIterator<'data, 'file> { + PeSectionIterator { + file: self, + iter: self.pe.sections.iter().enumerate(), + } + } + + fn symbol_by_index(&self, _index: SymbolIndex) -> Option<Symbol<'data>> { + // TODO: return COFF symbols for object files + None + } + + fn symbols(&'file self) -> PeSymbolIterator<'data, 'file> { + // TODO: return COFF symbols for object files + PeSymbolIterator { + index: 0, + exports: [].iter(), + imports: [].iter(), + } + } + + fn dynamic_symbols(&'file self) -> PeSymbolIterator<'data, 'file> { + PeSymbolIterator { + index: 0, + exports: self.pe.exports.iter(), + imports: self.pe.imports.iter(), + } + } + + fn symbol_map(&self) -> SymbolMap<'data> { + // TODO: untested + let mut symbols: Vec<_> = self + .symbols() + .map(|(_, s)| s) + .filter(SymbolMap::filter) + .collect(); + symbols.sort_by_key(|x| x.address); + SymbolMap { symbols } + } + + fn has_debug_symbols(&self) -> bool { + // TODO: check if CodeView-in-PE still works + for section in &self.pe.sections { + if let Ok(name) = section.name() { + if name == ".debug_info" { + return true; + } + } + } + false + } + + fn entry(&self) -> u64 { + self.pe.entry as u64 + } +} + +/// An iterator over the loadable sections of a `PeFile`. +#[derive(Debug)] +pub struct PeSegmentIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + iter: slice::Iter<'file, pe::section_table::SectionTable>, +} + +impl<'data, 'file> Iterator for PeSegmentIterator<'data, 'file> { + type Item = PeSegment<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|section| PeSegment { + file: self.file, + section, + }) + } +} + +/// A loadable section of a `PeFile`. +#[derive(Debug)] +pub struct PeSegment<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + section: &'file pe::section_table::SectionTable, +} + +impl<'data, 'file> ObjectSegment<'data> for PeSegment<'data, 'file> { + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size) + } + + #[inline] + fn align(&self) -> u64 { + self.file.section_alignment() + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + ( + self.section.pointer_to_raw_data as u64, + self.section.size_of_raw_data as u64, + ) + } + + fn data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = cmp::min(self.section.virtual_size, self.section.size_of_raw_data) as usize; + &self.file.data[offset..][..size] + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.data(), self.address(), address, size) + } + + #[inline] + fn name(&self) -> Option<&str> { + self.section.name().ok() + } +} + +/// An iterator over the sections of a `PeFile`. +#[derive(Debug)] +pub struct PeSectionIterator<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + iter: iter::Enumerate<slice::Iter<'file, pe::section_table::SectionTable>>, +} + +impl<'data, 'file> Iterator for PeSectionIterator<'data, 'file> { + type Item = PeSection<'data, 'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(index, section)| PeSection { + file: self.file, + index: SectionIndex(index), + section, + }) + } +} + +/// A section of a `PeFile`. +#[derive(Debug)] +pub struct PeSection<'data, 'file> +where + 'data: 'file, +{ + file: &'file PeFile<'data>, + index: SectionIndex, + section: &'file pe::section_table::SectionTable, +} + +impl<'data, 'file> PeSection<'data, 'file> { + fn raw_data(&self) -> &'data [u8] { + let offset = self.section.pointer_to_raw_data as usize; + let size = cmp::min(self.section.virtual_size, self.section.size_of_raw_data) as usize; + &self.file.data[offset..][..size] + } +} + +impl<'data, 'file> ObjectSection<'data> for PeSection<'data, 'file> { + type RelocationIterator = PeRelocationIterator; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + u64::from(self.section.virtual_address) + } + + #[inline] + fn size(&self) -> u64 { + u64::from(self.section.virtual_size) + } + + #[inline] + fn align(&self) -> u64 { + self.file.section_alignment() + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + Some(( + self.section.pointer_to_raw_data as u64, + self.section.size_of_raw_data as u64, + )) + } + + fn data(&self) -> Cow<'data, [u8]> { + Cow::from(self.raw_data()) + } + + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]> { + read::data_range(self.raw_data(), self.address(), address, size) + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'data, [u8]> { + // TODO: does PE support compression? + self.data() + } + + fn name(&self) -> Option<&str> { + self.section.name().ok() + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + None + } + + #[inline] + fn kind(&self) -> SectionKind { + if self.section.characteristics + & (pe::section_table::IMAGE_SCN_CNT_CODE | pe::section_table::IMAGE_SCN_MEM_EXECUTE) + != 0 + { + SectionKind::Text + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_CNT_INITIALIZED_DATA + != 0 + { + SectionKind::Data + } else if self.section.characteristics & pe::section_table::IMAGE_SCN_CNT_UNINITIALIZED_DATA + != 0 + { + SectionKind::UninitializedData + } else { + SectionKind::Unknown + } + } + + fn relocations(&self) -> PeRelocationIterator { + PeRelocationIterator + } +} + +/// An iterator over the symbols of a `PeFile`. +#[derive(Debug)] +pub struct PeSymbolIterator<'data, 'file> +where + 'data: 'file, +{ + index: usize, + exports: slice::Iter<'file, pe::export::Export<'data>>, + imports: slice::Iter<'file, pe::import::Import<'data>>, +} + +impl<'data, 'file> Iterator for PeSymbolIterator<'data, 'file> { + type Item = (SymbolIndex, Symbol<'data>); + + fn next(&mut self) -> Option<Self::Item> { + if let Some(export) = self.exports.next() { + let index = SymbolIndex(self.index); + self.index += 1; + return Some(( + index, + Symbol { + name: export.name, + address: export.rva as u64, + size: 0, + kind: SymbolKind::Unknown, + // TODO: can we find a section? + section_index: None, + undefined: false, + weak: false, + scope: SymbolScope::Dynamic, + }, + )); + } + if let Some(import) = self.imports.next() { + let index = SymbolIndex(self.index); + self.index += 1; + let name = match import.name { + Cow::Borrowed(name) => Some(name), + _ => None, + }; + return Some(( + index, + Symbol { + name, + address: 0, + size: 0, + kind: SymbolKind::Unknown, + section_index: None, + undefined: true, + weak: false, + scope: SymbolScope::Dynamic, + }, + )); + } + None + } +} + +/// An iterator over the relocations in an `PeSection`. +#[derive(Debug)] +pub struct PeRelocationIterator; + +impl Iterator for PeRelocationIterator { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + None + } +} diff --git a/third_party/rust/object/src/read/traits.rs b/third_party/rust/object/src/read/traits.rs new file mode 100644 index 0000000000..70b11e798a --- /dev/null +++ b/third_party/rust/object/src/read/traits.rs @@ -0,0 +1,220 @@ +use crate::alloc::borrow::Cow; +use crate::{Relocation, SectionIndex, SectionKind, Symbol, SymbolIndex, SymbolMap}; +use target_lexicon::{Architecture, Endianness}; +use uuid::Uuid; + +/// An object file. +pub trait Object<'data, 'file> { + /// A segment in the object file. + type Segment: ObjectSegment<'data>; + + /// An iterator over the segments in the object file. + type SegmentIterator: Iterator<Item = Self::Segment>; + + /// A section in the object file. + type Section: ObjectSection<'data>; + + /// An iterator over the sections in the object file. + type SectionIterator: Iterator<Item = Self::Section>; + + /// An iterator over the symbols in the object file. + type SymbolIterator: Iterator<Item = (SymbolIndex, Symbol<'data>)>; + + /// Get the architecture type of the file. + fn architecture(&self) -> Architecture; + + /// Get the endianness of the file. + #[inline] + fn endianness(&self) -> Endianness { + if self.is_little_endian() { + Endianness::Little + } else { + Endianness::Big + } + } + + /// Return true if the file is little endian, false if it is big endian. + fn is_little_endian(&self) -> bool; + + /// Return true if the file can contain 64-bit addresses. + fn is_64(&self) -> bool; + + /// Get an iterator over the segments in the file. + fn segments(&'file self) -> Self::SegmentIterator; + + /// Get the entry point address of the binary + fn entry(&'file self) -> u64; + + /// Get the section named `section_name`, if such a section exists. + /// + /// If `section_name` starts with a '.' then it is treated as a system section name, + /// and is compared using the conventions specific to the object file format. This + /// includes: + /// - if ".text" is requested for a Mach-O object file, then the actual + /// section name that is searched for is "__text". + /// - if ".debug_info" is requested for an ELF object file, then + /// ".zdebug_info" may be returned (and similarly for other debug sections). + /// + /// For some object files, multiple segments may contain sections with the same + /// name. In this case, the first matching section will be used. + fn section_by_name(&'file self, section_name: &str) -> Option<Self::Section>; + + /// Get the section at the given index. + /// + /// The meaning of the index depends on the object file. + /// + /// For some object files, this requires iterating through all sections. + fn section_by_index(&'file self, index: SectionIndex) -> Option<Self::Section>; + + /// Get the contents of the section named `section_name`, if such + /// a section exists. + /// + /// The `section_name` is interpreted according to `Self::section_by_name`. + /// + /// This may decompress section data. + fn section_data_by_name(&'file self, section_name: &str) -> Option<Cow<'data, [u8]>> { + self.section_by_name(section_name) + .map(|section| section.uncompressed_data()) + } + + /// Get an iterator over the sections in the file. + fn sections(&'file self) -> Self::SectionIterator; + + /// Get the debugging symbol at the given index. + /// + /// This is similar to `self.symbols().nth(index)`, except that + /// the index will take into account malformed or unsupported symbols. + fn symbol_by_index(&self, index: SymbolIndex) -> Option<Symbol<'data>>; + + /// Get an iterator over the debugging symbols in the file. + /// + /// This may skip over symbols that are malformed or unsupported. + fn symbols(&'file self) -> Self::SymbolIterator; + + /// Get the data for the given symbol. + fn symbol_data(&'file self, symbol: &Symbol<'data>) -> Option<&'data [u8]> { + if symbol.is_undefined() { + return None; + } + let address = symbol.address(); + let size = symbol.size(); + if let Some(index) = symbol.section_index() { + self.section_by_index(index) + .and_then(|section| section.data_range(address, size)) + } else { + self.segments() + .find_map(|segment| segment.data_range(address, size)) + } + } + + /// Get an iterator over the dynamic linking symbols in the file. + /// + /// This may skip over symbols that are malformed or unsupported. + fn dynamic_symbols(&'file self) -> Self::SymbolIterator; + + /// Construct a map from addresses to symbols. + fn symbol_map(&self) -> SymbolMap<'data>; + + /// Return true if the file contains debug information sections, false if not. + fn has_debug_symbols(&self) -> bool; + + /// The UUID from a Mach-O `LC_UUID` load command. + #[inline] + fn mach_uuid(&self) -> Option<Uuid> { + None + } + + /// The build ID from an ELF `NT_GNU_BUILD_ID` note. + #[inline] + fn build_id(&self) -> Option<&'data [u8]> { + None + } + + /// The filename and CRC from a `.gnu_debuglink` section. + #[inline] + fn gnu_debuglink(&self) -> Option<(&'data [u8], u32)> { + None + } +} + +/// A loadable segment defined in an object file. +/// +/// For ELF, this is a program header with type `PT_LOAD`. +/// For Mach-O, this is a load command with type `LC_SEGMENT` or `LC_SEGMENT_64`. +pub trait ObjectSegment<'data> { + /// Returns the virtual address of the segment. + fn address(&self) -> u64; + + /// Returns the size of the segment in memory. + fn size(&self) -> u64; + + /// Returns the alignment of the segment in memory. + fn align(&self) -> u64; + + /// Returns the offset and size of the segment in the file. + fn file_range(&self) -> (u64, u64); + + /// Returns a reference to the file contents of the segment. + /// The length of this data may be different from the size of the + /// segment in memory. + fn data(&self) -> &'data [u8]; + + /// Return the segment data in the given range. + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]>; + + /// Returns the name of the segment. + fn name(&self) -> Option<&str>; +} + +/// A section defined in an object file. +pub trait ObjectSection<'data> { + /// An iterator over the relocations for a section. + /// + /// The first field in the item tuple is the section offset + /// that the relocation applies to. + type RelocationIterator: Iterator<Item = (u64, Relocation)>; + + /// Returns the section index. + fn index(&self) -> SectionIndex; + + /// Returns the address of the section. + fn address(&self) -> u64; + + /// Returns the size of the section in memory. + fn size(&self) -> u64; + + /// Returns the alignment of the section in memory. + fn align(&self) -> u64; + + /// Returns offset and size of on-disk segment (if any) + fn file_range(&self) -> Option<(u64, u64)>; + + /// Returns the raw contents of the section. + /// The length of this data may be different from the size of the + /// section in memory. + /// + /// This does not do any decompression. + fn data(&self) -> Cow<'data, [u8]>; + + /// Return the raw contents of the section data in the given range. + /// + /// This does not do any decompression. + fn data_range(&self, address: u64, size: u64) -> Option<&'data [u8]>; + + /// Returns the uncompressed contents of the section. + /// The length of this data may be different from the size of the + /// section in memory. + fn uncompressed_data(&self) -> Cow<'data, [u8]>; + + /// Returns the name of the section. + fn name(&self) -> Option<&str>; + + /// Returns the name of the segment for this section. + fn segment_name(&self) -> Option<&str>; + + /// Return the kind of this section. + fn kind(&self) -> SectionKind; + + /// Get the relocations for this section. + fn relocations(&self) -> Self::RelocationIterator; +} diff --git a/third_party/rust/object/src/read/wasm.rs b/third_party/rust/object/src/read/wasm.rs new file mode 100644 index 0000000000..e13343e5b1 --- /dev/null +++ b/third_party/rust/object/src/read/wasm.rs @@ -0,0 +1,315 @@ +use crate::alloc::vec::Vec; +use parity_wasm::elements::{self, Deserialize}; +use std::borrow::Cow; +use std::{iter, slice}; +use target_lexicon::Architecture; + +use crate::read::{ + Object, ObjectSection, ObjectSegment, Relocation, SectionIndex, SectionKind, Symbol, + SymbolIndex, SymbolMap, +}; + +/// A WebAssembly object file. +#[derive(Debug)] +pub struct WasmFile { + module: elements::Module, +} + +impl<'data> WasmFile { + /// Parse the raw wasm data. + pub fn parse(mut data: &'data [u8]) -> Result<Self, &'static str> { + let module = + elements::Module::deserialize(&mut data).map_err(|_| "failed to parse wasm")?; + Ok(WasmFile { module }) + } +} + +fn serialize_to_cow<'a, S>(s: S) -> Option<Cow<'a, [u8]>> +where + S: elements::Serialize, +{ + let mut buf = Vec::new(); + s.serialize(&mut buf).ok()?; + Some(Cow::from(buf)) +} + +impl<'file> Object<'static, 'file> for WasmFile { + type Segment = WasmSegment<'file>; + type SegmentIterator = WasmSegmentIterator<'file>; + type Section = WasmSection<'file>; + type SectionIterator = WasmSectionIterator<'file>; + type SymbolIterator = WasmSymbolIterator<'file>; + + #[inline] + fn architecture(&self) -> Architecture { + Architecture::Wasm32 + } + + #[inline] + fn is_little_endian(&self) -> bool { + true + } + + #[inline] + fn is_64(&self) -> bool { + false + } + + fn segments(&'file self) -> Self::SegmentIterator { + WasmSegmentIterator { file: self } + } + + fn entry(&'file self) -> u64 { + self.module + .start_section() + .map_or(u64::max_value(), u64::from) + } + + fn section_by_name(&'file self, section_name: &str) -> Option<WasmSection<'file>> { + self.sections() + .find(|section| section.name() == Some(section_name)) + } + + fn section_by_index(&'file self, index: SectionIndex) -> Option<WasmSection<'file>> { + self.sections().find(|section| section.index() == index) + } + + fn sections(&'file self) -> Self::SectionIterator { + WasmSectionIterator { + sections: self.module.sections().iter().enumerate(), + } + } + + fn symbol_by_index(&self, _index: SymbolIndex) -> Option<Symbol<'static>> { + unimplemented!() + } + + fn symbols(&'file self) -> Self::SymbolIterator { + WasmSymbolIterator { file: self } + } + + fn dynamic_symbols(&'file self) -> Self::SymbolIterator { + WasmSymbolIterator { file: self } + } + + fn symbol_map(&self) -> SymbolMap<'static> { + SymbolMap { + symbols: Vec::new(), + } + } + + fn has_debug_symbols(&self) -> bool { + // We ignore the "name" section, and use this to mean whether the wasm + // has DWARF. + self.module.sections().iter().any(|s| match *s { + elements::Section::Custom(ref c) => c.name().starts_with(".debug_"), + _ => false, + }) + } +} + +/// An iterator over the segments of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSegmentIterator<'file> { + file: &'file WasmFile, +} + +impl<'file> Iterator for WasmSegmentIterator<'file> { + type Item = WasmSegment<'file>; + + fn next(&mut self) -> Option<Self::Item> { + None + } +} + +/// A segment of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSegment<'file> { + file: &'file WasmFile, +} + +impl<'file> ObjectSegment<'static> for WasmSegment<'file> { + #[inline] + fn address(&self) -> u64 { + unreachable!() + } + + #[inline] + fn size(&self) -> u64 { + unreachable!() + } + + #[inline] + fn align(&self) -> u64 { + unreachable!() + } + + #[inline] + fn file_range(&self) -> (u64, u64) { + unreachable!() + } + + fn data(&self) -> &'static [u8] { + unreachable!() + } + + fn data_range(&self, _address: u64, _size: u64) -> Option<&'static [u8]> { + unreachable!() + } + + #[inline] + fn name(&self) -> Option<&str> { + unreachable!() + } +} + +/// An iterator over the sections of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSectionIterator<'file> { + sections: iter::Enumerate<slice::Iter<'file, elements::Section>>, +} + +impl<'file> Iterator for WasmSectionIterator<'file> { + type Item = WasmSection<'file>; + + fn next(&mut self) -> Option<Self::Item> { + self.sections.next().map(|(index, section)| WasmSection { + index: SectionIndex(index), + section, + }) + } +} + +/// A section of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSection<'file> { + index: SectionIndex, + section: &'file elements::Section, +} + +impl<'file> ObjectSection<'static> for WasmSection<'file> { + type RelocationIterator = WasmRelocationIterator; + + #[inline] + fn index(&self) -> SectionIndex { + self.index + } + + #[inline] + fn address(&self) -> u64 { + 1 + } + + #[inline] + fn size(&self) -> u64 { + serialize_to_cow(self.section.clone()).map_or(0, |b| b.len() as u64) + } + + #[inline] + fn align(&self) -> u64 { + 1 + } + + #[inline] + fn file_range(&self) -> Option<(u64, u64)> { + None + } + + fn data(&self) -> Cow<'static, [u8]> { + match *self.section { + elements::Section::Custom(ref section) => Some(section.payload().to_vec().into()), + elements::Section::Start(section) => { + serialize_to_cow(elements::VarUint32::from(section)) + } + _ => serialize_to_cow(self.section.clone()), + } + .unwrap_or_else(|| Cow::from(&[][..])) + } + + fn data_range(&self, _address: u64, _size: u64) -> Option<&'static [u8]> { + unimplemented!() + } + + #[inline] + fn uncompressed_data(&self) -> Cow<'static, [u8]> { + // TODO: does wasm support compression? + self.data() + } + + fn name(&self) -> Option<&str> { + match *self.section { + elements::Section::Unparsed { .. } => None, + elements::Section::Custom(ref c) => Some(c.name()), + elements::Section::Type(_) => Some("Type"), + elements::Section::Import(_) => Some("Import"), + elements::Section::Function(_) => Some("Function"), + elements::Section::Table(_) => Some("Table"), + elements::Section::Memory(_) => Some("Memory"), + elements::Section::Global(_) => Some("Global"), + elements::Section::Export(_) => Some("Export"), + elements::Section::Start(_) => Some("Start"), + elements::Section::Element(_) => Some("Element"), + elements::Section::Code(_) => Some("Code"), + elements::Section::DataCount(_) => Some("DataCount"), + elements::Section::Data(_) => Some("Data"), + elements::Section::Name(_) => Some("Name"), + elements::Section::Reloc(_) => Some("Reloc"), + } + } + + #[inline] + fn segment_name(&self) -> Option<&str> { + None + } + + fn kind(&self) -> SectionKind { + match *self.section { + elements::Section::Unparsed { .. } => SectionKind::Unknown, + elements::Section::Custom(_) => SectionKind::Unknown, + elements::Section::Type(_) => SectionKind::Other, + elements::Section::Import(_) => SectionKind::Other, + elements::Section::Function(_) => SectionKind::Other, + elements::Section::Table(_) => SectionKind::Other, + elements::Section::Memory(_) => SectionKind::Other, + elements::Section::Global(_) => SectionKind::Other, + elements::Section::Export(_) => SectionKind::Other, + elements::Section::Start(_) => SectionKind::Other, + elements::Section::Element(_) => SectionKind::Other, + elements::Section::Code(_) => SectionKind::Text, + elements::Section::DataCount(_) => SectionKind::Other, + elements::Section::Data(_) => SectionKind::Data, + elements::Section::Name(_) => SectionKind::Other, + elements::Section::Reloc(_) => SectionKind::Other, + } + } + + fn relocations(&self) -> WasmRelocationIterator { + WasmRelocationIterator + } +} + +/// An iterator over the symbols of an `WasmFile`. +#[derive(Debug)] +pub struct WasmSymbolIterator<'file> { + file: &'file WasmFile, +} + +impl<'file> Iterator for WasmSymbolIterator<'file> { + type Item = (SymbolIndex, Symbol<'static>); + + fn next(&mut self) -> Option<Self::Item> { + unimplemented!() + } +} + +/// An iterator over the relocations in an `WasmSection`. +#[derive(Debug)] +pub struct WasmRelocationIterator; + +impl Iterator for WasmRelocationIterator { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option<Self::Item> { + None + } +} diff --git a/third_party/rust/object/src/write/coff.rs b/third_party/rust/object/src/write/coff.rs new file mode 100644 index 0000000000..5077bc7f0a --- /dev/null +++ b/third_party/rust/object/src/write/coff.rs @@ -0,0 +1,486 @@ +use crc32fast; +use scroll::ctx::SizeWith; +use scroll::IOwrite; +use std::iter; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod coff { + pub use goblin::pe::characteristic::*; + pub use goblin::pe::header::*; + pub use goblin::pe::relocation::*; + pub use goblin::pe::section_table::*; + pub use goblin::pe::symbol::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + offset: usize, + str_id: Option<StringId>, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, + aux_count: u8, +} + +impl Object { + pub(crate) fn coff_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rdata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + StandardSection::Tls => (&[], &b".tls$"[..], SectionKind::Tls), + StandardSection::UninitializedTls => { + // Unsupported section. + (&[], &[], SectionKind::UninitializedTls) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables) + } + } + } + + pub(crate) fn coff_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + let mut name = section.to_vec(); + name.push(b'$'); + name.extend(value); + name + } + + pub(crate) fn coff_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + if relocation.kind == RelocationKind::GotRelative { + // Use a stub symbol for the relocation instead. + // This isn't really a GOT, but it's a similar purpose. + // TODO: need to handle DLL imports differently? + relocation.kind = RelocationKind::Relative; + relocation.symbol = self.coff_add_stub_symbol(relocation.symbol); + } else if relocation.kind == RelocationKind::PltRelative { + // Windows doesn't need a separate relocation type for + // references to functions in import libraries. + // For convenience, treat this the same as Relative. + relocation.kind = RelocationKind::Relative; + } + + let constant = match self.architecture { + Architecture::I386 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_I386_REL32 + relocation.addend + 4 + } + _ => relocation.addend, + }, + Architecture::X86_64 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_AMD64_REL32 through to IMAGE_REL_AMD64_REL32_5 + if relocation.addend >= -4 && relocation.addend <= -9 { + 0 + } else { + relocation.addend + 4 + } + } + _ => relocation.addend, + }, + _ => unimplemented!(), + }; + relocation.addend -= constant; + constant + } + + fn coff_add_stub_symbol(&mut self, symbol_id: SymbolId) -> SymbolId { + if let Some(stub_id) = self.stub_symbols.get(&symbol_id) { + return *stub_id; + } + let stub_size = self.architecture.pointer_width().unwrap().bytes(); + + let mut name = b".rdata$.refptr.".to_vec(); + name.extend(&self.symbols[symbol_id.0].name); + let section_id = self.add_section(Vec::new(), name, SectionKind::ReadOnlyData); + let section = self.section_mut(section_id); + section.set_data(vec![0; stub_size as usize], u64::from(stub_size)); + section.relocations = vec![Relocation { + offset: 0, + size: stub_size * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: symbol_id, + addend: 0, + }]; + + let mut name = b".refptr.".to_vec(); + name.extend(&self.symbol(symbol_id).name); + let stub_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: u64::from(stub_size), + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: Some(section_id), + }); + self.stub_symbols.insert(symbol_id, stub_id); + + stub_id + } + + pub(crate) fn coff_write(&self) -> Result<Vec<u8>, String> { + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + let mut strtab = StringTable::default(); + + // COFF header. + let ctx = scroll::LE; + offset += coff::CoffHeader::size_with(&ctx); + + // Section headers. + offset += self.sections.len() * coff::SectionTable::size_with(&ctx); + + // Calculate size of section data and add section strings to strtab. + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + for (index, section) in self.sections.iter().enumerate() { + if section.name.len() > 8 { + section_offsets[index].str_id = Some(strtab.add(§ion.name)); + } + + let len = section.data.len(); + if len != 0 { + // TODO: not sure what alignment is required here, but this seems to match LLVM + offset = align(offset, 4); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + + // Calculate size of relocations. + let count = section.relocations.len(); + if count != 0 { + section_offsets[index].reloc_offset = offset; + offset += count * coff::Relocation::size_with(&ctx); + } + } + + // Calculate size of symbols and add symbol strings to strtab. + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut symtab_count = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + let aux_count = + (symbol.name.len() + coff::COFF_SYMBOL_SIZE - 1) / coff::COFF_SYMBOL_SIZE; + symbol_offsets[index].aux_count = aux_count as u8; + symtab_count += aux_count; + // Don't add name to strtab. + continue; + } + SymbolKind::Section => { + symbol_offsets[index].aux_count = 1; + symtab_count += 1; + } + _ => {} + } + if symbol.name.len() > 8 { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_offset = offset; + let symtab_len = symtab_count * coff::COFF_SYMBOL_SIZE; + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // First 4 bytes of strtab are the length. + strtab.write(4, &mut strtab_data); + let strtab_len = strtab_data.len() + 4; + offset += strtab_len; + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let header = coff::CoffHeader { + machine: match self.architecture { + Architecture::I386 => coff::COFF_MACHINE_X86, + Architecture::X86_64 => coff::COFF_MACHINE_X86_64, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }, + number_of_sections: self.sections.len() as u16, + time_date_stamp: 0, + pointer_to_symbol_table: symtab_offset as u32, + number_of_symbol_table: symtab_count as u32, + size_of_optional_header: 0, + characteristics: 0, + }; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + // TODO: IMAGE_SCN_LNK_COMDAT + let characteristics = match section.kind { + SectionKind::Text => { + coff::IMAGE_SCN_CNT_CODE + | coff::IMAGE_SCN_MEM_EXECUTE + | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Data => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::UninitializedData => { + coff::IMAGE_SCN_CNT_UNINITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::ReadOnlyData | SectionKind::ReadOnlyString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Debug | SectionKind::Other | SectionKind::OtherString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_DISCARDABLE + } + SectionKind::Linker => coff::IMAGE_SCN_LNK_INFO | coff::IMAGE_SCN_LNK_REMOVE, + SectionKind::Tls + | SectionKind::UninitializedTls + | SectionKind::TlsVariables + | SectionKind::Unknown + | SectionKind::Metadata => { + return Err(format!("unimplemented section {:?}", section.kind)) + } + }; + let align = match section.align { + 1 => coff::IMAGE_SCN_ALIGN_1BYTES, + 2 => coff::IMAGE_SCN_ALIGN_2BYTES, + 4 => coff::IMAGE_SCN_ALIGN_4BYTES, + 8 => coff::IMAGE_SCN_ALIGN_8BYTES, + 16 => coff::IMAGE_SCN_ALIGN_16BYTES, + 32 => coff::IMAGE_SCN_ALIGN_32BYTES, + 64 => coff::IMAGE_SCN_ALIGN_64BYTES, + 128 => coff::IMAGE_SCN_ALIGN_128BYTES, + 256 => coff::IMAGE_SCN_ALIGN_256BYTES, + 512 => coff::IMAGE_SCN_ALIGN_512BYTES, + 1024 => coff::IMAGE_SCN_ALIGN_1024BYTES, + 2048 => coff::IMAGE_SCN_ALIGN_2048BYTES, + 4096 => coff::IMAGE_SCN_ALIGN_4096BYTES, + 8192 => coff::IMAGE_SCN_ALIGN_8192BYTES, + _ => return Err(format!("unimplemented section align {}", section.align)), + }; + let mut coff_section = coff::SectionTable { + name: [0; 8], + real_name: None, + virtual_size: if section.data.is_empty() { + section.size as u32 + } else { + 0 + }, + virtual_address: 0, + size_of_raw_data: section.data.len() as u32, + pointer_to_raw_data: if section.data.is_empty() { + 0 + } else { + section_offsets[index].offset as u32 + }, + pointer_to_relocations: section_offsets[index].reloc_offset as u32, + pointer_to_linenumbers: 0, + number_of_relocations: section.relocations.len() as u16, + number_of_linenumbers: 0, + characteristics: characteristics | align, + }; + if section.name.len() <= 8 { + coff_section.name[..section.name.len()].copy_from_slice(§ion.name); + } else { + let str_offset = strtab.get_offset(section_offsets[index].str_id.unwrap()); + coff_section.set_name_offset(str_offset).unwrap(); + } + buffer.iowrite_with(coff_section, ctx).unwrap(); + } + + // Write section data and relocations. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, 4); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + + if !section.relocations.is_empty() { + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + //assert!(reloc.implicit_addend); + let typ = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 16, 0) => coff::IMAGE_REL_I386_DIR16, + (RelocationKind::Relative, 16, 0) => coff::IMAGE_REL_I386_REL16, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_I386_DIR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_I386_DIR32NB, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_I386_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_I386_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_I386_SECREL7, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_I386_REL32, + (RelocationKind::Coff(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_AMD64_ADDR64, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32NB, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_AMD64_REL32, + (RelocationKind::Relative, 32, -5) => coff::IMAGE_REL_AMD64_REL32_1, + (RelocationKind::Relative, 32, -6) => coff::IMAGE_REL_AMD64_REL32_2, + (RelocationKind::Relative, 32, -7) => coff::IMAGE_REL_AMD64_REL32_3, + (RelocationKind::Relative, 32, -8) => coff::IMAGE_REL_AMD64_REL32_4, + (RelocationKind::Relative, 32, -9) => coff::IMAGE_REL_AMD64_REL32_5, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_AMD64_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_AMD64_SECREL7, + (RelocationKind::Coff(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + buffer + .iowrite_with( + coff::Relocation { + virtual_address: reloc.offset as u32, + symbol_table_index: symbol_offsets[reloc.symbol.0].index as u32, + typ, + }, + ctx, + ) + .unwrap(); + } + } + } + + // Write symbols. + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + let mut name = &symbol.name[..]; + let mut section_number = symbol.section.map(|x| x.0 + 1).unwrap_or(0) as i16; + let typ = if symbol.kind == SymbolKind::Text { + coff::IMAGE_SYM_DTYPE_FUNCTION << coff::IMAGE_SYM_DTYPE_SHIFT + } else { + coff::IMAGE_SYM_TYPE_NULL + }; + let storage_class = match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + name = b".file"; + section_number = coff::IMAGE_SYM_DEBUG; + coff::IMAGE_SYM_CLASS_FILE + } + SymbolKind::Section => coff::IMAGE_SYM_CLASS_STATIC, + SymbolKind::Label => coff::IMAGE_SYM_CLASS_LABEL, + SymbolKind::Text | SymbolKind::Data => { + match symbol.scope { + _ if symbol.is_undefined() => coff::IMAGE_SYM_CLASS_EXTERNAL, + // TODO: does this need aux symbol records too? + _ if symbol.weak => coff::IMAGE_SYM_CLASS_WEAK_EXTERNAL, + SymbolScope::Unknown => { + return Err(format!("unimplemented symbol scope {:?}", symbol)) + } + SymbolScope::Compilation => coff::IMAGE_SYM_CLASS_STATIC, + SymbolScope::Linkage | SymbolScope::Dynamic => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + } + } + _ => return Err(format!("unimplemented symbol {:?}", symbol.kind)), + }; + let number_of_aux_symbols = symbol_offsets[index].aux_count; + let mut coff_symbol = coff::Symbol { + name: [0; 8], + value: symbol.value as u32, + section_number, + typ, + storage_class, + number_of_aux_symbols, + }; + if name.len() <= 8 { + coff_symbol.name[..name.len()].copy_from_slice(name); + } else { + let str_offset = strtab.get_offset(symbol_offsets[index].str_id.unwrap()); + coff_symbol.set_name_offset(str_offset as u32); + } + buffer.iowrite_with(coff_symbol, ctx).unwrap(); + + match symbol.kind { + SymbolKind::File => { + let aux_len = number_of_aux_symbols as usize * coff::COFF_SYMBOL_SIZE; + debug_assert!(aux_len >= symbol.name.len()); + buffer.extend(&symbol.name); + buffer.extend(iter::repeat(0).take(aux_len - symbol.name.len())); + } + SymbolKind::Section => { + debug_assert_eq!(number_of_aux_symbols, 1); + let section = &self.sections[symbol.section.unwrap().0]; + buffer + .iowrite_with( + coff::AuxSectionDefinition { + length: section.data.len() as u32, + number_of_relocations: section.relocations.len() as u16, + number_of_line_numbers: 0, + checksum: checksum(§ion.data), + number: section_number as u16, + // TODO: COMDAT + selection: 0, + unused: [0; 3], + }, + ctx, + ) + .unwrap(); + } + _ => { + debug_assert_eq!(number_of_aux_symbols, 0); + } + } + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.iowrite_with(strtab_len as u32, ctx).unwrap(); + buffer.extend(&strtab_data); + + Ok(buffer) + } +} + +// JamCRC +fn checksum(data: &[u8]) -> u32 { + let mut hasher = crc32fast::Hasher::new_with_initial(0xffff_ffff); + hasher.update(data); + !hasher.finalize() +} diff --git a/third_party/rust/object/src/write/elf.rs b/third_party/rust/object/src/write/elf.rs new file mode 100644 index 0000000000..e784794dc8 --- /dev/null +++ b/third_party/rust/object/src/write/elf.rs @@ -0,0 +1,736 @@ +use scroll::ctx::SizeWith; +use scroll::IOwrite; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod elf { + pub use goblin::elf::header::*; + pub use goblin::elf::program_header::*; + pub use goblin::elf::reloc::*; + pub use goblin::elf::section_header::*; + pub use goblin::elf::sym::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + str_id: Option<StringId>, + reloc_index: usize, + reloc_offset: usize, + reloc_len: usize, + reloc_str_id: Option<StringId>, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, +} + +impl Object { + pub(crate) fn elf_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rodata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + StandardSection::Tls => (&[], &b".tdata"[..], SectionKind::Tls), + StandardSection::UninitializedTls => { + (&[], &b".tbss"[..], SectionKind::UninitializedTls) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables) + } + } + } + + pub(crate) fn elf_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + let mut name = section.to_vec(); + name.push(b'.'); + name.extend(value); + name + } + + fn elf_has_relocation_addend(&self) -> Result<bool, String> { + Ok(match self.architecture { + Architecture::Arm(_) => false, + Architecture::Aarch64(_) => false, + Architecture::I386 => false, + Architecture::X86_64 => true, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }) + } + + pub(crate) fn elf_fixup_relocation( + &mut self, + mut relocation: &mut Relocation, + ) -> Result<i64, String> { + // Return true if we should use a section symbol to avoid preemption. + fn want_section_symbol(relocation: &Relocation, symbol: &Symbol) -> bool { + if symbol.scope != SymbolScope::Dynamic { + // Only dynamic symbols can be preemptible. + return false; + } + match symbol.kind { + SymbolKind::Text | SymbolKind::Data => {} + _ => return false, + } + match relocation.kind { + // Anything using GOT or PLT is preemptible. + // We also require that `Other` relocations must already be correct. + RelocationKind::Got + | RelocationKind::GotRelative + | RelocationKind::GotBaseRelative + | RelocationKind::PltRelative + | RelocationKind::Elf(_) => return false, + // Absolute relocations are preemptible for non-local data. + // TODO: not sure if this rule is exactly correct + // This rule was added to handle global data references in debuginfo. + // Maybe this should be a new relocation kind so that the caller can decide. + RelocationKind::Absolute => { + if symbol.kind == SymbolKind::Data { + return false; + } + } + _ => {} + } + true + } + + // Use section symbols for relocations where required to avoid preemption. + // Otherwise, the linker will fail with: + // relocation R_X86_64_PC32 against symbol `SomeSymbolName' can not be used when + // making a shared object; recompile with -fPIC + let symbol = &self.symbols[relocation.symbol.0]; + if want_section_symbol(relocation, symbol) { + if let Some(section) = symbol.section { + relocation.addend += symbol.value as i64; + relocation.symbol = self.section_symbol(section); + } + } + + // Determine whether the addend is stored in the relocation or the data. + if self.elf_has_relocation_addend()? { + Ok(0) + } else { + let constant = relocation.addend; + relocation.addend = 0; + Ok(constant) + } + } + + pub(crate) fn elf_write(&self) -> Result<Vec<u8>, String> { + let (container, pointer_align) = match self.architecture.pointer_width().unwrap() { + PointerWidth::U16 | PointerWidth::U32 => (goblin::container::Container::Little, 4), + PointerWidth::U64 => (goblin::container::Container::Big, 8), + }; + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => goblin::container::Endian::Little, + Endianness::Big => goblin::container::Endian::Big, + }; + let ctx = goblin::container::Ctx::new(container, endian); + let is_rela = self.elf_has_relocation_addend()?; + let reloc_ctx = (is_rela, ctx); + + // Calculate offsets of everything. + let mut offset = 0; + + // ELF header. + let e_ehsize = elf::Header::size_with(&ctx); + offset += e_ehsize; + + // Create reloc section header names. + let reloc_names: Vec<_> = self + .sections + .iter() + .map(|section| { + let mut reloc_name = Vec::new(); + if !section.relocations.is_empty() { + reloc_name.extend_from_slice(if is_rela { + &b".rela"[..] + } else { + &b".rel"[..] + }); + reloc_name.extend_from_slice(§ion.name); + } + reloc_name + }) + .collect(); + + // Calculate size of section data. + let mut shstrtab = StringTable::default(); + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + // Null section. + let mut e_shnum = 1; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].str_id = Some(shstrtab.add(§ion.name)); + section_offsets[index].index = e_shnum; + e_shnum += 1; + + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + + if !section.relocations.is_empty() { + section_offsets[index].reloc_str_id = Some(shstrtab.add(&reloc_names[index])); + section_offsets[index].reloc_index = e_shnum; + e_shnum += 1; + } + } + + // Calculate index of symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + // Null symbol. + let mut symtab_count = 1; + // Local symbols must come before global. + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + } + } + let symtab_count_local = symtab_count; + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.kind != SymbolKind::Section { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_str_id = shstrtab.add(&b".symtab"[..]); + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = symtab_count * elf::Sym::size_with(&ctx); + offset += symtab_len; + let symtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of symtab_shndx. + let mut need_symtab_shndx = false; + for symbol in &self.symbols { + let index = symbol + .section + .map(|s| section_offsets[s.0].index) + .unwrap_or(0); + if index >= elf::SHN_LORESERVE as usize { + need_symtab_shndx = true; + break; + } + } + let symtab_shndx_offset = offset; + let mut symtab_shndx_str_id = None; + let mut symtab_shndx_len = 0; + if need_symtab_shndx { + symtab_shndx_str_id = Some(shstrtab.add(&b".symtab_shndx"[..])); + symtab_shndx_len = symtab_count * 4; + offset += symtab_shndx_len; + e_shnum += 1; + } + + // Calculate size of strtab. + let strtab_str_id = shstrtab.add(&b".strtab"[..]); + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // Null name. + strtab_data.push(0); + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + let strtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, pointer_align); + section_offsets[index].reloc_offset = offset; + let len = count * elf::Reloc::size_with(&reloc_ctx); + section_offsets[index].reloc_len = len; + offset += len; + } + } + + // Calculate size of shstrtab. + let shstrtab_str_id = shstrtab.add(&b".shstrtab"[..]); + let shstrtab_offset = offset; + let mut shstrtab_data = Vec::new(); + // Null section name. + shstrtab_data.push(0); + shstrtab.write(1, &mut shstrtab_data); + offset += shstrtab_data.len(); + let shstrtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of section headers. + offset = align(offset, pointer_align); + let e_shoff = offset; + let e_shentsize = elf::SectionHeader::size_with(&ctx); + offset += e_shnum * e_shentsize; + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let e_machine = match self.architecture { + Architecture::Arm(_) => elf::EM_ARM, + Architecture::Aarch64(_) => elf::EM_AARCH64, + Architecture::I386 => elf::EM_386, + Architecture::X86_64 => elf::EM_X86_64, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let mut header = elf::Header { + e_ident: [0; 16], + e_type: elf::ET_REL, + e_machine, + e_version: elf::EV_CURRENT.into(), + e_entry: 0, + e_phoff: 0, + e_shoff: e_shoff as u64, + e_flags: 0, + e_ehsize: e_ehsize as u16, + e_phentsize: 0, + e_phnum: 0, + e_shentsize: e_shentsize as u16, + e_shnum: if e_shnum >= elf::SHN_LORESERVE as usize { + 0 + } else { + e_shnum as u16 + }, + e_shstrndx: if shstrtab_index >= elf::SHN_LORESERVE as usize { + elf::SHN_XINDEX as u16 + } else { + shstrtab_index as u16 + }, + }; + header.e_ident[0..4].copy_from_slice(elf::ELFMAG); + header.e_ident[elf::EI_CLASS] = if container.is_big() { + elf::ELFCLASS64 + } else { + elf::ELFCLASS32 + }; + header.e_ident[elf::EI_DATA] = if endian.is_little() { + elf::ELFDATA2LSB + } else { + elf::ELFDATA2MSB + }; + header.e_ident[elf::EI_VERSION] = elf::EV_CURRENT; + header.e_ident[elf::EI_OSABI] = elf::ELFOSABI_NONE; + header.e_ident[elf::EI_ABIVERSION] = 0; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write section data. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + } + + // Write symbols. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + buffer + .iowrite_with( + elf::Sym { + st_name: 0, + st_info: 0, + st_other: 0, + st_shndx: 0, + st_value: 0, + st_size: 0, + }, + ctx, + ) + .unwrap(); + let mut symtab_shndx = Vec::new(); + if need_symtab_shndx { + symtab_shndx.iowrite_with(0, ctx.le).unwrap(); + } + let mut write_symbol = |index: usize, symbol: &Symbol| { + let st_type = match symbol.kind { + SymbolKind::Unknown | SymbolKind::Null => elf::STT_NOTYPE, + SymbolKind::Text => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_FUNC + } + } + SymbolKind::Data => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_OBJECT + } + } + SymbolKind::Section => elf::STT_SECTION, + SymbolKind::File => elf::STT_FILE, + SymbolKind::Common => elf::STT_COMMON, + SymbolKind::Tls => elf::STT_TLS, + SymbolKind::Label => elf::STT_NOTYPE, + }; + let st_bind = if symbol.is_undefined() { + elf::STB_GLOBAL + } else if symbol.is_local() { + elf::STB_LOCAL + } else if symbol.weak { + elf::STB_WEAK + } else { + elf::STB_GLOBAL + }; + let st_other = if symbol.scope == SymbolScope::Linkage { + elf::STV_HIDDEN + } else { + elf::STV_DEFAULT + }; + let st_shndx = match symbol.kind { + SymbolKind::File => { + if need_symtab_shndx { + symtab_shndx.iowrite_with(0, ctx.le).unwrap(); + } + elf::SHN_ABS as usize + } + _ => { + let index = symbol + .section + .map(|s| section_offsets[s.0].index) + .unwrap_or(elf::SHN_UNDEF as usize); + if need_symtab_shndx { + symtab_shndx.iowrite_with(index as u32, ctx.le).unwrap(); + } + if index >= elf::SHN_LORESERVE as usize { + elf::SHN_XINDEX as usize + } else { + index + } + } + }; + let st_name = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::Sym { + st_name, + st_info: (st_bind << 4) + st_type, + st_other, + st_shndx, + st_value: symbol.value, + st_size: symbol.size, + }, + ctx, + ) + .unwrap(); + }; + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + write_symbol(index, symbol); + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + write_symbol(index, symbol); + } + } + if need_symtab_shndx { + debug_assert_eq!(symtab_shndx_offset, buffer.len()); + debug_assert_eq!(symtab_shndx_len, symtab_shndx.len()); + buffer.extend(&symtab_shndx); + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.extend(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(&mut buffer, pointer_align); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_type = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size) { + (RelocationKind::Absolute, 32) => elf::R_386_32, + (RelocationKind::Relative, 32) => elf::R_386_PC32, + (RelocationKind::Got, 32) => elf::R_386_GOT32, + (RelocationKind::PltRelative, 32) => elf::R_386_PLT32, + (RelocationKind::GotBaseOffset, 32) => elf::R_386_GOTOFF, + (RelocationKind::GotBaseRelative, 32) => elf::R_386_GOTPC, + (RelocationKind::Absolute, 16) => elf::R_386_16, + (RelocationKind::Relative, 16) => elf::R_386_PC16, + (RelocationKind::Absolute, 8) => elf::R_386_8, + (RelocationKind::Relative, 8) => elf::R_386_PC8, + (RelocationKind::Elf(x), _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { + elf::R_X86_64_64 + } + (RelocationKind::Relative, _, 32) => elf::R_X86_64_PC32, + (RelocationKind::Got, _, 32) => elf::R_X86_64_GOT32, + (RelocationKind::PltRelative, _, 32) => elf::R_X86_64_PLT32, + (RelocationKind::GotRelative, _, 32) => elf::R_X86_64_GOTPCREL, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_X86_64_32 + } + (RelocationKind::Absolute, RelocationEncoding::X86Signed, 32) => { + elf::R_X86_64_32S + } + (RelocationKind::Absolute, _, 16) => elf::R_X86_64_16, + (RelocationKind::Relative, _, 16) => elf::R_X86_64_PC16, + (RelocationKind::Absolute, _, 8) => elf::R_X86_64_8, + (RelocationKind::Relative, _, 8) => elf::R_X86_64_PC8, + (RelocationKind::Elf(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let r_sym = symbol_offsets[reloc.symbol.0].index; + buffer + .iowrite_with( + elf::Reloc { + r_offset: reloc.offset, + r_addend: Some(reloc.addend), + r_sym, + r_type, + }, + reloc_ctx, + ) + .unwrap(); + } + } + } + + // Write shstrtab section. + debug_assert_eq!(shstrtab_offset, buffer.len()); + buffer.extend(&shstrtab_data); + + // Write section headers. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(e_shoff, buffer.len()); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: 0, + sh_type: 0, + sh_flags: 0, + sh_addr: 0, + sh_offset: 0, + sh_size: if e_shnum >= elf::SHN_LORESERVE as usize { + e_shnum as u64 + } else { + 0 + }, + sh_link: if shstrtab_index >= elf::SHN_LORESERVE as usize { + shstrtab_index as u32 + } else { + 0 + }, + // TODO: e_phnum overflow + sh_info: 0, + sh_addralign: 0, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + for (index, section) in self.sections.iter().enumerate() { + let sh_type = match section.kind { + SectionKind::UninitializedData | SectionKind::UninitializedTls => elf::SHT_NOBITS, + _ => elf::SHT_PROGBITS, + }; + let sh_flags = match section.kind { + SectionKind::Text => elf::SHF_ALLOC | elf::SHF_EXECINSTR, + SectionKind::Data => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::Tls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::UninitializedData => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::UninitializedTls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::ReadOnlyData => elf::SHF_ALLOC, + SectionKind::ReadOnlyString => elf::SHF_ALLOC | elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::OtherString => elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::Other + | SectionKind::Debug + | SectionKind::Unknown + | SectionKind::Metadata + | SectionKind::Linker => 0, + SectionKind::TlsVariables => { + return Err(format!("unimplemented section {:?}", section.kind)) + } + }; + // TODO: not sure if this is correct, maybe user should determine this + let sh_entsize = match section.kind { + SectionKind::ReadOnlyString | SectionKind::OtherString => 1, + _ => 0, + }; + let sh_name = section_offsets[index] + .str_id + .map(|id| shstrtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name, + sh_type, + sh_flags: sh_flags.into(), + sh_addr: 0, + sh_offset: section_offsets[index].offset as u64, + sh_size: section.size, + sh_link: 0, + sh_info: 0, + sh_addralign: section.align, + sh_entsize, + }, + ctx, + ) + .unwrap(); + + if !section.relocations.is_empty() { + let sh_name = section_offsets[index] + .reloc_str_id + .map(|id| shstrtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name, + sh_type: if is_rela { elf::SHT_RELA } else { elf::SHT_REL }, + sh_flags: elf::SHF_INFO_LINK.into(), + sh_addr: 0, + sh_offset: section_offsets[index].reloc_offset as u64, + sh_size: section_offsets[index].reloc_len as u64, + sh_link: symtab_index as u32, + sh_info: section_offsets[index].index as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Reloc::size_with(&reloc_ctx) as u64, + }, + ctx, + ) + .unwrap(); + } + } + + // Write symtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(symtab_str_id), + sh_type: elf::SHT_SYMTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: symtab_offset as u64, + sh_size: symtab_len as u64, + sh_link: strtab_index as u32, + sh_info: symtab_count_local as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Sym::size_with(&ctx) as u64, + }, + ctx, + ) + .unwrap(); + + // Write symtab_shndx section header. + if need_symtab_shndx { + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(symtab_shndx_str_id.unwrap()), + sh_type: elf::SHT_SYMTAB_SHNDX, + sh_flags: 0, + sh_addr: 0, + sh_offset: symtab_shndx_offset as u64, + sh_size: symtab_shndx_len as u64, + sh_link: strtab_index as u32, + sh_info: symtab_count_local as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Sym::size_with(&ctx) as u64, + }, + ctx, + ) + .unwrap(); + } + + // Write strtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(strtab_str_id), + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: strtab_offset as u64, + sh_size: strtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + + // Write shstrtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(shstrtab_str_id), + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: shstrtab_offset as u64, + sh_size: shstrtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + + Ok(buffer) + } +} diff --git a/third_party/rust/object/src/write/macho.rs b/third_party/rust/object/src/write/macho.rs new file mode 100644 index 0000000000..356a44b34d --- /dev/null +++ b/third_party/rust/object/src/write/macho.rs @@ -0,0 +1,558 @@ +use scroll::ctx::SizeWith; +use scroll::{IOwrite, Pwrite}; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod mach { + pub use goblin::mach::constants::cputype::*; + pub use goblin::mach::constants::*; + pub use goblin::mach::header::*; + pub use goblin::mach::load_command::*; + pub use goblin::mach::relocation::*; + pub use goblin::mach::segment::*; + pub use goblin::mach::symbols::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + address: u64, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, +} + +impl Object { + pub(crate) fn macho_segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match segment { + StandardSegment::Text => &b"__TEXT"[..], + StandardSegment::Data => &b"__DATA"[..], + StandardSegment::Debug => &b"__DWARF"[..], + } + } + + pub(crate) fn macho_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&b"__TEXT"[..], &b"__text"[..], SectionKind::Text), + StandardSection::Data => (&b"__DATA"[..], &b"__data"[..], SectionKind::Data), + StandardSection::ReadOnlyData => { + (&b"__TEXT"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyDataWithRel => { + (&b"__DATA"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyString => ( + &b"__TEXT"[..], + &b"__cstring"[..], + SectionKind::ReadOnlyString, + ), + StandardSection::UninitializedData => ( + &b"__DATA"[..], + &b"__bss"[..], + SectionKind::UninitializedData, + ), + StandardSection::Tls => (&b"__DATA"[..], &b"__thread_data"[..], SectionKind::Tls), + StandardSection::UninitializedTls => ( + &b"__DATA"[..], + &b"__thread_bss"[..], + SectionKind::UninitializedTls, + ), + StandardSection::TlsVariables => ( + &b"__DATA"[..], + &b"__thread_vars"[..], + SectionKind::TlsVariables, + ), + } + } + + fn macho_tlv_bootstrap(&mut self) -> SymbolId { + match self.tlv_bootstrap { + Some(id) => id, + None => { + let id = self.add_symbol(Symbol { + name: b"_tlv_bootstrap".to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Dynamic, + weak: false, + section: None, + }); + self.tlv_bootstrap = Some(id); + id + } + } + } + + /// Create the `__thread_vars` entry for a TLS variable. + /// + /// The symbol given by `symbol_id` will be updated to point to this entry. + /// + /// A new `SymbolId` will be returned. The caller must update this symbol + /// to point to the initializer. + /// + /// If `symbol_id` is not for a TLS variable, then it is returned unchanged. + pub(crate) fn macho_add_thread_var(&mut self, symbol_id: SymbolId) -> SymbolId { + let symbol = self.symbol_mut(symbol_id); + if symbol.kind != SymbolKind::Tls { + return symbol_id; + } + + // Create the initializer symbol. + let mut name = symbol.name.clone(); + name.extend(b"$tlv$init"); + let init_symbol_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Tls, + scope: SymbolScope::Compilation, + weak: false, + section: None, + }); + + // Add the tlv entry. + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer - used when mapped by the runtime + // - pointer to symbol initializer + let section = self.section_id(StandardSection::TlsVariables); + let pointer_width = self.architecture.pointer_width().unwrap().bytes(); + let size = u64::from(pointer_width) * 3; + let data = vec![0; size as usize]; + let offset = self.append_section_data(section, &data, u64::from(pointer_width)); + + let tlv_bootstrap = self.macho_tlv_bootstrap(); + self.add_relocation( + section, + Relocation { + offset: offset, + size: pointer_width * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: tlv_bootstrap, + addend: 0, + }, + ) + .unwrap(); + self.add_relocation( + section, + Relocation { + offset: offset + u64::from(pointer_width) * 2, + size: pointer_width * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: init_symbol_id, + addend: 0, + }, + ) + .unwrap(); + + // Update the symbol to point to the tlv. + let symbol = self.symbol_mut(symbol_id); + symbol.value = offset; + symbol.size = size; + symbol.section = Some(section); + + init_symbol_id + } + + pub(crate) fn macho_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + let constant = match relocation.kind { + RelocationKind::Relative + | RelocationKind::GotRelative + | RelocationKind::PltRelative => relocation.addend + 4, + _ => relocation.addend, + }; + relocation.addend -= constant; + constant + } + + pub(crate) fn macho_write(&self) -> Result<Vec<u8>, String> { + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => goblin::container::Endian::Little, + Endianness::Big => goblin::container::Endian::Big, + }; + let (container, pointer_align) = match self.architecture.pointer_width().unwrap() { + PointerWidth::U16 | PointerWidth::U32 => (goblin::container::Container::Little, 4), + PointerWidth::U64 => (goblin::container::Container::Big, 8), + }; + let ctx = goblin::container::Ctx::new(container, endian); + + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + + // Calculate size of Mach-O header. + offset += mach::Header::size_with(&ctx); + + // Calculate size of commands. + let mut ncmds = 0; + let command_offset = offset; + + // Calculate size of segment command and section headers. + let segment_command_offset = offset; + let segment_command_len = + mach::Segment::size_with(&ctx) + self.sections.len() * mach::Section::size_with(&ctx); + offset += segment_command_len; + ncmds += 1; + + // Calculate size of symtab command. + let symtab_command_offset = offset; + let symtab_command_len = mach::SymtabCommand::size_with(&ctx.le); + offset += symtab_command_len; + ncmds += 1; + + let sizeofcmds = offset - command_offset; + + // Calculate size of section data. + let segment_data_offset = offset; + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + let mut address = 0; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].index = 1 + index; + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + address = align_u64(address, section.align); + section_offsets[index].address = address; + address += section.size; + } + let segment_data_size = offset - segment_data_offset; + + // Count symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut nsyms = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_undefined() { + match symbol.kind { + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => {} + SymbolKind::File | SymbolKind::Section => continue, + _ => return Err(format!("unimplemented symbol {:?}", symbol)), + } + } + symbol_offsets[index].index = nsyms; + nsyms += 1; + if !symbol.name.is_empty() { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = nsyms * mach::Nlist::size_with(&ctx); + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // Null name. + strtab_data.push(0); + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, 4); + section_offsets[index].reloc_offset = offset; + let len = count * mach::RelocationInfo::size_with(&ctx.le); + offset += len; + } + } + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let (cputype, cpusubtype) = match self.architecture { + Architecture::Arm(_) => (mach::CPU_TYPE_ARM, mach::CPU_SUBTYPE_ARM_ALL), + Architecture::Aarch64(_) => (mach::CPU_TYPE_ARM64, mach::CPU_SUBTYPE_ARM64_ALL), + Architecture::I386 => (mach::CPU_TYPE_I386, mach::CPU_SUBTYPE_I386_ALL), + Architecture::X86_64 => (mach::CPU_TYPE_X86_64, mach::CPU_SUBTYPE_X86_64_ALL), + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + + let header = mach::Header { + magic: if ctx.is_big() { + mach::MH_MAGIC_64 + } else { + mach::MH_MAGIC + }, + cputype, + cpusubtype, + filetype: mach::MH_OBJECT, + ncmds, + sizeofcmds: sizeofcmds as u32, + flags: if self.subsection_via_symbols { + mach::MH_SUBSECTIONS_VIA_SYMBOLS + } else { + 0 + }, + reserved: 0, + }; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write segment command. + debug_assert_eq!(segment_command_offset, buffer.len()); + let mut segment_command = mach::Segment::new(ctx, &[]); + segment_command.cmd = if ctx.is_big() { + mach::LC_SEGMENT_64 + } else { + mach::LC_SEGMENT + }; + segment_command.cmdsize = segment_command_len as u32; + segment_command.segname = [0; 16]; + segment_command.vmaddr = 0; + segment_command.vmsize = address; + segment_command.fileoff = segment_data_offset as u64; + segment_command.filesize = segment_data_size as u64; + segment_command.maxprot = mach::VM_PROT_READ | mach::VM_PROT_WRITE | mach::VM_PROT_EXECUTE; + segment_command.initprot = mach::VM_PROT_READ | mach::VM_PROT_WRITE | mach::VM_PROT_EXECUTE; + segment_command.nsects = self.sections.len() as u32; + segment_command.flags = 0; + buffer.iowrite_with(segment_command, ctx).unwrap(); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + let mut sectname = [0; 16]; + sectname.pwrite(&*section.name, 0).unwrap(); + let mut segname = [0; 16]; + segname.pwrite(&*section.segment, 0).unwrap(); + let flags = match section.kind { + SectionKind::Text => { + mach::S_ATTR_PURE_INSTRUCTIONS | mach::S_ATTR_SOME_INSTRUCTIONS + } + SectionKind::Data => 0, + SectionKind::ReadOnlyData => 0, + SectionKind::ReadOnlyString => mach::S_CSTRING_LITERALS, + SectionKind::UninitializedData => mach::S_ZEROFILL, + SectionKind::Tls => mach::S_THREAD_LOCAL_REGULAR, + SectionKind::UninitializedTls => mach::S_THREAD_LOCAL_ZEROFILL, + SectionKind::TlsVariables => mach::S_THREAD_LOCAL_VARIABLES, + SectionKind::Debug => mach::S_ATTR_DEBUG, + SectionKind::OtherString => mach::S_CSTRING_LITERALS, + SectionKind::Other + | SectionKind::Unknown + | SectionKind::Linker + | SectionKind::Metadata => 0, + }; + buffer + .iowrite_with( + mach::Section { + sectname, + segname, + addr: section_offsets[index].address, + size: section.size, + offset: section_offsets[index].offset as u32, + align: section.align.trailing_zeros(), + reloff: section_offsets[index].reloc_offset as u32, + nreloc: section.relocations.len() as u32, + flags, + }, + ctx, + ) + .unwrap(); + } + + // Write symtab command. + debug_assert_eq!(symtab_command_offset, buffer.len()); + buffer + .iowrite_with( + mach::SymtabCommand { + cmd: mach::LC_SYMTAB, + cmdsize: symtab_command_len as u32, + symoff: symtab_offset as u32, + nsyms: nsyms as u32, + stroff: strtab_offset as u32, + strsize: strtab_data.len() as u32, + }, + ctx.le, + ) + .unwrap(); + + // Write section data. + debug_assert_eq!(segment_data_offset, buffer.len()); + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + } + + // Write symtab. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_undefined() { + match symbol.kind { + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => {} + SymbolKind::File | SymbolKind::Section => continue, + _ => return Err(format!("unimplemented symbol {:?}", symbol)), + } + } + // TODO: N_STAB + // TODO: N_ABS + let mut n_type = if symbol.is_undefined() { + mach::N_UNDF | mach::N_EXT + } else { + mach::N_SECT + }; + match symbol.scope { + SymbolScope::Unknown | SymbolScope::Compilation => {} + SymbolScope::Linkage => { + n_type |= mach::N_EXT | mach::N_PEXT; + } + SymbolScope::Dynamic => { + n_type |= mach::N_EXT; + } + } + + let mut n_desc = 0; + if symbol.weak { + if symbol.is_undefined() { + n_desc |= mach::N_WEAK_REF; + } else { + n_desc |= mach::N_WEAK_DEF; + } + } + + let n_value = match symbol.section { + Some(section) => section_offsets[section.0].address + symbol.value, + None => symbol.value, + }; + + let n_strx = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + + buffer + .iowrite_with( + mach::Nlist { + n_strx, + n_type, + n_sect: symbol.section.map(|x| x.0 + 1).unwrap_or(0), + n_desc, + n_value, + }, + ctx, + ) + .unwrap(); + } + + // Write strtab. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.extend(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(&mut buffer, 4); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_extern; + let r_symbolnum; + let symbol = &self.symbols[reloc.symbol.0]; + if symbol.kind == SymbolKind::Section { + r_symbolnum = section_offsets[symbol.section.unwrap().0].index as u32; + r_extern = 0; + } else { + r_symbolnum = symbol_offsets[reloc.symbol.0].index as u32; + r_extern = 1; + } + let r_length = match reloc.size { + 8 => 0, + 16 => 1, + 32 => 2, + 64 => 3, + _ => return Err(format!("unimplemented reloc size {:?}", reloc)), + }; + let (r_pcrel, r_type) = match self.architecture { + Architecture::I386 => match reloc.kind { + RelocationKind::Absolute => (0, mach::GENERIC_RELOC_VANILLA), + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.addend) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { + (0, mach::X86_64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::Generic, -4) => { + (1, mach::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86RipRelative, -4) => { + (1, mach::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86Branch, -4) => { + (1, mach::X86_64_RELOC_BRANCH) + } + (RelocationKind::PltRelative, RelocationEncoding::X86Branch, -4) => { + (1, mach::X86_64_RELOC_BRANCH) + } + (RelocationKind::GotRelative, RelocationEncoding::Generic, -4) => { + (1, mach::X86_64_RELOC_GOT) + } + ( + RelocationKind::GotRelative, + RelocationEncoding::X86RipRelativeMovq, + -4, + ) => (1, mach::X86_64_RELOC_GOT_LOAD), + (RelocationKind::MachO { value, relative }, _, _) => { + (u32::from(relative), value) + } + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let r_info = r_symbolnum + | r_pcrel << 24 + | r_length << 25 + | r_extern << 27 + | u32::from(r_type) << 28; + buffer + .iowrite_with( + mach::RelocationInfo { + r_address: reloc.offset as i32, + r_info, + }, + ctx.le, + ) + .unwrap(); + } + } + } + + Ok(buffer) + } +} diff --git a/third_party/rust/object/src/write/mod.rs b/third_party/rust/object/src/write/mod.rs new file mode 100644 index 0000000000..79ffc16599 --- /dev/null +++ b/third_party/rust/object/src/write/mod.rs @@ -0,0 +1,674 @@ +//! Interface for writing object files. + +#![allow(clippy::collapsible_if)] +#![allow(clippy::cognitive_complexity)] +#![allow(clippy::module_inception)] + +use scroll::Pwrite; +use std::collections::HashMap; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::target_lexicon::{Architecture, BinaryFormat, Endianness, PointerWidth}; +use crate::{RelocationEncoding, RelocationKind, SectionKind, SymbolKind, SymbolScope}; + +mod coff; +mod elf; +mod macho; +mod string; +mod util; + +/// A writable object file. +#[derive(Debug)] +pub struct Object { + format: BinaryFormat, + architecture: Architecture, + sections: Vec<Section>, + standard_sections: HashMap<StandardSection, SectionId>, + symbols: Vec<Symbol>, + symbol_map: HashMap<Vec<u8>, SymbolId>, + stub_symbols: HashMap<SymbolId, SymbolId>, + subsection_via_symbols: bool, + /// The symbol name mangling scheme. + pub mangling: Mangling, + /// Mach-O "_tlv_bootstrap" symbol. + tlv_bootstrap: Option<SymbolId>, +} + +impl Object { + /// Create an empty object file. + pub fn new(format: BinaryFormat, architecture: Architecture) -> Object { + Object { + format, + architecture, + sections: Vec::new(), + standard_sections: HashMap::new(), + symbols: Vec::new(), + symbol_map: HashMap::new(), + stub_symbols: HashMap::new(), + subsection_via_symbols: false, + mangling: Mangling::default(format, architecture), + tlv_bootstrap: None, + } + } + + /// Return the file format. + #[inline] + pub fn format(&self) -> BinaryFormat { + self.format + } + + /// Return the architecture. + #[inline] + pub fn architecture(&self) -> Architecture { + self.architecture + } + + /// Return the current mangling setting. + #[inline] + pub fn mangling(&self) -> Mangling { + self.mangling + } + + /// Return the current mangling setting. + #[inline] + pub fn set_mangling(&mut self, mangling: Mangling) { + self.mangling = mangling; + } + + /// Return the name for a standard segment. + /// + /// This will vary based on the file format. + pub fn segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match self.format { + BinaryFormat::Elf | BinaryFormat::Coff => &[], + BinaryFormat::Macho => self.macho_segment_name(segment), + _ => unimplemented!(), + } + } + + /// Get the section with the given `SectionId`. + #[inline] + pub fn section(&self, section: SectionId) -> &Section { + &self.sections[section.0] + } + + /// Mutably get the section with the given `SectionId`. + #[inline] + pub fn section_mut(&mut self, section: SectionId) -> &mut Section { + &mut self.sections[section.0] + } + + /// Append data to an existing section. Returns the section offset of the data. + pub fn append_section_data(&mut self, section: SectionId, data: &[u8], align: u64) -> u64 { + self.sections[section.0].append_data(data, align) + } + + /// Append zero-initialized data to an existing section. Returns the section offset of the data. + pub fn append_section_bss(&mut self, section: SectionId, size: u64, align: u64) -> u64 { + self.sections[section.0].append_bss(size, align) + } + + /// Return the `SectionId` of a standard section. + /// + /// If the section doesn't already exist then it is created. + pub fn section_id(&mut self, section: StandardSection) -> SectionId { + self.standard_sections + .get(§ion) + .cloned() + .unwrap_or_else(|| { + let (segment, name, kind) = self.section_info(section); + self.add_section(segment.to_vec(), name.to_vec(), kind) + }) + } + + /// Add a new section and return its `SectionId`. + /// + /// This also creates a section symbol. + pub fn add_section(&mut self, segment: Vec<u8>, name: Vec<u8>, kind: SectionKind) -> SectionId { + let id = SectionId(self.sections.len()); + self.sections.push(Section { + segment, + name, + kind, + size: 0, + align: 1, + data: Vec::new(), + relocations: Vec::new(), + symbol: None, + }); + + // Add to self.standard_sections if required. This may match multiple standard sections. + let section = &self.sections[id.0]; + for standard_section in StandardSection::all() { + if !self.standard_sections.contains_key(standard_section) { + let (segment, name, kind) = self.section_info(*standard_section); + if segment == &*section.segment && name == &*section.name && kind == section.kind { + self.standard_sections.insert(*standard_section, id); + } + } + } + + id + } + + fn section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match self.format { + BinaryFormat::Elf => self.elf_section_info(section), + BinaryFormat::Coff => self.coff_section_info(section), + BinaryFormat::Macho => self.macho_section_info(section), + _ => unimplemented!(), + } + } + + /// Add a subsection. Returns the `SectionId` and section offset of the data. + pub fn add_subsection( + &mut self, + section: StandardSection, + name: &[u8], + data: &[u8], + align: u64, + ) -> (SectionId, u64) { + let section_id = if self.has_subsection_via_symbols() { + self.subsection_via_symbols = true; + self.section_id(section) + } else { + let (segment, name, kind) = self.subsection_info(section, name); + self.add_section(segment.to_vec(), name, kind) + }; + let offset = self.append_section_data(section_id, data, align); + (section_id, offset) + } + + fn has_subsection_via_symbols(&self) -> bool { + match self.format { + BinaryFormat::Elf | BinaryFormat::Coff => false, + BinaryFormat::Macho => true, + _ => unimplemented!(), + } + } + + fn subsection_info( + &self, + section: StandardSection, + value: &[u8], + ) -> (&'static [u8], Vec<u8>, SectionKind) { + let (segment, section, kind) = self.section_info(section); + let name = self.subsection_name(section, value); + (segment, name, kind) + } + + fn subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + debug_assert!(!self.has_subsection_via_symbols()); + match self.format { + BinaryFormat::Elf => self.elf_subsection_name(section, value), + BinaryFormat::Coff => self.coff_subsection_name(section, value), + _ => unimplemented!(), + } + } + + /// Get the `SymbolId` of the symbol with the given name. + pub fn symbol_id(&self, name: &[u8]) -> Option<SymbolId> { + self.symbol_map.get(name).cloned() + } + + /// Get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol(&self, symbol: SymbolId) -> &Symbol { + &self.symbols[symbol.0] + } + + /// Mutably get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol_mut(&mut self, symbol: SymbolId) -> &mut Symbol { + &mut self.symbols[symbol.0] + } + + /// Add a new symbol and return its `SymbolId`. + pub fn add_symbol(&mut self, mut symbol: Symbol) -> SymbolId { + // Defined symbols must have a scope. + debug_assert!(symbol.is_undefined() || symbol.scope != SymbolScope::Unknown); + if symbol.kind == SymbolKind::Section { + return self.section_symbol(symbol.section.unwrap()); + } + if !symbol.name.is_empty() + && (symbol.kind == SymbolKind::Text + || symbol.kind == SymbolKind::Data + || symbol.kind == SymbolKind::Tls) + { + let unmangled_name = symbol.name.clone(); + if let Some(prefix) = self.mangling.global_prefix() { + symbol.name.insert(0, prefix); + } + let symbol_id = self.add_raw_symbol(symbol); + self.symbol_map.insert(unmangled_name, symbol_id); + symbol_id + } else { + self.add_raw_symbol(symbol) + } + } + + fn add_raw_symbol(&mut self, symbol: Symbol) -> SymbolId { + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(symbol); + symbol_id + } + + /// Return true if the file format supports `StandardSection::UninitializedTls`. + pub fn has_uninitialized_tls(&self) -> bool { + self.format != BinaryFormat::Coff + } + + /// Add a new file symbol and return its `SymbolId`. + pub fn add_file_symbol(&mut self, name: Vec<u8>) -> SymbolId { + self.add_raw_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::File, + scope: SymbolScope::Compilation, + weak: false, + section: None, + }) + } + + /// Get the symbol for a section. + pub fn section_symbol(&mut self, section_id: SectionId) -> SymbolId { + let section = &mut self.sections[section_id.0]; + if let Some(symbol) = section.symbol { + return symbol; + } + let name = if self.format == BinaryFormat::Coff { + section.name.clone() + } else { + Vec::new() + }; + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Section, + scope: SymbolScope::Compilation, + weak: false, + section: Some(section_id), + }); + section.symbol = Some(symbol_id); + symbol_id + } + + /// Append data to an existing section, and update a symbol to refer to it. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the added data via the `__thread_vars` entry. + /// + /// Returns the section offset of the data. + pub fn add_symbol_data( + &mut self, + symbol_id: SymbolId, + section: SectionId, + data: &[u8], + align: u64, + ) -> u64 { + let offset = self.append_section_data(section, data, align); + self.set_symbol_data(symbol_id, section, offset, data.len() as u64); + offset + } + + /// Append zero-initialized data to an existing section, and update a symbol to refer to it. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the added data via the `__thread_vars` entry. + /// + /// Returns the section offset of the data. + pub fn add_symbol_bss( + &mut self, + symbol_id: SymbolId, + section: SectionId, + size: u64, + align: u64, + ) -> u64 { + let offset = self.append_section_bss(section, size, align); + self.set_symbol_data(symbol_id, section, offset, size); + offset + } + + /// Update a symbol to refer to the given data within a section. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the data via the `__thread_vars` entry. + pub fn set_symbol_data( + &mut self, + mut symbol_id: SymbolId, + section: SectionId, + offset: u64, + size: u64, + ) { + // Defined symbols must have a scope. + debug_assert!(self.symbol(symbol_id).scope != SymbolScope::Unknown); + if self.format == BinaryFormat::Macho { + symbol_id = self.macho_add_thread_var(symbol_id); + } + let symbol = self.symbol_mut(symbol_id); + symbol.value = offset; + symbol.size = size; + symbol.section = Some(section); + } + + /// Convert a symbol to a section symbol and offset. + /// + /// Returns an error if the symbol is not defined. + pub fn symbol_section_and_offset( + &mut self, + symbol_id: SymbolId, + ) -> Result<(SymbolId, u64), ()> { + let symbol = self.symbol(symbol_id); + if symbol.kind == SymbolKind::Section { + return Ok((symbol_id, 0)); + } + let symbol_offset = symbol.value; + let section = symbol.section.ok_or(())?; + let section_symbol = self.section_symbol(section); + Ok((section_symbol, symbol_offset)) + } + + /// Add a relocation to a section. + /// + /// Relocations must only be added after the referenced symbols have been added + /// and defined (if applicable). + pub fn add_relocation( + &mut self, + section: SectionId, + mut relocation: Relocation, + ) -> Result<(), String> { + let addend = match self.format { + BinaryFormat::Elf => self.elf_fixup_relocation(&mut relocation)?, + BinaryFormat::Coff => self.coff_fixup_relocation(&mut relocation), + BinaryFormat::Macho => self.macho_fixup_relocation(&mut relocation), + _ => unimplemented!(), + }; + if addend != 0 { + self.write_relocation_addend(section, &relocation, addend)?; + } + self.sections[section.0].relocations.push(relocation); + Ok(()) + } + + fn write_relocation_addend( + &mut self, + section: SectionId, + relocation: &Relocation, + addend: i64, + ) -> Result<(), String> { + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => scroll::LE, + Endianness::Big => scroll::BE, + }; + + let data = &mut self.sections[section.0].data; + if relocation.offset + (u64::from(relocation.size) + 7) / 8 > data.len() as u64 { + return Err(format!( + "invalid relocation offset {}+{} (max {})", + relocation.offset, + relocation.size, + data.len() + )); + } + match relocation.size { + 32 => { + data.pwrite_with(addend as i32, relocation.offset as usize, endian) + .unwrap(); + } + 64 => { + data.pwrite_with(addend, relocation.offset as usize, endian) + .unwrap(); + } + _ => return Err(format!("unimplemented relocation addend {:?}", relocation)), + } + Ok(()) + } + + /// Write the object to a `Vec`. + pub fn write(&self) -> Result<Vec<u8>, String> { + match self.format { + BinaryFormat::Elf => self.elf_write(), + BinaryFormat::Coff => self.coff_write(), + BinaryFormat::Macho => self.macho_write(), + _ => unimplemented!(), + } + } +} + +/// A standard segment kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StandardSegment { + Text, + Data, + Debug, +} + +/// A standard section kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StandardSection { + Text, + Data, + ReadOnlyData, + ReadOnlyDataWithRel, + ReadOnlyString, + UninitializedData, + Tls, + /// Zero-fill TLS initializers. Unsupported for COFF. + UninitializedTls, + /// TLS variable structures. Only supported for Mach-O. + TlsVariables, +} + +impl StandardSection { + /// Return the section kind of a standard section. + pub fn kind(self) -> SectionKind { + match self { + StandardSection::Text => SectionKind::Text, + StandardSection::Data => SectionKind::Data, + StandardSection::ReadOnlyData | StandardSection::ReadOnlyDataWithRel => { + SectionKind::ReadOnlyData + } + StandardSection::ReadOnlyString => SectionKind::ReadOnlyString, + StandardSection::UninitializedData => SectionKind::UninitializedData, + StandardSection::Tls => SectionKind::Tls, + StandardSection::UninitializedTls => SectionKind::UninitializedTls, + StandardSection::TlsVariables => SectionKind::TlsVariables, + } + } + + fn all() -> &'static [StandardSection] { + &[ + StandardSection::Text, + StandardSection::Data, + StandardSection::ReadOnlyData, + StandardSection::ReadOnlyDataWithRel, + StandardSection::ReadOnlyString, + StandardSection::UninitializedData, + StandardSection::Tls, + StandardSection::UninitializedTls, + StandardSection::TlsVariables, + ] + } +} + +/// An identifier used to reference a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionId(usize); + +/// A section in an object file. +#[derive(Debug)] +pub struct Section { + segment: Vec<u8>, + name: Vec<u8>, + kind: SectionKind, + size: u64, + align: u64, + data: Vec<u8>, + relocations: Vec<Relocation>, + symbol: Option<SymbolId>, +} + +impl Section { + /// Return true if this section contains uninitialized data. + #[inline] + pub fn is_bss(&self) -> bool { + self.kind == SectionKind::UninitializedData || self.kind == SectionKind::UninitializedTls + } + + /// Set the data for a section. + /// + /// Must not be called for sections that already have data, or that contain uninitialized data. + pub fn set_data(&mut self, data: Vec<u8>, align: u64) { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + debug_assert!(self.data.is_empty()); + self.size = data.len() as u64; + self.data = data; + self.align = align; + } + + /// Append data to a section. + /// + /// Must not be called for sections that contain uninitialized data. + pub fn append_data(&mut self, data: &[u8], align: u64) -> u64 { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let align = align as usize; + let mut offset = self.data.len(); + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.data.resize(offset, 0); + } + self.data.extend(data); + self.size = self.data.len() as u64; + offset as u64 + } + + /// Append unitialized data to a section. + /// + /// Must not be called for sections that contain initialized data. + pub fn append_bss(&mut self, size: u64, align: u64) -> u64 { + debug_assert!(self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let mut offset = self.size; + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.size = offset; + } + self.size += size; + offset as u64 + } +} + +/// An identifier used to reference a symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SymbolId(usize); + +/// A symbol in an object file. +#[derive(Debug)] +pub struct Symbol { + /// The name of the symbol. + pub name: Vec<u8>, + /// The value of the symbol. + /// + /// If the symbol defined in a section, then this is the section offset of the symbol. + pub value: u64, + /// The size of the symbol. + pub size: u64, + /// The kind of the symbol. + pub kind: SymbolKind, + /// The scope of the symbol. + pub scope: SymbolScope, + /// Whether the symbol has weak binding. + pub weak: bool, + /// The section containing the symbol. + /// + /// Set to `None` for undefined symbols. + pub section: Option<SectionId>, +} + +impl Symbol { + /// Return true if the symbol is undefined. + #[inline] + pub fn is_undefined(&self) -> bool { + self.section.is_none() + } + + /// Return true if the symbol scope is local. + #[inline] + pub fn is_local(&self) -> bool { + self.scope == SymbolScope::Compilation + } +} + +/// A relocation in an object file. +#[derive(Debug)] +pub struct Relocation { + /// The section offset of the place of the relocation. + pub offset: u64, + /// The size in bits of the place of relocation. + pub size: u8, + /// The operation used to calculate the result of the relocation. + pub kind: RelocationKind, + /// Information about how the result of the relocation operation is encoded in the place. + pub encoding: RelocationEncoding, + /// The symbol referred to by the relocation. + /// + /// This may be a section symbol. + pub symbol: SymbolId, + /// The addend to use in the relocation calculation. + /// + /// This may be in addition to an implicit addend stored at the place of the relocation. + pub addend: i64, +} + +/// The symbol name mangling scheme. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Mangling { + /// No symbol mangling. + None, + /// Windows COFF symbol mangling. + Coff, + /// Windows COFF i386 symbol mangling. + CoffI386, + /// ELF symbol mangling. + Elf, + /// Mach-O symbol mangling. + Macho, +} + +impl Mangling { + /// Return the default symboling mangling for the given format and architecture. + pub fn default(format: BinaryFormat, architecture: Architecture) -> Self { + match (format, architecture) { + (BinaryFormat::Coff, Architecture::I386) => Mangling::CoffI386, + (BinaryFormat::Coff, _) => Mangling::Coff, + (BinaryFormat::Elf, _) => Mangling::Elf, + (BinaryFormat::Macho, _) => Mangling::Macho, + _ => Mangling::None, + } + } + + /// Return the prefix to use for global symbols. + pub fn global_prefix(self) -> Option<u8> { + match self { + Mangling::None | Mangling::Elf | Mangling::Coff => None, + Mangling::CoffI386 | Mangling::Macho => Some(b'_'), + } + } +} diff --git a/third_party/rust/object/src/write/string.rs b/third_party/rust/object/src/write/string.rs new file mode 100644 index 0000000000..6a7ac31fe6 --- /dev/null +++ b/third_party/rust/object/src/write/string.rs @@ -0,0 +1,140 @@ +use indexmap::IndexSet; + +use crate::alloc::vec::Vec; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct StringId(usize); + +#[derive(Debug, Default)] +pub(crate) struct StringTable<'a> { + strings: IndexSet<&'a [u8]>, + offsets: Vec<usize>, +} + +impl<'a> StringTable<'a> { + /// Add a string to the string table. + /// + /// Panics if the string table has already been written, or + /// if the string contains a null byte. + pub fn add(&mut self, string: &'a [u8]) -> StringId { + assert!(self.offsets.is_empty()); + assert!(!string.contains(&0)); + let id = self.strings.insert_full(string).0; + StringId(id) + } + + /// Return the offset of the given string. + /// + /// Panics if the string table has not been written, or + /// if the string is not in the string table. + pub fn get_offset(&self, id: StringId) -> usize { + self.offsets[id.0] + } + + /// Append the string table to the given `Vec`, and + /// calculate the list of string offsets. + /// + /// `base` is the initial string table offset. For example, + /// this should be 1 for ELF, to account for the initial + /// null byte (which must have been written by the caller). + pub fn write(&mut self, base: usize, w: &mut Vec<u8>) { + assert!(self.offsets.is_empty()); + + let mut ids: Vec<_> = (0..self.strings.len()).collect(); + sort(&mut ids, 1, &self.strings); + + self.offsets = vec![0; ids.len()]; + let mut offset = base; + let mut previous = &[][..]; + for id in ids { + let string = self.strings.get_index(id).unwrap(); + if previous.ends_with(string) { + self.offsets[id] = offset - string.len() - 1; + } else { + self.offsets[id] = offset; + w.extend_from_slice(string); + w.push(0); + offset += string.len() + 1; + previous = string; + } + } + } +} + +// Multi-key quicksort. +// +// Ordering is such that if a string is a suffix of at least one other string, +// then it is placed immediately after one of those strings. That is: +// - comparison starts at the end of the string +// - shorter strings come later +// +// Based on the implementation in LLVM. +fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { + loop { + if ids.len() <= 1 { + return; + } + + let pivot = byte(ids[0], pos, strings); + let mut lower = 0; + let mut upper = ids.len(); + let mut i = 1; + while i < upper { + let b = byte(ids[i], pos, strings); + if b > pivot { + ids.swap(lower, i); + lower += 1; + i += 1; + } else if b < pivot { + upper -= 1; + ids.swap(upper, i); + } else { + i += 1; + } + } + + sort(&mut ids[..lower], pos, strings); + sort(&mut ids[upper..], pos, strings); + + if pivot == 0 { + return; + } + ids = &mut ids[lower..upper]; + pos += 1; + } +} + +fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { + let string = strings.get_index(id).unwrap(); + let len = string.len(); + if len >= pos { + string[len - pos] + } else { + // We know the strings don't contain null bytes. + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn string_table() { + let mut table = StringTable::default(); + let id0 = table.add(b""); + let id1 = table.add(b"foo"); + let id2 = table.add(b"bar"); + let id3 = table.add(b"foobar"); + + let mut data = Vec::new(); + data.push(0); + table.write(1, &mut data); + assert_eq!(data, b"\0foobar\0foo\0"); + + assert_eq!(table.get_offset(id0), 11); + assert_eq!(table.get_offset(id1), 8); + assert_eq!(table.get_offset(id2), 4); + assert_eq!(table.get_offset(id3), 1); + } +} diff --git a/third_party/rust/object/src/write/util.rs b/third_party/rust/object/src/write/util.rs new file mode 100644 index 0000000000..0ac65424af --- /dev/null +++ b/third_party/rust/object/src/write/util.rs @@ -0,0 +1,14 @@ +use crate::alloc::vec::Vec; + +pub(crate) fn align(offset: usize, size: usize) -> usize { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn align_u64(offset: u64, size: u64) -> u64 { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn write_align(buffer: &mut Vec<u8>, size: usize) { + let offset = align(buffer.len(), size); + buffer.resize(offset, 0); +} |