diff options
Diffstat (limited to 'third_party/rust/goblin/src')
44 files changed, 19561 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/archive/mod.rs b/third_party/rust/goblin/src/archive/mod.rs new file mode 100644 index 0000000000..0ff633c45b --- /dev/null +++ b/third_party/rust/goblin/src/archive/mod.rs @@ -0,0 +1,637 @@ +//! Implements a simple parser and extractor for a Unix Archive. +//! +//! There are two "common" formats: BSD and SysV +//! +//! This crate currently only implements the SysV version, which essentially postfixes all +//! names in the archive with a / as a sigil for the end of the name, and uses a special symbol +//! index for looking up symbols faster. + +use scroll::{Pread, Pwrite, SizeWith}; + +use crate::error::{Error, Result}; +use crate::strtab; + +use alloc::collections::btree_map::BTreeMap; +use alloc::vec::Vec; +use core::usize; + +pub const SIZEOF_MAGIC: usize = 8; +/// The magic number of a Unix Archive +pub const MAGIC: &[u8; SIZEOF_MAGIC] = b"!<arch>\x0A"; + +const SIZEOF_FILE_IDENTIFER: usize = 16; +const SIZEOF_FILE_SIZE: usize = 10; + +#[repr(C)] +#[derive(Debug, Clone, PartialEq, Pread, Pwrite, SizeWith)] +/// A Unix Archive Header - meta data for the file/byte blob/whatever that follows exactly after. +/// All data is right-padded with spaces ASCII `0x20`. The Binary layout is as follows: +/// +/// |Offset|Length|Name |Format | +/// |:-----|:-----|:--------------------------|:----------| +/// |0 |16 |File identifier |ASCII | +/// |16 |12 |File modification timestamp|Decimal | +/// |28 |6 |Owner ID |Decimal | +/// |34 |6 |Group ID |Decimal | +/// |40 |8 |File mode |Octal | +/// |48 |10 |Filesize in bytes |Decimal | +/// |58 |2 |Ending characters |`0x60 0x0A`| +/// +/// Byte alignment is according to the following: +/// > Each archive file member begins on an even byte boundary; a newline is inserted between files +/// > if necessary. Nevertheless, the size given reflects the actual size of the file exclusive +/// > of padding. +pub struct MemberHeader { + /// The identifier, or name for this file/whatever. + pub identifier: [u8; 16], + /// The timestamp for when this file was last modified. Base 10 number + pub timestamp: [u8; 12], + /// The file's owner's id. Base 10 string number + pub owner_id: [u8; 6], + /// The file's group id. Base 10 string number + pub group_id: [u8; 6], + /// The file's permissions mode. Base 8 number number + pub mode: [u8; 8], + /// The size of this file. Base 10 string number + pub file_size: [u8; 10], + /// The file header's terminator, always `0x60 0x0A` + pub terminator: [u8; 2], +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Header<'a> { + pub name: &'a str, + pub size: usize, +} + +pub const SIZEOF_HEADER: usize = SIZEOF_FILE_IDENTIFER + 12 + 6 + 6 + 8 + SIZEOF_FILE_SIZE + 2; + +impl MemberHeader { + pub fn name(&self) -> Result<&str> { + Ok(self + .identifier + .pread_with::<&str>(0, ::scroll::ctx::StrCtx::Length(SIZEOF_FILE_IDENTIFER))?) + } + pub fn size(&self) -> Result<usize> { + match usize::from_str_radix( + self.file_size + .pread_with::<&str>(0, ::scroll::ctx::StrCtx::Length(self.file_size.len()))? + .trim_end(), + 10, + ) { + Ok(file_size) => Ok(file_size), + Err(err) => Err(Error::Malformed(format!( + "{:?} Bad file_size in header: {:?}", + err, self + ))), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +/// Represents a single entry in the archive +pub struct Member<'a> { + /// The entry header + pub header: Header<'a>, + /// File offset from the start of the archive to where the header begins + pub header_offset: u64, + /// File offset from the start of the archive to where the file begins + pub offset: u64, + /// BSD `ar` members store the filename separately + bsd_name: Option<&'a str>, + /// SysV `ar` members store the filename in a string table, a copy of which we hold here + sysv_name: Option<&'a str>, +} + +impl<'a> Member<'a> { + /// Tries to parse the header in `R`, as well as the offset in `R. + /// **NOTE** the Seek will be pointing at the first byte of whatever the file is, skipping padding. + /// This is because just like members in the archive, the data section is 2-byte aligned. + pub fn parse(buffer: &'a [u8], offset: &mut usize) -> Result<Member<'a>> { + let header_offset = *offset; + let name = buffer.pread_with::<&str>( + *offset, + ::scroll::ctx::StrCtx::Length(SIZEOF_FILE_IDENTIFER), + )?; + let archive_header = buffer.gread::<MemberHeader>(offset)?; + let mut header = Header { + name, + size: archive_header.size()?, + }; + + // skip newline padding if we're on an uneven byte boundary + if *offset & 1 == 1 { + *offset += 1; + } + + let bsd_name = if let Some(len) = Self::bsd_filename_length(name) { + // there's a filename of length `len` right after the header + let name = buffer.pread_with::<&str>( + header_offset + SIZEOF_HEADER, + ::scroll::ctx::StrCtx::Length(len), + )?; + + // adjust the offset and size accordingly + if header.size > len { + *offset = header_offset + SIZEOF_HEADER + len; + header.size -= len; + + // the name may have trailing NULs which we don't really want to keep + Some(name.trim_end_matches('\0')) + } else { + None + } + } else { + None + }; + + Ok(Member { + header, + header_offset: header_offset as u64, + offset: *offset as u64, + bsd_name, + sysv_name: None, + }) + } + + /// The size of the Member's content, in bytes. Does **not** include newline padding, + /// nor the size of the file header. + pub fn size(&self) -> usize { + self.header.size + } + + /// Parse `#1/123` as `Some(123)` + fn bsd_filename_length(name: &str) -> Option<usize> { + use core::str::FromStr; + + if let Some(name) = name.strip_prefix("#1/") { + let trimmed_name = name.trim_end_matches(' '); + if let Ok(len) = usize::from_str(trimmed_name) { + Some(len) + } else { + None + } + } else { + None + } + } + + /// The member name, accounting for SysV and BSD `ar` filename extensions + pub fn extended_name(&self) -> &'a str { + if let Some(bsd_name) = self.bsd_name { + bsd_name + } else if let Some(ref sysv_name) = self.sysv_name { + sysv_name + } else { + self.header.name.trim_end_matches(' ').trim_end_matches('/') + } + } + + /// The untrimmed raw member name, i.e., includes right-aligned space padding and `'/'` end-of-string + /// identifier + pub fn raw_name(&self) -> &'a str { + self.header.name + } +} + +#[derive(Debug, Default)] +/// The special index member signified by the name `'/'`. +/// The data element contains a list of symbol indexes and symbol names, giving their offsets +/// into the archive for a given name. +pub struct Index<'a> { + /// Big Endian number of symbol_indexes and strings + pub size: usize, + /// Big Endian u32 index into the archive for this symbol (index in array is the index into the string table) + pub symbol_indexes: Vec<u32>, + /// Set of zero-terminated strings indexed by above. Number of strings = `self.size` + pub strtab: Vec<&'a str>, +} + +/// SysV Archive Variant Symbol Lookup Table "Magic" Name +const INDEX_NAME: &str = "/ "; +/// SysV Archive Variant Extended Filename String Table Name +const NAME_INDEX_NAME: &str = "// "; +/// BSD symbol definitions +const BSD_SYMDEF_NAME: &str = "__.SYMDEF"; +const BSD_SYMDEF_SORTED_NAME: &str = "__.SYMDEF SORTED"; + +impl<'a> Index<'a> { + /// Parses the given byte buffer into an Index. NB: the buffer must be the start of the index + pub fn parse_sysv_index(buffer: &'a [u8]) -> Result<Self> { + let offset = &mut 0; + let sizeof_table = buffer.gread_with::<u32>(offset, scroll::BE)? as usize; + + if sizeof_table > buffer.len() / 4 { + return Err(Error::BufferTooShort(sizeof_table, "indices")); + } + + let mut indexes = Vec::with_capacity(sizeof_table); + for _ in 0..sizeof_table { + indexes.push(buffer.gread_with::<u32>(offset, scroll::BE)?); + } + let sizeof_strtab = buffer.len() - ((sizeof_table * 4) + 4); + let strtab = strtab::Strtab::parse(buffer, *offset, sizeof_strtab, 0x0)?; + Ok(Index { + size: sizeof_table, + symbol_indexes: indexes, + strtab: strtab.to_vec()?, // because i'm lazy + }) + } + + /// Parses the given byte buffer into an Index, in BSD style archives + pub fn parse_bsd_symdef(buffer: &'a [u8]) -> Result<Self> { + // `llvm-ar` is a suitable reference: + // https://github.com/llvm-mirror/llvm/blob/6ea9891f9310510c621be562d1c5cdfcf5575678/lib/Object/Archive.cpp#L842-L870 + + // BSD __.SYMDEF files look like: + // + // ┌─────────────┐ + // entries: │ # bytes │ + // ├─────────────┼─────────────┐ + // │ name offset │ .o offset │ + // ├─────────────┼─────────────┤ + // │ name offset │ .o offset │ + // ├─────────────┼─────────────┤ + // │ name offset │ .o offset │ + // ├─────────────┼─────────────┤ + // │ name offset │ .o offset │ + // ├─────────────┼─────────────┘ + // strings: │ # bytes │ + // ├─────────────┴───────────────────┐ + // │ _symbol\0 │ + // ├─────────────────────────────────┴─────────────────────┐ + // │ _longer_symbol\0 │ + // ├────────────────┬──────────────────────────────────────┘ + // │ _baz\0 │ + // ├────────────────┴───┐ + // │ _quxx\0 │ + // └────────────────────┘ + // + // All numeric values are u32s. Name offsets are relative to the start of the string table, + // and .o offsets are relative to the the start of the archive. + + // Read the number of entries, which is at the start of the symdef (offset 0) + let entries_bytes = buffer.pread_with::<u32>(0, scroll::LE)? as usize; + let entries = entries_bytes / 8; + + // Set up the string table, the length of which is recorded after the entire entries table, + // (`entries_bytes + 4`), and which starts immediately after that (`entries_bytes + 8`). + let strtab_bytes = buffer.pread_with::<u32>(entries_bytes + 4, scroll::LE)? as usize; + let strtab = strtab::Strtab::parse(buffer, entries_bytes + 8, strtab_bytes, 0x0)?; + + if entries_bytes > buffer.len() { + return Err(Error::BufferTooShort(entries, "entries")); + } + + // build the index + let mut indexes = Vec::with_capacity(entries); + let mut strings = Vec::with_capacity(entries); + for i in 0..entries { + // The entries table starts after the original length value (offset 4), and each entry + // has two u32 values, making them 8 bytes long. + // + // Therefore, the `i`th entry starts at offset `(i*8)+4`. The first u32 is at that + // address, and the second u32 follows 4 bytes later. + let string_offset: u32 = buffer.pread_with(i * 8 + 4, scroll::LE)?; + let archive_member: u32 = buffer.pread_with(i * 8 + 8, scroll::LE)?; + + let string = match strtab.get_at(string_offset as usize) { + Some(result) => Ok(result), + None => Err(Error::Malformed(format!( + "{} entry {} has string offset {}, which is out of bounds", + BSD_SYMDEF_NAME, i, string_offset + ))), + }?; + + indexes.push(archive_member); + strings.push(string); + } + + Ok(Index { + size: entries, + symbol_indexes: indexes, + strtab: strings, + }) + } + + // Parses Windows Second Linker Member: + // number of members (m): 4 + // member offsets: 4 * m + // number of symbols (n): 4 + // symbol member indexes: 2 * n + // followed by SysV-style string table + // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#first-linker-member + pub fn parse_windows_linker_member(buffer: &'a [u8]) -> Result<Self> { + let offset = &mut 0; + let members = buffer.gread_with::<u32>(offset, scroll::LE)? as usize; + + if members > buffer.len() / 4 { + return Err(Error::BufferTooShort(members, "members")); + } + + let mut member_offsets = Vec::with_capacity(members); + for _ in 0..members { + member_offsets.push(buffer.gread_with::<u32>(offset, scroll::LE)?); + } + + let symbols = buffer.gread_with::<u32>(offset, scroll::LE)? as usize; + + if symbols > buffer.len() / 2 { + return Err(Error::BufferTooShort(symbols, "symbols")); + } + + let mut symbol_offsets = Vec::with_capacity(symbols); + for _ in 0..symbols { + if let Some(symbol_offset) = + member_offsets.get(buffer.gread_with::<u16>(offset, scroll::LE)? as usize - 1) + { + symbol_offsets.push(*symbol_offset); + } else { + return Err(Error::BufferTooShort(members, "members")); + } + } + let strtab = strtab::Strtab::parse(buffer, *offset, buffer.len() - *offset, 0x0)?; + Ok(Index { + size: symbols, + symbol_indexes: symbol_offsets, + strtab: strtab.to_vec()?, + }) + } +} + +/// Member names greater than 16 bytes are indirectly referenced using a `/<idx` schema, +/// where `idx` is an offset into a newline delimited string table directly following the `//` member +/// of the archive. +#[derive(Debug, Default)] +struct NameIndex<'a> { + strtab: strtab::Strtab<'a>, +} + +impl<'a> NameIndex<'a> { + pub fn parse(buffer: &'a [u8], offset: &mut usize, size: usize) -> Result<NameIndex<'a>> { + // This is a total hack, because strtab returns "" if idx == 0, need to change + // but previous behavior might rely on this, as ELF strtab's have "" at 0th index... + let hacked_size = size + 1; + let strtab = strtab::Strtab::parse(buffer, *offset - 1, hacked_size, b'\n')?; + // precious time was lost when refactoring because strtab::parse doesn't update the mutable seek... + *offset += hacked_size - 2; + Ok(NameIndex { strtab }) + } + + pub fn get(&self, name: &str) -> Result<&'a str> { + let idx = name.trim_start_matches('/').trim_end(); + match usize::from_str_radix(idx, 10) { + Ok(idx) => { + let name = match self.strtab.get_at(idx + 1) { + Some(result) => Ok(result), + None => Err(Error::Malformed(format!( + "Name {} is out of range in archive NameIndex", + name + ))), + }?; + + if name != "" { + Ok(name.trim_end_matches('/')) + } else { + Err(Error::Malformed(format!( + "Could not find {:?} in index", + name + ))) + } + } + Err(_) => Err(Error::Malformed(format!( + "Bad name index {:?} in index", + name + ))), + } + } +} + +#[derive(Debug, PartialEq)] +/// The type of symbol index can be present in an archive. Can serve as an indication of the +/// archive format. +pub enum IndexType { + /// No symbol index present. + None, + /// SystemV/GNU style symbol index, used on Windows as well. + SysV, + /// Windows specific extension of SysV symbol index, so called Second Linker Member. Has the + /// same member name as SysV symbol index but different structure. + Windows, + /// BSD style symbol index. + BSD, +} + +// TODO: add pretty printer fmt::Display with number of members, and names of members, along with +// the values of the index symbols once implemented +#[derive(Debug)] +/// An in-memory representation of a parsed Unix Archive +pub struct Archive<'a> { + // The array of members, which are indexed by the members hash and symbol index. + // These are in the same order they are found in the file. + member_array: Vec<Member<'a>>, + // file name -> member + members: BTreeMap<&'a str, usize>, + // symbol -> member + symbol_index: BTreeMap<&'a str, usize>, +} + +impl<'a> Archive<'a> { + pub fn parse(buffer: &'a [u8]) -> Result<Archive<'a>> { + let mut magic = [0u8; SIZEOF_MAGIC]; + let offset = &mut 0usize; + buffer.gread_inout(offset, &mut magic)?; + if &magic != MAGIC { + return Err(Error::BadMagic(magic.pread(0)?)); + } + let mut member_array = Vec::new(); + let mut index = Index::default(); + let mut index_type = IndexType::None; + let mut sysv_name_index = NameIndex::default(); + while *offset + 1 < buffer.len() { + // realign the cursor to a word boundary, if it's not on one already + if *offset & 1 == 1 { + *offset += 1; + } + + let member = Member::parse(buffer, offset)?; + + // advance to the next record + *offset = member.offset as usize + member.size() as usize; + + let name = member.raw_name(); + if name == INDEX_NAME { + let data: &[u8] = buffer.pread_with(member.offset as usize, member.size())?; + index = match index_type { + IndexType::None => { + index_type = IndexType::SysV; + Index::parse_sysv_index(data)? + } + IndexType::SysV => { + index_type = IndexType::Windows; + // second symbol index is Microsoft's extension of SysV format + Index::parse_windows_linker_member(data)? + } + IndexType::BSD => { + return Err(Error::Malformed("SysV index occurs after BSD index".into())) + } + IndexType::Windows => { + return Err(Error::Malformed( + "More than two Windows Linker members".into(), + )) + } + } + } else if member.bsd_name == Some(BSD_SYMDEF_NAME) + || member.bsd_name == Some(BSD_SYMDEF_SORTED_NAME) + { + if index_type != IndexType::None { + return Err(Error::Malformed("BSD index occurs after SysV index".into())); + } + index_type = IndexType::BSD; + let data: &[u8] = buffer.pread_with(member.offset as usize, member.size())?; + index = Index::parse_bsd_symdef(data)?; + } else if name == NAME_INDEX_NAME { + let mut name_index_offset: usize = member.offset as usize; + sysv_name_index = NameIndex::parse(buffer, &mut name_index_offset, member.size())?; + } else { + // record this as an archive member + member_array.push(member); + } + } + + // preprocess member names + let mut members = BTreeMap::new(); + let mut member_index_by_offset: BTreeMap<u32, usize> = BTreeMap::new(); + for (i, member) in member_array.iter_mut().enumerate() { + // copy in any SysV extended names + if let Ok(sysv_name) = sysv_name_index.get(member.raw_name()) { + member.sysv_name = Some(sysv_name); + } + + // build a hashmap by extended name + let key = member.extended_name(); + members.insert(key, i); + + // build a hashmap translating archive offset into member index + member_index_by_offset.insert(member.header_offset as u32, i); + } + + // build the symbol index, translating symbol names into member indexes + let mut symbol_index: BTreeMap<&str, usize> = BTreeMap::new(); + for (member_offset, name) in index.symbol_indexes.iter().zip(index.strtab.iter()) { + let member_index = *member_index_by_offset.get(member_offset).ok_or_else(|| { + Error::Malformed(format!( + "Could not get member {:?} at offset: {}", + name, member_offset + )) + })?; + symbol_index.insert(&name, member_index); + } + + Ok(Archive { + member_array, + members, + symbol_index, + }) + } + + /// Get the member named `member` in this archive, if any. If there are + /// multiple files in the archive with the same name it only returns one + /// of them. + pub fn get(&self, member: &str) -> Option<&Member> { + if let Some(idx) = self.members.get(member) { + Some(&self.member_array[*idx]) + } else { + None + } + } + + /// Get the member at position `index` in this archive, if any. + pub fn get_at(&self, index: usize) -> Option<&Member> { + self.member_array.get(index) + } + + /// Return the number of archive members. + pub fn len(&self) -> usize { + self.member_array.len() + } + + /// Returns a slice of the raw bytes for the given `member` in the scrollable `buffer` + pub fn extract<'b>(&self, member: &str, buffer: &'b [u8]) -> Result<&'b [u8]> { + if let Some(member) = self.get(member) { + let bytes = buffer.pread_with(member.offset as usize, member.size())?; + Ok(bytes) + } else { + Err(Error::Malformed(format!( + "Cannot extract member {:?}", + member + ))) + } + } + + /// Gets a summary of this archive, returning a list of membername, the member, and the list of symbols the member contains + pub fn summarize(&self) -> Vec<(&str, &Member, Vec<&'a str>)> { + // build a result array, with indexes matching the member indexes + let mut result = self + .member_array + .iter() + .map(|ref member| (member.extended_name(), *member, Vec::new())) + .collect::<Vec<_>>(); + + // walk the symbol index once, adding each symbol to the appropriate result Vec + for (symbol_name, member_index) in self.symbol_index.iter() { + result[*member_index].2.push(*symbol_name); + } + + result + } + + /// Get the list of member names in this archive + /// + /// This returns members in alphabetical order, not in the order they + /// occurred in the archive. If there are multiple files with the same + /// name, the size of the returned array will be less than the size of + /// `len()`. + pub fn members(&self) -> Vec<&'a str> { + self.members.keys().cloned().collect() + } + + /// Returns the member's name which contains the given `symbol`, if it is in the archive + pub fn member_of_symbol(&self, symbol: &str) -> Option<&'a str> { + if let Some(idx) = self.symbol_index.get(symbol) { + Some(self.member_array[*idx].extended_name()) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_member_bsd_filename_length() { + // non-BSD names should fall through + assert_eq!(Member::bsd_filename_length(""), None); + assert_eq!(Member::bsd_filename_length("123"), None); + assert_eq!(Member::bsd_filename_length("#1"), None); + assert_eq!(Member::bsd_filename_length("#1/"), None); + assert_eq!(Member::bsd_filename_length("#2/1"), None); + assert_eq!(Member::bsd_filename_length(INDEX_NAME), None); + assert_eq!(Member::bsd_filename_length(NAME_INDEX_NAME), None); + assert_eq!(Member::bsd_filename_length("👺"), None); + + // #1/<len> should be parsed as Some(len), with or without whitespace + assert_eq!(Member::bsd_filename_length("#1/1"), Some(1)); + assert_eq!(Member::bsd_filename_length("#1/22"), Some(22)); + assert_eq!(Member::bsd_filename_length("#1/333"), Some(333)); + assert_eq!(Member::bsd_filename_length("#1/1 "), Some(1)); + assert_eq!(Member::bsd_filename_length("#1/22 "), Some(22)); + assert_eq!(Member::bsd_filename_length("#1/333 "), Some(333)); + + // #!/<len><trailing garbage> should be None + assert_eq!(Member::bsd_filename_length("#1/1A"), None); + assert_eq!(Member::bsd_filename_length("#1/1 A"), None); + } +} diff --git a/third_party/rust/goblin/src/elf/compression_header.rs b/third_party/rust/goblin/src/elf/compression_header.rs new file mode 100644 index 0000000000..1dd7aa1362 --- /dev/null +++ b/third_party/rust/goblin/src/elf/compression_header.rs @@ -0,0 +1,275 @@ +macro_rules! elf_compression_header { + () => { + use plain; + // Declare that this is a plain type. + unsafe impl plain::Plain for CompressionHeader {} + + impl ::core::fmt::Debug for CompressionHeader { + fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { + f.debug_struct("CompressionHeader") + .field("ch_type", &self.ch_type) + .field("ch_size", &format_args!("0x{:x}", self.ch_size)) + .field("ch_addralign", &format_args!("0x{:x}", self.ch_addralign)) + .finish() + } + } + }; +} + +/// ZLIB/DEFLATE algorithm. +pub const ELFCOMPRESS_ZLIB: u32 = 1; +/// Start of OS-specific. +pub const ELFCOMPRESS_LOOS: u32 = 0x6000_0000; +/// End of OS-specific. +pub const ELFCOMPRESS_HIOS: u32 = 0x6fff_ffff; +/// Start of processor-specific. +pub const ELFCOMPRESS_LOPROC: u32 = 0x7000_0000; +/// End of processor-specific. +pub const ELFCOMPRESS_HIPROC: u32 = 0x7fff_ffff; + +macro_rules! elf_compression_header_std_impl { + ($size:ty) => { + #[cfg(test)] + mod tests { + use super::*; + #[test] + fn size_of() { + assert_eq!(::std::mem::size_of::<CompressionHeader>(), SIZEOF_CHDR); + } + } + + if_alloc! { + use crate::elf::compression_header::CompressionHeader as ElfCompressionHeader; + + use plain::Plain; + + if_std! { + use crate::error::Result; + + use std::fs::File; + use std::io::{Read, Seek}; + use std::io::SeekFrom::Start; + } + + impl From<CompressionHeader> for ElfCompressionHeader { + fn from(ch: CompressionHeader) -> Self { + ElfCompressionHeader { + ch_type: ch.ch_type, + ch_size: u64::from(ch.ch_size), + ch_addralign: u64::from(ch.ch_addralign), + } + } + } + + impl CompressionHeader { + pub fn from_bytes(bytes: &[u8]) -> CompressionHeader { + let mut chdr = CompressionHeader::default(); + chdr.copy_from_bytes(bytes).expect("buffer is too short for header"); + chdr + } + + #[cfg(feature = "std")] + pub fn from_fd(fd: &mut File, offset: u64) -> Result<CompressionHeader> { + let mut chdr = CompressionHeader::default(); + fd.seek(Start(offset))?; + unsafe { + fd.read_exact(plain::as_mut_bytes(&mut chdr))?; + } + Ok(chdr) + } + } + } // end if_alloc + }; +} + +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, SizeWith}; + +pub mod compression_header32 { + pub use crate::elf::compression_header::*; + + #[repr(C)] + #[derive(Copy, Clone, Eq, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// The compression header is used at the start of SHF_COMPRESSED sections + pub struct CompressionHeader { + /// Compression format + pub ch_type: u32, + /// Uncompressed data size + pub ch_size: u32, + /// Uncompressed data alignment + pub ch_addralign: u32, + } + + elf_compression_header!(); + + pub const SIZEOF_CHDR: usize = 12; + + elf_compression_header_std_impl!(u32); + + if_alloc! { + impl From<ElfCompressionHeader> for CompressionHeader { + fn from(ch: ElfCompressionHeader) -> Self { + CompressionHeader { + ch_type: ch.ch_type, + ch_size: ch.ch_size as u32, + ch_addralign: ch.ch_addralign as u32, + } + } + } + } +} + +pub mod compression_header64 { + pub use crate::elf::compression_header::*; + + #[repr(C)] + #[derive(Copy, Clone, Eq, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// The compression header is used at the start of SHF_COMPRESSED sections + pub struct CompressionHeader { + /// Compression format + pub ch_type: u32, + pub ch_reserved: u32, + /// Uncompressed data size + pub ch_size: u64, + /// Uncompressed data alignment + pub ch_addralign: u64, + } + + elf_compression_header!(); + + pub const SIZEOF_CHDR: usize = 24; + + elf_compression_header_std_impl!(u64); + + if_alloc! { + impl From<ElfCompressionHeader> for CompressionHeader { + fn from(ch: ElfCompressionHeader) -> Self { + CompressionHeader { + ch_type: ch.ch_type, + ch_reserved: 0, + ch_size: ch.ch_size as u64, + ch_addralign: ch.ch_addralign as u64, + } + } + } + } +} + +/////////////////////////////// +// Std/analysis/Unified Structs +/////////////////////////////// + +if_alloc! { + #[cfg(feature = "endian_fd")] + use crate::error; + use core::fmt; + use core::result; + use scroll::ctx; + use crate::container::{Container, Ctx}; + + #[derive(Default, PartialEq, Clone)] + /// A unified CompressionHeader - convertable to and from 32-bit and 64-bit variants + pub struct CompressionHeader { + /// Compression format + pub ch_type: u32, + /// Uncompressed data size + pub ch_size: u64, + /// Uncompressed data alignment + pub ch_addralign: u64, + } + + impl CompressionHeader { + /// Return the size of the underlying compression header, given a `container` + #[inline] + pub fn size(ctx: Ctx) -> usize { + use scroll::ctx::SizeWith; + Self::size_with(&ctx) + } + pub fn new() -> Self { + CompressionHeader { + ch_type: 0, + ch_size: 0, + ch_addralign: 2 << 8, + } + } + /// Parse a compression header from `bytes` at `offset`, using the given `ctx` + #[cfg(feature = "endian_fd")] + pub fn parse(bytes: &[u8], mut offset: usize, ctx: Ctx) -> error::Result<CompressionHeader> { + use scroll::Pread; + bytes.gread_with(&mut offset, ctx) + } + } + + impl fmt::Debug for CompressionHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("CompressionHeader") + .field("ch_type", &self.ch_type) + .field("ch_size", &format_args!("0x{:x}", self.ch_size)) + .field("ch_addralign", &format_args!("0x{:x}", self.ch_addralign)) + .finish() + } + } + + impl ctx::SizeWith<Ctx> for CompressionHeader { + fn size_with( &Ctx { container, .. }: &Ctx) -> usize { + match container { + Container::Little => { + compression_header32::SIZEOF_CHDR + }, + Container::Big => { + compression_header64::SIZEOF_CHDR + }, + } + } + } + + impl<'a> ctx::TryFromCtx<'a, Ctx> for CompressionHeader { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], Ctx {container, le}: Ctx) -> result::Result<(Self, usize), Self::Error> { + use scroll::Pread; + let res = match container { + Container::Little => { + (bytes.pread_with::<compression_header32::CompressionHeader>(0, le)?.into(), compression_header32::SIZEOF_CHDR) + }, + Container::Big => { + (bytes.pread_with::<compression_header64::CompressionHeader>(0, le)?.into(), compression_header64::SIZEOF_CHDR) + } + }; + Ok(res) + } + } + + impl ctx::TryIntoCtx<Ctx> for CompressionHeader { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result<usize, Self::Error> { + use scroll::Pwrite; + match container { + Container::Little => { + let chdr: compression_header32::CompressionHeader = self.into(); + Ok(bytes.pwrite_with(chdr, 0, le)?) + }, + Container::Big => { + let chdr: compression_header64::CompressionHeader = self.into(); + Ok(bytes.pwrite_with(chdr, 0, le)?) + } + } + } + } + impl ctx::IntoCtx<Ctx> for CompressionHeader { + fn into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) { + use scroll::Pwrite; + match container { + Container::Little => { + let chdr: compression_header32::CompressionHeader = self.into(); + bytes.pwrite_with(chdr, 0, le).unwrap(); + }, + Container::Big => { + let chdr: compression_header64::CompressionHeader = self.into(); + bytes.pwrite_with(chdr, 0, le).unwrap(); + } + } + } + } +} // end if_alloc diff --git a/third_party/rust/goblin/src/elf/constants_header.rs b/third_party/rust/goblin/src/elf/constants_header.rs new file mode 100644 index 0000000000..9b19c84c2c --- /dev/null +++ b/third_party/rust/goblin/src/elf/constants_header.rs @@ -0,0 +1,563 @@ +// sweet emacs regexp +// pub const \([[:word:]|_]*\)[[:space:]]*\([[:digit:]]+\)[[:space:]]*/\*\(.*\) \*/ +// \\\\3 C-q C-j pub const \1: u32 = \2; + +/// TODO: use Enum with explicit discriminant and get debug printer for free? + +/// No machine +pub const EM_NONE: u16 = 0; +/// AT&T WE 32100 +pub const EM_M32: u16 = 1; +/// SUN SPARC +pub const EM_SPARC: u16 = 2; +/// Intel 80386 +pub const EM_386: u16 = 3; +/// Motorola m68k family +pub const EM_68K: u16 = 4; +/// Motorola m88k family +pub const EM_88K: u16 = 5; +/// Intel MCU +pub const EM_IAMCU: u16 = 6; +/// Intel 80860 +pub const EM_860: u16 = 7; +/// MIPS R3000 big-endian +pub const EM_MIPS: u16 = 8; +/// IBM System/370 +pub const EM_S370: u16 = 9; +/// MIPS R3000 little-endian +pub const EM_MIPS_RS3_LE: u16 = 10; +// reserved 11-14 +/// HPPA +pub const EM_PARISC: u16 = 15; +// reserved 16 +/// Fujitsu VPP500 +pub const EM_VPP500: u16 = 17; +/// Sun's "v8plus" +pub const EM_SPARC32PLUS: u16 = 18; +/// Intel 80960 +pub const EM_960: u16 = 19; +/// PowerPC +pub const EM_PPC: u16 = 20; +/// PowerPC 64-bit +pub const EM_PPC64: u16 = 21; +/// IBM S390 +pub const EM_S390: u16 = 22; +/// IBM SPU/SPC +pub const EM_SPU: u16 = 23; +// reserved 24-35 +/// NEC V800 series +pub const EM_V800: u16 = 36; +/// Fujitsu FR20 +pub const EM_FR20: u16 = 37; +/// TRW RH-32 +pub const EM_RH32: u16 = 38; +/// Motorola RCE +pub const EM_RCE: u16 = 39; +/// ARM +pub const EM_ARM: u16 = 40; +/// Digital Alpha +pub const EM_FAKE_ALPHA: u16 = 41; +/// Hitachi SH +pub const EM_SH: u16 = 42; +/// SPARC v9 64-bit +pub const EM_SPARCV9: u16 = 43; +/// Siemens Tricore +pub const EM_TRICORE: u16 = 44; +/// Argonaut RISC Core +pub const EM_ARC: u16 = 45; +/// Hitachi H8/300 +pub const EM_H8_300: u16 = 46; +/// Hitachi H8/300H +pub const EM_H8_300H: u16 = 47; +/// Hitachi H8S +pub const EM_H8S: u16 = 48; +/// Hitachi H8/500 +pub const EM_H8_500: u16 = 49; +/// Intel Merced +pub const EM_IA_64: u16 = 50; +/// Stanford MIPS-X +pub const EM_MIPS_X: u16 = 51; +/// Motorola Coldfire +pub const EM_COLDFIRE: u16 = 52; +/// Motorola M68HC12 +pub const EM_68HC12: u16 = 53; +/// Fujitsu MMA Multimedia Accelerator +pub const EM_MMA: u16 = 54; +/// Siemens PCP +pub const EM_PCP: u16 = 55; +/// Sony nCPU embeeded RISC +pub const EM_NCPU: u16 = 56; +/// Denso NDR1 microprocessor +pub const EM_NDR1: u16 = 57; +/// Motorola Start*Core processor +pub const EM_STARCORE: u16 = 58; +/// Toyota ME16 processor +pub const EM_ME16: u16 = 59; +/// STMicroelectronic ST100 processor +pub const EM_ST100: u16 = 60; +/// Advanced Logic Corp. Tinyj emb.fam +pub const EM_TINYJ: u16 = 61; +/// AMD x86-64 architecture +pub const EM_X86_64: u16 = 62; +/// Sony DSP Processor +pub const EM_PDSP: u16 = 63; +/// Digital PDP-10 +pub const EM_PDP10: u16 = 64; +/// Digital PDP-11 +pub const EM_PDP11: u16 = 65; +/// Siemens FX66 microcontroller +pub const EM_FX66: u16 = 66; +/// STMicroelectronics ST9+ 8/16 mc +pub const EM_ST9PLUS: u16 = 67; +/// STmicroelectronics ST7 8 bit mc +pub const EM_ST7: u16 = 68; +/// Motorola MC68HC16 microcontroller +pub const EM_68HC16: u16 = 69; +/// Motorola MC68HC11 microcontroller +pub const EM_68HC11: u16 = 70; +/// Motorola MC68HC08 microcontroller +pub const EM_68HC08: u16 = 71; +/// Motorola MC68HC05 microcontroller +pub const EM_68HC05: u16 = 72; +/// Silicon Graphics SVx +pub const EM_SVX: u16 = 73; +/// STMicroelectronics ST19 8 bit mc +pub const EM_ST19: u16 = 74; +/// Digital VAX +pub const EM_VAX: u16 = 75; +/// Axis Communications 32-bit emb.proc +pub const EM_CRIS: u16 = 76; +/// Infineon Technologies 32-bit emb.proc +pub const EM_JAVELIN: u16 = 77; +/// Element 14 64-bit DSP Processor +pub const EM_FIREPATH: u16 = 78; +/// LSI Logic 16-bit DSP Processor +pub const EM_ZSP: u16 = 79; +/// Donald Knuth's educational 64-bit proc +pub const EM_MMIX: u16 = 80; +/// Harvard University machine-independent object files +pub const EM_HUANY: u16 = 81; +/// SiTera Prism +pub const EM_PRISM: u16 = 82; +/// Atmel AVR 8-bit microcontroller +pub const EM_AVR: u16 = 83; +/// Fujitsu FR30 +pub const EM_FR30: u16 = 84; +/// Mitsubishi D10V +pub const EM_D10V: u16 = 85; +/// Mitsubishi D30V +pub const EM_D30V: u16 = 86; +/// NEC v850 +pub const EM_V850: u16 = 87; +/// Mitsubishi M32R +pub const EM_M32R: u16 = 88; +/// Matsushita MN10300 +pub const EM_MN10300: u16 = 89; +/// Matsushita MN10200 +pub const EM_MN10200: u16 = 90; +/// picoJava +pub const EM_PJ: u16 = 91; +/// OpenRISC 32-bit embedded processor +pub const EM_OPENRISC: u16 = 92; +/// ARC International ARCompact +pub const EM_ARC_COMPACT: u16 = 93; +/// Tensilica Xtensa Architecture +pub const EM_XTENSA: u16 = 94; +/// Alphamosaic VideoCore +pub const EM_VIDEOCORE: u16 = 95; +/// Thompson Multimedia General Purpose Proc +pub const EM_TMM_GPP: u16 = 96; +/// National Semi. 32000 +pub const EM_NS32K: u16 = 97; +/// Tenor Network TPC +pub const EM_TPC: u16 = 98; +/// Trebia SNP 1000 +pub const EM_SNP1K: u16 = 99; +/// STMicroelectronics ST200 +pub const EM_ST200: u16 = 100; +/// Ubicom IP2xxx +pub const EM_IP2K: u16 = 101; +/// MAX processor +pub const EM_MAX: u16 = 102; +/// National Semi. CompactRISC +pub const EM_CR: u16 = 103; +/// Fujitsu F2MC16 +pub const EM_F2MC16: u16 = 104; +/// Texas Instruments msp430 +pub const EM_MSP430: u16 = 105; +/// Analog Devices Blackfin DSP +pub const EM_BLACKFIN: u16 = 106; +/// Seiko Epson S1C33 family +pub const EM_SE_C33: u16 = 107; +/// Sharp embedded microprocessor +pub const EM_SEP: u16 = 108; +/// Arca RISC +pub const EM_ARCA: u16 = 109; +/// PKU-Unity & MPRC Peking Uni. mc series +pub const EM_UNICORE: u16 = 110; +/// eXcess configurable cpu +pub const EM_EXCESS: u16 = 111; +/// Icera Semi. Deep Execution Processor +pub const EM_DXP: u16 = 112; +/// Altera Nios II +pub const EM_ALTERA_NIOS2: u16 = 113; +/// National Semi. CompactRISC CRX +pub const EM_CRX: u16 = 114; +/// Motorola XGATE +pub const EM_XGATE: u16 = 115; +/// Infineon C16x/XC16x +pub const EM_C166: u16 = 116; +/// Renesas M16C +pub const EM_M16C: u16 = 117; +/// Microchip Technology dsPIC30F +pub const EM_DSPIC30F: u16 = 118; +/// Freescale Communication Engine RISC +pub const EM_CE: u16 = 119; +/// Renesas M32C +pub const EM_M32C: u16 = 120; +// reserved 121-130 +/// Altium TSK3000 +pub const EM_TSK3000: u16 = 131; +/// Freescale RS08 +pub const EM_RS08: u16 = 132; +/// Analog Devices SHARC family +pub const EM_SHARC: u16 = 133; +/// Cyan Technology eCOG2 +pub const EM_ECOG2: u16 = 134; +/// Sunplus S+core7 RISC +pub const EM_SCORE7: u16 = 135; +/// New Japan Radio (NJR) 24-bit DSP +pub const EM_DSP24: u16 = 136; +/// Broadcom VideoCore III +pub const EM_VIDEOCORE3: u16 = 137; +/// RISC for Lattice FPGA +pub const EM_LATTICEMICO32: u16 = 138; +/// Seiko Epson C17 +pub const EM_SE_C17: u16 = 139; +/// Texas Instruments TMS320C6000 DSP +pub const EM_TI_C6000: u16 = 140; +/// Texas Instruments TMS320C2000 DSP +pub const EM_TI_C2000: u16 = 141; +/// Texas Instruments TMS320C55x DSP +pub const EM_TI_C5500: u16 = 142; +/// Texas Instruments App. Specific RISC +pub const EM_TI_ARP32: u16 = 143; +/// Texas Instruments Prog. Realtime Unit +pub const EM_TI_PRU: u16 = 144; +// reserved 145-159 +/// STMicroelectronics 64bit VLIW DSP +pub const EM_MMDSP_PLUS: u16 = 160; +/// Cypress M8C +pub const EM_CYPRESS_M8C: u16 = 161; +/// Renesas R32C +pub const EM_R32C: u16 = 162; +/// NXP Semi. TriMedia +pub const EM_TRIMEDIA: u16 = 163; +/// QUALCOMM DSP6 +pub const EM_QDSP6: u16 = 164; +/// Intel 8051 and variants +pub const EM_8051: u16 = 165; +/// STMicroelectronics STxP7x +pub const EM_STXP7X: u16 = 166; +/// Andes Tech. compact code emb. RISC +pub const EM_NDS32: u16 = 167; +/// Cyan Technology eCOG1X +pub const EM_ECOG1X: u16 = 168; +/// Dallas Semi. MAXQ30 mc +pub const EM_MAXQ30: u16 = 169; +/// New Japan Radio (NJR) 16-bit DSP +pub const EM_XIMO16: u16 = 170; +/// M2000 Reconfigurable RISC +pub const EM_MANIK: u16 = 171; +/// Cray NV2 vector architecture +pub const EM_CRAYNV2: u16 = 172; +/// Renesas RX +pub const EM_RX: u16 = 173; +/// Imagination Tech. META +pub const EM_METAG: u16 = 174; +/// MCST Elbrus +pub const EM_MCST_ELBRUS: u16 = 175; +/// Cyan Technology eCOG16 +pub const EM_ECOG16: u16 = 176; +/// National Semi. CompactRISC CR16 +pub const EM_CR16: u16 = 177; +/// Freescale Extended Time Processing Unit +pub const EM_ETPU: u16 = 178; +/// Infineon Tech. SLE9X +pub const EM_SLE9X: u16 = 179; +/// Intel L10M +pub const EM_L10M: u16 = 180; +/// Intel K10M +pub const EM_K10M: u16 = 181; +// reserved 182 +/// ARM AARCH64 +pub const EM_AARCH64: u16 = 183; +// reserved 184 +/// Amtel 32-bit microprocessor +pub const EM_AVR32: u16 = 185; +/// STMicroelectronics STM8 +pub const EM_STM8: u16 = 186; +/// Tileta TILE64 +pub const EM_TILE64: u16 = 187; +/// Tilera TILEPro +pub const EM_TILEPRO: u16 = 188; +/// Xilinx MicroBlaze +pub const EM_MICROBLAZE: u16 = 189; +/// NVIDIA CUDA +pub const EM_CUDA: u16 = 190; +/// Tilera TILE-Gx +pub const EM_TILEGX: u16 = 191; +/// CloudShield +pub const EM_CLOUDSHIELD: u16 = 192; +/// KIPO-KAIST Core-A 1st gen. +pub const EM_COREA_1ST: u16 = 193; +/// KIPO-KAIST Core-A 2nd gen. +pub const EM_COREA_2ND: u16 = 194; +/// Synopsys ARCompact V2 +pub const EM_ARC_COMPACT2: u16 = 195; +/// Open8 RISC +pub const EM_OPEN8: u16 = 196; +/// Renesas RL78 +pub const EM_RL78: u16 = 197; +/// Broadcom VideoCore V +pub const EM_VIDEOCORE5: u16 = 198; +/// Renesas 78KOR +pub const EM_78KOR: u16 = 199; +/// Freescale 56800EX DSC +pub const EM_56800EX: u16 = 200; +/// Beyond BA1 +pub const EM_BA1: u16 = 201; +/// Beyond BA2 +pub const EM_BA2: u16 = 202; +/// XMOS xCORE +pub const EM_XCORE: u16 = 203; +/// Microchip 8-bit PIC(r) +pub const EM_MCHP_PIC: u16 = 204; +/// Intel Graphics Technology +pub const EM_INTELGT: u16 = 205; +// reserved 206-209 +/// KM211 KM32 +pub const EM_KM32: u16 = 210; +/// KM211 KMX32 +pub const EM_KMX32: u16 = 211; +/// KM211 KMX16 +pub const EM_EMX16: u16 = 212; +/// KM211 KMX8 +pub const EM_EMX8: u16 = 213; +/// KM211 KVARC +pub const EM_KVARC: u16 = 214; +/// Paneve CDP +pub const EM_CDP: u16 = 215; +/// Cognitive Smart Memory Processor +pub const EM_COGE: u16 = 216; +/// Bluechip CoolEngine +pub const EM_COOL: u16 = 217; +/// Nanoradio Optimized RISC +pub const EM_NORC: u16 = 218; +/// CSR Kalimba +pub const EM_CSR_KALIMBA: u16 = 219; +/// Zilog Z80 +pub const EM_Z80: u16 = 220; +/// Controls and Data Services VISIUMcore +pub const EM_VISIUM: u16 = 221; +/// FTDI Chip FT32 +pub const EM_FT32: u16 = 222; +/// Moxie processor +pub const EM_MOXIE: u16 = 223; +/// AMD GPU +pub const EM_AMDGPU: u16 = 224; +// reserved 225-242 +/// RISC-V +pub const EM_RISCV: u16 = 243; + +/// Linux BPF -- in-kernel virtual machine +pub const EM_BPF: u16 = 247; + +/// C-SKY +pub const EM_CSKY: u16 = 252; + +pub const EM_NUM: u16 = 248; + +/// Convert machine to str representation +pub fn machine_to_str (machine: u16) -> &'static str { + match machine { + EM_M32 => "M32", + EM_SPARC => "SPARC", + EM_386 => "386", + EM_68K => "68K", + EM_88K => "88K", + EM_IAMCU => "IAMCU", + EM_860 => "860", + EM_MIPS => "MIPS", + EM_S370 => "S370", + EM_MIPS_RS3_LE => "MIPS_RS3_LE", + EM_PARISC => "PARISC", + EM_VPP500 => "VPP500", + EM_SPARC32PLUS => "SPARC32PLUS", + EM_960 => "960", + EM_PPC => "PPC", + EM_PPC64 => "PPC64", + EM_S390 => "S390", + EM_SPU => "SPU", + EM_V800 => "V800", + EM_FR20 => "FR20", + EM_RH32 => "RH32", + EM_RCE => "RCE", + EM_ARM => "ARM", + EM_FAKE_ALPHA => "FAKE_ALPHA", + EM_SH => "SH", + EM_SPARCV9 => "SPARCV9", + EM_TRICORE => "TRICORE", + EM_ARC => "ARC", + EM_H8_300 => "H8_300", + EM_H8_300H => "H8_300H", + EM_H8S => "H8S", + EM_H8_500 => "H8_500", + EM_IA_64 => "IA_64", + EM_MIPS_X => "MIPS_X", + EM_COLDFIRE => "COLDFIRE", + EM_68HC12 => "68HC12", + EM_MMA => "MMA", + EM_PCP => "PCP", + EM_NCPU => "NCPU", + EM_NDR1 => "NDR1", + EM_STARCORE => "STARCORE", + EM_ME16 => "ME16", + EM_ST100 => "ST100", + EM_TINYJ => "TINYJ", + EM_X86_64 => "X86_64", + EM_PDSP => "PDSP", + EM_PDP10 => "PDP10", + EM_PDP11 => "PDP11", + EM_FX66 => "FX66", + EM_ST9PLUS => "ST9PLUS", + EM_ST7 => "ST7", + EM_68HC16 => "68HC16", + EM_68HC11 => "68HC11", + EM_68HC08 => "68HC08", + EM_68HC05 => "68HC05", + EM_SVX => "SVX", + EM_ST19 => "ST19", + EM_VAX => "VAX", + EM_CRIS => "CRIS", + EM_JAVELIN => "JAVELIN", + EM_FIREPATH => "FIREPATH", + EM_ZSP => "ZSP", + EM_MMIX => "MMIX", + EM_HUANY => "HUANY", + EM_PRISM => "PRISM", + EM_AVR => "AVR", + EM_FR30 => "FR30", + EM_D10V => "D10V", + EM_D30V => "D30V", + EM_V850 => "V850", + EM_M32R => "M32R", + EM_MN10300 => "MN10300", + EM_MN10200 => "MN10200", + EM_PJ => "PJ", + EM_OPENRISC => "OPENRISC", + EM_ARC_COMPACT => "ARC_COMPACT", + EM_XTENSA => "XTENSA", + EM_VIDEOCORE => "VIDEOCORE", + EM_TMM_GPP => "TMM_GPP", + EM_NS32K => "NS32K", + EM_TPC => "TPC", + EM_SNP1K => "SNP1K", + EM_ST200 => "ST200", + EM_IP2K => "IP2K", + EM_MAX => "MAX", + EM_CR => "CR", + EM_F2MC16 => "F2MC16", + EM_MSP430 => "MSP430", + EM_BLACKFIN => "BLACKFIN", + EM_SE_C33 => "SE_C33", + EM_SEP => "SEP", + EM_ARCA => "ARCA", + EM_UNICORE => "UNICORE", + EM_EXCESS => "EXCESS", + EM_DXP => "DXP", + EM_ALTERA_NIOS2 => "ALTERA_NIOS2", + EM_CRX => "CRX", + EM_XGATE => "XGATE", + EM_C166 => "C166", + EM_M16C => "M16C", + EM_DSPIC30F => "DSPIC30F", + EM_CE => "CE", + EM_M32C => "M32C", + EM_TSK3000 => "TSK3000", + EM_RS08 => "RS08", + EM_SHARC => "SHARC", + EM_ECOG2 => "ECOG2", + EM_SCORE7 => "SCORE7", + EM_DSP24 => "DSP24", + EM_VIDEOCORE3 => "VIDEOCORE3", + EM_LATTICEMICO32 => "LATTICEMICO32", + EM_SE_C17 => "SE_C17", + EM_TI_C6000 => "TI_C6000", + EM_TI_C2000 => "TI_C2000", + EM_TI_C5500 => "TI_C5500", + EM_TI_ARP32 => "TI_ARP32", + EM_TI_PRU => "TI_PRU", + EM_MMDSP_PLUS => "MMDSP_PLUS", + EM_CYPRESS_M8C => "CYPRESS_M8C", + EM_R32C => "R32C", + EM_TRIMEDIA => "TRIMEDIA", + EM_QDSP6 => "QDSP6", + EM_8051 => "8051", + EM_STXP7X => "STXP7X", + EM_NDS32 => "NDS32", + EM_ECOG1X => "ECOG1X", + EM_MAXQ30 => "MAXQ30", + EM_XIMO16 => "XIMO16", + EM_MANIK => "MANIK", + EM_CRAYNV2 => "CRAYNV2", + EM_RX => "RX", + EM_METAG => "METAG", + EM_MCST_ELBRUS => "MCST_ELBRUS", + EM_ECOG16 => "ECOG16", + EM_CR16 => "CR16", + EM_ETPU => "ETPU", + EM_SLE9X => "SLE9X", + EM_L10M => "L10M", + EM_K10M => "K10M", + EM_AARCH64 => "AARCH64", + EM_AVR32 => "AVR32", + EM_STM8 => "STM8", + EM_TILE64 => "TILE64", + EM_TILEPRO => "TILEPRO", + EM_MICROBLAZE => "MICROBLAZE", + EM_CUDA => "CUDA", + EM_TILEGX => "TILEGX", + EM_CLOUDSHIELD => "CLOUDSHIELD", + EM_COREA_1ST => "COREA_1ST", + EM_COREA_2ND => "COREA_2ND", + EM_ARC_COMPACT2 => "ARC_COMPACT2", + EM_OPEN8 => "OPEN8", + EM_RL78 => "RL78", + EM_VIDEOCORE5 => "VIDEOCORE5", + EM_78KOR => "78KOR", + EM_56800EX => "56800EX", + EM_BA1 => "BA1", + EM_BA2 => "BA2", + EM_XCORE => "XCORE", + EM_MCHP_PIC => "MCHP_PIC", + EM_KM32 => "KM32", + EM_KMX32 => "KMX32", + EM_EMX16 => "EMX16", + EM_EMX8 => "EMX8", + EM_KVARC => "KVARC", + EM_CDP => "CDP", + EM_COGE => "COGE", + EM_COOL => "COOL", + EM_NORC => "NORC", + EM_CSR_KALIMBA => "CSR_KALIMBA", + EM_Z80 => "Z80", + EM_VISIUM => "VISIUM", + EM_FT32 => "FT32", + EM_MOXIE => "MOXIE", + EM_AMDGPU => "AMDGPU", + EM_RISCV => "RISCV", + EM_BPF => "BPF", + _val => "EM_UNKNOWN", + } +} diff --git a/third_party/rust/goblin/src/elf/constants_relocation.rs b/third_party/rust/goblin/src/elf/constants_relocation.rs new file mode 100644 index 0000000000..6339de5434 --- /dev/null +++ b/third_party/rust/goblin/src/elf/constants_relocation.rs @@ -0,0 +1,1417 @@ +// x86_64 relocations +/// No reloc. +pub const R_X86_64_NONE: u32 = 0; +/// Direct 64 bit. +pub const R_X86_64_64: u32 = 1; +/// PC relative 32 bit signed. +pub const R_X86_64_PC32: u32 = 2; +/// 32 bit GOT entry. +pub const R_X86_64_GOT32: u32 = 3; +/// 32 bit PLT address. +pub const R_X86_64_PLT32: u32 = 4; +/// Copy symbol at runtime. +pub const R_X86_64_COPY: u32 = 5; +/// Create GOT entry. +pub const R_X86_64_GLOB_DAT: u32 = 6; +/// Create PLT entry. +pub const R_X86_64_JUMP_SLOT: u32 = 7; +/// Adjust by program base. +pub const R_X86_64_RELATIVE: u32 = 8; +/// 32 bit signed PC relative offset to GOT. +pub const R_X86_64_GOTPCREL: u32 = 9; +/// Direct 32 bit zero extended. +pub const R_X86_64_32: u32 = 10; +/// Direct 32 bit sign extended. +pub const R_X86_64_32S: u32 = 11; +/// Direct 16 bit zero extended. +pub const R_X86_64_16: u32 = 12; +/// 16 bit sign extended pc relative. +pub const R_X86_64_PC16: u32 = 13; +/// Direct 8 bit sign extended. +pub const R_X86_64_8: u32 = 14; +/// 8 bit sign extended pc relative. +pub const R_X86_64_PC8: u32 = 15; +/// ID of module containing symbol. +pub const R_X86_64_DTPMOD64: u32 = 16; +/// Offset in module's TLS block. +pub const R_X86_64_DTPOFF64: u32 = 17; +/// Offset in initial TLS block. +pub const R_X86_64_TPOFF64: u32 = 18; +/// 32 bit signed PC relative offset to two GOT entries for GD symbol. +pub const R_X86_64_TLSGD: u32 = 19; +/// 32 bit signed PC relative offset to two GOT entries for LD symbol. +pub const R_X86_64_TLSLD: u32 = 20; +/// Offset in TLS block. +pub const R_X86_64_DTPOFF32: u32 = 21; +/// 32 bit signed PC relative offset to GOT entry for IE symbol. +pub const R_X86_64_GOTTPOFF: u32 = 22; +/// Offset in initial TLS block. +pub const R_X86_64_TPOFF32: u32 = 23; +/// PC relative 64 bit. +pub const R_X86_64_PC64: u32 = 24; +/// 64 bit offset to GOT. +pub const R_X86_64_GOTOFF64: u32 = 25; +/// 32 bit signed pc relative offset to GOT. +pub const R_X86_64_GOTPC32: u32 = 26; +/// 64-bit GOT entry offset. +pub const R_X86_64_GOT64: u32 = 27; +/// 64-bit PC relative offset to GOT entry. +pub const R_X86_64_GOTPCREL64: u32 = 28; +/// 64-bit PC relative offset to GOT. +pub const R_X86_64_GOTPC64: u32 = 29; +/// like GOT64, says PLT entry needed. +pub const R_X86_64_GOTPLT64: u32 = 30; +/// 64-bit GOT relative offset to PLT entry. +pub const R_X86_64_PLTOFF64: u32 = 31; +/// Size of symbol plus 32-bit addend. +pub const R_X86_64_SIZE32: u32 = 32; +/// Size of symbol plus 64-bit addend. +pub const R_X86_64_SIZE64: u32 = 33; +/// GOT offset for TLS descriptor.. +pub const R_X86_64_GOTPC32_TLSDESC: u32 = 34; +/// Marker for call through TLS descriptor.. +pub const R_X86_64_TLSDESC_CALL: u32 = 35; +/// TLS descriptor.. +pub const R_X86_64_TLSDESC: u32 = 36; +/// Adjust indirectly by program base. +pub const R_X86_64_IRELATIVE: u32 = 37; +/// 64-bit adjust by program base. +pub const R_X86_64_RELATIVE64: u32 = 38; +///Load from 32 bit signed pc relative offset to GOT entry without REX prefix, relaxable. +pub const R_X86_64_GOTPCRELX: u32 = 41; +/// Load from 32 bit signed pc relative offset to GOT entry with REX prefix, relaxable. +pub const R_X86_64_REX_GOTPCRELX: u32 = 42; +pub const R_X86_64_NUM: u32 = 43; + +// Intel 80386 specific definitions + +// i386 relocs +/// No reloc +pub const R_386_NONE: u32 = 0; +/// Direct 32 bit +pub const R_386_32: u32 = 1; +/// PC relative 32 bit +pub const R_386_PC32: u32 = 2; +/// 32 bit GOT entry +pub const R_386_GOT32: u32 = 3; +/// 32 bit PLT address +pub const R_386_PLT32: u32 = 4; +/// Copy symbol at runtime +pub const R_386_COPY: u32 = 5; +/// Create GOT entry +pub const R_386_GLOB_DAT: u32 = 6; +/// Create PLT entry +pub const R_386_JMP_SLOT: u32 = 7; +/// Adjust by program base +pub const R_386_RELATIVE: u32 = 8; +/// 32 bit offset to GOT +pub const R_386_GOTOFF: u32 = 9; +/// 32 bit PC relative offset to GOT +pub const R_386_GOTPC: u32 = 10; +pub const R_386_32PLT: u32 = 11; +/// Offset in static TLS block +pub const R_386_TLS_TPOFF: u32 = 14; +/// Address of GOT entry for static TLS block offset +pub const R_386_TLS_IE: u32 = 15; +/// GOT entry for static TLS block offset +pub const R_386_TLS_GOTIE: u32 = 16; +/// Offset relative to static TLS block +pub const R_386_TLS_LE: u32 = 17; +/// Direct 32 bit for GNU version of general dynamic thread local data +pub const R_386_TLS_GD: u32 = 18; +/// Direct 32 bit for GNU version of local dynamic thread local data in LE code +pub const R_386_TLS_LDM: u32 = 19; +pub const R_386_16: u32 = 20; +pub const R_386_PC16: u32 = 21; +pub const R_386_8: u32 = 22; +pub const R_386_PC8: u32 = 23; +/// Direct 32 bit for general dynamic thread local data +pub const R_386_TLS_GD_32: u32 = 24; +/// Tag for pushl in GD TLS code +pub const R_386_TLS_GD_PUSH: u32 = 25; +/// Relocation for call to __tls_get_addr() +pub const R_386_TLS_GD_CALL: u32 = 26; +/// Tag for popl in GD TLS code +pub const R_386_TLS_GD_POP: u32 = 27; +/// Direct 32 bit for local dynamic thread local data in LE code +pub const R_386_TLS_LDM_32: u32 = 28; +/// Tag for pushl in LDM TLS code +pub const R_386_TLS_LDM_PUSH: u32 = 29; +/// Relocation for call to __tls_get_addr() in LDM code +pub const R_386_TLS_LDM_CALL: u32 = 30; +/// Tag for popl in LDM TLS code +pub const R_386_TLS_LDM_POP: u32 = 31; +/// Offset relative to TLS block +pub const R_386_TLS_LDO_32: u32 = 32; +/// GOT entry for negated static TLS block offset +pub const R_386_TLS_IE_32: u32 = 33; +/// Negated offset relative to static TLS block +pub const R_386_TLS_LE_32: u32 = 34; +/// ID of module containing symbol +pub const R_386_TLS_DTPMOD32: u32 = 35; +/// Offset in TLS block +pub const R_386_TLS_DTPOFF32: u32 = 36; +/// Negated offset in static TLS block +pub const R_386_TLS_TPOFF32: u32 = 37; +/// 32-bit symbol size +pub const R_386_SIZE32: u32 = 38; +/// GOT offset for TLS descriptor. +pub const R_386_TLS_GOTDESC: u32 = 39; +/// Marker of call through TLS descriptor for relaxation +pub const R_386_TLS_DESC_CALL: u32 = 40; +/// TLS descriptor containing pointer to code and to argument, returning the TLS offset for the symbol +pub const R_386_TLS_DESC: u32 = 41; +/// Adjust indirectly by program base +pub const R_386_IRELATIVE: u32 = 42; +/// Load from 32 bit GOT entry, relaxable +pub const R_386_GOT32X: u32 = 43; +/// Keep this the last entry +pub const R_386_NUM: u32 = 44; + +// AArch64 relocs +/// No relocation +pub const R_AARCH64_NONE: u32 = 0; + +// ILP32 AArch64 relocs +/// Direct 32 bit +pub const R_AARCH64_P32_ABS32: u32 = 1; +/// Copy symbol at runtime +pub const R_AARCH64_P32_COPY: u32 = 180; +/// Create GOT entry +pub const R_AARCH64_P32_GLOB_DAT: u32 = 181; +/// Create PLT entry +pub const R_AARCH64_P32_JUMP_SLOT: u32 = 182; +/// Adjust by program base +pub const R_AARCH64_P32_RELATIVE: u32 = 183; +/// Module number, 32 bit +pub const R_AARCH64_P32_TLS_DTPMOD: u32 = 184; +/// Module-relative offset, 32 bit +pub const R_AARCH64_P32_TLS_DTPREL: u32 = 185; +/// TP-relative offset, 32 bit +pub const R_AARCH64_P32_TLS_TPREL: u32 = 186; +/// TLS Descriptor +pub const R_AARCH64_P32_TLSDESC: u32 = 187; +/// STT_GNU_IFUNC relocation +pub const R_AARCH64_P32_IRELATIVE: u32 = 188; + +// LP64 AArch64 relocs +/// Direct 64 bit +pub const R_AARCH64_ABS64: u32 = 257; +/// Direct 32 bit +pub const R_AARCH64_ABS32: u32 = 258; +/// Direct 16-bit +pub const R_AARCH64_ABS16: u32 = 259; +/// PC-relative 64-bit +pub const R_AARCH64_PREL64: u32 = 260; +/// PC-relative 32-bit +pub const R_AARCH64_PREL32: u32 = 261; +/// PC-relative 16-bit +pub const R_AARCH64_PREL16: u32 = 262; +/// Dir. MOVZ imm. from bits 15:0 +pub const R_AARCH64_MOVW_UABS_G0: u32 = 263; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_UABS_G0_NC: u32 = 264; +/// Dir. MOVZ imm. from bits 31:16 +pub const R_AARCH64_MOVW_UABS_G1: u32 = 265; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_UABS_G1_NC: u32 = 266; +/// Dir. MOVZ imm. from bits 47:32 +pub const R_AARCH64_MOVW_UABS_G2: u32 = 267; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_UABS_G2_NC: u32 = 268; +/// Dir. MOV{K,Z} imm. from 63:48 +pub const R_AARCH64_MOVW_UABS_G3: u32 = 269; +/// Dir. MOV{N,Z} imm. from 15:0 +pub const R_AARCH64_MOVW_SABS_G0: u32 = 270; +/// Dir. MOV{N,Z} imm. from 31:16 +pub const R_AARCH64_MOVW_SABS_G1: u32 = 271; +/// Dir. MOV{N,Z} imm. from 47:32 +pub const R_AARCH64_MOVW_SABS_G2: u32 = 272; +/// PC-rel. LD imm. from bits 20:2 +pub const R_AARCH64_LD_PREL_LO19: u32 = 273; +/// PC-rel. ADR imm. from bits 20:0 +pub const R_AARCH64_ADR_PREL_LO21: u32 = 274; +/// Page-rel. ADRP imm. from 32:12 +pub const R_AARCH64_ADR_PREL_PG_HI21: u32 = 275; +/// Likewise; no overflow check +pub const R_AARCH64_ADR_PREL_PG_HI21_NC: u32 = 276; +/// Dir. ADD imm. from bits 11:0 +pub const R_AARCH64_ADD_ABS_LO12_NC: u32 = 277; +/// Likewise for LD/ST; no check. +pub const R_AARCH64_LDST8_ABS_LO12_NC: u32 = 278; +/// PC-rel. TBZ/TBNZ imm. from 15:2 +pub const R_AARCH64_TSTBR14: u32 = 279; +/// PC-rel. cond. br. imm. from 20:2. +pub const R_AARCH64_CONDBR19: u32 = 280; +/// PC-rel. B imm. from bits 27:2 +pub const R_AARCH64_JUMP26: u32 = 282; +/// Likewise for CALL +pub const R_AARCH64_CALL26: u32 = 283; +/// Dir. ADD imm. from bits 11:1 +pub const R_AARCH64_LDST16_ABS_LO12_NC: u32 = 284; +/// Likewise for bits 11:2 +pub const R_AARCH64_LDST32_ABS_LO12_NC: u32 = 285; +/// Likewise for bits 11:3 +pub const R_AARCH64_LDST64_ABS_LO12_NC: u32 = 286; +/// PC-rel. MOV{N,Z} imm. from 15:0 +pub const R_AARCH64_MOVW_PREL_G0: u32 = 287; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_PREL_G0_NC: u32 = 288; +/// PC-rel. MOV{N,Z} imm. from 31:16. +pub const R_AARCH64_MOVW_PREL_G1: u32 = 289; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_PREL_G1_NC: u32 = 290; +/// PC-rel. MOV{N,Z} imm. from 47:32. +pub const R_AARCH64_MOVW_PREL_G2: u32 = 291; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_PREL_G2_NC: u32 = 292; +/// PC-rel. MOV{N,Z} imm. from 63:48. +pub const R_AARCH64_MOVW_PREL_G3: u32 = 293; +/// Dir. ADD imm. from bits 11:4 +pub const R_AARCH64_LDST128_ABS_LO12_NC: u32 = 299; +/// GOT-rel. off. MOV{N,Z} imm. 15:0. +pub const R_AARCH64_MOVW_GOTOFF_G0: u32 = 300; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_GOTOFF_G0_NC: u32 = 301; +/// GOT-rel. o. MOV{N,Z} imm. 31:16 +pub const R_AARCH64_MOVW_GOTOFF_G1: u32 = 302; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_GOTOFF_G1_NC: u32 = 303; +/// GOT-rel. o. MOV{N,Z} imm. 47:32 +pub const R_AARCH64_MOVW_GOTOFF_G2: u32 = 304; +/// Likewise for MOVK; no check +pub const R_AARCH64_MOVW_GOTOFF_G2_NC: u32 = 305; +/// GOT-rel. o. MOV{N,Z} imm. 63:48 +pub const R_AARCH64_MOVW_GOTOFF_G3: u32 = 306; +/// GOT-relative 64-bit +pub const R_AARCH64_GOTREL64: u32 = 307; +/// GOT-relative 32-bit +pub const R_AARCH64_GOTREL32: u32 = 308; +/// PC-rel. GOT off. load imm. 20:2 +pub const R_AARCH64_GOT_LD_PREL19: u32 = 309; +/// GOT-rel. off. LD/ST imm. 14:3 +pub const R_AARCH64_LD64_GOTOFF_LO15: u32 = 310; +/// P-page-rel. GOT off. ADRP 32:12 +pub const R_AARCH64_ADR_GOT_PAGE: u32 = 311; +/// Dir. GOT off. LD/ST imm. 11:3 +pub const R_AARCH64_LD64_GOT_LO12_NC: u32 = 312; +/// GOT-page-rel. GOT off. LD/ST 14:3 +pub const R_AARCH64_LD64_GOTPAGE_LO15: u32 = 313; +/// PC-relative ADR imm. 20:0 +pub const R_AARCH64_TLSGD_ADR_PREL21: u32 = 512; +/// page-rel. ADRP imm. 32:12 +pub const R_AARCH64_TLSGD_ADR_PAGE21: u32 = 513; +/// direct ADD imm. from 11:0 +pub const R_AARCH64_TLSGD_ADD_LO12_NC: u32 = 514; +/// GOT-rel. MOV{N,Z} 31:16 +pub const R_AARCH64_TLSGD_MOVW_G1: u32 = 515; +/// GOT-rel. MOVK imm. 15:0 +pub const R_AARCH64_TLSGD_MOVW_G0_NC: u32 = 516; +/// Like 512; local dynamic model +pub const R_AARCH64_TLSLD_ADR_PREL21: u32 = 517; +/// Like 513; local dynamic model +pub const R_AARCH64_TLSLD_ADR_PAGE21: u32 = 518; +/// Like 514; local dynamic model +pub const R_AARCH64_TLSLD_ADD_LO12_NC: u32 = 519; +/// Like 515; local dynamic model +pub const R_AARCH64_TLSLD_MOVW_G1: u32 = 520; +/// Like 516; local dynamic model +pub const R_AARCH64_TLSLD_MOVW_G0_NC: u32 = 521; +/// TLS PC-rel. load imm. 20:2 +pub const R_AARCH64_TLSLD_LD_PREL19: u32 = 522; +/// TLS DTP-rel. MOV{N,Z} 47:32 +pub const R_AARCH64_TLSLD_MOVW_DTPREL_G2: u32 = 523; +/// TLS DTP-rel. MOV{N,Z} 31:16 +pub const R_AARCH64_TLSLD_MOVW_DTPREL_G1: u32 = 524; +/// Likewise; MOVK; no check +pub const R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: u32 = 525; +/// TLS DTP-rel. MOV{N,Z} 15:0 +pub const R_AARCH64_TLSLD_MOVW_DTPREL_G0: u32 = 526; +/// Likewise; MOVK; no check +pub const R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: u32 = 527; +/// DTP-rel. ADD imm. from 23:12. +pub const R_AARCH64_TLSLD_ADD_DTPREL_HI12: u32 = 528; +/// DTP-rel. ADD imm. from 11:0 +pub const R_AARCH64_TLSLD_ADD_DTPREL_LO12: u32 = 529; +/// Likewise; no ovfl. check +pub const R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: u32 = 530; +/// DTP-rel. LD/ST imm. 11:0 +pub const R_AARCH64_TLSLD_LDST8_DTPREL_LO12: u32 = 531; +/// Likewise; no check +pub const R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC: u32 = 532; +/// DTP-rel. LD/ST imm. 11:1 +pub const R_AARCH64_TLSLD_LDST16_DTPREL_LO12: u32 = 533; +/// Likewise; no check +pub const R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC: u32 = 534; +/// DTP-rel. LD/ST imm. 11:2 +pub const R_AARCH64_TLSLD_LDST32_DTPREL_LO12: u32 = 535; +/// Likewise; no check +pub const R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC: u32 = 536; +/// DTP-rel. LD/ST imm. 11:3 +pub const R_AARCH64_TLSLD_LDST64_DTPREL_LO12: u32 = 537; +/// Likewise; no check +pub const R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC: u32 = 538; +/// GOT-rel. MOV{N,Z} 31:16 +pub const R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: u32 = 539; +/// GOT-rel. MOVK 15:0 +pub const R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: u32 = 540; +/// Page-rel. ADRP 32:12 +pub const R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: u32 = 541; +/// Direct LD off. 11:3 +pub const R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: u32 = 542; +/// PC-rel. load imm. 20:2 +pub const R_AARCH64_TLSIE_LD_GOTTPREL_PREL19: u32 = 543; +/// TLS TP-rel. MOV{N,Z} 47:32 +pub const R_AARCH64_TLSLE_MOVW_TPREL_G2: u32 = 544; +/// TLS TP-rel. MOV{N,Z} 31:16 +pub const R_AARCH64_TLSLE_MOVW_TPREL_G1: u32 = 545; +/// Likewise; MOVK; no check +pub const R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: u32 = 546; +/// TLS TP-rel. MOV{N,Z} 15:0 +pub const R_AARCH64_TLSLE_MOVW_TPREL_G0: u32 = 547; +/// Likewise; MOVK; no check +pub const R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: u32 = 548; +/// TP-rel. ADD imm. 23:12 +pub const R_AARCH64_TLSLE_ADD_TPREL_HI12: u32 = 549; +/// TP-rel. ADD imm. 11:0 +pub const R_AARCH64_TLSLE_ADD_TPREL_LO12: u32 = 550; +/// Likewise; no ovfl. check +pub const R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: u32 = 551; +/// TP-rel. LD/ST off. 11:0 +pub const R_AARCH64_TLSLE_LDST8_TPREL_LO12: u32 = 552; +/// Likewise; no ovfl. check. +pub const R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: u32 = 553; +/// TP-rel. LD/ST off. 11:1 +pub const R_AARCH64_TLSLE_LDST16_TPREL_LO12: u32 = 554; +/// Likewise; no check +pub const R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: u32 = 555; +/// TP-rel. LD/ST off. 11:2 +pub const R_AARCH64_TLSLE_LDST32_TPREL_LO12: u32 = 556; +/// Likewise; no check +pub const R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: u32 = 557; +/// TP-rel. LD/ST off. 11:3 +pub const R_AARCH64_TLSLE_LDST64_TPREL_LO12: u32 = 558; +/// Likewise; no check +pub const R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: u32 = 559; +/// PC-rel. load immediate 20:2 +pub const R_AARCH64_TLSDESC_LD_PREL19: u32 = 560; +/// PC-rel. ADR immediate 20:0 +pub const R_AARCH64_TLSDESC_ADR_PREL21: u32 = 561; +/// Page-rel. ADRP imm. 32:12 +pub const R_AARCH64_TLSDESC_ADR_PAGE21: u32 = 562; +/// Direct LD off. from 11:3 +pub const R_AARCH64_TLSDESC_LD64_LO12: u32 = 563; +/// Direct ADD imm. from 11:0 +pub const R_AARCH64_TLSDESC_ADD_LO12: u32 = 564; +/// GOT-rel. MOV{N,Z} imm. 31:16 +pub const R_AARCH64_TLSDESC_OFF_G1: u32 = 565; +/// GOT-rel. MOVK imm. 15:0; no ck +pub const R_AARCH64_TLSDESC_OFF_G0_NC: u32 = 566; +/// Relax LDR +pub const R_AARCH64_TLSDESC_LDR: u32 = 567; +/// Relax ADD +pub const R_AARCH64_TLSDESC_ADD: u32 = 568; +/// Relax BLR +pub const R_AARCH64_TLSDESC_CALL: u32 = 569; +/// TP-rel. LD/ST off. 11:4 +pub const R_AARCH64_TLSLE_LDST128_TPREL_LO12: u32 = 570; +/// Likewise; no check +pub const R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: u32 = 571; +/// DTP-rel. LD/ST imm. 11:4. +pub const R_AARCH64_TLSLD_LDST128_DTPREL_LO12: u32 = 572; +/// Likewise; no check +pub const R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC: u32 = 573; +/// Copy symbol at runtime +pub const R_AARCH64_COPY: u32 = 1024; +/// Create GOT entry +pub const R_AARCH64_GLOB_DAT: u32 = 1025; +/// Create PLT entry +pub const R_AARCH64_JUMP_SLOT: u32 = 1026; +/// Adjust by program base +pub const R_AARCH64_RELATIVE: u32 = 1027; +/// Module number, 64 bit +pub const R_AARCH64_TLS_DTPMOD: u32 = 1028; +/// Module-relative offset, 64 bit +pub const R_AARCH64_TLS_DTPREL: u32 = 1029; +/// TP-relative offset, 64 bit +pub const R_AARCH64_TLS_TPREL: u32 = 1030; +/// TLS Descriptor +pub const R_AARCH64_TLSDESC: u32 = 1031; +/// STT_GNU_IFUNC relocation +pub const R_AARCH64_IRELATIVE: u32 = 1032; + +// ARM relocs +/// No reloc +pub const R_ARM_NONE: u32 = 0; +/// Deprecated PC relative 26 bit branch +pub const R_ARM_PC24: u32 = 1; +/// Direct 32 bit +pub const R_ARM_ABS32: u32 = 2; +/// PC relative 32 bit +pub const R_ARM_REL32: u32 = 3; +pub const R_ARM_PC13: u32 = 4; +/// Direct 16 bit +pub const R_ARM_ABS16: u32 = 5; +/// Direct 12 bit +pub const R_ARM_ABS12: u32 = 6; +/// Direct & 0x7C (LDR, STR) +pub const R_ARM_THM_ABS5: u32 = 7; +/// Direct 8 bit +pub const R_ARM_ABS8: u32 = 8; +pub const R_ARM_SBREL32: u32 = 9; +/// PC relative 24 bit (Thumb32 BL) +pub const R_ARM_THM_PC22: u32 = 10; +/// PC relative & 0x3FC(Thumb16 LDR, ADD, ADR). +pub const R_ARM_THM_PC8: u32 = 11; +pub const R_ARM_AMP_VCALL9: u32 = 12; +/// Obsolete static relocation +pub const R_ARM_SWI24: u32 = 13; +/// Dynamic relocation +pub const R_ARM_TLS_DESC: u32 = 13; +/// Reserved +pub const R_ARM_THM_SWI8: u32 = 14; +/// Reserved +pub const R_ARM_XPC25: u32 = 15; +/// Reserved +pub const R_ARM_THM_XPC22: u32 = 16; +/// ID of module containing symbol +pub const R_ARM_TLS_DTPMOD32: u32 = 17; +/// Offset in TLS block +pub const R_ARM_TLS_DTPOFF32: u32 = 18; +/// Offset in static TLS block +pub const R_ARM_TLS_TPOFF32: u32 = 19; +/// Copy symbol at runtime +pub const R_ARM_COPY: u32 = 20; +/// Create GOT entry +pub const R_ARM_GLOB_DAT: u32 = 21; +/// Create PLT entry +pub const R_ARM_JUMP_SLOT: u32 = 22; +/// Adjust by program base +pub const R_ARM_RELATIVE: u32 = 23; +/// 32 bit offset to GOT +pub const R_ARM_GOTOFF: u32 = 24; +/// 32 bit PC relative offset to GOT +pub const R_ARM_GOTPC: u32 = 25; +/// 32 bit GOT entry +pub const R_ARM_GOT32: u32 = 26; +/// Deprecated, 32 bit PLT address +pub const R_ARM_PLT32: u32 = 27; +/// PC relative 24 bit (BL, BLX) +pub const R_ARM_CALL: u32 = 28; +/// PC relative 24 bit (B, BL<cond>) +pub const R_ARM_JUMP24: u32 = 29; +/// PC relative 24 bit (Thumb32 B.W) +pub const R_ARM_THM_JUMP24: u32 = 30; +/// Adjust by program base +pub const R_ARM_BASE_ABS: u32 = 31; +/// Obsolete +pub const R_ARM_ALU_PCREL_7_0: u32 = 32; +/// Obsolete +pub const R_ARM_ALU_PCREL_15_8: u32 = 33; +/// Obsolete +pub const R_ARM_ALU_PCREL_23_15: u32 = 34; +/// Deprecated, prog. base relative +pub const R_ARM_LDR_SBREL_11_0: u32 = 35; +/// Deprecated, prog. base relative +pub const R_ARM_ALU_SBREL_19_12: u32 = 36; +/// Deprecated, prog. base relative +pub const R_ARM_ALU_SBREL_27_20: u32 = 37; +pub const R_ARM_TARGET1: u32 = 38; +/// Program base relative +pub const R_ARM_SBREL31: u32 = 39; +pub const R_ARM_V4BX: u32 = 40; +pub const R_ARM_TARGET2: u32 = 41; +/// 32 bit PC relative +pub const R_ARM_PREL31: u32 = 42; +/// Direct 16-bit (MOVW) +pub const R_ARM_MOVW_ABS_NC: u32 = 43; +/// Direct high 16-bit (MOVT) +pub const R_ARM_MOVT_ABS: u32 = 44; +/// PC relative 16-bit (MOVW) +pub const R_ARM_MOVW_PREL_NC: u32 = 45; +/// PC relative (MOVT) +pub const R_ARM_MOVT_PREL: u32 = 46; +/// Direct 16 bit (Thumb32 MOVW) +pub const R_ARM_THM_MOVW_ABS_NC: u32 = 47; +/// Direct high 16 bit (Thumb32 MOVT) +pub const R_ARM_THM_MOVT_ABS: u32 = 48; +/// PC relative 16 bit (Thumb32 MOVW) +pub const R_ARM_THM_MOVW_PREL_NC: u32 = 49; +/// PC relative high 16 bit (Thumb32 MOVT) +pub const R_ARM_THM_MOVT_PREL: u32 = 50; +/// PC relative 20 bit (Thumb32 B<cond>.W) +pub const R_ARM_THM_JUMP19: u32 = 51; +/// PC relative X & 0x7E (Thumb16 CBZ, CBNZ) +pub const R_ARM_THM_JUMP6: u32 = 52; +/// PC relative 12 bit (Thumb32 ADR.W) +pub const R_ARM_THM_ALU_PREL_11_0: u32 = 53; +/// PC relative 12 bit (Thumb32 LDR{D,SB,H,SH}) +pub const R_ARM_THM_PC12: u32 = 54; +/// Direct 32-bit +pub const R_ARM_ABS32_NOI: u32 = 55; +/// PC relative 32-bit +pub const R_ARM_REL32_NOI: u32 = 56; +/// PC relative (ADD, SUB) +pub const R_ARM_ALU_PC_G0_NC: u32 = 57; +/// PC relative (ADD, SUB) +pub const R_ARM_ALU_PC_G0: u32 = 58; +/// PC relative (ADD, SUB) +pub const R_ARM_ALU_PC_G1_NC: u32 = 59; +/// PC relative (ADD, SUB) +pub const R_ARM_ALU_PC_G1: u32 = 60; +/// PC relative (ADD, SUB) +pub const R_ARM_ALU_PC_G2: u32 = 61; +/// PC relative (LDR,STR,LDRB,STRB) +pub const R_ARM_LDR_PC_G1: u32 = 62; +/// PC relative (LDR,STR,LDRB,STRB) +pub const R_ARM_LDR_PC_G2: u32 = 63; +/// PC relative (STR{D,H},LDR{D,SB,H,SH}) +pub const R_ARM_LDRS_PC_G0: u32 = 64; +/// PC relative (STR{D,H},LDR{D,SB,H,SH}) +pub const R_ARM_LDRS_PC_G1: u32 = 65; +/// PC relative (STR{D,H},LDR{D,SB,H,SH}) +pub const R_ARM_LDRS_PC_G2: u32 = 66; +/// PC relative (LDC, STC) +pub const R_ARM_LDC_PC_G0: u32 = 67; +/// PC relative (LDC, STC) +pub const R_ARM_LDC_PC_G1: u32 = 68; +/// PC relative (LDC, STC) +pub const R_ARM_LDC_PC_G2: u32 = 69; +/// Program base relative (ADD,SUB) +pub const R_ARM_ALU_SB_G0_NC: u32 = 70; +/// Program base relative (ADD,SUB) +pub const R_ARM_ALU_SB_G0: u32 = 71; +/// Program base relative (ADD,SUB) +pub const R_ARM_ALU_SB_G1_NC: u32 = 72; +/// Program base relative (ADD,SUB) +pub const R_ARM_ALU_SB_G1: u32 = 73; +/// Program base relative (ADD,SUB) +pub const R_ARM_ALU_SB_G2: u32 = 74; +/// Program base relative (LDR,STR, LDRB, STRB) +pub const R_ARM_LDR_SB_G0: u32 = 75; +/// Program base relative (LDR, STR, LDRB, STRB) +pub const R_ARM_LDR_SB_G1: u32 = 76; +/// Program base relative (LDR, STR, LDRB, STRB) +pub const R_ARM_LDR_SB_G2: u32 = 77; +/// Program base relative (LDR, STR, LDRB, STRB) +pub const R_ARM_LDRS_SB_G0: u32 = 78; +/// Program base relative (LDR, STR, LDRB, STRB) +pub const R_ARM_LDRS_SB_G1: u32 = 79; +/// Program base relative (LDR, STR, LDRB, STRB) +pub const R_ARM_LDRS_SB_G2: u32 = 80; +/// Program base relative (LDC,STC) +pub const R_ARM_LDC_SB_G0: u32 = 81; +/// Program base relative (LDC,STC) +pub const R_ARM_LDC_SB_G1: u32 = 82; +/// Program base relative (LDC,STC) +pub const R_ARM_LDC_SB_G2: u32 = 83; +/// Program base relative 16 bit (MOVW) +pub const R_ARM_MOVW_BREL_NC: u32 = 84; +/// Program base relative high 16 bit (MOVT) +pub const R_ARM_MOVT_BREL: u32 = 85; +/// Program base relative 16 bit (MOVW) +pub const R_ARM_MOVW_BREL: u32 = 86; +/// Program base relative 16 bit (Thumb32 MOVW) +pub const R_ARM_THM_MOVW_BREL_NC: u32 = 87; +/// Program base relative high 16 bit (Thumb32 MOVT) +pub const R_ARM_THM_MOVT_BREL: u32 = 88; +/// Program base relative 16 bit (Thumb32 MOVW) +pub const R_ARM_THM_MOVW_BREL: u32 = 89; +pub const R_ARM_TLS_GOTDESC: u32 = 90; +pub const R_ARM_TLS_CALL: u32 = 91; +/// TLS relaxation +pub const R_ARM_TLS_DESCSEQ: u32 = 92; +pub const R_ARM_THM_TLS_CALL: u32 = 93; +pub const R_ARM_PLT32_ABS: u32 = 94; +/// GOT entry +pub const R_ARM_GOT_ABS: u32 = 95; +/// PC relative GOT entry +pub const R_ARM_GOT_PREL: u32 = 96; +/// GOT entry relative to GOT origin (LDR) +pub const R_ARM_GOT_BREL12: u32 = 97; +/// 12 bit, GOT entry relative to GOT origin (LDR, STR) +pub const R_ARM_GOTOFF12: u32 = 98; +pub const R_ARM_GOTRELAX: u32 = 99; +pub const R_ARM_GNU_VTENTRY: u32 = 100; +pub const R_ARM_GNU_VTINHERIT: u32 = 101; +/// PC relative & 0xFFE (Thumb16 B) +pub const R_ARM_THM_PC11: u32 = 102; +/// PC relative & 0x1FE (Thumb16 B/B<cond>) +pub const R_ARM_THM_PC9: u32 = 103; +/// PC-rel 32 bit for global dynamic thread local data +pub const R_ARM_TLS_GD32: u32 = 104; +/// PC-rel 32 bit for local dynamic thread local data +pub const R_ARM_TLS_LDM32: u32 = 105; +/// 32 bit offset relative to TLS block +pub const R_ARM_TLS_LDO32: u32 = 106; +/// PC-rel 32 bit for GOT entry of static TLS block offset +pub const R_ARM_TLS_IE32: u32 = 107; +/// 32 bit offset relative to static TLS block +pub const R_ARM_TLS_LE32: u32 = 108; +/// 12 bit relative to TLS block (LDR, STR) +pub const R_ARM_TLS_LDO12: u32 = 109; +/// 12 bit relative to static TLS block (LDR, STR) +pub const R_ARM_TLS_LE12: u32 = 110; +/// 12 bit GOT entry relative to GOT origin (LDR) +pub const R_ARM_TLS_IE12GP: u32 = 111; +/// Obsolete +pub const R_ARM_ME_TOO: u32 = 128; +pub const R_ARM_THM_TLS_DESCSEQ: u32 = 129; +pub const R_ARM_THM_TLS_DESCSEQ16: u32 = 129; +pub const R_ARM_THM_TLS_DESCSEQ32: u32 = 130; +/// GOT entry relative to GOT origin, 12 bit (Thumb32 LDR) +pub const R_ARM_THM_GOT_BREL12: u32 = 131; +pub const R_ARM_IRELATIVE: u32 = 160; +pub const R_ARM_RXPC25: u32 = 249; +pub const R_ARM_RSBREL32: u32 = 250; +pub const R_ARM_THM_RPC22: u32 = 251; +pub const R_ARM_RREL32: u32 = 252; +pub const R_ARM_RABS22: u32 = 253; +pub const R_ARM_RPC24: u32 = 254; +pub const R_ARM_RBASE: u32 = 255; +/// Keep this the last entry +pub const R_ARM_NUM: u32 = 256; + +/////////////////// +// OpenRisc +/////////////////// +pub const R_OR1K_NONE: u32 = 0; +pub const R_OR1K_32: u32 = 1; +pub const R_OR1K_16: u32 = 2; +pub const R_OR1K_8: u32 = 3; +pub const R_OR1K_LO_16_IN_INSN: u32 = 4; +pub const R_OR1K_HI_16_IN_INSN: u32 = 5; +pub const R_OR1K_INSN_REL_26: u32 = 6; +pub const R_OR1K_GNU_VTENTRY: u32 = 7; +pub const R_OR1K_GNU_VTINHERIT: u32 = 8; +pub const R_OR1K_32_PCREL: u32 = 9; +pub const R_OR1K_16_PCREL: u32 = 10; +pub const R_OR1K_8_PCREL: u32 = 11; +pub const R_OR1K_GOTPC_HI16: u32 = 12; +pub const R_OR1K_GOTPC_LO16: u32 = 13; +pub const R_OR1K_GOT16: u32 = 14; +pub const R_OR1K_PLT26: u32 = 15; +pub const R_OR1K_GOTOFF_HI16: u32 = 16; +pub const R_OR1K_GOTOFF_LO16: u32 = 17; +pub const R_OR1K_COPY: u32 = 18; +pub const R_OR1K_GLOB_DAT: u32 = 19; +pub const R_OR1K_JMP_SLOT: u32 = 20; +pub const R_OR1K_RELATIVE: u32 = 21; +pub const R_OR1K_TLS_GD_HI16: u32 = 22; +pub const R_OR1K_TLS_GD_LO16: u32 = 23; +pub const R_OR1K_TLS_LDM_HI16: u32 = 24; +pub const R_OR1K_TLS_LDM_LO16: u32 = 25; +pub const R_OR1K_TLS_LDO_HI16: u32 = 26; +pub const R_OR1K_TLS_LDO_LO16: u32 = 27; +pub const R_OR1K_TLS_IE_HI16: u32 = 28; +pub const R_OR1K_TLS_IE_LO16: u32 = 29; +pub const R_OR1K_TLS_LE_HI16: u32 = 30; +pub const R_OR1K_TLS_LE_LO16: u32 = 31; +pub const R_OR1K_TLS_TPOFF: u32 = 32; +pub const R_OR1K_TLS_DTPOFF: u32 = 33; +pub const R_OR1K_TLS_DTPMOD: u32 = 34; +pub const R_OR1K_NUM: u32 = 35; + +///////////////////// +// MIPS +///////////////////// +/// No reloc +pub const R_MIPS_NONE: u32 = 0; +/// Direct 16 bit +pub const R_MIPS_16: u32 = 1; +/// Direct 32 bit +pub const R_MIPS_32: u32 = 2; +/// PC relative 32 bit +pub const R_MIPS_REL32: u32 = 3; +/// Direct 26 bit shifted +pub const R_MIPS_26: u32 = 4; +/// High 16 bit +pub const R_MIPS_HI16: u32 = 5; +/// Low 16 bit +pub const R_MIPS_LO16: u32 = 6; +/// GP relative 16 bit +pub const R_MIPS_GPREL16: u32 = 7; +/// 16 bit literal entry +pub const R_MIPS_LITERAL: u32 = 8; +/// 16 bit GOT entry +pub const R_MIPS_GOT16: u32 = 9; +/// PC relative 16 bit +pub const R_MIPS_PC16: u32 = 10; +/// 16 bit GOT entry for function +pub const R_MIPS_CALL16: u32 = 11; +/// GP relative 32 bit +pub const R_MIPS_GPREL32: u32 = 12; +pub const R_MIPS_SHIFT5: u32 = 16; +pub const R_MIPS_SHIFT6: u32 = 17; +pub const R_MIPS_64: u32 = 18; +pub const R_MIPS_GOT_DISP: u32 = 19; +pub const R_MIPS_GOT_PAGE: u32 = 20; +pub const R_MIPS_GOT_OFST: u32 = 21; +pub const R_MIPS_GOT_HI16: u32 = 22; +pub const R_MIPS_GOT_LO16: u32 = 23; +pub const R_MIPS_SUB: u32 = 24; +pub const R_MIPS_INSERT_A: u32 = 25; +pub const R_MIPS_INSERT_B: u32 = 26; +pub const R_MIPS_DELETE: u32 = 27; +pub const R_MIPS_HIGHER: u32 = 28; +pub const R_MIPS_HIGHEST: u32 = 29; +pub const R_MIPS_CALL_HI16: u32 = 30; +pub const R_MIPS_CALL_LO16: u32 = 31; +pub const R_MIPS_SCN_DISP: u32 = 32; +pub const R_MIPS_REL16: u32 = 33; +pub const R_MIPS_ADD_IMMEDIATE: u32 = 34; +pub const R_MIPS_PJUMP: u32 = 35; +pub const R_MIPS_RELGOT: u32 = 36; +pub const R_MIPS_JALR: u32 = 37; +/// Module number 32 bit +pub const R_MIPS_TLS_DTPMOD32: u32 = 38; +/// Module-relative offset 32 bit +pub const R_MIPS_TLS_DTPREL32: u32 = 39; +/// Module number 64 bit +pub const R_MIPS_TLS_DTPMOD64: u32 = 40; +/// Module-relative offset 64 bit +pub const R_MIPS_TLS_DTPREL64: u32 = 41; +/// 16 bit GOT offset for GD +pub const R_MIPS_TLS_GD: u32 = 42; +/// 16 bit GOT offset for LDM +pub const R_MIPS_TLS_LDM: u32 = 43; +/// Module-relative offset, high 16 bits +pub const R_MIPS_TLS_DTPREL_HI16: u32 = 44; +/// Module-relative offset, low 16 bits +pub const R_MIPS_TLS_DTPREL_LO16: u32 = 45; +/// 16 bit GOT offset for IE +pub const R_MIPS_TLS_GOTTPREL: u32 = 46; +/// TP-relative offset, 32 bit6 +pub const R_MIPS_TLS_TPREL32: u32 = 47; +/// TP-relative offset, 64 bit +pub const R_MIPS_TLS_TPREL64: u32 = 48; +/// TP-relative offset, high 16 bits +pub const R_MIPS_TLS_TPREL_HI16: u32 = 49; +/// TP-relative offset, low 16 bits +pub const R_MIPS_TLS_TPREL_LO16: u32 = 50; +pub const R_MIPS_GLOB_DAT: u32 = 51; +pub const R_MIPS_COPY: u32 = 126; +pub const R_MIPS_JUMP_SLOT: u32 = 127; +pub const R_MIPS_NUM: u32 = 128; + +/////////////////// +// RISC-V +// See https://github.com/riscv/riscv-elf-psabi-doc +/////////////////// +/// None +pub const R_RISCV_NONE: u32 = 0; +/// Runtime relocation: word32 = S + A +pub const R_RISCV_32: u32 = 1; +/// Runtime relocation: word64 = S + A +pub const R_RISCV_64: u32 = 2; +/// Runtime relocation: word32,64 = B + A +pub const R_RISCV_RELATIVE: u32 = 3; +/// Runtime relocation: must be in executable, not allowed in shared library +pub const R_RISCV_COPY: u32 = 4; +/// Runtime relocation: word32,64 = S; handled by PLT unless LD_BIND_NOW +pub const R_RISCV_JUMP_SLOT: u32 = 5; +/// TLS relocation: word32 = S->TLSINDEX +pub const R_RISCV_TLS_DTPMOD32: u32 = 6; +/// TLS relocation: word64 = S->TLSINDEX +pub const R_RISCV_TLS_DTPMOD64: u32 = 7; +/// TLS relocation: word32 = TLS + S + A - TLS_TP_OFFSET +pub const R_RISCV_TLS_DTPREL32: u32 = 8; +/// TLS relocation: word64 = TLS + S + A - TLS_TP_OFFSET +pub const R_RISCV_TLS_DTPREL64: u32 = 9; +/// TLS relocation: word32 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET +pub const R_RISCV_TLS_TPREL32: u32 = 10; +/// TLS relocation: word64 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET +pub const R_RISCV_TLS_TPREL64: u32 = 11; +/// PC-relative branch (SB-Type) +pub const R_RISCV_BRANCH: u32 = 16; +/// PC-relative jump (UJ-Type) +pub const R_RISCV_JAL: u32 = 17; +/// PC-relative call: MACRO call,tail (auipc+jalr pair) +pub const R_RISCV_CALL: u32 = 18; +/// PC-relative call (PLT): MACRO call,tail (auipc+jalr pair) PIC +pub const R_RISCV_CALL_PLT: u32 = 19; +/// PC-relative GOT reference: MACRO la +pub const R_RISCV_GOT_HI20: u32 = 20; +/// PC-relative TLS IE GOT offset: MACRO la.tls.ie +pub const R_RISCV_TLS_GOT_HI20: u32 = 21; +/// PC-relative TLS GD reference: MACRO la.tls.gd +pub const R_RISCV_TLS_GD_HI20: u32 = 22; +/// PC-relative reference: %pcrel_hi(symbol) (U-Type) +pub const R_RISCV_PCREL_HI20: u32 = 23; +/// PC-relative reference: %pcrel_lo(symbol) (I-Type) +pub const R_RISCV_PCREL_LO12_I: u32 = 24; +/// PC-relative reference: %pcrel_lo(symbol) (S-Type) +pub const R_RISCV_PCREL_LO12_S: u32 = 25; +/// Absolute address: %hi(symbol) (U-Type) +pub const R_RISCV_HI20: u32 = 26; +/// Absolute address: %lo(symbol) (I-Type) +pub const R_RISCV_LO12_I: u32 = 27; +/// Absolute address: %lo(symbol) (S-Type) +pub const R_RISCV_LO12_S: u32 = 28; +/// TLS LE thread offset: %tprel_hi(symbol) (U-Type) +pub const R_RISCV_TPREL_HI20: u32 = 29; +/// TLS LE thread offset: %tprel_lo(symbol) (I-Type) +pub const R_RISCV_TPREL_LO12_I: u32 = 30; +/// TLS LE thread offset: %tprel_lo(symbol) (S-Type) +pub const R_RISCV_TPREL_LO12_S: u32 = 31; +/// TLS LE thread usage: %tprel_add(symbol) +pub const R_RISCV_TPREL_ADD: u32 = 32; +/// 8-bit label addition: word8 = S + A +pub const R_RISCV_ADD8: u32 = 33; +/// 16-bit label addition: word16 = S + A +pub const R_RISCV_ADD16: u32 = 34; +/// 32-bit label addition: word32 = S + A +pub const R_RISCV_ADD32: u32 = 35; +/// 64-bit label addition: word64 = S + A +pub const R_RISCV_ADD64: u32 = 36; +/// 8-bit label subtraction: word8 = S - A +pub const R_RISCV_SUB8: u32 = 37; +/// 16-bit label subtraction: word16 = S - A +pub const R_RISCV_SUB16: u32 = 38; +/// 32-bit label subtraction: word32 = S - A +pub const R_RISCV_SUB32: u32 = 39; +/// 64-bit label subtraction: word64 = S - A +pub const R_RISCV_SUB64: u32 = 40; +/// GNU C++ vtable hierarchy +pub const R_RISCV_GNU_VTINHERIT: u32 = 41; +/// GNU C++ vtable member usage +pub const R_RISCV_GNU_VTENTRY: u32 = 42; +/// Alignment statement +pub const R_RISCV_ALIGN: u32 = 43; +/// PC-relative branch offset (CB-Type) +pub const R_RISCV_RVC_BRANCH: u32 = 44; +/// PC-relative jump offset (CJ-Type) +pub const R_RISCV_RVC_JUMP: u32 = 45; +/// Absolute address (CI-Type) +pub const R_RISCV_RVC_LUI: u32 = 46; +/// GP-relative reference (I-Type) +pub const R_RISCV_GPREL_I: u32 = 47; +/// GP-relative reference (S-Type) +pub const R_RISCV_GPREL_S: u32 = 48; +/// TP-relative TLS LE load (I-Type) +pub const R_RISCV_TPREL_I: u32 = 49; +/// TP-relative TLS LE store (S-Type) +pub const R_RISCV_TPREL_S: u32 = 50; +/// Instruction pair can be relaxed +pub const R_RISCV_RELAX: u32 = 51; +/// Local label subtraction +pub const R_RISCV_SUB6: u32 = 52; +/// Local label subtraction +pub const R_RISCV_SET6: u32 = 53; +/// Local label subtraction +pub const R_RISCV_SET8: u32 = 54; +/// Local label subtraction +pub const R_RISCV_SET16: u32 = 55; +/// Local label subtraction +pub const R_RISCV_SET32: u32 = 56; + +#[inline] +pub fn r_to_str(typ: u32, machine: u16) -> &'static str { + use crate::elf::header::*; + match machine { + // x86 + EM_386 => { match typ { + R_386_NONE => "386_NONE", + R_386_32 => "386_32", + R_386_PC32 => "386_PC32", + R_386_GOT32 => "386_GOT32", + R_386_PLT32 => "386_PLT32", + R_386_COPY => "386_COPY", + R_386_GLOB_DAT => "386_GLOB_DAT", + R_386_JMP_SLOT => "386_JMP_SLOT", + R_386_RELATIVE => "386_RELATIVE", + R_386_GOTOFF => "386_GOTOFF", + R_386_GOTPC => "386_GOTPC", + R_386_32PLT => "386_32PLT", + R_386_TLS_TPOFF => "386_TLS_TPOFF", + R_386_TLS_IE => "386_TLS_IE", + R_386_TLS_GOTIE => "386_TLS_GOTIE", + R_386_TLS_LE => "386_TLS_LE", + R_386_TLS_GD => "386_TLS_GD", + R_386_TLS_LDM => "386_TLS_LDM", + R_386_16 => "386_16", + R_386_PC16 => "386_PC16", + R_386_8 => "386_8", + R_386_PC8 => "386_PC8", + R_386_TLS_GD_32 => "386_TLS_GD_32", + R_386_TLS_GD_PUSH => "386_TLS_GD_PUSH", + R_386_TLS_GD_CALL => "386_TLS_GD_CALL", + R_386_TLS_GD_POP => "386_TLS_GD_POP", + R_386_TLS_LDM_32 => "386_TLS_LDM_32", + R_386_TLS_LDM_PUSH => "386_TLS_LDM_PUSH", + R_386_TLS_LDM_CALL => "386_TLS_LDM_CALL", + R_386_TLS_LDM_POP => "386_TLS_LDM_POP", + R_386_TLS_LDO_32 => "386_TLS_LDO_32", + R_386_TLS_IE_32 => "386_TLS_IE_32", + R_386_TLS_LE_32 => "386_TLS_LE_32", + R_386_TLS_DTPMOD32 => "386_TLS_DTPMOD32", + R_386_TLS_DTPOFF32 => "386_TLS_DTPOFF32", + R_386_TLS_TPOFF32 => "386_TLS_TPOFF32", + R_386_SIZE32 => "386_SIZE32", + R_386_TLS_GOTDESC => "386_TLS_GOTDESC", + R_386_TLS_DESC_CALL => "386_TLS_DESC_CALL", + R_386_TLS_DESC => "386_TLS_DESC", + R_386_IRELATIVE => "386_IRELATIVE", + R_386_GOT32X => "386_GOT32X", + _ => "R_UNKNOWN_386", + }}, + EM_X86_64 => { match typ { + R_X86_64_64 => "X86_64_64", + R_X86_64_PC32 => "X86_64_PC32", + R_X86_64_GOT32 => "X86_64_GOT32", + R_X86_64_PLT32 => "X86_64_PLT32", + R_X86_64_COPY => "X86_64_COPY", + R_X86_64_GLOB_DAT => "X86_64_GLOB_DAT", + R_X86_64_JUMP_SLOT => "X86_64_JUMP_SLOT", + R_X86_64_RELATIVE => "X86_64_RELATIVE", + R_X86_64_GOTPCREL => "X86_64_GOTPCREL", + R_X86_64_32 => "X86_64_32", + R_X86_64_32S => "X86_64_32S", + R_X86_64_16 => "X86_64_16", + R_X86_64_PC16 => "X86_64_PC16", + R_X86_64_8 => "X86_64_8", + R_X86_64_PC8 => "X86_64_PC8", + R_X86_64_DTPMOD64 => "X86_64_DTPMOD64", + R_X86_64_DTPOFF64 => "X86_64_DTPOFF64", + R_X86_64_TPOFF64 => "X86_64_TPOFF64", + R_X86_64_TLSGD => "X86_64_TLSGD", + R_X86_64_TLSLD => "X86_64_TLSLD", + R_X86_64_DTPOFF32 => "X86_64_DTPOFF32", + R_X86_64_GOTTPOFF => "X86_64_GOTTPOFF", + R_X86_64_TPOFF32 => "X86_64_TPOFF32", + R_X86_64_PC64 => "X86_64_PC64", + R_X86_64_GOTOFF64 => "X86_64_GOTOFF64", + R_X86_64_GOTPC32 => "X86_64_GOTPC32", + R_X86_64_GOT64 => "X86_64_GOT64", + R_X86_64_GOTPCREL64 => "X86_64_GOTPCREL64", + R_X86_64_GOTPC64 => "X86_64_GOTPC64", + R_X86_64_GOTPLT64 => "X86_64_GOTPLT64", + R_X86_64_PLTOFF64 => "X86_64_PLTOFF64", + R_X86_64_SIZE32 => "X86_64_SIZE32", + R_X86_64_SIZE64 => "X86_64_SIZE64", + R_X86_64_GOTPC32_TLSDESC => "X86_64_GOTPC32_TLSDESC", + R_X86_64_TLSDESC_CALL => "X86_64_TLSDESC_CALL", + R_X86_64_TLSDESC => "X86_64_TLSDESC", + R_X86_64_IRELATIVE => "X86_64_IRELATIVE", + R_X86_64_RELATIVE64 => "X86_64_RELATIVE64", + R_X86_64_GOTPCRELX => "R_X86_64_GOTPCRELX", + R_X86_64_REX_GOTPCRELX => "R_X86_64_REX_GOTPCRELX", + _ => "R_UNKNOWN_X86_64", + }}, + // openrisc + EM_OPENRISC => { match typ { + R_OR1K_NONE => "OR1K_NONE", + R_OR1K_32 => "OR1K_32", + R_OR1K_16 => "OR1K_16", + R_OR1K_8 => "OR1K_8", + R_OR1K_LO_16_IN_INSN => "OR1K_LO_16_IN_INSN", + R_OR1K_HI_16_IN_INSN => "OR1K_HI_16_IN_INSN", + R_OR1K_INSN_REL_26 => "OR1K_INSN_REL_26", + R_OR1K_GNU_VTENTRY => "OR1K_GNU_VTENTRY", + R_OR1K_GNU_VTINHERIT => "OR1K_GNU_VTINHERIT", + R_OR1K_32_PCREL => "OR1K_32_PCREL", + R_OR1K_16_PCREL => "OR1K_16_PCREL", + R_OR1K_8_PCREL => "OR1K_8_PCREL", + R_OR1K_GOTPC_HI16 => "OR1K_GOTPC_HI16", + R_OR1K_GOTPC_LO16 => "OR1K_GOTPC_LO16", + R_OR1K_GOT16 => "OR1K_GOT16", + R_OR1K_PLT26 => "OR1K_PLT26", + R_OR1K_GOTOFF_HI16 => "OR1K_GOTOFF_HI16", + R_OR1K_GOTOFF_LO16 => "OR1K_GOTOFF_LO16", + R_OR1K_COPY => "OR1K_COPY", + R_OR1K_GLOB_DAT => "OR1K_GLOB_DAT", + R_OR1K_JMP_SLOT => "OR1K_JMP_SLOT", + R_OR1K_RELATIVE => "OR1K_RELATIVE", + R_OR1K_TLS_GD_HI16 => "OR1K_TLS_GD_HI16", + R_OR1K_TLS_GD_LO16 => "OR1K_TLS_GD_LO16", + R_OR1K_TLS_LDM_HI16 => "OR1K_TLS_LDM_HI16", + R_OR1K_TLS_LDM_LO16 => "OR1K_TLS_LDM_LO16", + R_OR1K_TLS_LDO_HI16 => "OR1K_TLS_LDO_HI16", + R_OR1K_TLS_LDO_LO16 => "OR1K_TLS_LDO_LO16", + R_OR1K_TLS_IE_HI16 => "OR1K_TLS_IE_HI16", + R_OR1K_TLS_IE_LO16 => "OR1K_TLS_IE_LO16", + R_OR1K_TLS_LE_HI16 => "OR1K_TLS_LE_HI16", + R_OR1K_TLS_LE_LO16 => "OR1K_TLS_LE_LO16", + R_OR1K_TLS_TPOFF => "OR1K_TLS_TPOFF", + R_OR1K_TLS_DTPOFF => "OR1K_TLS_DTPOFF", + R_OR1K_TLS_DTPMOD => "OR1K_TLS_DTPMOD", + _ => "R_UNKNOWN_OR1K", + }}, + // arm64 + EM_AARCH64 => { match typ { + R_AARCH64_P32_ABS32 => "AARCH64_P32_ABS32", + R_AARCH64_P32_COPY => "AARCH64_P32_COPY", + R_AARCH64_P32_GLOB_DAT => "AARCH64_P32_GLOB_DAT", + R_AARCH64_P32_JUMP_SLOT => "AARCH64_P32_JUMP_SLOT", + R_AARCH64_P32_RELATIVE => "AARCH64_P32_RELATIVE", + R_AARCH64_P32_TLS_DTPMOD => "AARCH64_P32_TLS_DTPMOD", + R_AARCH64_P32_TLS_DTPREL => "AARCH64_P32_TLS_DTPREL", + R_AARCH64_P32_TLS_TPREL => "AARCH64_P32_TLS_TPREL", + R_AARCH64_P32_TLSDESC => "AARCH64_P32_TLSDESC", + R_AARCH64_P32_IRELATIVE => "AARCH64_P32_IRELATIVE", + R_AARCH64_ABS64 => "AARCH64_ABS64", + R_AARCH64_ABS32 => "AARCH64_ABS32", + R_AARCH64_ABS16 => "AARCH64_ABS16", + R_AARCH64_PREL64 => "AARCH64_PREL64", + R_AARCH64_PREL32 => "AARCH64_PREL32", + R_AARCH64_PREL16 => "AARCH64_PREL16", + R_AARCH64_MOVW_UABS_G0 => "AARCH64_MOVW_UABS_G0", + R_AARCH64_MOVW_UABS_G0_NC => "AARCH64_MOVW_UABS_G0_NC", + R_AARCH64_MOVW_UABS_G1 => "AARCH64_MOVW_UABS_G1", + R_AARCH64_MOVW_UABS_G1_NC => "AARCH64_MOVW_UABS_G1_NC", + R_AARCH64_MOVW_UABS_G2 => "AARCH64_MOVW_UABS_G2", + R_AARCH64_MOVW_UABS_G2_NC => "AARCH64_MOVW_UABS_G2_NC", + R_AARCH64_MOVW_UABS_G3 => "AARCH64_MOVW_UABS_G3", + R_AARCH64_MOVW_SABS_G0 => "AARCH64_MOVW_SABS_G0", + R_AARCH64_MOVW_SABS_G1 => "AARCH64_MOVW_SABS_G1", + R_AARCH64_MOVW_SABS_G2 => "AARCH64_MOVW_SABS_G2", + R_AARCH64_LD_PREL_LO19 => "AARCH64_LD_PREL_LO19", + R_AARCH64_ADR_PREL_LO21 => "AARCH64_ADR_PREL_LO21", + R_AARCH64_ADR_PREL_PG_HI21 => "AARCH64_ADR_PREL_PG_HI21", + R_AARCH64_ADR_PREL_PG_HI21_NC => "AARCH64_ADR_PREL_PG_HI21_NC", + R_AARCH64_ADD_ABS_LO12_NC => "AARCH64_ADD_ABS_LO12_NC", + R_AARCH64_LDST8_ABS_LO12_NC => "AARCH64_LDST8_ABS_LO12_NC", + R_AARCH64_TSTBR14 => "AARCH64_TSTBR14", + R_AARCH64_CONDBR19 => "AARCH64_CONDBR19", + R_AARCH64_JUMP26 => "AARCH64_JUMP26", + R_AARCH64_CALL26 => "AARCH64_CALL26", + R_AARCH64_LDST16_ABS_LO12_NC => "AARCH64_LDST16_ABS_LO12_NC", + R_AARCH64_LDST32_ABS_LO12_NC => "AARCH64_LDST32_ABS_LO12_NC", + R_AARCH64_LDST64_ABS_LO12_NC => "AARCH64_LDST64_ABS_LO12_NC", + R_AARCH64_MOVW_PREL_G0 => "AARCH64_MOVW_PREL_G0", + R_AARCH64_MOVW_PREL_G0_NC => "AARCH64_MOVW_PREL_G0_NC", + R_AARCH64_MOVW_PREL_G1 => "AARCH64_MOVW_PREL_G1", + R_AARCH64_MOVW_PREL_G1_NC => "AARCH64_MOVW_PREL_G1_NC", + R_AARCH64_MOVW_PREL_G2 => "AARCH64_MOVW_PREL_G2", + R_AARCH64_MOVW_PREL_G2_NC => "AARCH64_MOVW_PREL_G2_NC", + R_AARCH64_MOVW_PREL_G3 => "AARCH64_MOVW_PREL_G3", + R_AARCH64_LDST128_ABS_LO12_NC => "AARCH64_LDST128_ABS_LO12_NC", + R_AARCH64_MOVW_GOTOFF_G0 => "AARCH64_MOVW_GOTOFF_G0", + R_AARCH64_MOVW_GOTOFF_G0_NC => "AARCH64_MOVW_GOTOFF_G0_NC", + R_AARCH64_MOVW_GOTOFF_G1 => "AARCH64_MOVW_GOTOFF_G1", + R_AARCH64_MOVW_GOTOFF_G1_NC => "AARCH64_MOVW_GOTOFF_G1_NC", + R_AARCH64_MOVW_GOTOFF_G2 => "AARCH64_MOVW_GOTOFF_G2", + R_AARCH64_MOVW_GOTOFF_G2_NC => "AARCH64_MOVW_GOTOFF_G2_NC", + R_AARCH64_MOVW_GOTOFF_G3 => "AARCH64_MOVW_GOTOFF_G3", + R_AARCH64_GOTREL64 => "AARCH64_GOTREL64", + R_AARCH64_GOTREL32 => "AARCH64_GOTREL32", + R_AARCH64_GOT_LD_PREL19 => "AARCH64_GOT_LD_PREL19", + R_AARCH64_LD64_GOTOFF_LO15 => "AARCH64_LD64_GOTOFF_LO15", + R_AARCH64_ADR_GOT_PAGE => "AARCH64_ADR_GOT_PAGE", + R_AARCH64_LD64_GOT_LO12_NC => "AARCH64_LD64_GOT_LO12_NC", + R_AARCH64_LD64_GOTPAGE_LO15 => "AARCH64_LD64_GOTPAGE_LO15", + R_AARCH64_TLSGD_ADR_PREL21 => "AARCH64_TLSGD_ADR_PREL21", + R_AARCH64_TLSGD_ADR_PAGE21 => "AARCH64_TLSGD_ADR_PAGE21", + R_AARCH64_TLSGD_ADD_LO12_NC => "AARCH64_TLSGD_ADD_LO12_NC", + R_AARCH64_TLSGD_MOVW_G1 => "AARCH64_TLSGD_MOVW_G1", + R_AARCH64_TLSGD_MOVW_G0_NC => "AARCH64_TLSGD_MOVW_G0_NC", + R_AARCH64_TLSLD_ADR_PREL21 => "AARCH64_TLSLD_ADR_PREL21", + R_AARCH64_TLSLD_ADR_PAGE21 => "AARCH64_TLSLD_ADR_PAGE21", + R_AARCH64_TLSLD_ADD_LO12_NC => "AARCH64_TLSLD_ADD_LO12_NC", + R_AARCH64_TLSLD_MOVW_G1 => "AARCH64_TLSLD_MOVW_G1", + R_AARCH64_TLSLD_MOVW_G0_NC => "AARCH64_TLSLD_MOVW_G0_NC", + R_AARCH64_TLSLD_LD_PREL19 => "AARCH64_TLSLD_LD_PREL19", + R_AARCH64_TLSLD_MOVW_DTPREL_G2 => "AARCH64_TLSLD_MOVW_DTPREL_G2", + R_AARCH64_TLSLD_MOVW_DTPREL_G1 => "AARCH64_TLSLD_MOVW_DTPREL_G1", + R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC => "AARCH64_TLSLD_MOVW_DTPREL_G1_NC", + R_AARCH64_TLSLD_MOVW_DTPREL_G0 => "AARCH64_TLSLD_MOVW_DTPREL_G0", + R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC => "AARCH64_TLSLD_MOVW_DTPREL_G0_NC", + R_AARCH64_TLSLD_ADD_DTPREL_HI12 => "AARCH64_TLSLD_ADD_DTPREL_HI12", + R_AARCH64_TLSLD_ADD_DTPREL_LO12 => "AARCH64_TLSLD_ADD_DTPREL_LO12", + R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC => "AARCH64_TLSLD_ADD_DTPREL_LO12_NC", + R_AARCH64_TLSLD_LDST8_DTPREL_LO12 => "AARCH64_TLSLD_LDST8_DTPREL_LO12", + R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC => "AARCH64_TLSLD_LDST8_DTPREL_LO12_NC", + R_AARCH64_TLSLD_LDST16_DTPREL_LO12 => "AARCH64_TLSLD_LDST16_DTPREL_LO12", + R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC => "AARCH64_TLSLD_LDST16_DTPREL_LO12_NC", + R_AARCH64_TLSLD_LDST32_DTPREL_LO12 => "AARCH64_TLSLD_LDST32_DTPREL_LO12", + R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC => "AARCH64_TLSLD_LDST32_DTPREL_LO12_NC", + R_AARCH64_TLSLD_LDST64_DTPREL_LO12 => "AARCH64_TLSLD_LDST64_DTPREL_LO12", + R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC => "AARCH64_TLSLD_LDST64_DTPREL_LO12_NC", + R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 => "AARCH64_TLSIE_MOVW_GOTTPREL_G1", + R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC => "AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC", + R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 => "AARCH64_TLSIE_ADR_GOTTPREL_PAGE21", + R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC => "AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC", + R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 => "AARCH64_TLSIE_LD_GOTTPREL_PREL19", + R_AARCH64_TLSLE_MOVW_TPREL_G2 => "AARCH64_TLSLE_MOVW_TPREL_G2", + R_AARCH64_TLSLE_MOVW_TPREL_G1 => "AARCH64_TLSLE_MOVW_TPREL_G1", + R_AARCH64_TLSLE_MOVW_TPREL_G1_NC => "AARCH64_TLSLE_MOVW_TPREL_G1_NC", + R_AARCH64_TLSLE_MOVW_TPREL_G0 => "AARCH64_TLSLE_MOVW_TPREL_G0", + R_AARCH64_TLSLE_MOVW_TPREL_G0_NC => "AARCH64_TLSLE_MOVW_TPREL_G0_NC", + R_AARCH64_TLSLE_ADD_TPREL_HI12 => "AARCH64_TLSLE_ADD_TPREL_HI12", + R_AARCH64_TLSLE_ADD_TPREL_LO12 => "AARCH64_TLSLE_ADD_TPREL_LO12", + R_AARCH64_TLSLE_ADD_TPREL_LO12_NC => "AARCH64_TLSLE_ADD_TPREL_LO12_NC", + R_AARCH64_TLSLE_LDST8_TPREL_LO12 => "AARCH64_TLSLE_LDST8_TPREL_LO12", + R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC => "AARCH64_TLSLE_LDST8_TPREL_LO12_NC", + R_AARCH64_TLSLE_LDST16_TPREL_LO12 => "AARCH64_TLSLE_LDST16_TPREL_LO12", + R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC => "AARCH64_TLSLE_LDST16_TPREL_LO12_NC", + R_AARCH64_TLSLE_LDST32_TPREL_LO12 => "AARCH64_TLSLE_LDST32_TPREL_LO12", + R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC => "AARCH64_TLSLE_LDST32_TPREL_LO12_NC", + R_AARCH64_TLSLE_LDST64_TPREL_LO12 => "AARCH64_TLSLE_LDST64_TPREL_LO12", + R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC => "AARCH64_TLSLE_LDST64_TPREL_LO12_NC", + R_AARCH64_TLSDESC_LD_PREL19 => "AARCH64_TLSDESC_LD_PREL19", + R_AARCH64_TLSDESC_ADR_PREL21 => "AARCH64_TLSDESC_ADR_PREL21", + R_AARCH64_TLSDESC_ADR_PAGE21 => "AARCH64_TLSDESC_ADR_PAGE21", + R_AARCH64_TLSDESC_LD64_LO12 => "AARCH64_TLSDESC_LD64_LO12", + R_AARCH64_TLSDESC_ADD_LO12 => "AARCH64_TLSDESC_ADD_LO12", + R_AARCH64_TLSDESC_OFF_G1 => "AARCH64_TLSDESC_OFF_G1", + R_AARCH64_TLSDESC_OFF_G0_NC => "AARCH64_TLSDESC_OFF_G0_NC", + R_AARCH64_TLSDESC_LDR => "AARCH64_TLSDESC_LDR", + R_AARCH64_TLSDESC_ADD => "AARCH64_TLSDESC_ADD", + R_AARCH64_TLSDESC_CALL => "AARCH64_TLSDESC_CALL", + R_AARCH64_TLSLE_LDST128_TPREL_LO12 => "AARCH64_TLSLE_LDST128_TPREL_LO12", + R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC => "AARCH64_TLSLE_LDST128_TPREL_LO12_NC", + R_AARCH64_TLSLD_LDST128_DTPREL_LO12 => "AARCH64_TLSLD_LDST128_DTPREL_LO12", + R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC => "AARCH64_TLSLD_LDST128_DTPREL_LO12_NC", + R_AARCH64_COPY => "AARCH64_COPY", + R_AARCH64_GLOB_DAT => "AARCH64_GLOB_DAT", + R_AARCH64_JUMP_SLOT => "AARCH64_JUMP_SLOT", + R_AARCH64_RELATIVE => "AARCH64_RELATIVE", + R_AARCH64_TLS_DTPMOD => "AARCH64_TLS_DTPMOD", + R_AARCH64_TLS_DTPREL => "AARCH64_TLS_DTPREL", + R_AARCH64_TLS_TPREL => "AARCH64_TLS_TPREL", + R_AARCH64_TLSDESC => "AARCH64_TLSDESC", + R_AARCH64_IRELATIVE => "AARCH64_IRELATIVE", + _ => "R_UNKNOWN_AARCH64", + }}, + // arm + EM_ARM => { match typ { + R_ARM_PC24 => "ARM_PC24", + R_ARM_ABS32 => "ARM_ABS32", + R_ARM_REL32 => "ARM_REL32", + R_ARM_PC13 => "ARM_PC13", + R_ARM_ABS16 => "ARM_ABS16", + R_ARM_ABS12 => "ARM_ABS12", + R_ARM_THM_ABS5 => "ARM_THM_ABS5", + R_ARM_ABS8 => "ARM_ABS8", + R_ARM_SBREL32 => "ARM_SBREL32", + R_ARM_THM_PC22 => "ARM_THM_PC22", + R_ARM_THM_PC8 => "ARM_THM_PC8", + R_ARM_AMP_VCALL9 => "ARM_AMP_VCALL9", + R_ARM_TLS_DESC => "ARM_TLS_DESC", + R_ARM_THM_SWI8 => "ARM_THM_SWI8", + R_ARM_XPC25 => "ARM_XPC25", + R_ARM_THM_XPC22 => "ARM_THM_XPC22", + R_ARM_TLS_DTPMOD32 => "ARM_TLS_DTPMOD32", + R_ARM_TLS_DTPOFF32 => "ARM_TLS_DTPOFF32", + R_ARM_TLS_TPOFF32 => "ARM_TLS_TPOFF32", + R_ARM_COPY => "ARM_COPY", + R_ARM_GLOB_DAT => "ARM_GLOB_DAT", + R_ARM_JUMP_SLOT => "ARM_JUMP_SLOT", + R_ARM_RELATIVE => "ARM_RELATIVE", + R_ARM_GOTOFF => "ARM_GOTOFF", + R_ARM_GOTPC => "ARM_GOTPC", + R_ARM_GOT32 => "ARM_GOT32", + R_ARM_PLT32 => "ARM_PLT32", + R_ARM_CALL => "ARM_CALL", + R_ARM_JUMP24 => "ARM_JUMP24", + R_ARM_THM_JUMP24 => "ARM_THM_JUMP24", + R_ARM_BASE_ABS => "ARM_BASE_ABS", + R_ARM_ALU_PCREL_7_0 => "ARM_ALU_PCREL_7_0", + R_ARM_ALU_PCREL_15_8 => "ARM_ALU_PCREL_15_8", + R_ARM_ALU_PCREL_23_15 => "ARM_ALU_PCREL_23_15", + R_ARM_LDR_SBREL_11_0 => "ARM_LDR_SBREL_11_0", + R_ARM_ALU_SBREL_19_12 => "ARM_ALU_SBREL_19_12", + R_ARM_ALU_SBREL_27_20 => "ARM_ALU_SBREL_27_20", + R_ARM_TARGET1 => "ARM_TARGET1", + R_ARM_SBREL31 => "ARM_SBREL31", + R_ARM_V4BX => "ARM_V4BX", + R_ARM_TARGET2 => "ARM_TARGET2", + R_ARM_PREL31 => "ARM_PREL31", + R_ARM_MOVW_ABS_NC => "ARM_MOVW_ABS_NC", + R_ARM_MOVT_ABS => "ARM_MOVT_ABS", + R_ARM_MOVW_PREL_NC => "ARM_MOVW_PREL_NC", + R_ARM_MOVT_PREL => "ARM_MOVT_PREL", + R_ARM_THM_MOVW_ABS_NC => "ARM_THM_MOVW_ABS_NC", + R_ARM_THM_MOVT_ABS => "ARM_THM_MOVT_ABS", + R_ARM_THM_MOVW_PREL_NC => "ARM_THM_MOVW_PREL_NC", + R_ARM_THM_MOVT_PREL => "ARM_THM_MOVT_PREL", + R_ARM_THM_JUMP19 => "ARM_THM_JUMP19", + R_ARM_THM_JUMP6 => "ARM_THM_JUMP6", + R_ARM_THM_ALU_PREL_11_0 => "ARM_THM_ALU_PREL_11_0", + R_ARM_THM_PC12 => "ARM_THM_PC12", + R_ARM_ABS32_NOI => "ARM_ABS32_NOI", + R_ARM_REL32_NOI => "ARM_REL32_NOI", + R_ARM_ALU_PC_G0_NC => "ARM_ALU_PC_G0_NC", + R_ARM_ALU_PC_G0 => "ARM_ALU_PC_G0", + R_ARM_ALU_PC_G1_NC => "ARM_ALU_PC_G1_NC", + R_ARM_ALU_PC_G1 => "ARM_ALU_PC_G1", + R_ARM_ALU_PC_G2 => "ARM_ALU_PC_G2", + R_ARM_LDR_PC_G1 => "ARM_LDR_PC_G1", + R_ARM_LDR_PC_G2 => "ARM_LDR_PC_G2", + R_ARM_LDRS_PC_G0 => "ARM_LDRS_PC_G0", + R_ARM_LDRS_PC_G1 => "ARM_LDRS_PC_G1", + R_ARM_LDRS_PC_G2 => "ARM_LDRS_PC_G2", + R_ARM_LDC_PC_G0 => "ARM_LDC_PC_G0", + R_ARM_LDC_PC_G1 => "ARM_LDC_PC_G1", + R_ARM_LDC_PC_G2 => "ARM_LDC_PC_G2", + R_ARM_ALU_SB_G0_NC => "ARM_ALU_SB_G0_NC", + R_ARM_ALU_SB_G0 => "ARM_ALU_SB_G0", + R_ARM_ALU_SB_G1_NC => "ARM_ALU_SB_G1_NC", + R_ARM_ALU_SB_G1 => "ARM_ALU_SB_G1", + R_ARM_ALU_SB_G2 => "ARM_ALU_SB_G2", + R_ARM_LDR_SB_G0 => "ARM_LDR_SB_G0", + R_ARM_LDR_SB_G1 => "ARM_LDR_SB_G1", + R_ARM_LDR_SB_G2 => "ARM_LDR_SB_G2", + R_ARM_LDRS_SB_G0 => "ARM_LDRS_SB_G0", + R_ARM_LDRS_SB_G1 => "ARM_LDRS_SB_G1", + R_ARM_LDRS_SB_G2 => "ARM_LDRS_SB_G2", + R_ARM_LDC_SB_G0 => "ARM_LDC_SB_G0", + R_ARM_LDC_SB_G1 => "ARM_LDC_SB_G1", + R_ARM_LDC_SB_G2 => "ARM_LDC_SB_G2", + R_ARM_MOVW_BREL_NC => "ARM_MOVW_BREL_NC", + R_ARM_MOVT_BREL => "ARM_MOVT_BREL", + R_ARM_MOVW_BREL => "ARM_MOVW_BREL", + R_ARM_THM_MOVW_BREL_NC => "ARM_THM_MOVW_BREL_NC", + R_ARM_THM_MOVT_BREL => "ARM_THM_MOVT_BREL", + R_ARM_THM_MOVW_BREL => "ARM_THM_MOVW_BREL", + R_ARM_TLS_GOTDESC => "ARM_TLS_GOTDESC", + R_ARM_TLS_CALL => "ARM_TLS_CALL", + R_ARM_TLS_DESCSEQ => "ARM_TLS_DESCSEQ", + R_ARM_THM_TLS_CALL => "ARM_THM_TLS_CALL", + R_ARM_PLT32_ABS => "ARM_PLT32_ABS", + R_ARM_GOT_ABS => "ARM_GOT_ABS", + R_ARM_GOT_PREL => "ARM_GOT_PREL", + R_ARM_GOT_BREL12 => "ARM_GOT_BREL12", + R_ARM_GOTOFF12 => "ARM_GOTOFF12", + R_ARM_GOTRELAX => "ARM_GOTRELAX", + R_ARM_GNU_VTENTRY => "ARM_GNU_VTENTRY", + R_ARM_GNU_VTINHERIT => "ARM_GNU_VTINHERIT", + R_ARM_THM_PC11 => "ARM_THM_PC11", + R_ARM_THM_PC9 => "ARM_THM_PC9", + R_ARM_TLS_GD32 => "ARM_TLS_GD32", + R_ARM_TLS_LDM32 => "ARM_TLS_LDM32", + R_ARM_TLS_LDO32 => "ARM_TLS_LDO32", + R_ARM_TLS_IE32 => "ARM_TLS_IE32", + R_ARM_TLS_LE32 => "ARM_TLS_LE32", + R_ARM_TLS_LDO12 => "ARM_TLS_LDO12", + R_ARM_TLS_LE12 => "ARM_TLS_LE12", + R_ARM_TLS_IE12GP => "ARM_TLS_IE12GP", + R_ARM_ME_TOO => "ARM_ME_TOO", + R_ARM_THM_TLS_DESCSEQ16 => "ARM_THM_TLS_DESCSEQ16", + R_ARM_THM_TLS_DESCSEQ32 => "ARM_THM_TLS_DESCSEQ32", + R_ARM_THM_GOT_BREL12 => "ARM_THM_GOT_BREL12", + R_ARM_IRELATIVE => "ARM_IRELATIVE", + R_ARM_RXPC25 => "ARM_RXPC25", + R_ARM_RSBREL32 => "ARM_RSBREL32", + R_ARM_THM_RPC22 => "ARM_THM_RPC22", + R_ARM_RREL32 => "ARM_RREL32", + R_ARM_RABS22 => "ARM_RABS22", + R_ARM_RPC24 => "ARM_RPC24", + R_ARM_RBASE => "ARM_RBASE", + _ => "R_UNKNOWN_ARM", + }}, + // MIPS + EM_MIPS | EM_MIPS_RS3_LE | EM_MIPS_X => { match typ { + R_MIPS_NONE => "R_MIPS_NONE", + R_MIPS_16 => "R_MIPS_16", + R_MIPS_32 => "R_MIPS_32", + R_MIPS_REL32 => "R_MIPS_REL32", + R_MIPS_26 => "R_MIPS_26", + R_MIPS_HI16 => "R_MIPS_HI16", + R_MIPS_LO16 => "R_MIPS_LO16", + R_MIPS_GPREL16 => "R_MIPS_GPREL16", + R_MIPS_LITERAL => "R_MIPS_LITERAL", + R_MIPS_GOT16 => "R_MIPS_GOT16", + R_MIPS_PC16 => "R_MIPS_PC16", + R_MIPS_CALL16 => "R_MIPS_CALL16", + R_MIPS_GPREL32 => "R_MIPS_GPREL32", + R_MIPS_SHIFT5 => "R_MIPS_SHIFT5", + R_MIPS_SHIFT6 => "R_MIPS_SHIFT6", + R_MIPS_64 => "R_MIPS_64", + R_MIPS_GOT_DISP => "R_MIPS_GOT_DISP", + R_MIPS_GOT_PAGE => "R_MIPS_GOT_PAGE", + R_MIPS_GOT_OFST => "R_MIPS_GOT_OFST", + R_MIPS_GOT_HI16 => "R_MIPS_GOT_HI16", + R_MIPS_GOT_LO16 => "R_MIPS_GOT_LO16", + R_MIPS_SUB => "R_MIPS_SUB", + R_MIPS_INSERT_A => "R_MIPS_INSERT_A", + R_MIPS_INSERT_B => "R_MIPS_INSERT_B", + R_MIPS_DELETE => "R_MIPS_DELETE", + R_MIPS_HIGHER => "R_MIPS_HIGHER", + R_MIPS_HIGHEST => "R_MIPS_HIGHEST", + R_MIPS_CALL_HI16 => "R_MIPS_CALL_HI16", + R_MIPS_CALL_LO16 => "R_MIPS_CALL_LO16", + R_MIPS_SCN_DISP => "R_MIPS_SCN_DISP", + R_MIPS_REL16 => "R_MIPS_REL16", + R_MIPS_ADD_IMMEDIATE => "R_MIPS_ADD_IMMEDIATE", + R_MIPS_PJUMP => "R_MIPS_PJUMP", + R_MIPS_RELGOT => "R_MIPS_RELGOT", + R_MIPS_JALR => "R_MIPS_JALR", + R_MIPS_TLS_DTPMOD32 => "R_MIPS_TLS_DTPMOD32", + R_MIPS_TLS_DTPREL32 => "R_MIPS_TLS_DTPREL32", + R_MIPS_TLS_DTPMOD64 => "R_MIPS_TLS_DTPMOD64", + R_MIPS_TLS_DTPREL64 => "R_MIPS_TLS_DTPREL64", + R_MIPS_TLS_GD => "R_MIPS_TLS_GD", + R_MIPS_TLS_LDM => "R_MIPS_TLS_LDM", + R_MIPS_TLS_DTPREL_HI16 => "R_MIPS_TLS_DTPREL_HI16", + R_MIPS_TLS_DTPREL_LO16 => "R_MIPS_TLS_DTPREL_LO16", + R_MIPS_TLS_GOTTPREL => "R_MIPS_TLS_GOTTPREL", + R_MIPS_TLS_TPREL32 => "R_MIPS_TLS_TPREL32", + R_MIPS_TLS_TPREL64 => "R_MIPS_TLS_TPREL64", + R_MIPS_TLS_TPREL_HI16 => "R_MIPS_TLS_TPREL_HI16", + R_MIPS_TLS_TPREL_LO16 => "R_MIPS_TLS_TPREL_LO16", + R_MIPS_GLOB_DAT => "R_MIPS_GLOB_DAT", + R_MIPS_COPY => "R_MIPS_COPY", + R_MIPS_JUMP_SLOT => "R_MIPS_JUMP_SLOT", + _ => "R_UNKNOWN_MIPS", + }}, + // RISC-V + EM_RISCV => { match typ { + R_RISCV_NONE => "R_RISCV_NONE", + R_RISCV_32 => "R_RISCV_32", + R_RISCV_64 => "R_RISCV_64", + R_RISCV_RELATIVE => "R_RISCV_RELATIVE", + R_RISCV_COPY => "R_RISCV_COPY", + R_RISCV_JUMP_SLOT => "R_RISCV_JUMP_SLOT", + R_RISCV_TLS_DTPMOD32 => "R_RISCV_TLS_DTPMOD32", + R_RISCV_TLS_DTPMOD64 => "R_RISCV_TLS_DTPMOD64", + R_RISCV_TLS_DTPREL32 => "R_RISCV_TLS_DTPREL32", + R_RISCV_TLS_DTPREL64 => "R_RISCV_TLS_DTPREL64", + R_RISCV_TLS_TPREL32 => "R_RISCV_TLS_TPREL32", + R_RISCV_TLS_TPREL64 => "R_RISCV_TLS_TPREL64", + R_RISCV_BRANCH => "R_RISCV_BRANCH", + R_RISCV_JAL => "R_RISCV_JAL", + R_RISCV_CALL => "R_RISCV_CALL", + R_RISCV_CALL_PLT => "R_RISCV_CALL_PLT", + R_RISCV_GOT_HI20 => "R_RISCV_GOT_HI20", + R_RISCV_TLS_GOT_HI20 => "R_RISCV_TLS_GOT_HI20", + R_RISCV_TLS_GD_HI20 => "R_RISCV_TLS_GD_HI20", + R_RISCV_PCREL_HI20 => "R_RISCV_PCREL_HI20", + R_RISCV_PCREL_LO12_I => "R_RISCV_PCREL_LO12_I", + R_RISCV_PCREL_LO12_S => "R_RISCV_PCREL_LO12_S", + R_RISCV_HI20 => "R_RISCV_HI20", + R_RISCV_LO12_I => "R_RISCV_LO12_I", + R_RISCV_LO12_S => "R_RISCV_LO12_S", + R_RISCV_TPREL_HI20 => "R_RISCV_TPREL_HI20", + R_RISCV_TPREL_LO12_I => "R_RISCV_TPREL_LO12_I", + R_RISCV_TPREL_LO12_S => "R_RISCV_TPREL_LO12_S", + R_RISCV_TPREL_ADD => "R_RISCV_TPREL_ADD", + R_RISCV_ADD8 => "R_RISCV_ADD8", + R_RISCV_ADD16 => "R_RISCV_ADD16", + R_RISCV_ADD32 => "R_RISCV_ADD32", + R_RISCV_ADD64 => "R_RISCV_ADD64", + R_RISCV_SUB8 => "R_RISCV_SUB8", + R_RISCV_SUB16 => "R_RISCV_SUB16", + R_RISCV_SUB32 => "R_RISCV_SUB32", + R_RISCV_SUB64 => "R_RISCV_SUB64", + R_RISCV_GNU_VTINHERIT => "R_RISCV_GNU_VTINHERIT", + R_RISCV_GNU_VTENTRY => "R_RISCV_GNU_VTENTRY", + R_RISCV_ALIGN => "R_RISCV_ALIGN", + R_RISCV_RVC_BRANCH => "R_RISCV_RVC_BRANCH", + R_RISCV_RVC_JUMP => "R_RISCV_RVC_JUMP", + R_RISCV_RVC_LUI => "R_RISCV_RVC_LUI", + R_RISCV_GPREL_I => "R_RISCV_GPREL_I", + R_RISCV_GPREL_S => "R_RISCV_GPREL_S", + R_RISCV_TPREL_I => "R_RISCV_TPREL_I", + R_RISCV_TPREL_S => "R_RISCV_TPREL_S", + R_RISCV_RELAX => "R_RISCV_RELAX", + R_RISCV_SUB6 => "R_RISCV_SUB6", + R_RISCV_SET6 => "R_RISCV_SET6", + R_RISCV_SET8 => "R_RISCV_SET8", + R_RISCV_SET16 => "R_RISCV_SET16", + R_RISCV_SET32 => "R_RISCV_SET32", + _ => "R_UNKNOWN_RISCV", + }}, + _ => "R_UNKNOWN", + } +} diff --git a/third_party/rust/goblin/src/elf/dynamic.rs b/third_party/rust/goblin/src/elf/dynamic.rs new file mode 100644 index 0000000000..c410469c6a --- /dev/null +++ b/third_party/rust/goblin/src/elf/dynamic.rs @@ -0,0 +1,807 @@ +macro_rules! elf_dyn { + ($size:ty) => { + // XXX: Do not import scroll traits here. + // See: https://github.com/rust-lang/rust/issues/65090#issuecomment-538668155 + + #[repr(C)] + #[derive(Copy, Clone, PartialEq, Default)] + #[cfg_attr( + feature = "alloc", + derive(scroll::Pread, scroll::Pwrite, scroll::SizeWith) + )] + /// An entry in the dynamic array + pub struct Dyn { + /// Dynamic entry type + pub d_tag: $size, + /// Integer value + pub d_val: $size, + } + + use plain; + unsafe impl plain::Plain for Dyn {} + }; +} + +// TODO: figure out what's the best, most friendly + safe API choice here - u32s or u64s +// remember that DT_TAG is "pointer sized"/used as address sometimes Original rationale: I +// decided to use u64 instead of u32 due to pattern matching use case seems safer to cast the +// elf32's d_tag from u32 -> u64 at runtime instead of casting the elf64's d_tag from u64 -> +// u32 at runtime + +/// Marks end of dynamic section +pub const DT_NULL: u64 = 0; +/// Name of needed library +pub const DT_NEEDED: u64 = 1; +/// Size in bytes of PLT relocs +pub const DT_PLTRELSZ: u64 = 2; +/// Processor defined value +pub const DT_PLTGOT: u64 = 3; +/// Address of symbol hash table +pub const DT_HASH: u64 = 4; +/// Address of string table +pub const DT_STRTAB: u64 = 5; +/// Address of symbol table +pub const DT_SYMTAB: u64 = 6; +/// Address of Rela relocs +pub const DT_RELA: u64 = 7; +/// Total size of Rela relocs +pub const DT_RELASZ: u64 = 8; +/// Size of one Rela reloc +pub const DT_RELAENT: u64 = 9; +/// Size of string table +pub const DT_STRSZ: u64 = 10; +/// Size of one symbol table entry +pub const DT_SYMENT: u64 = 11; +/// Address of init function +pub const DT_INIT: u64 = 12; +/// Address of termination function +pub const DT_FINI: u64 = 13; +/// Name of shared object +pub const DT_SONAME: u64 = 14; +/// Library search path (deprecated) +pub const DT_RPATH: u64 = 15; +/// Start symbol search here +pub const DT_SYMBOLIC: u64 = 16; +/// Address of Rel relocs +pub const DT_REL: u64 = 17; +/// Total size of Rel relocs +pub const DT_RELSZ: u64 = 18; +/// Size of one Rel reloc +pub const DT_RELENT: u64 = 19; +/// Type of reloc in PLT +pub const DT_PLTREL: u64 = 20; +/// For debugging; unspecified +pub const DT_DEBUG: u64 = 21; +/// Reloc might modify .text +pub const DT_TEXTREL: u64 = 22; +/// Address of PLT relocs +pub const DT_JMPREL: u64 = 23; +/// Process relocations of object +pub const DT_BIND_NOW: u64 = 24; +/// Array with addresses of init fct +pub const DT_INIT_ARRAY: u64 = 25; +/// Array with addresses of fini fct +pub const DT_FINI_ARRAY: u64 = 26; +/// Size in bytes of DT_INIT_ARRAY +pub const DT_INIT_ARRAYSZ: u64 = 27; +/// Size in bytes of DT_FINI_ARRAY +pub const DT_FINI_ARRAYSZ: u64 = 28; +/// Library search path +pub const DT_RUNPATH: u64 = 29; +/// Flags for the object being loaded +pub const DT_FLAGS: u64 = 30; +/// Start of encoded range +pub const DT_ENCODING: u64 = 32; +/// Array with addresses of preinit fct +pub const DT_PREINIT_ARRAY: u64 = 32; +/// size in bytes of DT_PREINIT_ARRAY +pub const DT_PREINIT_ARRAYSZ: u64 = 33; +/// Number used +pub const DT_NUM: u64 = 34; +/// Start of OS-specific +pub const DT_LOOS: u64 = 0x6000_000d; +/// End of OS-specific +pub const DT_HIOS: u64 = 0x6fff_f000; +/// Start of processor-specific +pub const DT_LOPROC: u64 = 0x7000_0000; +/// End of processor-specific +pub const DT_HIPROC: u64 = 0x7fff_ffff; +// Most used by any processor +// pub const DT_PROCNUM: u64 = DT_MIPS_NUM; + +/// DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the +/// Dyn.d_un.d_ptr field of the Elf*_Dyn structure. +/// +/// If any adjustment is made to the ELF object after it has been +/// built these entries will need to be adjusted. +pub const DT_ADDRRNGLO: u64 = 0x6fff_fe00; +/// GNU-style hash table +pub const DT_GNU_HASH: u64 = 0x6fff_fef5; +/// +pub const DT_TLSDESC_PLT: u64 = 0x6fff_fef6; +/// +pub const DT_TLSDESC_GOT: u64 = 0x6fff_fef7; +/// Start of conflict section +pub const DT_GNU_CONFLICT: u64 = 0x6fff_fef8; +/// Library list +pub const DT_GNU_LIBLIST: u64 = 0x6fff_fef9; +/// Configuration information +pub const DT_CONFIG: u64 = 0x6fff_fefa; +/// Dependency auditing +pub const DT_DEPAUDIT: u64 = 0x6fff_fefb; +/// Object auditing +pub const DT_AUDIT: u64 = 0x6fff_fefc; +/// PLT padding +pub const DT_PLTPAD: u64 = 0x6fff_fefd; +/// Move table +pub const DT_MOVETAB: u64 = 0x6fff_fefe; +/// Syminfo table +pub const DT_SYMINFO: u64 = 0x6fff_feff; +/// +pub const DT_ADDRRNGHI: u64 = 0x6fff_feff; + +//DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag)) /* Reverse order! */ +pub const DT_ADDRNUM: u64 = 11; + +/// The versioning entry types. The next are defined as part of the GNU extension +pub const DT_VERSYM: u64 = 0x6fff_fff0; +pub const DT_RELACOUNT: u64 = 0x6fff_fff9; +pub const DT_RELCOUNT: u64 = 0x6fff_fffa; +/// State flags, see DF_1_* below +pub const DT_FLAGS_1: u64 = 0x6fff_fffb; +/// Address of version definition table +pub const DT_VERDEF: u64 = 0x6fff_fffc; +/// Number of version definitions +pub const DT_VERDEFNUM: u64 = 0x6fff_fffd; +/// Address of table with needed versions +pub const DT_VERNEED: u64 = 0x6fff_fffe; +/// Number of needed versions +pub const DT_VERNEEDNUM: u64 = 0x6fff_ffff; + +/// Converts a tag to its string representation. +#[inline] +pub fn tag_to_str(tag: u64) -> &'static str { + match tag { + DT_NULL => "DT_NULL", + DT_NEEDED => "DT_NEEDED", + DT_PLTRELSZ => "DT_PLTRELSZ", + DT_PLTGOT => "DT_PLTGOT", + DT_HASH => "DT_HASH", + DT_STRTAB => "DT_STRTAB", + DT_SYMTAB => "DT_SYMTAB", + DT_RELA => "DT_RELA", + DT_RELASZ => "DT_RELASZ", + DT_RELAENT => "DT_RELAENT", + DT_STRSZ => "DT_STRSZ", + DT_SYMENT => "DT_SYMENT", + DT_INIT => "DT_INIT", + DT_FINI => "DT_FINI", + DT_SONAME => "DT_SONAME", + DT_RPATH => "DT_RPATH", + DT_SYMBOLIC => "DT_SYMBOLIC", + DT_REL => "DT_REL", + DT_RELSZ => "DT_RELSZ", + DT_RELENT => "DT_RELENT", + DT_PLTREL => "DT_PLTREL", + DT_DEBUG => "DT_DEBUG", + DT_TEXTREL => "DT_TEXTREL", + DT_JMPREL => "DT_JMPREL", + DT_BIND_NOW => "DT_BIND_NOW", + DT_INIT_ARRAY => "DT_INIT_ARRAY", + DT_FINI_ARRAY => "DT_FINI_ARRAY", + DT_INIT_ARRAYSZ => "DT_INIT_ARRAYSZ", + DT_FINI_ARRAYSZ => "DT_FINI_ARRAYSZ", + DT_RUNPATH => "DT_RUNPATH", + DT_FLAGS => "DT_FLAGS", + DT_PREINIT_ARRAY => "DT_PREINIT_ARRAY", + DT_PREINIT_ARRAYSZ => "DT_PREINIT_ARRAYSZ", + DT_NUM => "DT_NUM", + DT_LOOS => "DT_LOOS", + DT_HIOS => "DT_HIOS", + DT_LOPROC => "DT_LOPROC", + DT_HIPROC => "DT_HIPROC", + DT_VERSYM => "DT_VERSYM", + DT_RELACOUNT => "DT_RELACOUNT", + DT_RELCOUNT => "DT_RELCOUNT", + DT_GNU_HASH => "DT_GNU_HASH", + DT_VERDEF => "DT_VERDEF", + DT_VERDEFNUM => "DT_VERDEFNUM", + DT_VERNEED => "DT_VERNEED", + DT_VERNEEDNUM => "DT_VERNEEDNUM", + DT_FLAGS_1 => "DT_FLAGS_1", + _ => "UNKNOWN_TAG", + } +} + +// Values of `d_un.d_val` in the DT_FLAGS entry +/// Object may use DF_ORIGIN. +pub const DF_ORIGIN: u64 = 0x0000_0001; +/// Symbol resolutions starts here. +pub const DF_SYMBOLIC: u64 = 0x0000_0002; +/// Object contains text relocations. +pub const DF_TEXTREL: u64 = 0x0000_0004; +/// No lazy binding for this object. +pub const DF_BIND_NOW: u64 = 0x0000_0008; +/// Module uses the static TLS model. +pub const DF_STATIC_TLS: u64 = 0x0000_0010; + +pub fn df_tag_to_str(tag: u64) -> &'static str { + match tag { + DF_ORIGIN => "DF_ORIGIN", + DF_SYMBOLIC => "DF_SYMBOLIC", + DF_TEXTREL => "DF_TEXTREL", + DF_BIND_NOW => "DF_BIND_NOW", + DF_STATIC_TLS => "DF_STATIC_TLS", + _ => "UNKNOWN_TAG", + } +} + +/// === State flags === +/// selectable in the `d_un.d_val` element of the DT_FLAGS_1 entry in the dynamic section. +/// +/// Set RTLD_NOW for this object. +pub const DF_1_NOW: u64 = 0x0000_0001; +/// Set RTLD_GLOBAL for this object. +pub const DF_1_GLOBAL: u64 = 0x0000_0002; +/// Set RTLD_GROUP for this object. +pub const DF_1_GROUP: u64 = 0x0000_0004; +/// Set RTLD_NODELETE for this object. +pub const DF_1_NODELETE: u64 = 0x0000_0008; +/// Trigger filtee loading at runtime. +pub const DF_1_LOADFLTR: u64 = 0x0000_0010; +/// Set RTLD_INITFIRST for this object. +pub const DF_1_INITFIRST: u64 = 0x0000_0020; +/// Set RTLD_NOOPEN for this object. +pub const DF_1_NOOPEN: u64 = 0x0000_0040; +/// $ORIGIN must be handled. +pub const DF_1_ORIGIN: u64 = 0x0000_0080; +/// Direct binding enabled. +pub const DF_1_DIRECT: u64 = 0x0000_0100; +pub const DF_1_TRANS: u64 = 0x0000_0200; +/// Object is used to interpose. +pub const DF_1_INTERPOSE: u64 = 0x0000_0400; +/// Ignore default lib search path. +pub const DF_1_NODEFLIB: u64 = 0x0000_0800; +/// Object can't be dldump'ed. +pub const DF_1_NODUMP: u64 = 0x0000_1000; +/// Configuration alternative created. +pub const DF_1_CONFALT: u64 = 0x0000_2000; +/// Filtee terminates filters search. +pub const DF_1_ENDFILTEE: u64 = 0x0000_4000; +/// Disp reloc applied at build time. +pub const DF_1_DISPRELDNE: u64 = 0x0000_8000; +/// Disp reloc applied at run-time. +pub const DF_1_DISPRELPND: u64 = 0x0001_0000; +/// Object has no-direct binding. +pub const DF_1_NODIRECT: u64 = 0x0002_0000; +pub const DF_1_IGNMULDEF: u64 = 0x0004_0000; +pub const DF_1_NOKSYMS: u64 = 0x0008_0000; +pub const DF_1_NOHDR: u64 = 0x0010_0000; +/// Object is modified after built. +pub const DF_1_EDITED: u64 = 0x0020_0000; +pub const DF_1_NORELOC: u64 = 0x0040_0000; +/// Object has individual interposers. +pub const DF_1_SYMINTPOSE: u64 = 0x0080_0000; +/// Global auditing required. +pub const DF_1_GLOBAUDIT: u64 = 0x0100_0000; +/// Singleton dyn are used. +pub const DF_1_SINGLETON: u64 = 0x0200_0000; +/// Object is a Position Independent Executable (PIE). +pub const DF_1_PIE: u64 = 0x0800_0000; + +pub fn df_1_tag_to_str(tag: u64) -> &'static str { + match tag { + DF_1_NOW => "DF_1_NOW", + DF_1_GLOBAL => "DF_1_GLOBAL", + DF_1_GROUP => "DF_1_GROUP", + DF_1_NODELETE => "DF_1_NODELETE", + DF_1_LOADFLTR => "DF_1_LOADFLTR", + DF_1_INITFIRST => "DF_1_INITFIRST", + DF_1_NOOPEN => "DF_1_NOOPEN", + DF_1_ORIGIN => "DF_1_ORIGIN", + DF_1_DIRECT => "DF_1_DIRECT", + DF_1_TRANS => "DF_1_TRANS", + DF_1_INTERPOSE => "DF_1_INTERPOSE", + DF_1_NODEFLIB => "DF_1_NODEFLIB", + DF_1_NODUMP => "DF_1_NODUMP", + DF_1_CONFALT => "DF_1_CONFALT", + DF_1_ENDFILTEE => "DF_1_ENDFILTEE", + DF_1_DISPRELDNE => "DF_1_DISPRELDNE", + DF_1_DISPRELPND => "DF_1_DISPRELPND", + DF_1_NODIRECT => "DF_1_NODIRECT", + DF_1_IGNMULDEF => "DF_1_IGNMULDEF", + DF_1_NOKSYMS => "DF_1_NOKSYMS", + DF_1_NOHDR => "DF_1_NOHDR", + DF_1_EDITED => "DF_1_EDITED", + DF_1_NORELOC => "DF_1_NORELOC", + DF_1_SYMINTPOSE => "DF_1_SYMINTPOSE", + DF_1_GLOBAUDIT => "DF_1_GLOBAUDIT", + DF_1_SINGLETON => "DF_1_SINGLETON", + DF_1_PIE => "DF_1_PIE", + _ => "UNKNOWN_TAG", + } +} + +if_alloc! { + use core::fmt; + use scroll::ctx; + use core::result; + use crate::container::{Ctx, Container}; + use crate::strtab::Strtab; + use alloc::vec::Vec; + + #[derive(Default, PartialEq, Clone)] + pub struct Dyn { + pub d_tag: u64, + pub d_val: u64, + } + + impl Dyn { + #[inline] + pub fn size(container: Container) -> usize { + use scroll::ctx::SizeWith; + Self::size_with(&Ctx::from(container)) + } + } + + impl fmt::Debug for Dyn { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Dyn") + .field("d_tag", &tag_to_str(self.d_tag)) + .field("d_val", &format_args!("0x{:x}", self.d_val)) + .finish() + } + } + + impl ctx::SizeWith<Ctx> for Dyn { + fn size_with(&Ctx { container, .. }: &Ctx) -> usize { + match container { + Container::Little => { + dyn32::SIZEOF_DYN + }, + Container::Big => { + dyn64::SIZEOF_DYN + }, + } + } + } + + impl<'a> ctx::TryFromCtx<'a, Ctx> for Dyn { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], Ctx { container, le}: Ctx) -> result::Result<(Self, usize), Self::Error> { + use scroll::Pread; + let dynamic = match container { + Container::Little => { + (bytes.pread_with::<dyn32::Dyn>(0, le)?.into(), dyn32::SIZEOF_DYN) + }, + Container::Big => { + (bytes.pread_with::<dyn64::Dyn>(0, le)?.into(), dyn64::SIZEOF_DYN) + } + }; + Ok(dynamic) + } + } + + impl ctx::TryIntoCtx<Ctx> for Dyn { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], Ctx { container, le}: Ctx) -> result::Result<usize, Self::Error> { + use scroll::Pwrite; + match container { + Container::Little => { + let dynamic: dyn32::Dyn = self.into(); + Ok(bytes.pwrite_with(dynamic, 0, le)?) + }, + Container::Big => { + let dynamic: dyn64::Dyn = self.into(); + Ok(bytes.pwrite_with(dynamic, 0, le)?) + } + } + } + } + + #[derive(Debug)] + pub struct Dynamic { + pub dyns: Vec<Dyn>, + pub info: DynamicInfo, + } + + impl Dynamic { + #[cfg(feature = "endian_fd")] + /// Returns a vector of dynamic entries from the underlying byte `bytes`, with `endianness`, using the provided `phdrs` + pub fn parse(bytes: &[u8], phdrs: &[crate::elf::program_header::ProgramHeader], ctx: Ctx) -> crate::error::Result<Option<Self>> { + use scroll::ctx::SizeWith; + use scroll::Pread; + use crate::elf::program_header; + for phdr in phdrs { + if phdr.p_type == program_header::PT_DYNAMIC { + let offset = phdr.p_offset as usize; + let filesz = phdr.p_filesz as usize; + // Ensure offset and filesz are valid. + let bytes = if filesz > 0 { + bytes + .pread_with::<&[u8]>(offset, filesz) + .map_err(|_| crate::error::Error::Malformed(format!("Invalid PT_DYNAMIC size (offset {:#x}, filesz {:#x})", + offset, filesz)))? + } else { + &[] + }; + let size = Dyn::size_with(&ctx); + // the validity of `count` was implicitly checked by reading `bytes`. + let count = filesz / size; + let mut dyns = Vec::with_capacity(count); + let mut offset = 0; + for _ in 0..count { + let dynamic = bytes.gread_with::<Dyn>(&mut offset, ctx)?; + let tag = dynamic.d_tag; + dyns.push(dynamic); + if tag == DT_NULL { break } + } + let mut info = DynamicInfo::default(); + for dynamic in &dyns { + info.update(phdrs, dynamic); + } + return Ok(Some(Dynamic { dyns: dyns, info: info, })); + } + } + Ok(None) + } + + pub fn get_libraries<'a>(&self, strtab: &Strtab<'a>) -> Vec<&'a str> { + use log::warn; + let count = self.info.needed_count.min(self.dyns.len()); + let mut needed = Vec::with_capacity(count); + for dynamic in &self.dyns { + if dynamic.d_tag as u64 == DT_NEEDED { + if let Some(lib) = strtab.get_at(dynamic.d_val as usize) { + needed.push(lib) + } else { + warn!("Invalid DT_NEEDED {}", dynamic.d_val) + } + } + } + needed + } + } +} + +macro_rules! elf_dyn_std_impl { + ($size:ident, $phdr:ty) => { + + #[cfg(test)] + mod tests { + use super::*; + #[test] + fn size_of() { + assert_eq!(::std::mem::size_of::<Dyn>(), SIZEOF_DYN); + } + } + + if_alloc! { + use core::fmt; + use core::slice; + use alloc::vec::Vec; + + use crate::elf::program_header::{PT_DYNAMIC}; + use crate::strtab::Strtab; + + use crate::elf::dynamic::Dyn as ElfDyn; + + if_std! { + use std::fs::File; + use std::io::{Read, Seek}; + use std::io::SeekFrom::Start; + use crate::error::Result; + } + + impl From<ElfDyn> for Dyn { + fn from(dynamic: ElfDyn) -> Self { + Dyn { + d_tag: dynamic.d_tag as $size, + d_val: dynamic.d_val as $size, + } + } + } + impl From<Dyn> for ElfDyn { + fn from(dynamic: Dyn) -> Self { + ElfDyn { + d_tag: u64::from(dynamic.d_tag), + d_val: u64::from(dynamic.d_val), + } + } + } + + impl fmt::Debug for Dyn { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Dyn") + .field("d_tag", &tag_to_str(u64::from(self.d_tag))) + .field("d_val", &format_args!("0x{:x}", self.d_val)) + .finish() + } + } + + /// Returns a vector of dynamic entries from the given fd and program headers + #[cfg(feature = "std")] + pub fn from_fd(mut fd: &File, phdrs: &[$phdr]) -> Result<Option<Vec<Dyn>>> { + for phdr in phdrs { + if phdr.p_type == PT_DYNAMIC { + // FIXME: validate filesz before allocating + let filesz = phdr.p_filesz as usize; + let dync = filesz / SIZEOF_DYN; + let mut dyns = vec![Dyn::default(); dync]; + fd.seek(Start(u64::from(phdr.p_offset)))?; + unsafe { + fd.read_exact(plain::as_mut_bytes(&mut *dyns))?; + } + dyns.dedup(); + return Ok(Some(dyns)); + } + } + Ok(None) + } + + /// Given a bias and a memory address (typically for a _correctly_ mmap'd binary in memory), returns the `_DYNAMIC` array as a slice of that memory + pub unsafe fn from_raw<'a>(bias: usize, vaddr: usize) -> &'a [Dyn] { + let dynp = vaddr.wrapping_add(bias) as *const Dyn; + let mut idx = 0; + while u64::from((*dynp.offset(idx)).d_tag) != DT_NULL { + idx += 1; + } + slice::from_raw_parts(dynp, idx as usize) + } + + // TODO: these bare functions have always seemed awkward, but not sure where they should go... + /// Maybe gets and returns the dynamic array with the same lifetime as the [phdrs], using the provided bias with wrapping addition. + /// If the bias is wrong, it will either segfault or give you incorrect values, beware + pub unsafe fn from_phdrs(bias: usize, phdrs: &[$phdr]) -> Option<&[Dyn]> { + for phdr in phdrs { + // FIXME: change to casting to u64 similar to DT_*? + if phdr.p_type as u32 == PT_DYNAMIC { + return Some(from_raw(bias, phdr.p_vaddr as usize)); + } + } + None + } + + /// Gets the needed libraries from the `_DYNAMIC` array, with the str slices lifetime tied to the dynamic array/strtab's lifetime(s) + pub unsafe fn get_needed<'a>(dyns: &[Dyn], strtab: *const Strtab<'a>, count: usize) -> Vec<&'a str> { + let mut needed = Vec::with_capacity(count.min(dyns.len())); + for dynamic in dyns { + if u64::from(dynamic.d_tag) == DT_NEEDED { + let lib = &(*strtab)[dynamic.d_val as usize]; + needed.push(lib); + } + } + needed + } + } + }; +} + +macro_rules! elf_dynamic_info_std_impl { + ($size:ident, $phdr:ty) => { + /// Convert a virtual memory address to a file offset + fn vm_to_offset(phdrs: &[$phdr], address: $size) -> Option<$size> { + for ph in phdrs { + if ph.p_type == crate::elf::program_header::PT_LOAD && address >= ph.p_vaddr { + let offset = address - ph.p_vaddr; + if offset < ph.p_memsz { + return ph.p_offset.checked_add(offset); + } + } + } + None + } + + /// Important dynamic linking info generated via a single pass through the `_DYNAMIC` array + #[derive(Default, PartialEq)] + pub struct DynamicInfo { + pub rela: usize, + pub relasz: usize, + pub relaent: $size, + pub relacount: usize, + pub rel: usize, + pub relsz: usize, + pub relent: $size, + pub relcount: usize, + pub gnu_hash: Option<$size>, + pub hash: Option<$size>, + pub strtab: usize, + pub strsz: usize, + pub symtab: usize, + pub syment: usize, + pub pltgot: Option<$size>, + pub pltrelsz: usize, + pub pltrel: $size, + pub jmprel: usize, + pub verdef: $size, + pub verdefnum: $size, + pub verneed: $size, + pub verneednum: $size, + pub versym: $size, + pub init: $size, + pub fini: $size, + pub init_array: $size, + pub init_arraysz: usize, + pub fini_array: $size, + pub fini_arraysz: usize, + pub needed_count: usize, + pub flags: $size, + pub flags_1: $size, + pub soname: usize, + pub textrel: bool, + } + + impl DynamicInfo { + #[inline] + pub fn update(&mut self, phdrs: &[$phdr], dynamic: &Dyn) { + match u64::from(dynamic.d_tag) { + DT_RELA => self.rela = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, // .rela.dyn + DT_RELASZ => self.relasz = dynamic.d_val as usize, + DT_RELAENT => self.relaent = dynamic.d_val as _, + DT_RELACOUNT => self.relacount = dynamic.d_val as usize, + DT_REL => self.rel = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize, // .rel.dyn + DT_RELSZ => self.relsz = dynamic.d_val as usize, + DT_RELENT => self.relent = dynamic.d_val as _, + DT_RELCOUNT => self.relcount = dynamic.d_val as usize, + DT_GNU_HASH => self.gnu_hash = vm_to_offset(phdrs, dynamic.d_val), + DT_HASH => self.hash = vm_to_offset(phdrs, dynamic.d_val), + DT_STRTAB => { + self.strtab = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize + } + DT_STRSZ => self.strsz = dynamic.d_val as usize, + DT_SYMTAB => { + self.symtab = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize + } + DT_SYMENT => self.syment = dynamic.d_val as usize, + DT_PLTGOT => self.pltgot = vm_to_offset(phdrs, dynamic.d_val), + DT_PLTRELSZ => self.pltrelsz = dynamic.d_val as usize, + DT_PLTREL => self.pltrel = dynamic.d_val as _, + DT_JMPREL => { + self.jmprel = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) as usize + } // .rela.plt + DT_VERDEF => self.verdef = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_VERDEFNUM => self.verdefnum = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_VERNEED => self.verneed = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_VERNEEDNUM => self.verneednum = dynamic.d_val as _, + DT_VERSYM => self.versym = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_INIT => self.init = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_FINI => self.fini = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0), + DT_INIT_ARRAY => { + self.init_array = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) + } + DT_INIT_ARRAYSZ => self.init_arraysz = dynamic.d_val as _, + DT_FINI_ARRAY => { + self.fini_array = vm_to_offset(phdrs, dynamic.d_val).unwrap_or(0) + } + DT_FINI_ARRAYSZ => self.fini_arraysz = dynamic.d_val as _, + DT_NEEDED => self.needed_count += 1, + DT_FLAGS => self.flags = dynamic.d_val as _, + DT_FLAGS_1 => self.flags_1 = dynamic.d_val as _, + DT_SONAME => self.soname = dynamic.d_val as _, + DT_TEXTREL => self.textrel = true, + _ => (), + } + } + pub fn new(dynamic: &[Dyn], phdrs: &[$phdr]) -> DynamicInfo { + let mut info = DynamicInfo::default(); + for dyna in dynamic { + info.update(phdrs, &dyna); + } + info + } + } + + if_alloc! { + impl fmt::Debug for DynamicInfo { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let gnu_hash = self.gnu_hash.unwrap_or(0); + let hash = self.hash.unwrap_or(0); + let pltgot = self.pltgot.unwrap_or(0); + + let flags: Vec<&'static str> = [DF_ORIGIN, DF_SYMBOLIC, DF_TEXTREL, DF_BIND_NOW, DF_STATIC_TLS,][..] + .iter() + .filter(|f| (self.flags as u64 & *f) != 0) + .map(|f| df_tag_to_str(*f)) + .collect(); + + let flags_1: Vec<&'static str> = [ + DF_1_NOW, + DF_1_GLOBAL, + DF_1_GROUP, + DF_1_NODELETE, + DF_1_LOADFLTR, + DF_1_INITFIRST, + DF_1_NOOPEN, + DF_1_ORIGIN, + DF_1_DIRECT, + DF_1_TRANS, + DF_1_INTERPOSE, + DF_1_NODEFLIB, + DF_1_NODUMP, + DF_1_CONFALT, + DF_1_ENDFILTEE, + DF_1_DISPRELDNE, + DF_1_DISPRELPND, + DF_1_NODIRECT, + DF_1_IGNMULDEF, + DF_1_NOKSYMS, + DF_1_NOHDR, + DF_1_EDITED, + DF_1_NORELOC, + DF_1_SYMINTPOSE, + DF_1_GLOBAUDIT, + DF_1_SINGLETON, + DF_1_PIE, + ][..] + .iter() + .filter(|f| (self.flags_1 as u64 & *f) != 0) + .map(|f| df_1_tag_to_str(*f)) + .collect(); + + f.debug_struct("DynamicInfo") + .field("rela", &format_args!("0x{:x}", self.rela)) + .field("relasz", &self.relasz) + .field("relaent", &self.relaent) + .field("relacount", &self.relacount) + .field("gnu_hash", &format_args!("0x{:x}", gnu_hash)) + .field("hash", &format_args!("0x{:x}", hash)) + .field("strtab", &format_args!("0x{:x}", self.strtab)) + .field("strsz", &self.strsz) + .field("symtab", &format_args!("0x{:x}", self.symtab)) + .field("syment", &self.syment) + .field("pltgot", &format_args!("0x{:x}", pltgot)) + .field("pltrelsz", &self.pltrelsz) + .field("pltrel", &self.pltrel) + .field("jmprel", &format_args!("0x{:x}", self.jmprel)) + .field("verdef", &format_args!("0x{:x}", self.verdef)) + .field("verdefnum", &self.verdefnum) + .field("verneed", &format_args!("0x{:x}", self.verneed)) + .field("verneednum", &self.verneednum) + .field("versym", &format_args!("0x{:x}", self.versym)) + .field("init", &format_args!("0x{:x}", self.init)) + .field("fini", &format_args!("0x{:x}", self.fini)) + .field("init_array", &format_args!("{:#x}", self.init_array)) + .field("init_arraysz", &self.init_arraysz) + .field("needed_count", &self.needed_count) + .field("flags", &format_args!("{:#0width$x} {:?}", self.flags, flags, width = core::mem::size_of_val(&self.flags))) + .field("flags_1", &format_args!("{:#0width$x} {:?}", self.flags_1, flags_1, width = core::mem::size_of_val(&self.flags_1))) + .field("soname", &self.soname) + .field("textrel", &self.textrel) + .finish() + } + } + } + }; +} + +if_alloc! { + elf_dynamic_info_std_impl!(u64, crate::elf::program_header::ProgramHeader); +} + +pub mod dyn32 { + pub use crate::elf::dynamic::*; + + elf_dyn!(u32); + + pub const SIZEOF_DYN: usize = 8; + + elf_dyn_std_impl!(u32, crate::elf32::program_header::ProgramHeader); + elf_dynamic_info_std_impl!( + u32, + crate::elf::program_header::program_header32::ProgramHeader + ); +} + +pub mod dyn64 { + pub use crate::elf::dynamic::*; + + elf_dyn!(u64); + + pub const SIZEOF_DYN: usize = 16; + + elf_dyn_std_impl!(u64, crate::elf64::program_header::ProgramHeader); + elf_dynamic_info_std_impl!( + u64, + crate::elf::program_header::program_header64::ProgramHeader + ); +} diff --git a/third_party/rust/goblin/src/elf/gnu_hash.rs b/third_party/rust/goblin/src/elf/gnu_hash.rs new file mode 100644 index 0000000000..d785382c26 --- /dev/null +++ b/third_party/rust/goblin/src/elf/gnu_hash.rs @@ -0,0 +1,220 @@ +//! A Gnu Hash table as 4 sections: +//! +//! 1. Header +//! 2. Bloom Filter +//! 3. Hash Buckets +//! 4. Chains +//! +//! The header has is an array of four `u32`s: +//! +//! 1. nbuckets +//! 2. symndx +//! 3. maskwords +//! 4. shift2 +//! +//! See more: +//! * http://www.linker-aliens.org/blogs/ali/entry/gnu_hash_elf_sections +//! or https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2 +//! * https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ + +/// GNU hash function: accepts a symbol name and returns a value that may be +/// used to compute a bucket index. +/// +/// Consequently, if the hashing function returns the value `x` for some name, +/// `buckets[x % nbuckets]` gives an index, `y`, into both the symbol table +/// and the chain table. +pub fn hash(symbol: &str) -> u32 { + const HASH_SEED: u32 = 5381; + symbol.bytes().fold(HASH_SEED, |hash, b| { + hash.wrapping_mul(33).wrapping_add(u32::from(b)) + }) +} + +#[cfg(test)] +mod tests { + use super::hash; + #[test] + fn test_hash() { + assert_eq!(hash(""), 0x0000_1505); + assert_eq!(hash("printf"), 0x156b_2bb8); + assert_eq!(hash("exit"), 0x7c96_7e3f); + assert_eq!(hash("syscall"), 0xbac2_12a0); + assert_eq!(hash("flapenguin.me"), 0x8ae9_f18e); + } +} + +macro_rules! elf_gnu_hash_impl { + ($IntTy:ty) => { + use crate::elf::sym::Sym; + use crate::strtab::Strtab; + use core::fmt; + use core::mem; + use core::slice; + + const INT_SIZE: usize = mem::size_of::<$IntTy>(); + const U32_SIZE: usize = mem::size_of::<u32>(); + /// Size of a bits mask in bloom filter + const ELFCLASS_BITS: u32 = INT_SIZE as u32 * 8; + + /// A better hash table for the ELF used by GNU systems in GNU-compatible software. + pub struct GnuHash<'a> { + /// Index of the first symbol in the `.dynsym` table which is accessible with + /// the hash table + symindex: u32, + /// Shift count used in the bloom filter + shift2: u32, + /// 2 bit bloom filter on `chains` + // Either 32 or 64-bit depending on the class of object + bloom_filter: &'a [$IntTy], + /// GNU hash table bucket array; indexes start at 0. This array holds symbol + /// table indexes and contains the index of hashes in `chains` + buckets: &'a [u32], + /// Hash values; indexes start at 0. This array holds symbol table indexes. + chains: &'a [u32], // => chains[dynsyms.len() - symindex] + dynsyms: &'a [Sym], + } + + impl fmt::Debug for GnuHash<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("GnuHash") + .field("nbuckets", &self.buckets.len()) + .field("symindex", &self.symindex) + .field("maskwords", &(self.bloom_filter.len() - 1)) + .field("shift2", &self.shift2) + .field("bloom_filter", &self.bloom_filter.as_ptr()) + .field("bucket", &self.buckets.as_ptr()) + .field("chains", &self.chains.as_ptr()) + .finish() + } + } + + impl<'a> GnuHash<'a> { + /// Initialize a GnuHash from a pointer to `.hash` (or `.gnu.hash`) section + /// and total number of dynamic symbols. + /// # Safety + /// + /// This function creates a `GnuHash` directly from a raw pointer + pub unsafe fn from_raw_table( + hashtab: &'a [u8], + dynsyms: &'a [Sym], + ) -> Result<Self, &'static str> { + if hashtab.as_ptr() as usize % INT_SIZE != 0 { + return Err("hashtab is not aligned with 64-bit"); + } + + if hashtab.len() <= 16 { + return Err("failed to read in number of buckets"); + } + + let [nbuckets, symindex, maskwords, shift2] = + (hashtab.as_ptr() as *const u32 as *const [u32; 4]).read(); + + if !maskwords.is_power_of_two() { + return Err("maskwords must be a power of two"); + } + + let hashtab = &hashtab[16..]; + { + // SAFETY: Condition to check for an overflow + // size_of(chains) + size_of(buckets) + size_of(bloom_filter) == size_of(hashtab) + + if dynsyms.len() <= symindex as usize { + return Err("symindex must be smaller than dynsyms.len()"); + } + let chains_size = (dynsyms.len() - symindex as usize).checked_mul(U32_SIZE); + let buckets_size = (nbuckets as usize).checked_mul(U32_SIZE); + let bloom_size = (maskwords as usize).checked_mul(INT_SIZE); + + let total_size = match (chains_size, buckets_size, bloom_size) { + (Some(a), Some(b), Some(c)) => { + a.checked_add(b).and_then(|t| t.checked_add(c)) + } + _ => None, + }; + match total_size { + Some(size) if size == hashtab.len() => {} + _ => return Err("index out of bound or non-complete hash section"), + } + } + + let bloom_filter_ptr = hashtab.as_ptr() as *const $IntTy; + let buckets_ptr = bloom_filter_ptr.add(maskwords as usize) as *const u32; + let chains_ptr = buckets_ptr.add(nbuckets as usize); + let bloom_filter = slice::from_raw_parts(bloom_filter_ptr, maskwords as usize); + let buckets = slice::from_raw_parts(buckets_ptr, nbuckets as usize); + let chains = slice::from_raw_parts(chains_ptr, dynsyms.len() - symindex as usize); + Ok(Self { + symindex, + shift2, + bloom_filter, + buckets, + chains, + dynsyms, + }) + } + + /// Locate the hash chain, and corresponding hash value element. + #[cold] + fn lookup(&self, symbol: &str, hash: u32, dynstrtab: &Strtab) -> Option<&'a Sym> { + const MASK_LOWEST_BIT: u32 = 0xffff_fffe; + let bucket = self.buckets[hash as usize % self.buckets.len()]; + + // Empty hash chain, symbol not present + if bucket < self.symindex { + return None; + } + // Walk the chain until the symbol is found or the chain is exhausted. + let chain_idx = bucket - self.symindex; + let hash = hash & MASK_LOWEST_BIT; + let chains = &self.chains.get((chain_idx as usize)..)?; + let dynsyms = &self.dynsyms.get((bucket as usize)..)?; + for (hash2, symb) in chains.iter().zip(dynsyms.iter()) { + if (hash == (hash2 & MASK_LOWEST_BIT)) + && (symbol == &dynstrtab[symb.st_name as usize]) + { + return Some(symb); + } + // Chain ends with an element with the lowest bit set to 1. + if hash2 & 1 == 1 { + break; + } + } + None + } + + /// Check if symbol maybe is in the hash table, or definitely not in it. + #[inline] + fn check_maybe_match(&self, hash: u32) -> bool { + const MASK: u32 = ELFCLASS_BITS - 1; + let hash2 = hash >> self.shift2; + // `x & (N - 1)` is equivalent to `x % N` iff `N = 2^y`. + let bitmask: $IntTy = 1 << (hash & (MASK)) | 1 << (hash2 & MASK); + let bloom_idx = (hash / ELFCLASS_BITS) & (self.bloom_filter.len() as u32 - 1); + let bitmask_word = self.bloom_filter[bloom_idx as usize]; + (bitmask_word & bitmask) == bitmask + } + + /// Given a symbol, a hash of that symbol, a dynamic string table and + /// a `dynstrtab` to cross-reference names, maybe returns a Sym. + pub fn find(&self, symbol: &str, dynstrtab: &Strtab) -> Option<&'a Sym> { + let hash = self::hash(symbol); + self.find_with_hash(symbol, hash, dynstrtab) + } + + /// This function will not check if the passed `hash` is really + /// the hash of `symbol` + pub fn find_with_hash( + &self, + symbol: &str, + hash: u32, + dynstrtab: &Strtab, + ) -> Option<&'a Sym> { + if self.check_maybe_match(hash) { + self.lookup(symbol, hash, dynstrtab) + } else { + None + } + } + } + }; +} diff --git a/third_party/rust/goblin/src/elf/header.rs b/third_party/rust/goblin/src/elf/header.rs new file mode 100644 index 0000000000..2c05e8ffc6 --- /dev/null +++ b/third_party/rust/goblin/src/elf/header.rs @@ -0,0 +1,630 @@ +include!("constants_header.rs"); + +macro_rules! elf_header { + ($size:ident) => { + use core::fmt; + + #[repr(C)] + #[derive(Clone, Copy, Default, PartialEq)] + pub struct Header { + /// Magic number and other info + pub e_ident: [u8; SIZEOF_IDENT], + /// Object file type + pub e_type: u16, + /// Architecture + pub e_machine: u16, + /// Object file version + pub e_version: u32, + /// Entry point virtual address + pub e_entry: $size, + /// Program header table file offset + pub e_phoff: $size, + /// Section header table file offset + pub e_shoff: $size, + /// Processor-specific flags + pub e_flags: u32, + /// ELF header size in bytes + pub e_ehsize: u16, + /// Program header table entry size + pub e_phentsize: u16, + /// Program header table entry count + pub e_phnum: u16, + /// Section header table entry size + pub e_shentsize: u16, + /// Section header table entry count + pub e_shnum: u16, + /// Section header string table index + pub e_shstrndx: u16, + } + + use plain; + // Declare that this is a plain type. + unsafe impl plain::Plain for Header {} + + impl Header { + /// Returns the corresponding ELF header from the given byte array. + pub fn from_bytes(bytes: &[u8; SIZEOF_EHDR]) -> &Header { + // FIXME: Length is ensured correct because it's encoded in the type, + // but it can still panic due to invalid alignment. + plain::from_bytes(bytes).unwrap() + } + } + impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Header") + .field("e_ident", &format_args!("{:?}", self.e_ident)) + .field("e_type", &et_to_str(self.e_type)) + .field("e_machine", &format_args!("0x{:x}", self.e_machine)) + .field("e_version", &format_args!("0x{:x}", self.e_version)) + .field("e_entry", &format_args!("0x{:x}", self.e_entry)) + .field("e_phoff", &format_args!("0x{:x}", self.e_phoff)) + .field("e_shoff", &format_args!("0x{:x}", self.e_shoff)) + .field("e_flags", &format_args!("{:x}", self.e_flags)) + .field("e_ehsize", &self.e_ehsize) + .field("e_phentsize", &self.e_phentsize) + .field("e_phnum", &self.e_phnum) + .field("e_shentsize", &self.e_shentsize) + .field("e_shnum", &self.e_shnum) + .field("e_shstrndx", &self.e_shstrndx) + .finish() + } + } + }; +} + +/// No file type. +pub const ET_NONE: u16 = 0; +/// Relocatable file. +pub const ET_REL: u16 = 1; +/// Executable file. +pub const ET_EXEC: u16 = 2; +/// Shared object file. +pub const ET_DYN: u16 = 3; +/// Core file. +pub const ET_CORE: u16 = 4; +/// Number of defined types. +pub const ET_NUM: u16 = 5; +/// OS-specific range start +pub const ET_LOOS: u16 = 0xfe00; +/// OS-specific range end +pub const ET_HIOS: u16 = 0xfeff; +/// Processor-specific range start +pub const ET_LOPROC: u16 = 0xff00; +/// Processor-specific range end +pub const ET_HIPROC: u16 = 0xffff; + +/// The ELF magic number. +pub const ELFMAG: &[u8; 4] = b"\x7FELF"; +/// Sizeof ELF magic number. +pub const SELFMAG: usize = 4; + +/// File class byte index. +pub const EI_CLASS: usize = 4; +/// Invalid class. +pub const ELFCLASSNONE: u8 = 0; +/// 32-bit objects. +pub const ELFCLASS32: u8 = 1; +/// 64-bit objects. +pub const ELFCLASS64: u8 = 2; +/// ELF class number. +pub const ELFCLASSNUM: u8 = 3; + +/// Data encoding byte index. +pub const EI_DATA: usize = 5; +/// Invalid data encoding. +pub const ELFDATANONE: u8 = 0; +/// 2's complement, little endian. +pub const ELFDATA2LSB: u8 = 1; +/// 2's complement, big endian. +pub const ELFDATA2MSB: u8 = 2; + +/// File version byte index. +pub const EI_VERSION: usize = 6; +/// Current ELF version. +pub const EV_CURRENT: u8 = 1; + +/// OS ABI byte index. +pub const EI_OSABI: usize = 7; +/// UNIX System V ABI. +pub const ELFOSABI_NONE: u8 = 0; +/// UNIX System V ABI. +/// +/// Alias. +pub const ELFOSABI_SYSV: u8 = ELFOSABI_NONE; +/// HP-UX. +pub const ELFOSABI_HPUX: u8 = 1; +/// NetBSD. +pub const ELFOSABI_NETBSD: u8 = 2; +/// Object uses GNU ELF extensions. +pub const ELFOSABI_GNU: u8 = 3; +/// Object uses GNU ELF extensions. +/// +/// Alias. +pub const ELFOSABI_LINUX: u8 = ELFOSABI_GNU; +/// Sun Solaris. +pub const ELFOSABI_SOLARIS: u8 = 6; +/// IBM AIX. +pub const ELFOSABI_AIX: u8 = 7; +/// SGI Irix. +pub const ELFOSABI_IRIX: u8 = 8; +/// FreeBSD +pub const ELFOSABI_FREEBSD: u8 = 9; +/// Compaq TRU64 UNIX. +pub const ELFOSABI_TRU64: u8 = 10; +/// Novell Modesto. +pub const ELFOSABI_MODESTO: u8 = 11; +/// OpenBSD. +pub const ELFOSABI_OPENBSD: u8 = 12; +/// ARM EABI. +pub const ELFOSABI_ARM_AEABI: u8 = 64; +/// ARM. +pub const ELFOSABI_ARM: u8 = 97; +/// Standalone (embedded) application. +pub const ELFOSABI_STANDALONE: u8 = 255; + +/// ABI version byte index. +pub const EI_ABIVERSION: usize = 8; + +/// Number of bytes in an identifier. +pub const SIZEOF_IDENT: usize = 16; + +/// Convert a ELF class byte to the associated string. +#[inline] +pub fn class_to_str(et: u8) -> &'static str { + match et { + ELFCLASSNONE => "NONE", + ELFCLASS32 => "ELF32", + ELFCLASS64 => "ELF64", + _ => "UNKNOWN_CLASS", + } +} + +/// Convert an ET value to their associated string. +#[inline] +pub fn et_to_str(et: u16) -> &'static str { + match et { + ET_NONE => "NONE", + ET_REL => "REL", + ET_EXEC => "EXEC", + ET_DYN => "DYN", + ET_CORE => "CORE", + ET_NUM => "NUM", + _ => "UNKNOWN_ET", + } +} + +if_alloc! { + use crate::error; + use scroll::{ctx, Endian}; + use core::fmt; + use crate::container::{Ctx, Container}; + use alloc::string::ToString; + + #[derive(Copy, Clone, PartialEq)] + /// An ELF header + pub struct Header { + pub e_ident : [u8; SIZEOF_IDENT], + pub e_type : u16, + pub e_machine : u16, + pub e_version : u32, + pub e_entry : u64, + pub e_phoff : u64, + pub e_shoff : u64, + pub e_flags : u32, + pub e_ehsize : u16, + pub e_phentsize : u16, + pub e_phnum : u16, + pub e_shentsize : u16, + pub e_shnum : u16, + pub e_shstrndx : u16, + } + + impl Header { + /// Return the size of the underlying program header, given a `container` + #[inline] + pub fn size(ctx: Ctx) -> usize { + use scroll::ctx::SizeWith; + Self::size_with(&ctx) + } + /// Returns the container type this header specifies + pub fn container(&self) -> error::Result<Container> { + use crate::error::Error; + match self.e_ident[EI_CLASS] { + ELFCLASS32 => { Ok(Container::Little) }, + ELFCLASS64 => { Ok(Container::Big) }, + class => Err(Error::Malformed(format!("Invalid class in Header: {}", class))) + } + } + /// Returns the byte order this header specifies + pub fn endianness(&self) -> error::Result<scroll::Endian> { + use crate::error::Error; + match self.e_ident[EI_DATA] { + ELFDATA2LSB => { Ok(scroll::LE) }, + ELFDATA2MSB => { Ok(scroll::BE) }, + class => Err(Error::Malformed(format!("Invalid endianness in Header: {}", class))) + } + } + pub fn new(ctx: Ctx) -> Self { + use crate::elf32; + use crate::elf64; + let (typ, ehsize, phentsize, shentsize) = match ctx.container { + Container::Little => { + (ELFCLASS32, header32::SIZEOF_EHDR, + elf32::program_header::SIZEOF_PHDR, + elf32::section_header::SIZEOF_SHDR) + }, + Container::Big => { + (ELFCLASS64, header64::SIZEOF_EHDR, + elf64::program_header::SIZEOF_PHDR, + elf64::section_header::SIZEOF_SHDR) + } + }; + let byteorder = match ctx.le { Endian::Little => ELFDATA2LSB, Endian::Big => ELFDATA2MSB }; + Header { + e_ident: [ + 127, + 69, + 76, + 70, + typ, + byteorder, + EV_CURRENT, + ELFOSABI_NONE, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + e_type: ET_DYN, + e_machine: EM_NONE, + e_version: 1, + e_entry: 0x0, + e_phoff: 0x0, + e_shoff: 0x0, + e_flags: 0, + e_ehsize: ehsize as u16, + e_phentsize: phentsize as u16, + e_phnum: 0, + e_shentsize: shentsize as u16, + e_shnum: 0, + e_shstrndx: 0, + } + } + } + + impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Header") + .field("e_ident", &format_args!("{:?}", self.e_ident)) + .field("e_type", &et_to_str(self.e_type)) + .field("e_machine", &format_args!("0x{:x}", self.e_machine)) + .field("e_version", &format_args!("0x{:x}", self.e_version)) + .field("e_entry", &format_args!("0x{:x}", self.e_entry)) + .field("e_phoff", &format_args!("0x{:x}", self.e_phoff)) + .field("e_shoff", &format_args!("0x{:x}", self.e_shoff)) + .field("e_flags", &format_args!("{:x}", self.e_flags)) + .field("e_ehsize", &self.e_ehsize) + .field("e_phentsize", &self.e_phentsize) + .field("e_phnum", &self.e_phnum) + .field("e_shentsize", &self.e_shentsize) + .field("e_shnum", &self.e_shnum) + .field("e_shstrndx", &self.e_shstrndx) + .finish() + } + } + + impl ctx::SizeWith<crate::container::Ctx> for Header { + fn size_with(ctx: &crate::container::Ctx) -> usize { + match ctx.container { + Container::Little => { + header32::SIZEOF_EHDR + }, + Container::Big => { + header64::SIZEOF_EHDR + }, + } + } + } + + impl<'a> ctx::TryFromCtx<'a, scroll::Endian> for Header { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], _ctx: scroll::Endian) -> error::Result<(Self, usize)> { + use scroll::Pread; + if bytes.len() < SIZEOF_IDENT { + return Err(error::Error::Malformed("Too small".to_string())); + } + let ident: &[u8] = &bytes[..SIZEOF_IDENT]; + if &ident[0..SELFMAG] != ELFMAG { + let magic: u64 = ident.pread_with(0, scroll::LE)?; + return Err(error::Error::BadMagic(magic)); + } + let class = ident[EI_CLASS]; + match class { + ELFCLASS32 => { + Ok((Header::from(bytes.pread::<header32::Header>(0)?), header32::SIZEOF_EHDR)) + }, + ELFCLASS64 => { + Ok((Header::from(bytes.pread::<header64::Header>(0)?), header64::SIZEOF_EHDR)) + }, + _ => { + Err(error::Error::Malformed(format!("invalid ELF class {:x}", class))) + } + } + } + } + + impl ctx::TryIntoCtx<scroll::Endian> for Header { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], _ctx: scroll::Endian) -> Result<usize, Self::Error> { + use scroll::Pwrite; + match self.container()? { + Container::Little => { + bytes.pwrite(header32::Header::from(self), 0) + }, + Container::Big => { + bytes.pwrite(header64::Header::from(self), 0) + } + } + } + } + impl ctx::IntoCtx<crate::container::Ctx> for Header { + fn into_ctx(self, bytes: &mut [u8], ctx: crate::container::Ctx) { + use scroll::Pwrite; + match ctx.container { + Container::Little => { + bytes.pwrite_with(header32::Header::from(self), 0, ctx.le).unwrap() + }, + Container::Big => { + bytes.pwrite_with(header64::Header::from(self), 0, ctx.le).unwrap() + } + }; + } + } +} // end if_alloc + +macro_rules! elf_header_std_impl { + ($size:expr, $width:ty) => { + + if_alloc! { + use crate::elf::header::Header as ElfHeader; + use crate::error::Error; + #[cfg(any(feature = "std", feature = "endian_fd"))] + use crate::error::Result; + + use scroll::{ctx, Pread}; + + use core::result; + + if_std! { + use std::fs::File; + use std::io::{Read}; + } + + impl From<ElfHeader> for Header { + fn from(eh: ElfHeader) -> Self { + Header { + e_ident: eh.e_ident, + e_type: eh.e_type, + e_machine: eh.e_machine, + e_version: eh.e_version, + e_entry: eh.e_entry as $width, + e_phoff: eh.e_phoff as $width, + e_shoff: eh.e_shoff as $width, + e_flags: eh.e_flags, + e_ehsize: eh.e_ehsize, + e_phentsize: eh.e_phentsize, + e_phnum: eh.e_phnum, + e_shentsize: eh.e_shentsize, + e_shnum: eh.e_shnum, + e_shstrndx: eh.e_shstrndx, + } + } + } + + impl From<Header> for ElfHeader { + fn from(eh: Header) -> Self { + ElfHeader { + e_ident: eh.e_ident, + e_type: eh.e_type, + e_machine: eh.e_machine, + e_version: eh.e_version, + e_entry: u64::from(eh.e_entry), + e_phoff: u64::from(eh.e_phoff), + e_shoff: u64::from(eh.e_shoff), + e_flags: eh.e_flags, + e_ehsize: eh.e_ehsize, + e_phentsize: eh.e_phentsize, + e_phnum: eh.e_phnum, + e_shentsize: eh.e_shentsize, + e_shnum: eh.e_shnum, + e_shstrndx: eh.e_shstrndx, + } + } + } + + impl<'a> ctx::TryFromCtx<'a, scroll::Endian> for Header { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], _: scroll::Endian) -> result::Result<(Self, usize), Self::Error> { + let mut elf_header = Header::default(); + let offset = &mut 0; + bytes.gread_inout(offset, &mut elf_header.e_ident)?; + let endianness = + match elf_header.e_ident[EI_DATA] { + ELFDATA2LSB => scroll::LE, + ELFDATA2MSB => scroll::BE, + d => return Err(Error::Malformed(format!("invalid ELF endianness DATA type {:x}", d)).into()), + }; + elf_header.e_type = bytes.gread_with(offset, endianness)?; + elf_header.e_machine = bytes.gread_with(offset, endianness)?; + elf_header.e_version = bytes.gread_with(offset, endianness)?; + elf_header.e_entry = bytes.gread_with(offset, endianness)?; + elf_header.e_phoff = bytes.gread_with(offset, endianness)?; + elf_header.e_shoff = bytes.gread_with(offset, endianness)?; + elf_header.e_flags = bytes.gread_with(offset, endianness)?; + elf_header.e_ehsize = bytes.gread_with(offset, endianness)?; + elf_header.e_phentsize = bytes.gread_with(offset, endianness)?; + elf_header.e_phnum = bytes.gread_with(offset, endianness)?; + elf_header.e_shentsize = bytes.gread_with(offset, endianness)?; + elf_header.e_shnum = bytes.gread_with(offset, endianness)?; + elf_header.e_shstrndx = bytes.gread_with(offset, endianness)?; + Ok((elf_header, SIZEOF_EHDR)) + } + } + + impl ctx::TryIntoCtx<scroll::Endian> for Header { + type Error = crate::error::Error; + /// a Pwrite impl for Header: **note** we use the endianness value in the header, and not a parameter + fn try_into_ctx(self, bytes: &mut [u8], _endianness: scroll::Endian) -> result::Result<usize, Self::Error> { + use scroll::{Pwrite}; + let offset = &mut 0; + let endianness = + match self.e_ident[EI_DATA] { + ELFDATA2LSB => scroll::LE, + ELFDATA2MSB => scroll::BE, + d => return Err(Error::Malformed(format!("invalid ELF DATA type {:x}", d)).into()), + }; + for i in 0..self.e_ident.len() { + bytes.gwrite(self.e_ident[i], offset)?; + } + bytes.gwrite_with(self.e_type , offset, endianness)?; + bytes.gwrite_with(self.e_machine , offset, endianness)?; + bytes.gwrite_with(self.e_version , offset, endianness)?; + bytes.gwrite_with(self.e_entry , offset, endianness)?; + bytes.gwrite_with(self.e_phoff , offset, endianness)?; + bytes.gwrite_with(self.e_shoff , offset, endianness)?; + bytes.gwrite_with(self.e_flags , offset, endianness)?; + bytes.gwrite_with(self.e_ehsize , offset, endianness)?; + bytes.gwrite_with(self.e_phentsize , offset, endianness)?; + bytes.gwrite_with(self.e_phnum , offset, endianness)?; + bytes.gwrite_with(self.e_shentsize , offset, endianness)?; + bytes.gwrite_with(self.e_shnum , offset, endianness)?; + bytes.gwrite_with(self.e_shstrndx , offset, endianness)?; + Ok(SIZEOF_EHDR) + } + } + + impl Header { + /// Load a header from a file. **You must** ensure the seek is at the correct position. + #[cfg(feature = "std")] + pub fn from_fd(bytes: &mut File) -> Result<Header> { + let mut elf_header = [0; $size]; + bytes.read_exact(&mut elf_header)?; + Ok(*Header::from_bytes(&elf_header)) + } + + #[cfg(feature = "endian_fd")] + /// Parses an ELF header from the given bytes + pub fn parse(bytes: &[u8]) -> Result<Header> { + use super::{EI_DATA, ELFDATA2LSB, ELFDATA2MSB, SIZEOF_IDENT}; + + let mut elf_header = Header::default(); + let mut offset = &mut 0; + for i in 0..SIZEOF_IDENT { + elf_header.e_ident[i] = bytes.gread(&mut offset)?; + } + let endianness = + match elf_header.e_ident[EI_DATA] { + ELFDATA2LSB => scroll::LE, + ELFDATA2MSB => scroll::BE, + d => return Err(Error::Malformed(format!("invalid ELF DATA type {:x}", d)).into()), + }; + elf_header.e_type = bytes.gread_with(offset, endianness)?; + elf_header.e_machine = bytes.gread_with(offset, endianness)?; + elf_header.e_version = bytes.gread_with(offset, endianness)?; + elf_header.e_entry = bytes.gread_with(offset, endianness)?; + elf_header.e_phoff = bytes.gread_with(offset, endianness)?; + elf_header.e_shoff = bytes.gread_with(offset, endianness)?; + elf_header.e_flags = bytes.gread_with(offset, endianness)?; + elf_header.e_ehsize = bytes.gread_with(offset, endianness)?; + elf_header.e_phentsize = bytes.gread_with(offset, endianness)?; + elf_header.e_phnum = bytes.gread_with(offset, endianness)?; + elf_header.e_shentsize = bytes.gread_with(offset, endianness)?; + elf_header.e_shnum = bytes.gread_with(offset, endianness)?; + elf_header.e_shstrndx = bytes.gread_with(offset, endianness)?; + Ok(elf_header) + } + } + } // end if_alloc + }; +} + +// tests + +macro_rules! elf_header_test { + ($class:expr) => { + #[cfg(test)] + mod tests { + use super::*; + use crate::container::{Container, Ctx}; + use crate::elf::header::Header as ElfHeader; + use alloc::vec::Vec; + use scroll::{Pread, Pwrite}; + #[test] + fn size_of() { + assert_eq!(::std::mem::size_of::<Header>(), SIZEOF_EHDR); + } + #[test] + fn header_read_write() { + let crt1: Vec<u8> = if $class == ELFCLASS64 { + include!("../../etc/crt1.rs") + } else { + include!("../../etc/crt132.rs") + }; + let header: Header = crt1.pread(0).unwrap(); + assert_eq!(header.e_type, ET_REL); + println!("header: {:?}", &header); + let mut bytes = [0u8; SIZEOF_EHDR]; + bytes.pwrite(header, 0).unwrap(); + let header2: Header = bytes.pread(0).unwrap(); + assert_eq!(header, header2); + } + #[test] + fn elfheader_read_write() { + let (container, crt1): (Container, Vec<u8>) = if $class == ELFCLASS64 { + (Container::Big, include!("../../etc/crt1.rs")) + } else { + (Container::Little, include!("../../etc/crt132.rs")) + }; + let header: Header = crt1.pread(0).unwrap(); + assert_eq!(header.e_type, ET_REL); + println!("header: {:?}", &header); + let mut bytes = [0u8; SIZEOF_EHDR]; + let header_ = Header::from(header.clone()); + bytes.pwrite(header_, 0).unwrap(); + let header2: Header = bytes.pread(0).unwrap(); + assert_eq!(header, header2); + let header = ElfHeader::new(Ctx::from(container)); + println!("header: {:?}", &header); + + let mut bytes = vec![0; 100]; + bytes.pwrite(header, 0).unwrap(); + } + } + }; +} + +pub mod header32 { + pub use super::*; + + pub const SIZEOF_EHDR: usize = 52; + pub const ELFCLASS: u8 = ELFCLASS32; + + elf_header!(u32); + elf_header_std_impl!(SIZEOF_EHDR, u32); + elf_header_test!(ELFCLASS); +} + +pub mod header64 { + pub use super::*; + + pub const SIZEOF_EHDR: usize = 64; + pub const ELFCLASS: u8 = ELFCLASS64; + + elf_header!(u64); + elf_header_std_impl!(SIZEOF_EHDR, u64); + elf_header_test!(ELFCLASS); +} diff --git a/third_party/rust/goblin/src/elf/mod.rs b/third_party/rust/goblin/src/elf/mod.rs new file mode 100644 index 0000000000..70c49146ff --- /dev/null +++ b/third_party/rust/goblin/src/elf/mod.rs @@ -0,0 +1,571 @@ +//! The generic ELF module, which gives access to ELF constants and other helper functions, which are independent of ELF bithood. Also defines an `Elf` struct which implements a unified parser that returns a wrapped `Elf64` or `Elf32` binary. +//! +//! To access the exact 32-bit or 64-bit versions, use [goblin::elf32::Header](header/header32/struct.Header.html)/[goblin::elf64::Header](header/header64/struct.Header.html), etc., for the various 32/64-bit structs. +//! +//! # Example +//! +//! ```rust +//! use std::fs::File; +//! +//! pub fn read (bytes: &[u8]) { +//! match goblin::elf::Elf::parse(&bytes) { +//! Ok(binary) => { +//! let entry = binary.entry; +//! for ph in binary.program_headers { +//! if ph.p_type == goblin::elf::program_header::PT_LOAD { +//! // TODO: you should validate p_filesz before allocating. +//! let mut _buf = vec![0u8; ph.p_filesz as usize]; +//! // read responsibly +//! } +//! } +//! }, +//! Err(_) => () +//! } +//! } +//! ``` +//! +//! This will properly access the underlying 32-bit or 64-bit binary automatically. Note that since +//! 32-bit binaries typically have shorter 32-bit values in some cases (specifically for addresses and pointer +//! values), these values are upcasted to u64/i64s when appropriate. +//! +//! See [goblin::elf::Elf](struct.Elf.html) for more information. +//! +//! You are still free to use the specific 32-bit or 64-bit versions by accessing them through `goblin::elf64`, etc., but you will have to parse and/or construct the various components yourself. +//! In other words, there is no unified 32/64-bit `Elf` struct. +//! +//! # Note +//! To use the automagic ELF datatype union parser, you _must_ enable/opt-in to the `elf64`, `elf32`, and +//! `endian_fd` features if you disable `default`. + +#[macro_use] +pub(crate) mod gnu_hash; + +// These are shareable values for the 32/64 bit implementations. +// +// They are publicly re-exported by the pub-using module +pub mod compression_header; +pub mod header; +pub mod program_header; +pub mod section_header; +#[macro_use] +pub mod sym; +pub mod dynamic; +#[macro_use] +pub mod reloc; +pub mod note; +#[cfg(all(any(feature = "elf32", feature = "elf64"), feature = "alloc"))] +pub mod symver; + +macro_rules! if_sylvan { + ($($i:item)*) => ($( + #[cfg(all(feature = "elf32", feature = "elf64", feature = "endian_fd"))] + $i + )*) +} + +if_sylvan! { + use scroll::{ctx, Pread, Endian}; + use crate::strtab::Strtab; + use crate::error; + use crate::container::{Container, Ctx}; + use alloc::vec::Vec; + use core::cmp; + + pub use header::Header; + pub use program_header::ProgramHeader; + pub use section_header::SectionHeader; + pub use sym::Symtab; + pub use sym::Sym; + pub use dynamic::Dyn; + pub use dynamic::Dynamic; + pub use reloc::Reloc; + pub use reloc::RelocSection; + pub use symver::{VersymSection, VerdefSection, VerneedSection}; + + pub type ProgramHeaders = Vec<ProgramHeader>; + pub type SectionHeaders = Vec<SectionHeader>; + pub type ShdrIdx = usize; + + #[derive(Debug)] + /// An ELF binary. The underlying data structures are read according to the headers byte order and container size (32 or 64). + pub struct Elf<'a> { + /// The ELF header, which provides a rudimentary index into the rest of the binary + pub header: Header, + /// The program headers; they primarily tell the kernel and the dynamic linker + /// how to load this binary + pub program_headers: ProgramHeaders, + /// The sections headers. These are strippable, never count on them being + /// here unless you're a static linker! + pub section_headers: SectionHeaders, + /// The section header string table + pub shdr_strtab: Strtab<'a>, + /// The string table for the dynamically accessible symbols + pub dynstrtab: Strtab<'a>, + /// The dynamically accessible symbols, i.e., exports, imports. + /// This is what the dynamic linker uses to dynamically load and link your binary, + /// or find imported symbols for binaries which dynamically link against your library + pub dynsyms: Symtab<'a>, + /// The debugging symbol table + pub syms: Symtab<'a>, + /// The string table for the symbol table + pub strtab: Strtab<'a>, + /// Contains dynamic linking information, with the _DYNAMIC array + a preprocessed DynamicInfo for that array + pub dynamic: Option<Dynamic>, + /// The dynamic relocation entries (strings, copy-data, etc.) with an addend + pub dynrelas: RelocSection<'a>, + /// The dynamic relocation entries without an addend + pub dynrels: RelocSection<'a>, + /// The plt relocation entries (procedure linkage table). For 32-bit binaries these are usually Rel (no addend) + pub pltrelocs: RelocSection<'a>, + /// Section relocations by section index (only present if this is a relocatable object file) + pub shdr_relocs: Vec<(ShdrIdx, RelocSection<'a>)>, + /// The binary's soname, if it has one + pub soname: Option<&'a str>, + /// The binary's program interpreter (e.g., dynamic linker), if it has one + pub interpreter: Option<&'a str>, + /// A list of this binary's dynamic libraries it uses, if there are any + pub libraries: Vec<&'a str>, + /// A list of runtime search paths for this binary's dynamic libraries it uses, if there + /// are any. (deprecated) + pub rpaths: Vec<&'a str>, + /// A list of runtime search paths for this binary's dynamic libraries it uses, if there + /// are any. + pub runpaths: Vec<&'a str>, + /// Whether this is a 64-bit elf or not + pub is_64: bool, + /// Whether this is a shared object or not + pub is_lib: bool, + /// The binaries entry point address, if it has one + pub entry: u64, + /// Whether the binary is little endian or not + pub little_endian: bool, + /// Contains the symbol version information from the optional section + /// [`SHT_GNU_VERSYM`][section_header::SHT_GNU_VERSYM] (GNU extenstion). + pub versym : Option<VersymSection<'a>>, + /// Contains the version definition information from the optional section + /// [`SHT_GNU_VERDEF`][section_header::SHT_GNU_VERDEF] (GNU extenstion). + pub verdef : Option<VerdefSection<'a>>, + /// Contains the version needed information from the optional section + /// [`SHT_GNU_VERNEED`][section_header::SHT_GNU_VERNEED] (GNU extenstion). + pub verneed : Option<VerneedSection<'a>>, + ctx: Ctx, + } + + impl<'a> Elf<'a> { + /// Try to iterate notes in PT_NOTE program headers; returns `None` if there aren't any note headers in this binary + pub fn iter_note_headers(&self, data: &'a [u8]) -> Option<note::NoteIterator<'a>> { + let mut iters = vec![]; + for phdr in &self.program_headers { + if phdr.p_type == program_header::PT_NOTE { + let offset = phdr.p_offset as usize; + let alignment = phdr.p_align as usize; + + iters.push(note::NoteDataIterator { + data, + offset, + size: offset.saturating_add(phdr.p_filesz as usize), + ctx: (alignment, self.ctx) + }); + } + } + + if iters.is_empty() { + None + } else { + Some(note::NoteIterator { + iters: iters, + index: 0, + }) + } + } + /// Try to iterate notes in SHT_NOTE sections; returns `None` if there aren't any note sections in this binary + /// + /// If a section_name is given, only the section with the according name is iterated. + pub fn iter_note_sections( + &self, + data: &'a [u8], + section_name: Option<&str>, + ) -> Option<note::NoteIterator<'a>> { + let mut iters = vec![]; + for sect in &self.section_headers { + if sect.sh_type != section_header::SHT_NOTE { + continue; + } + + if section_name.is_some() && self.shdr_strtab.get_at(sect.sh_name) != section_name { + continue; + } + + let offset = sect.sh_offset as usize; + let alignment = sect.sh_addralign as usize; + iters.push(note::NoteDataIterator { + data, + offset, + size: offset.saturating_add(sect.sh_size as usize), + ctx: (alignment, self.ctx) + }); + } + + if iters.is_empty() { + None + } else { + Some(note::NoteIterator { + iters: iters, + index: 0, + }) + } + } + pub fn is_object_file(&self) -> bool { + self.header.e_type == header::ET_REL + } + + /// Parses the contents to get the Header only. This `bytes` buffer should contain at least the length for parsing Header. + pub fn parse_header(bytes: &'a [u8]) -> error::Result<Header> { + bytes.pread::<Header>(0) + } + + /// Lazy parse the ELF contents. This function mainly just assembles an Elf struct. Once we have the struct, we can choose to parse whatever we want. + pub fn lazy_parse(header: Header) -> error::Result<Self> { + let misc = parse_misc(&header)?; + + Ok(Elf { + header, + program_headers: vec![], + section_headers: Default::default(), + shdr_strtab: Default::default(), + dynamic: None, + dynsyms: Default::default(), + dynstrtab: Strtab::default(), + syms: Default::default(), + strtab: Default::default(), + dynrelas: Default::default(), + dynrels: Default::default(), + pltrelocs: Default::default(), + shdr_relocs: Default::default(), + soname: None, + interpreter: None, + libraries: vec![], + rpaths: vec![], + runpaths: vec![], + is_64: misc.is_64, + is_lib: misc.is_lib, + entry: misc.entry, + little_endian: misc.little_endian, + ctx: misc.ctx, + versym: None, + verdef: None, + verneed: None, + }) + } + + /// Parses the contents of the byte stream in `bytes`, and maybe returns a unified binary + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + let header = Self::parse_header(bytes)?; + let misc = parse_misc(&header)?; + let ctx = misc.ctx; + + let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?; + + let mut interpreter = None; + for ph in &program_headers { + if ph.p_type == program_header::PT_INTERP && ph.p_filesz != 0 { + let count = (ph.p_filesz - 1) as usize; + let offset = ph.p_offset as usize; + interpreter = bytes.pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count)).ok(); + } + } + + let section_headers = SectionHeader::parse(bytes, header.e_shoff as usize, header.e_shnum as usize, ctx)?; + + let get_strtab = |section_headers: &[SectionHeader], mut section_idx: usize| { + if section_idx == section_header::SHN_XINDEX as usize { + if section_headers.is_empty() { + return Ok(Strtab::default()) + } + section_idx = section_headers[0].sh_link as usize; + } + + if section_idx >= section_headers.len() { + // FIXME: warn! here + Ok(Strtab::default()) + } else { + let shdr = §ion_headers[section_idx]; + shdr.check_size(bytes.len())?; + Strtab::parse(bytes, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0) + } + }; + + let strtab_idx = header.e_shstrndx as usize; + let shdr_strtab = get_strtab(§ion_headers, strtab_idx)?; + + let mut syms = Symtab::default(); + let mut strtab = Strtab::default(); + if let Some(shdr) = section_headers.iter().rfind(|shdr| shdr.sh_type as u32 == section_header::SHT_SYMTAB) { + let size = shdr.sh_entsize; + let count = if size == 0 { 0 } else { shdr.sh_size / size }; + syms = Symtab::parse(bytes, shdr.sh_offset as usize, count as usize, ctx)?; + strtab = get_strtab(§ion_headers, shdr.sh_link as usize)?; + } + + let mut is_pie = false; + let mut soname = None; + let mut libraries = vec![]; + let mut rpaths = vec![]; + let mut runpaths = vec![]; + let mut dynsyms = Symtab::default(); + let mut dynrelas = RelocSection::default(); + let mut dynrels = RelocSection::default(); + let mut pltrelocs = RelocSection::default(); + let mut dynstrtab = Strtab::default(); + let dynamic = Dynamic::parse(bytes, &program_headers, ctx)?; + if let Some(ref dynamic) = dynamic { + let dyn_info = &dynamic.info; + + is_pie = dyn_info.flags_1 & dynamic::DF_1_PIE != 0; + dynstrtab = Strtab::parse(bytes, + dyn_info.strtab, + dyn_info.strsz, + 0x0)?; + + if dyn_info.soname != 0 { + // FIXME: warn! here + soname = dynstrtab.get_at(dyn_info.soname); + } + if dyn_info.needed_count > 0 { + libraries = dynamic.get_libraries(&dynstrtab); + } + for dyn_ in &dynamic.dyns { + if dyn_.d_tag == dynamic::DT_RPATH { + if let Some(path) = dynstrtab.get_at(dyn_.d_val as usize) { + rpaths.push(path); + } + } else if dyn_.d_tag == dynamic::DT_RUNPATH { + if let Some(path) = dynstrtab.get_at(dyn_.d_val as usize) { + runpaths.push(path); + } + } + } + // parse the dynamic relocations + dynrelas = RelocSection::parse(bytes, dyn_info.rela, dyn_info.relasz, true, ctx)?; + dynrels = RelocSection::parse(bytes, dyn_info.rel, dyn_info.relsz, false, ctx)?; + let is_rela = dyn_info.pltrel as u64 == dynamic::DT_RELA; + pltrelocs = RelocSection::parse(bytes, dyn_info.jmprel, dyn_info.pltrelsz, is_rela, ctx)?; + + let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash { + gnu_hash_len(bytes, gnu_hash as usize, ctx)? + } else if let Some(hash) = dyn_info.hash { + hash_len(bytes, hash as usize, header.e_machine, ctx)? + } else { + 0 + }; + let max_reloc_sym = dynrelas.iter() + .chain(dynrels.iter()) + .chain(pltrelocs.iter()) + .fold(0, |num, reloc| cmp::max(num, reloc.r_sym)); + if max_reloc_sym != 0 { + num_syms = cmp::max(num_syms, max_reloc_sym + 1); + } + dynsyms = Symtab::parse(bytes, dyn_info.symtab, num_syms, ctx)?; + } + + let mut shdr_relocs = vec![]; + for (idx, section) in section_headers.iter().enumerate() { + let is_rela = section.sh_type == section_header::SHT_RELA; + if is_rela || section.sh_type == section_header::SHT_REL { + section.check_size(bytes.len())?; + let sh_relocs = RelocSection::parse(bytes, section.sh_offset as usize, section.sh_size as usize, is_rela, ctx)?; + shdr_relocs.push((idx, sh_relocs)); + } + } + + let versym = symver::VersymSection::parse(bytes, §ion_headers, ctx)?; + let verdef = symver::VerdefSection::parse(bytes, §ion_headers, ctx)?; + let verneed = symver::VerneedSection::parse(bytes, §ion_headers, ctx)?; + + let is_lib = misc.is_lib && !is_pie; + + Ok(Elf { + header, + program_headers, + section_headers, + shdr_strtab, + dynamic, + dynsyms, + dynstrtab, + syms, + strtab, + dynrelas, + dynrels, + pltrelocs, + shdr_relocs, + soname, + interpreter, + libraries, + rpaths, + runpaths, + is_64: misc.is_64, + is_lib, + entry: misc.entry, + little_endian: misc.little_endian, + ctx: ctx, + versym, + verdef, + verneed, + }) + } + } + + impl<'a> ctx::TryFromCtx<'a, (usize, Endian)> for Elf<'a> { + type Error = crate::error::Error; + fn try_from_ctx(src: &'a [u8], (_, _): (usize, Endian)) -> Result<(Elf<'a>, usize), Self::Error> { + let elf = Elf::parse(src)?; + Ok((elf, src.len())) + } + } + + fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> error::Result<usize> { + let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize; + let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize; + let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize; + // We could handle min_chain==0 if we really had to, but it shouldn't happen. + if buckets_num == 0 || min_chain == 0 || bloom_size == 0 { + return Err(error::Error::Malformed(format!("Invalid DT_GNU_HASH: buckets_num={} min_chain={} bloom_size={}", + buckets_num, min_chain, bloom_size))); + } + // Find the last bucket. + let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 }; + let mut max_chain = 0; + for bucket in 0..buckets_num { + let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize; + if max_chain < chain { + max_chain = chain; + } + } + if max_chain < min_chain { + return Ok(0); + } + // Find the last chain within the bucket. + let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4; + loop { + let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?; + max_chain += 1; + chain_offset += 4; + if hash & 1 != 0 { + return Ok(max_chain); + } + } + } + + fn hash_len(bytes: &[u8], offset: usize, machine: u16, ctx: Ctx) -> error::Result<usize> { + // Based on readelf code. + let nchain = if (machine == header::EM_FAKE_ALPHA || machine == header::EM_S390) && ctx.container.is_big() { + bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize + } else { + bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize + }; + Ok(nchain) + } + + struct Misc { + is_64: bool, + is_lib: bool, + entry: u64, + little_endian: bool, + ctx: Ctx, + } + + fn parse_misc(header: &Header) -> error::Result<Misc> { + let entry = header.e_entry as usize; + let is_lib = header.e_type == header::ET_DYN; + let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB; + let endianness = scroll::Endian::from(is_lsb); + let class = header.e_ident[header::EI_CLASS]; + if class != header::ELFCLASS64 && class != header::ELFCLASS32 { + return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}", + class, + header.e_ident[header::EI_DATA]))); + } + let is_64 = class == header::ELFCLASS64; + let container = if is_64 { Container::Big } else { Container::Little }; + let ctx = Ctx::new(container, endianness); + + Ok(Misc{ + is_64, + is_lib, + entry: entry as u64, + little_endian:is_lsb, + ctx, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_crt1_64bit() { + let crt1: Vec<u8> = include!("../../etc/crt1.rs"); + match Elf::parse(&crt1) { + Ok(binary) => { + assert!(binary.is_64); + assert!(!binary.is_lib); + assert_eq!(binary.entry, 0); + assert!(binary.syms.get(1000).is_none()); + assert!(binary.syms.get(5).is_some()); + let syms = binary.syms.to_vec(); + assert!(!binary.section_headers.is_empty()); + for (i, sym) in syms.iter().enumerate() { + if i == 11 { + let symtab = binary.strtab; + println!("sym: {:?}", &sym); + assert_eq!(&symtab[sym.st_name], "_start"); + break; + } + } + assert!(!syms.is_empty()); + } + Err(err) => { + panic!("failed: {}", err); + } + } + } + + #[test] + fn parse_crt1_32bit() { + let crt1: Vec<u8> = include!("../../etc/crt132.rs"); + match Elf::parse(&crt1) { + Ok(binary) => { + assert!(!binary.is_64); + assert!(!binary.is_lib); + assert_eq!(binary.entry, 0); + assert!(binary.syms.get(1000).is_none()); + assert!(binary.syms.get(5).is_some()); + let syms = binary.syms.to_vec(); + assert!(!binary.section_headers.is_empty()); + for (i, sym) in syms.iter().enumerate() { + if i == 11 { + let symtab = binary.strtab; + println!("sym: {:?}", &sym); + assert_eq!(&symtab[sym.st_name], "__libc_csu_fini"); + break; + } + } + assert!(!syms.is_empty()); + } + Err(err) => { + panic!("failed: {}", err); + } + } + } + + // See https://github.com/m4b/goblin/issues/257 + #[test] + #[allow(unused)] + fn no_use_statement_conflict() { + use crate::elf::section_header::*; + use crate::elf::*; + + fn f(_: SectionHeader) {} + } +} diff --git a/third_party/rust/goblin/src/elf/note.rs b/third_party/rust/goblin/src/elf/note.rs new file mode 100644 index 0000000000..2e2864d25f --- /dev/null +++ b/third_party/rust/goblin/src/elf/note.rs @@ -0,0 +1,319 @@ +// Defined note types for GNU systems. + +#[cfg(feature = "log")] +use log::debug; +#[cfg(feature = "alloc")] +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// ABI information. +/// +/// The descriptor consists of words: +/// * word 0: OS descriptor +/// * word 1: major version of the ABI +/// * word 2: minor version of the ABI +/// * word 3: subminor version of the ABI +pub const NT_GNU_ABI_TAG: u32 = 1; + +/// Old name +pub const ELF_NOTE_ABI: u32 = NT_GNU_ABI_TAG; +// Known OSes. These values can appear in word 0 of an +// `NT_GNU_ABI_TAG` note section entry. +pub const ELF_NOTE_OS_LINUX: u32 = 0; +pub const ELF_NOTE_OS_GNU: u32 = 1; +pub const ELF_NOTE_OS_SOLARIS2: u32 = 2; +pub const ELF_NOTE_OS_FREEBSD: u32 = 3; + +/// Synthetic `hwcap` information. +/// +/// The descriptor begins with two words: +/// * word 0: number of entries +/// * word 1: bitmask of enabled entries +/// +/// Then follow variable-length entries, one byte followed by a '\0'-terminated +/// `hwcap` name string. The byte gives the bit number to test if enabled, +/// `(1U << bit) & bitmask`. +pub const NT_GNU_HWCAP: u32 = 2; + +/// Build ID bits as generated by ld --build-id. +/// +/// The descriptor consists of any nonzero number of bytes. +pub const NT_GNU_BUILD_ID: u32 = 3; + +/// Version note generated by GNU gold containing a version string. +pub const NT_GNU_GOLD_VERSION: u32 = 4; + +/// Program property note +pub const NT_GNU_PROPERTY_TYPE_0: u32 = 5; + +///Contains copy of prstatus struct. +pub const NT_PRSTATUS: u32 = 1; + +///Contains copy of prpsinfo struct. +pub const NT_PRPSINFO: u32 = 3; + +///Fields of siginfo_t. +pub const NT_SIGINFO: u32 = 0x5349_4749; + +///Description of mapped files. +pub const NT_FILE: u32 = 0x4649_4c45; + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "alloc", derive(Pread, Pwrite, IOread, IOwrite, SizeWith))] +#[repr(C)] +/// Note section contents. Each entry in the note section begins with a header +/// of a fixed form. +pub struct Nhdr32 { + /// Length of the note's name (includes the terminator) + pub n_namesz: u32, + /// Length of the note's descriptor + pub n_descsz: u32, + /// Type of the note + pub n_type: u32, +} + +// Declare that this is a plain type. +unsafe impl plain::Plain for Nhdr32 {} + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(feature = "alloc", derive(Pread, Pwrite, IOread, IOwrite, SizeWith))] +#[repr(C)] +/// Note section contents. Each entry in the note section begins with a header +/// of a fixed form. +pub struct Nhdr64 { + /// Length of the note's name (includes the terminator) + pub n_namesz: u64, + /// Length of the note's descriptor. + pub n_descsz: u64, + /// Type of the note. + pub n_type: u64, +} + +// Declare that this is a plain type. +unsafe impl plain::Plain for Nhdr64 {} + +if_alloc! { + use crate::error; + use crate::container; + use scroll::ctx; + use alloc::vec::Vec; + + /// An iterator over ELF binary notes in a note section or segment + pub struct NoteDataIterator<'a> { + pub data: &'a [u8], + pub size: usize, + pub offset: usize, + pub ctx: (usize, container::Ctx), // (alignment, ctx) + } + + impl<'a> Iterator for NoteDataIterator<'a> { + type Item = error::Result<Note<'a>>; + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.size { + None + } else { + debug!("NoteIterator - {:#x}", self.offset); + match self.data.gread_with(&mut self.offset, self.ctx) { + Ok(res) => Some(Ok(res)), + Err(e) => Some(Err(e)) + } + } + } + } + + /// An iterator over ELF binary notes + pub struct NoteIterator<'a> { + pub iters: Vec<NoteDataIterator<'a>>, + pub index: usize, + } + + impl<'a> Iterator for NoteIterator<'a> { + type Item = error::Result<Note<'a>>; + fn next(&mut self) -> Option<Self::Item> { + while self.index < self.iters.len() { + if let Some(note_result) = self.iters[self.index].next() { + return Some(note_result); + } + + self.index += 1; + } + + None + } + } + + #[derive(Debug)] + struct NoteHeader { + n_namesz: usize, + n_descsz: usize, + n_type: u32, + } + + impl From<Nhdr32> for NoteHeader { + fn from(header: Nhdr32) -> Self { + NoteHeader { + n_namesz: header.n_namesz as usize, + n_descsz: header.n_descsz as usize, + n_type: header.n_type, + } + } + } + + impl From<Nhdr64> for NoteHeader { + fn from(header: Nhdr64) -> Self { + NoteHeader { + n_namesz: header.n_namesz as usize, + n_descsz: header.n_descsz as usize, + n_type: header.n_type as u32, + } + } + } + + fn align(alignment: usize, offset: &mut usize) { + let diff = *offset % alignment; + if diff != 0 { + *offset += alignment - diff; + } + } + + /// A 32/64 bit Note struct, with the name and desc pre-parsed + #[derive(Debug)] + pub struct Note<'a> { + /// The type of this note + pub n_type: u32, + /// NUL terminated string, where `namesz` includes the terminator + pub name: &'a str, // needs padding such that namesz + padding % {wordsize} == 0 + /// arbitrary data of length `descsz` + pub desc: &'a [u8], // needs padding such that descsz + padding % {wordsize} == 0 + } + + impl<'a> Note<'a> { + pub fn type_to_str(&self) -> &'static str { + match self.n_type { + NT_GNU_ABI_TAG => "NT_GNU_ABI_TAG", + NT_GNU_HWCAP => "NT_GNU_HWCAP", + NT_GNU_BUILD_ID => "NT_GNU_BUILD_ID", + NT_GNU_GOLD_VERSION => "NT_GNU_GOLD_VERSION", + NT_GNU_PROPERTY_TYPE_0 => "NT_GNU_PROPERTY_0", + _ => "NT_UNKNOWN" + } + } + } + + impl<'a> ctx::TryFromCtx<'a, (usize, container::Ctx)> for Note<'a> { + type Error = error::Error; + fn try_from_ctx(bytes: &'a [u8], (alignment, ctx): (usize, container::Ctx)) -> Result<(Self, usize), Self::Error> { + let offset = &mut 0; + let mut alignment = alignment; + if alignment < 4 { + alignment = 4; + } + let header: NoteHeader = { + match alignment { + 4|8 => bytes.gread_with::<Nhdr32>(offset, ctx.le)?.into(), + _ => return Err(error::Error::Malformed(format!("Notes has unimplemented alignment requirement: {:#x}", alignment))) + } + }; + debug!("{:?} - {:#x}", header, *offset); + // -1 because includes \0 terminator + let name = bytes.gread_with::<&'a str>(offset, ctx::StrCtx::Length(header.n_namesz.saturating_sub(1)))?; + if header.n_namesz > 0 { + *offset += 1; + } + align(alignment, offset); + debug!("note name {} - {:#x}", name, *offset); + let desc = bytes.gread_with::<&'a [u8]>(offset, header.n_descsz)?; + align(alignment, offset); + debug!("desc {:?} - {:#x}", desc, *offset); + Ok((Note { + name, + desc, + n_type: header.n_type, + }, *offset)) + } + } + + #[cfg(test)] + mod tests { + use super::*; + + static NOTE_DATA: [u8; 132] = [0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4e, 0x55, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x4e, 0x55, 0x00, + 0xbc, 0xfc, 0x66, 0xcd, 0xc7, 0xd5, 0x14, 0x7b, + 0x53, 0xb1, 0x10, 0x11, 0x94, 0x86, 0x8e, 0xf9, + 0x4f, 0xe8, 0xdd, 0xdb, 0x04, 0x00, 0x00, 0x00, + 0x30, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x47, 0x4E, 0x55, 0x00, 0x02, 0x80, 0x00, 0xC0, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0xC0, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01, 0xC0, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00]; + + static CONTEXT: (usize, container::Ctx) = (4, container::Ctx { + container: container::Container::Big, + le: ::scroll::Endian::Little, + }); + + fn make_note_iter(start: usize, end: usize) -> NoteDataIterator<'static> { + NoteDataIterator { + data: &NOTE_DATA, + size: end, + offset: start, + ctx: CONTEXT, + } + } + + #[test] + fn iter_single_section() { + let mut notes = NoteIterator { + iters: vec![make_note_iter(0, 132)], + index: 0, + }; + + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_ABI_TAG); + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_BUILD_ID); + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_PROPERTY_TYPE_0); + assert!(notes.next().is_none()); + } + + #[test] + fn iter_multiple_sections() { + let mut notes = NoteIterator { + iters: vec![make_note_iter(0, 32), make_note_iter(32, 68), make_note_iter(68, 132)], + index: 0, + }; + + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_ABI_TAG); + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_BUILD_ID); + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_PROPERTY_TYPE_0); + assert!(notes.next().is_none()); + } + + #[test] + fn skip_empty_sections() { + let mut notes = NoteIterator { + iters: vec![ + make_note_iter(0, 32), + make_note_iter(0, 0), + make_note_iter(32, 68), + ], + index: 0, + }; + + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_ABI_TAG); + assert_eq!(notes.next().unwrap().unwrap().n_type, NT_GNU_BUILD_ID); + assert!(notes.next().is_none()); + } + + #[test] + fn ignore_no_sections() { + let mut notes = NoteIterator { iters: vec![], index: 0 }; + assert!(notes.next().is_none()); + } + } +} diff --git a/third_party/rust/goblin/src/elf/program_header.rs b/third_party/rust/goblin/src/elf/program_header.rs new file mode 100644 index 0000000000..80d499ca68 --- /dev/null +++ b/third_party/rust/goblin/src/elf/program_header.rs @@ -0,0 +1,430 @@ +/* Legal values for p_type (segment type). */ + +/// Programg header table entry unused +pub const PT_NULL: u32 = 0; +/// Loadable program segment +pub const PT_LOAD: u32 = 1; +/// Dynamic linking information +pub const PT_DYNAMIC: u32 = 2; +/// Program interpreter +pub const PT_INTERP: u32 = 3; +/// Auxiliary information +pub const PT_NOTE: u32 = 4; +/// Reserved +pub const PT_SHLIB: u32 = 5; +/// Entry for header table itself +pub const PT_PHDR: u32 = 6; +/// Thread-local storage segment +pub const PT_TLS: u32 = 7; +/// Number of defined types +pub const PT_NUM: u32 = 8; +/// Start of OS-specific +pub const PT_LOOS: u32 = 0x6000_0000; +/// GCC .eh_frame_hdr segment +pub const PT_GNU_EH_FRAME: u32 = 0x6474_e550; +/// GNU property notes for linker and run-time loaders +pub const PT_GNU_PROPERTY: u32 = 0x6474_e553; +/// Indicates stack executability +pub const PT_GNU_STACK: u32 = 0x6474_e551; +/// Read-only after relocation +pub const PT_GNU_RELRO: u32 = 0x6474_e552; +/// Sun Specific segment +pub const PT_LOSUNW: u32 = 0x6fff_fffa; +/// Sun Specific segment +pub const PT_SUNWBSS: u32 = 0x6fff_fffa; +/// Stack segment +pub const PT_SUNWSTACK: u32 = 0x6fff_fffb; +/// End of OS-specific +pub const PT_HISUNW: u32 = 0x6fff_ffff; +/// End of OS-specific +pub const PT_HIOS: u32 = 0x6fff_ffff; +/// Start of processor-specific +pub const PT_LOPROC: u32 = 0x7000_0000; +/// ARM unwind segment +pub const PT_ARM_EXIDX: u32 = 0x7000_0001; +/// End of processor-specific +pub const PT_HIPROC: u32 = 0x7fff_ffff; + +/* Legal values for p_flags (segment flags). */ + +/// Segment is executable +pub const PF_X: u32 = 1; +/// Segment is writable +pub const PF_W: u32 = 1 << 1; +/// Segment is readable +pub const PF_R: u32 = 1 << 2; +/// Bits reserved for OS-specific usage +pub const PF_MASKOS: u32 = 0x0ff0_0000; +/// Bits reserved for processor-specific usage +pub const PF_MASKPROC: u32 = 0xf000_0000; + +pub fn pt_to_str(pt: u32) -> &'static str { + match pt { + PT_NULL => "PT_NULL", + PT_LOAD => "PT_LOAD", + PT_DYNAMIC => "PT_DYNAMIC", + PT_INTERP => "PT_INTERP", + PT_NOTE => "PT_NOTE", + PT_SHLIB => "PT_SHLIB", + PT_PHDR => "PT_PHDR", + PT_TLS => "PT_TLS", + PT_NUM => "PT_NUM", + PT_LOOS => "PT_LOOS", + PT_GNU_EH_FRAME => "PT_GNU_EH_FRAME", + PT_GNU_PROPERTY => "PT_GNU_PROPERTY", + PT_GNU_STACK => "PT_GNU_STACK", + PT_GNU_RELRO => "PT_GNU_RELRO", + PT_SUNWBSS => "PT_SUNWBSS", + PT_SUNWSTACK => "PT_SUNWSTACK", + PT_HIOS => "PT_HIOS", + PT_LOPROC => "PT_LOPROC", + PT_HIPROC => "PT_HIPROC", + PT_ARM_EXIDX => "PT_ARM_EXIDX", + _ => "UNKNOWN_PT", + } +} + +if_alloc! { + use core::fmt; + use scroll::ctx; + use core::result; + use core::ops::Range; + use crate::container::{Ctx, Container}; + use alloc::vec::Vec; + + #[derive(Default, PartialEq, Clone)] + /// A unified ProgramHeader - convertable to and from 32-bit and 64-bit variants + pub struct ProgramHeader { + pub p_type : u32, + pub p_flags : u32, + pub p_offset: u64, + pub p_vaddr : u64, + pub p_paddr : u64, + pub p_filesz: u64, + pub p_memsz : u64, + pub p_align : u64, + } + + impl ProgramHeader { + /// Return the size of the underlying program header, given a `Ctx` + #[inline] + pub fn size(ctx: Ctx) -> usize { + use scroll::ctx::SizeWith; + Self::size_with(&ctx) + } + /// Create a new `PT_LOAD` ELF program header + pub fn new() -> Self { + ProgramHeader { + p_type : PT_LOAD, + p_flags : 0, + p_offset: 0, + p_vaddr : 0, + p_paddr : 0, + p_filesz: 0, + p_memsz : 0, + //TODO: check if this is true for 32-bit pt_load + p_align : 2 << 20, + } + } + /// Returns this program header's file offset range + pub fn file_range(&self) -> Range<usize> { + self.p_offset as usize..self.p_offset.saturating_add(self.p_filesz) as usize + } + /// Returns this program header's virtual memory range + pub fn vm_range(&self) -> Range<usize> { + self.p_vaddr as usize..self.p_vaddr.saturating_add(self.p_memsz) as usize + } + /// Sets the executable flag + pub fn executable(&mut self) { + self.p_flags |= PF_X; + } + /// Sets the write flag + pub fn write(&mut self) { + self.p_flags |= PF_W; + } + /// Sets the read flag + pub fn read(&mut self) { + self.p_flags |= PF_R; + } + /// Whether this program header is executable + pub fn is_executable(&self) -> bool { + self.p_flags & PF_X != 0 + } + /// Whether this program header is readable + pub fn is_read(&self) -> bool { + self.p_flags & PF_R != 0 + } + /// Whether this program header is writable + pub fn is_write(&self) -> bool { + self.p_flags & PF_W != 0 + } + #[cfg(feature = "endian_fd")] + pub fn parse(bytes: &[u8], mut offset: usize, count: usize, ctx: Ctx) -> crate::error::Result<Vec<ProgramHeader>> { + use scroll::Pread; + // Sanity check to avoid OOM + if count > bytes.len() / Self::size(ctx) { + return Err(crate::error::Error::BufferTooShort(count, "program headers")); + } + let mut program_headers = Vec::with_capacity(count); + for _ in 0..count { + let phdr = bytes.gread_with(&mut offset, ctx)?; + program_headers.push(phdr); + } + Ok(program_headers) + } + } + + impl fmt::Debug for ProgramHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("ProgramHeader") + .field("p_type", &pt_to_str(self.p_type)) + .field("p_flags", &format_args!("0x{:x}", self.p_flags)) + .field("p_offset", &format_args!("0x{:x}", self.p_offset)) + .field("p_vaddr", &format_args!("0x{:x}", self.p_vaddr)) + .field("p_paddr", &format_args!("0x{:x}", self.p_paddr)) + .field("p_filesz", &format_args!("0x{:x}", self.p_filesz)) + .field("p_memsz", &format_args!("0x{:x}", self.p_memsz)) + .field("p_align", &self.p_align) + .finish() + } + } + + impl ctx::SizeWith<Ctx> for ProgramHeader { + fn size_with(ctx: &Ctx) -> usize { + match ctx.container { + Container::Little => { + program_header32::SIZEOF_PHDR + }, + Container::Big => { + program_header64::SIZEOF_PHDR + }, + } + } + } + + impl<'a> ctx::TryFromCtx<'a, Ctx> for ProgramHeader { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], Ctx { container, le}: Ctx) -> result::Result<(Self, usize), Self::Error> { + use scroll::Pread; + let res = match container { + Container::Little => { + (bytes.pread_with::<program_header32::ProgramHeader>(0, le)?.into(), program_header32::SIZEOF_PHDR) + }, + Container::Big => { + (bytes.pread_with::<program_header64::ProgramHeader>(0, le)?.into(), program_header64::SIZEOF_PHDR) + } + }; + Ok(res) + } + } + + impl ctx::TryIntoCtx<Ctx> for ProgramHeader { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result<usize, Self::Error> { + use scroll::Pwrite; + match container { + Container::Little => { + let phdr: program_header32::ProgramHeader = self.into(); + Ok(bytes.pwrite_with(phdr, 0, le)?) + }, + Container::Big => { + let phdr: program_header64::ProgramHeader = self.into(); + Ok(bytes.pwrite_with(phdr, 0, le)?) + } + } + } + } +} // end if_alloc + +macro_rules! elf_program_header_std_impl { + ($size:ty) => { + #[cfg(test)] + mod tests { + use super::*; + #[test] + fn size_of() { + assert_eq!(::std::mem::size_of::<ProgramHeader>(), SIZEOF_PHDR); + } + } + + if_alloc! { + + + use crate::elf::program_header::ProgramHeader as ElfProgramHeader; + #[cfg(any(feature = "std", feature = "endian_fd"))] + use crate::error::Result; + + use plain::Plain; + + if_std! { + use std::fs::File; + use std::io::{Seek, Read}; + use std::io::SeekFrom::Start; + } + + impl From<ProgramHeader> for ElfProgramHeader { + fn from(ph: ProgramHeader) -> Self { + ElfProgramHeader { + p_type : ph.p_type, + p_flags : ph.p_flags, + p_offset : u64::from(ph.p_offset), + p_vaddr : u64::from(ph.p_vaddr), + p_paddr : u64::from(ph.p_paddr), + p_filesz : u64::from(ph.p_filesz), + p_memsz : u64::from(ph.p_memsz), + p_align : u64::from(ph.p_align), + } + } + } + + impl From<ElfProgramHeader> for ProgramHeader { + fn from(ph: ElfProgramHeader) -> Self { + ProgramHeader { + p_type : ph.p_type, + p_flags : ph.p_flags, + p_offset : ph.p_offset as $size, + p_vaddr : ph.p_vaddr as $size, + p_paddr : ph.p_paddr as $size, + p_filesz : ph.p_filesz as $size, + p_memsz : ph.p_memsz as $size, + p_align : ph.p_align as $size, + } + } + } + } // end if_alloc + + use core::fmt; + use core::slice; + + impl fmt::Debug for ProgramHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("ProgramHeader") + .field("p_type", &pt_to_str(self.p_type)) + .field("p_flags", &format_args!("0x{:x}", self.p_flags)) + .field("p_offset", &format_args!("0x{:x}", self.p_offset)) + .field("p_vaddr", &format_args!("0x{:x}", self.p_vaddr)) + .field("p_paddr", &format_args!("0x{:x}", self.p_paddr)) + .field("p_filesz", &format_args!("0x{:x}", self.p_filesz)) + .field("p_memsz", &format_args!("0x{:x}", self.p_memsz)) + .field("p_align", &self.p_align) + .finish() + } + } + + impl ProgramHeader { + #[cfg(feature = "endian_fd")] + pub fn parse( + bytes: &[u8], + mut offset: usize, + count: usize, + ctx: ::scroll::Endian, + ) -> Result<Vec<ProgramHeader>> { + use scroll::Pread; + let mut program_headers = vec![ProgramHeader::default(); count]; + let offset = &mut offset; + bytes.gread_inout_with(offset, &mut program_headers, ctx)?; + Ok(program_headers) + } + + #[cfg(feature = "alloc")] + pub fn from_bytes(bytes: &[u8], phnum: usize) -> Vec<ProgramHeader> { + let mut phdrs = vec![ProgramHeader::default(); phnum]; + phdrs + .copy_from_bytes(bytes) + .expect("buffer is too short for given number of entries"); + phdrs + } + + /// # Safety + /// + /// This function creates a `ProgramHeader` directly from a raw pointer + pub unsafe fn from_raw_parts<'a>( + phdrp: *const ProgramHeader, + phnum: usize, + ) -> &'a [ProgramHeader] { + slice::from_raw_parts(phdrp, phnum) + } + + #[cfg(feature = "std")] + pub fn from_fd(fd: &mut File, offset: u64, count: usize) -> Result<Vec<ProgramHeader>> { + let mut phdrs = vec![ProgramHeader::default(); count]; + fd.seek(Start(offset))?; + unsafe { + fd.read_exact(plain::as_mut_bytes(&mut *phdrs))?; + } + Ok(phdrs) + } + } + }; +} + +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, SizeWith}; + +pub mod program_header32 { + pub use crate::elf::program_header::*; + + #[repr(C)] + #[derive(Copy, Clone, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// A 32-bit ProgramHeader typically specifies how to map executable and data segments into memory + pub struct ProgramHeader { + /// Segment type + pub p_type: u32, + /// Segment file offset + pub p_offset: u32, + /// Segment virtual address + pub p_vaddr: u32, + /// Segment physical address + pub p_paddr: u32, + /// Segment size in file + pub p_filesz: u32, + /// Segment size in memory + pub p_memsz: u32, + /// Segment flags + pub p_flags: u32, + /// Segment alignment + pub p_align: u32, + } + + pub const SIZEOF_PHDR: usize = 32; + + // Declare that this is a plain type. + unsafe impl plain::Plain for ProgramHeader {} + + elf_program_header_std_impl!(u32); +} + +pub mod program_header64 { + pub use crate::elf::program_header::*; + + #[repr(C)] + #[derive(Copy, Clone, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// A 64-bit ProgramHeader typically specifies how to map executable and data segments into memory + pub struct ProgramHeader { + /// Segment type + pub p_type: u32, + /// Segment flags + pub p_flags: u32, + /// Segment file offset + pub p_offset: u64, + /// Segment virtual address + pub p_vaddr: u64, + /// Segment physical address + pub p_paddr: u64, + /// Segment size in file + pub p_filesz: u64, + /// Segment size in memory + pub p_memsz: u64, + /// Segment alignment + pub p_align: u64, + } + + pub const SIZEOF_PHDR: usize = 56; + + // Declare that this is a plain type. + unsafe impl plain::Plain for ProgramHeader {} + + elf_program_header_std_impl!(u64); +} diff --git a/third_party/rust/goblin/src/elf/reloc.rs b/third_party/rust/goblin/src/elf/reloc.rs new file mode 100644 index 0000000000..eeb3e1a2ac --- /dev/null +++ b/third_party/rust/goblin/src/elf/reloc.rs @@ -0,0 +1,522 @@ +//! # Relocation computations +//! +//! The following notation is used to describe relocation computations +//! specific to x86_64 ELF. +//! +//! * A: The addend used to compute the value of the relocatable field. +//! * B: The base address at which a shared object is loaded into memory +//! during execution. Generally, a shared object file is built with a +//! base virtual address of 0. However, the execution address of the +//! shared object is different. +//! * G: The offset into the global offset table at which the address of +//! the relocation entry's symbol resides during execution. +//! * GOT: The address of the global offset table. +//! * L: The section offset or address of the procedure linkage table entry +//! for a symbol. +//! * P: The section offset or address of the storage unit being relocated, +//! computed using r_offset. +//! * S: The value of the symbol whose index resides in the relocation entry. +//! * Z: The size of the symbol whose index resides in the relocation entry. +//! +//! Below are some common x86_64 relocation computations you might find useful: +//! +//! | Relocation | Value | Size | Formula | +//! |:--------------------------|:------|:----------|:------------------| +//! | `R_X86_64_NONE` | 0 | NONE | NONE | +//! | `R_X86_64_64` | 1 | 64 | S + A | +//! | `R_X86_64_PC32` | 2 | 32 | S + A - P | +//! | `R_X86_64_GOT32` | 3 | 32 | G + A | +//! | `R_X86_64_PLT32` | 4 | 32 | L + A - P | +//! | `R_X86_64_COPY` | 5 | NONE | NONE | +//! | `R_X86_64_GLOB_DAT` | 6 | 64 | S | +//! | `R_X86_64_JUMP_SLOT` | 7 | 64 | S | +//! | `R_X86_64_RELATIVE` | 8 | 64 | B + A | +//! | `R_X86_64_GOTPCREL` | 9 | 32 | G + GOT + A - P | +//! | `R_X86_64_32` | 10 | 32 | S + A | +//! | `R_X86_64_32S` | 11 | 32 | S + A | +//! | `R_X86_64_16` | 12 | 16 | S + A | +//! | `R_X86_64_PC16` | 13 | 16 | S + A - P | +//! | `R_X86_64_8` | 14 | 8 | S + A | +//! | `R_X86_64_PC8` | 15 | 8 | S + A - P | +//! | `R_X86_64_DTPMOD64` | 16 | 64 | | +//! | `R_X86_64_DTPOFF64` | 17 | 64 | | +//! | `R_X86_64_TPOFF64` | 18 | 64 | | +//! | `R_X86_64_TLSGD` | 19 | 32 | | +//! | `R_X86_64_TLSLD` | 20 | 32 | | +//! | `R_X86_64_DTPOFF32` | 21 | 32 | | +//! | `R_X86_64_GOTTPOFF` | 22 | 32 | | +//! | `R_X86_64_TPOFF32` | 23 | 32 | | +//! | `R_X86_64_PC64` | 24 | 64 | S + A - P | +//! | `R_X86_64_GOTOFF64` | 25 | 64 | S + A - GOT | +//! | `R_X86_64_GOTPC32` | 26 | 32 | GOT + A - P | +//! | `R_X86_64_SIZE32` | 32 | 32 | Z + A | +//! | `R_X86_64_SIZE64` | 33 | 64 | Z + A | +//! | `R_X86_64_GOTPC32_TLSDESC` 34 | 32 | | +//! | `R_X86_64_TLSDESC_CALL` | 35 | NONE | | +//! | `R_X86_64_TLSDESC` | 36 | 64 × 2 | | +//! | `R_X86_64_IRELATIVE` | 37 | 64 | indirect (B + A) | +//! +//! TLS information is at http://people.redhat.com/aoliva/writeups/TLS/RFC-TLSDESC-x86.txt +//! +//! `R_X86_64_IRELATIVE` is similar to `R_X86_64_RELATIVE` except that +//! the value used in this relocation is the program address returned by the function, +//! which takes no arguments, at the address of the result of the corresponding +//! `R_X86_64_RELATIVE` relocation. +//! +//! Read more https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-54839.html + +include!("constants_relocation.rs"); + +macro_rules! elf_reloc { + ($size:ident, $isize:ty) => { + use core::fmt; + #[cfg(feature = "alloc")] + use scroll::{Pread, Pwrite, SizeWith}; + #[repr(C)] + #[derive(Clone, Copy, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// Relocation with an explicit addend + pub struct Rela { + /// Address + pub r_offset: $size, + /// Relocation type and symbol index + pub r_info: $size, + /// Addend + pub r_addend: $isize, + } + #[repr(C)] + #[derive(Clone, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// Relocation without an addend + pub struct Rel { + /// address + pub r_offset: $size, + /// relocation type and symbol address + pub r_info: $size, + } + use plain; + unsafe impl plain::Plain for Rela {} + unsafe impl plain::Plain for Rel {} + + impl fmt::Debug for Rela { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let sym = r_sym(self.r_info); + let typ = r_type(self.r_info); + f.debug_struct("Rela") + .field("r_offset", &format_args!("{:x}", self.r_offset)) + .field("r_info", &format_args!("{:x}", self.r_info)) + .field("r_addend", &format_args!("{:x}", self.r_addend)) + .field("r_typ", &typ) + .field("r_sym", &sym) + .finish() + } + } + impl fmt::Debug for Rel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let sym = r_sym(self.r_info); + let typ = r_type(self.r_info); + f.debug_struct("Rel") + .field("r_offset", &format_args!("{:x}", self.r_offset)) + .field("r_info", &format_args!("{:x}", self.r_info)) + .field("r_typ", &typ) + .field("r_sym", &sym) + .finish() + } + } + }; +} + +macro_rules! elf_rela_std_impl { + ($size:ident, $isize:ty) => { + if_alloc! { + use crate::elf::reloc::Reloc; + + use core::slice; + + if_std! { + use crate::error::Result; + + use std::fs::File; + use std::io::{Read, Seek}; + use std::io::SeekFrom::Start; + } + + impl From<Rela> for Reloc { + fn from(rela: Rela) -> Self { + Reloc { + r_offset: u64::from(rela.r_offset), + r_addend: Some(i64::from(rela.r_addend)), + r_sym: r_sym(rela.r_info) as usize, + r_type: r_type(rela.r_info), + } + } + } + + impl From<Rel> for Reloc { + fn from(rel: Rel) -> Self { + Reloc { + r_offset: u64::from(rel.r_offset), + r_addend: None, + r_sym: r_sym(rel.r_info) as usize, + r_type: r_type(rel.r_info), + } + } + } + + impl From<Reloc> for Rela { + fn from(rela: Reloc) -> Self { + let r_info = r_info(rela.r_sym as $size, $size::from(rela.r_type)); + Rela { + r_offset: rela.r_offset as $size, + r_info: r_info, + r_addend: rela.r_addend.unwrap_or(0) as $isize, + } + } + } + + impl From<Reloc> for Rel { + fn from(rel: Reloc) -> Self { + let r_info = r_info(rel.r_sym as $size, $size::from(rel.r_type)); + Rel { + r_offset: rel.r_offset as $size, + r_info: r_info, + } + } + } + + /// Gets the rela entries given a rela pointer and the _size_ of the rela section in the binary, + /// in bytes. + /// Assumes the pointer is valid and can safely return a slice of memory pointing to the relas because: + /// 1. `ptr` points to memory received from the kernel (i.e., it loaded the executable), _or_ + /// 2. The binary has already been mmapped (i.e., it's a `SharedObject`), and hence it's safe to return a slice of that memory. + /// 3. Or if you obtained the pointer in some other lawful manner + pub unsafe fn from_raw_rela<'a>(ptr: *const Rela, size: usize) -> &'a [Rela] { + slice::from_raw_parts(ptr, size / SIZEOF_RELA) + } + + /// Gets the rel entries given a rel pointer and the _size_ of the rel section in the binary, + /// in bytes. + /// Assumes the pointer is valid and can safely return a slice of memory pointing to the rels because: + /// 1. `ptr` points to memory received from the kernel (i.e., it loaded the executable), _or_ + /// 2. The binary has already been mmapped (i.e., it's a `SharedObject`), and hence it's safe to return a slice of that memory. + /// 3. Or if you obtained the pointer in some other lawful manner + pub unsafe fn from_raw_rel<'a>(ptr: *const Rel, size: usize) -> &'a [Rel] { + slice::from_raw_parts(ptr, size / SIZEOF_REL) + } + + #[cfg(feature = "std")] + pub fn from_fd(fd: &mut File, offset: usize, size: usize) -> Result<Vec<Rela>> { + let count = size / SIZEOF_RELA; + let mut relocs = vec![Rela::default(); count]; + fd.seek(Start(offset as u64))?; + unsafe { + fd.read_exact(plain::as_mut_bytes(&mut *relocs))?; + } + Ok(relocs) + } + } // end if_alloc + }; +} + +pub mod reloc32 { + + pub use crate::elf::reloc::*; + + elf_reloc!(u32, i32); + + pub const SIZEOF_RELA: usize = 4 + 4 + 4; + pub const SIZEOF_REL: usize = 4 + 4; + + #[inline(always)] + pub fn r_sym(info: u32) -> u32 { + info >> 8 + } + + #[inline(always)] + pub fn r_type(info: u32) -> u32 { + info & 0xff + } + + #[inline(always)] + pub fn r_info(sym: u32, typ: u32) -> u32 { + (sym << 8) + (typ & 0xff) + } + + elf_rela_std_impl!(u32, i32); +} + +pub mod reloc64 { + pub use crate::elf::reloc::*; + + elf_reloc!(u64, i64); + + pub const SIZEOF_RELA: usize = 8 + 8 + 8; + pub const SIZEOF_REL: usize = 8 + 8; + + #[inline(always)] + pub fn r_sym(info: u64) -> u32 { + (info >> 32) as u32 + } + + #[inline(always)] + pub fn r_type(info: u64) -> u32 { + (info & 0xffff_ffff) as u32 + } + + #[inline(always)] + pub fn r_info(sym: u64, typ: u64) -> u64 { + (sym << 32) + typ + } + + elf_rela_std_impl!(u64, i64); +} + +////////////////////////////// +// Generic Reloc +///////////////////////////// +if_alloc! { + use scroll::{ctx, Pread}; + use scroll::ctx::SizeWith; + use core::fmt; + use core::result; + use crate::container::{Ctx, Container}; + use alloc::vec::Vec; + + #[derive(Clone, Copy, PartialEq, Default)] + /// A unified ELF relocation structure + pub struct Reloc { + /// Address + pub r_offset: u64, + /// Addend + pub r_addend: Option<i64>, + /// The index into the corresponding symbol table - either dynamic or regular + pub r_sym: usize, + /// The relocation type + pub r_type: u32, + } + + impl Reloc { + pub fn size(is_rela: bool, ctx: Ctx) -> usize { + use scroll::ctx::SizeWith; + Reloc::size_with(&(is_rela, ctx)) + } + } + + type RelocCtx = (bool, Ctx); + + impl ctx::SizeWith<RelocCtx> for Reloc { + fn size_with( &(is_rela, Ctx { container, .. }): &RelocCtx) -> usize { + match container { + Container::Little => { + if is_rela { reloc32::SIZEOF_RELA } else { reloc32::SIZEOF_REL } + }, + Container::Big => { + if is_rela { reloc64::SIZEOF_RELA } else { reloc64::SIZEOF_REL } + } + } + } + } + + impl<'a> ctx::TryFromCtx<'a, RelocCtx> for Reloc { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], (is_rela, Ctx { container, le }): RelocCtx) -> result::Result<(Self, usize), Self::Error> { + use scroll::Pread; + let reloc = match container { + Container::Little => { + if is_rela { + (bytes.pread_with::<reloc32::Rela>(0, le)?.into(), reloc32::SIZEOF_RELA) + } else { + (bytes.pread_with::<reloc32::Rel>(0, le)?.into(), reloc32::SIZEOF_REL) + } + }, + Container::Big => { + if is_rela { + (bytes.pread_with::<reloc64::Rela>(0, le)?.into(), reloc64::SIZEOF_RELA) + } else { + (bytes.pread_with::<reloc64::Rel>(0, le)?.into(), reloc64::SIZEOF_REL) + } + } + }; + Ok(reloc) + } + } + + impl ctx::TryIntoCtx<RelocCtx> for Reloc { + type Error = crate::error::Error; + /// Writes the relocation into `bytes` + fn try_into_ctx(self, bytes: &mut [u8], (is_rela, Ctx {container, le}): RelocCtx) -> result::Result<usize, Self::Error> { + use scroll::Pwrite; + match container { + Container::Little => { + if is_rela { + let rela: reloc32::Rela = self.into(); + Ok(bytes.pwrite_with(rela, 0, le)?) + } else { + let rel: reloc32::Rel = self.into(); + Ok(bytes.pwrite_with(rel, 0, le)?) + } + }, + Container::Big => { + if is_rela { + let rela: reloc64::Rela = self.into(); + Ok(bytes.pwrite_with(rela, 0, le)?) + } else { + let rel: reloc64::Rel = self.into(); + Ok(bytes.pwrite_with(rel, 0, le)?) + } + }, + } + } + } + + impl ctx::IntoCtx<(bool, Ctx)> for Reloc { + /// Writes the relocation into `bytes` + fn into_ctx(self, bytes: &mut [u8], ctx: RelocCtx) { + use scroll::Pwrite; + bytes.pwrite_with(self, 0, ctx).unwrap(); + } + } + + impl fmt::Debug for Reloc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Reloc") + .field("r_offset", &format_args!("{:x}", self.r_offset)) + .field("r_addend", &format_args!("{:x}", self.r_addend.unwrap_or(0))) + .field("r_sym", &self.r_sym) + .field("r_type", &self.r_type) + .finish() + } + } + + #[derive(Default)] + /// An ELF section containing relocations, allowing lazy iteration over symbols. + pub struct RelocSection<'a> { + bytes: &'a [u8], + count: usize, + ctx: RelocCtx, + start: usize, + end: usize, + } + + impl<'a> fmt::Debug for RelocSection<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let len = self.bytes.len(); + fmt.debug_struct("RelocSection") + .field("bytes", &len) + .field("range", &format!("{:#x}..{:#x}", self.start, self.end)) + .field("count", &self.count) + .field("Relocations", &self.to_vec()) + .finish() + } + } + + impl<'a> RelocSection<'a> { + #[cfg(feature = "endian_fd")] + /// Parse a REL or RELA section of size `filesz` from `offset`. + pub fn parse(bytes: &'a [u8], offset: usize, filesz: usize, is_rela: bool, ctx: Ctx) -> crate::error::Result<RelocSection<'a>> { + // TODO: better error message when too large (see symtab implementation) + let bytes = if filesz != 0 { + bytes.pread_with::<&'a [u8]>(offset, filesz)? + } else { + &[] + }; + + Ok(RelocSection { + bytes: bytes, + count: filesz / Reloc::size(is_rela, ctx), + ctx: (is_rela, ctx), + start: offset, + end: offset + filesz, + }) + } + + /// Try to parse a single relocation from the binary, at `index`. + #[inline] + pub fn get(&self, index: usize) -> Option<Reloc> { + if index >= self.count { + None + } else { + Some(self.bytes.pread_with(index * Reloc::size_with(&self.ctx), self.ctx).unwrap()) + } + } + + /// The number of relocations in the section. + #[inline] + pub fn len(&self) -> usize { + self.count + } + + /// Returns true if section has no relocations. + #[inline] + pub fn is_empty(&self) -> bool { + self.count == 0 + } + + /// Iterate over all relocations. + pub fn iter(&self) -> RelocIterator<'a> { + self.into_iter() + } + + /// Parse all relocations into a vector. + pub fn to_vec(&self) -> Vec<Reloc> { + self.iter().collect() + } + } + + impl<'a, 'b> IntoIterator for &'b RelocSection<'a> { + type Item = <RelocIterator<'a> as Iterator>::Item; + type IntoIter = RelocIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + RelocIterator { + bytes: self.bytes, + offset: 0, + index: 0, + count: self.count, + ctx: self.ctx, + } + } + } + + pub struct RelocIterator<'a> { + bytes: &'a [u8], + offset: usize, + index: usize, + count: usize, + ctx: RelocCtx, + } + + impl<'a> fmt::Debug for RelocIterator<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("RelocIterator") + .field("bytes", &"<... redacted ...>") + .field("offset", &self.offset) + .field("index", &self.index) + .field("count", &self.count) + .field("ctx", &self.ctx) + .finish() + } + } + + impl<'a> Iterator for RelocIterator<'a> { + type Item = Reloc; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.count { + None + } else { + self.index += 1; + Some(self.bytes.gread_with(&mut self.offset, self.ctx).unwrap()) + } + } + } + + impl<'a> ExactSizeIterator for RelocIterator<'a> { + #[inline] + fn len(&self) -> usize { + self.count - self.index + } + } +} // end if_alloc diff --git a/third_party/rust/goblin/src/elf/section_header.rs b/third_party/rust/goblin/src/elf/section_header.rs new file mode 100644 index 0000000000..981706153c --- /dev/null +++ b/third_party/rust/goblin/src/elf/section_header.rs @@ -0,0 +1,581 @@ +macro_rules! elf_section_header { + ($size:ident) => { + // XXX: Do not import scroll traits here. + // See: https://github.com/rust-lang/rust/issues/65090#issuecomment-538668155 + + #[repr(C)] + #[derive(Copy, Clone, Eq, PartialEq, Default)] + #[cfg_attr( + feature = "alloc", + derive(scroll::Pread, scroll::Pwrite, scroll::SizeWith) + )] + /// Section Headers are typically used by humans and static linkers for additional information or how to relocate the object + /// + /// **NOTE** section headers are strippable from a binary without any loss of portability/executability; _do not_ rely on them being there! + pub struct SectionHeader { + /// Section name (string tbl index) + pub sh_name: u32, + /// Section type + pub sh_type: u32, + /// Section flags + pub sh_flags: $size, + /// Section virtual addr at execution + pub sh_addr: $size, + /// Section file offset + pub sh_offset: $size, + /// Section size in bytes + pub sh_size: $size, + /// Link to another section + pub sh_link: u32, + /// Additional section information + pub sh_info: u32, + /// Section alignment + pub sh_addralign: $size, + /// Entry size if section holds table + pub sh_entsize: $size, + } + + use plain; + // Declare that this is a plain type. + unsafe impl plain::Plain for SectionHeader {} + + impl ::core::fmt::Debug for SectionHeader { + fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { + f.debug_struct("SectionHeader") + .field("sh_name", &self.sh_name) + .field("sh_type", &sht_to_str(self.sh_type)) + .field("sh_flags", &format_args!("0x{:x}", self.sh_flags)) + .field("sh_addr", &format_args!("0x{:x}", self.sh_addr)) + .field("sh_offset", &format_args!("0x{:x}", self.sh_offset)) + .field("sh_size", &format_args!("0x{:x}", self.sh_size)) + .field("sh_link", &format_args!("0x{:x}", self.sh_link)) + .field("sh_info", &format_args!("0x{:x}", self.sh_info)) + .field("sh_addralign", &format_args!("0x{:x}", self.sh_addralign)) + .field("sh_entsize", &format_args!("0x{:x}", self.sh_entsize)) + .finish() + } + } + }; +} + +/// Undefined section. +pub const SHN_UNDEF: u32 = 0; +/// Start of reserved indices. +pub const SHN_LORESERVE: u32 = 0xff00; +/// Start of processor-specific. +pub const SHN_LOPROC: u32 = 0xff00; +/// Order section before all others (Solaris). +pub const SHN_BEFORE: u32 = 0xff00; +/// Order section after all others (Solaris). +pub const SHN_AFTER: u32 = 0xff01; +/// End of processor-specific. +pub const SHN_HIPROC: u32 = 0xff1f; +/// Start of OS-specific. +pub const SHN_LOOS: u32 = 0xff20; +/// End of OS-specific. +pub const SHN_HIOS: u32 = 0xff3f; +/// Associated symbol is absolute. +pub const SHN_ABS: u32 = 0xfff1; +/// Associated symbol is common. +pub const SHN_COMMON: u32 = 0xfff2; +/// Index is in extra table. +pub const SHN_XINDEX: u32 = 0xffff; +/// End of reserved indices. +pub const SHN_HIRESERVE: u32 = 0xffff; + +// === Legal values for sh_type (section type). === +/// Section header table entry unused. +pub const SHT_NULL: u32 = 0; +/// Program data. +pub const SHT_PROGBITS: u32 = 1; +/// Symbol table. +pub const SHT_SYMTAB: u32 = 2; +/// String table. +pub const SHT_STRTAB: u32 = 3; +/// Relocation entries with addends. +pub const SHT_RELA: u32 = 4; +/// Symbol hash table. +pub const SHT_HASH: u32 = 5; +/// Dynamic linking information. +pub const SHT_DYNAMIC: u32 = 6; +/// Notes. +pub const SHT_NOTE: u32 = 7; +/// Program space with no data (bss). +pub const SHT_NOBITS: u32 = 8; +/// Relocation entries, no addends. +pub const SHT_REL: u32 = 9; +/// Reserved. +pub const SHT_SHLIB: u32 = 10; +/// Dynamic linker symbol table. +pub const SHT_DYNSYM: u32 = 11; +/// Array of constructors. +pub const SHT_INIT_ARRAY: u32 = 14; +/// Array of destructors. +pub const SHT_FINI_ARRAY: u32 = 15; +/// Array of pre-constructors. +pub const SHT_PREINIT_ARRAY: u32 = 16; +/// Section group. +pub const SHT_GROUP: u32 = 17; +/// Extended section indeces. +pub const SHT_SYMTAB_SHNDX: u32 = 18; +/// Number of defined types. +pub const SHT_NUM: u32 = 19; +/// Start OS-specific. +pub const SHT_LOOS: u32 = 0x6000_0000; +/// Object attributes. +pub const SHT_GNU_ATTRIBUTES: u32 = 0x6fff_fff5; +/// GNU-style hash table. +pub const SHT_GNU_HASH: u32 = 0x6fff_fff6; +/// Prelink library list. +pub const SHT_GNU_LIBLIST: u32 = 0x6fff_fff7; +/// Checksum for DSO content. +pub const SHT_CHECKSUM: u32 = 0x6fff_fff8; +/// Sun-specific low bound. +pub const SHT_LOSUNW: u32 = 0x6fff_fffa; +pub const SHT_SUNW_MOVE: u32 = 0x6fff_fffa; +pub const SHT_SUNW_COMDAT: u32 = 0x6fff_fffb; +pub const SHT_SUNW_SYMINFO: u32 = 0x6fff_fffc; +/// Version definition section. +pub const SHT_GNU_VERDEF: u32 = 0x6fff_fffd; +/// Version needs section. +pub const SHT_GNU_VERNEED: u32 = 0x6fff_fffe; +/// Version symbol table. +pub const SHT_GNU_VERSYM: u32 = 0x6fff_ffff; +/// Sun-specific high bound. +pub const SHT_HISUNW: u32 = 0x6fff_ffff; +/// End OS-specific type. +pub const SHT_HIOS: u32 = 0x6fff_ffff; +/// Start of processor-specific. +pub const SHT_LOPROC: u32 = 0x7000_0000; +/// X86-64 unwind information. +pub const SHT_X86_64_UNWIND: u32 = 0x7000_0001; +/// End of processor-specific. +pub const SHT_HIPROC: u32 = 0x7fff_ffff; +/// Start of application-specific. +pub const SHT_LOUSER: u32 = 0x8000_0000; +/// End of application-specific. +pub const SHT_HIUSER: u32 = 0x8fff_ffff; + +// Legal values for sh_flags (section flags) +/// Writable. +pub const SHF_WRITE: u32 = 0x1; +/// Occupies memory during execution. +pub const SHF_ALLOC: u32 = 0x2; +/// Executable. +pub const SHF_EXECINSTR: u32 = 0x4; +/// Might be merged. +pub const SHF_MERGE: u32 = 0x10; +/// Contains nul-terminated strings. +pub const SHF_STRINGS: u32 = 0x20; +/// `sh_info' contains SHT index. +pub const SHF_INFO_LINK: u32 = 0x40; +/// Preserve order after combining. +pub const SHF_LINK_ORDER: u32 = 0x80; +/// Non-standard OS specific handling required. +pub const SHF_OS_NONCONFORMING: u32 = 0x100; +/// Section is member of a group. +pub const SHF_GROUP: u32 = 0x200; +/// Section hold thread-local data. +pub const SHF_TLS: u32 = 0x400; +/// Section with compressed data. +pub const SHF_COMPRESSED: u32 = 0x800; +/// OS-specific.. +pub const SHF_MASKOS: u32 = 0x0ff0_0000; +/// Processor-specific. +pub const SHF_MASKPROC: u32 = 0xf000_0000; +/// Special ordering requirement (Solaris). +pub const SHF_ORDERED: u32 = 1 << 30; +/// Number of "regular" section header flags +pub const SHF_NUM_REGULAR_FLAGS: usize = 12; +/// Section is excluded unless referenced or allocated (Solaris). +pub const SHF_EXCLUDE: u32 = 0x80000000; // 1U << 31 + +pub const SHF_FLAGS: [u32; SHF_NUM_REGULAR_FLAGS] = [ + SHF_WRITE, + SHF_ALLOC, + SHF_EXECINSTR, + SHF_MERGE, + SHF_STRINGS, + SHF_INFO_LINK, + SHF_LINK_ORDER, + SHF_OS_NONCONFORMING, + SHF_GROUP, + SHF_TLS, + SHF_COMPRESSED, + SHF_ORDERED, +]; + +pub fn sht_to_str(sht: u32) -> &'static str { + match sht { + SHT_NULL => "SHT_NULL", + SHT_PROGBITS => "SHT_PROGBITS", + SHT_SYMTAB => "SHT_SYMTAB", + SHT_STRTAB => "SHT_STRTAB", + SHT_RELA => "SHT_RELA", + SHT_HASH => "SHT_HASH", + SHT_DYNAMIC => "SHT_DYNAMIC", + SHT_NOTE => "SHT_NOTE", + SHT_NOBITS => "SHT_NOBITS", + SHT_REL => "SHT_REL", + SHT_SHLIB => "SHT_SHLIB", + SHT_DYNSYM => "SHT_DYNSYM", + SHT_INIT_ARRAY => "SHT_INIT_ARRAY", + SHT_FINI_ARRAY => "SHT_FINI_ARRAY", + SHT_PREINIT_ARRAY => "SHT_PREINIT_ARRAY", + SHT_GROUP => "SHT_GROUP", + SHT_SYMTAB_SHNDX => "SHT_SYMTAB_SHNDX", + SHT_NUM => "SHT_NUM", + SHT_LOOS => "SHT_LOOS", + SHT_GNU_ATTRIBUTES => "SHT_GNU_ATTRIBUTES", + SHT_GNU_HASH => "SHT_GNU_HASH", + SHT_GNU_LIBLIST => "SHT_GNU_LIBLIST", + SHT_CHECKSUM => "SHT_CHECKSUM", + SHT_SUNW_MOVE => "SHT_SUNW_MOVE", + SHT_SUNW_COMDAT => "SHT_SUNW_COMDAT", + SHT_SUNW_SYMINFO => "SHT_SUNW_SYMINFO", + SHT_GNU_VERDEF => "SHT_GNU_VERDEF", + SHT_GNU_VERNEED => "SHT_GNU_VERNEED", + SHT_GNU_VERSYM => "SHT_GNU_VERSYM", + SHT_LOPROC => "SHT_LOPROC", + SHT_X86_64_UNWIND => "SHT_X86_64_UNWIND", + SHT_HIPROC => "SHT_HIPROC", + SHT_LOUSER => "SHT_LOUSER", + SHT_HIUSER => "SHT_HIUSER", + _ => "UNKNOWN_SHT", + } +} + +pub fn shf_to_str(shf: u32) -> &'static str { + match shf { + SHF_WRITE => "SHF_WRITE", + SHF_ALLOC => "SHF_ALLOC", + SHF_EXECINSTR => "SHF_EXECINSTR", + SHF_MERGE => "SHF_MERGE", + SHF_STRINGS => "SHF_STRINGS", + SHF_INFO_LINK => "SHF_INFO_LINK", + SHF_LINK_ORDER => "SHF_LINK_ORDER", + SHF_OS_NONCONFORMING => "SHF_OS_NONCONFORMING", + SHF_GROUP => "SHF_GROUP", + SHF_TLS => "SHF_TLS", + SHF_COMPRESSED => "SHF_COMPRESSED", + //SHF_MASKOS..SHF_MASKPROC => "SHF_OSFLAG", + SHF_ORDERED => "SHF_ORDERED", + _ => "SHF_UNKNOWN", + } +} + +macro_rules! elf_section_header_std_impl { ($size:ty) => { + + #[cfg(test)] + mod tests { + use super::*; + #[test] + fn size_of() { + assert_eq!(::std::mem::size_of::<SectionHeader>(), SIZEOF_SHDR); + } + } + + if_alloc! { + use crate::elf::section_header::SectionHeader as ElfSectionHeader; + + use plain::Plain; + use alloc::vec::Vec; + + if_std! { + use crate::error::Result; + + use std::fs::File; + use std::io::{Read, Seek}; + use std::io::SeekFrom::Start; + } + + impl From<SectionHeader> for ElfSectionHeader { + fn from(sh: SectionHeader) -> Self { + ElfSectionHeader { + sh_name: sh.sh_name as usize, + sh_type: sh.sh_type, + sh_flags: u64::from(sh.sh_flags), + sh_addr: u64::from(sh.sh_addr), + sh_offset: u64::from(sh.sh_offset), + sh_size: u64::from(sh.sh_size), + sh_link: sh.sh_link, + sh_info: sh.sh_info, + sh_addralign: u64::from(sh.sh_addralign), + sh_entsize: u64::from(sh.sh_entsize), + } + } + } + impl From<ElfSectionHeader> for SectionHeader { + fn from(sh: ElfSectionHeader) -> Self { + SectionHeader { + sh_name : sh.sh_name as u32, + sh_type : sh.sh_type, + sh_flags : sh.sh_flags as $size, + sh_addr : sh.sh_addr as $size, + sh_offset : sh.sh_offset as $size, + sh_size : sh.sh_size as $size, + sh_link : sh.sh_link, + sh_info : sh.sh_info, + sh_addralign: sh.sh_addralign as $size, + sh_entsize : sh.sh_entsize as $size, + } + } + } + + impl SectionHeader { + // FIXME: > 65535 sections + pub fn from_bytes(bytes: &[u8], shnum: usize) -> Vec<SectionHeader> { + let mut shdrs = vec![SectionHeader::default(); shnum]; + shdrs.copy_from_bytes(bytes).expect("buffer is too short for given number of entries"); + shdrs + } + + #[cfg(feature = "std")] + // FIXME: > 65535 sections + pub fn from_fd(fd: &mut File, offset: u64, shnum: usize) -> Result<Vec<SectionHeader>> { + let mut shdrs = vec![SectionHeader::default(); shnum]; + fd.seek(Start(offset))?; + unsafe { + fd.read_exact(plain::as_mut_bytes(&mut *shdrs))?; + } + Ok(shdrs) + } + } + } // end if_alloc +};} + +pub mod section_header32 { + pub use crate::elf::section_header::*; + + elf_section_header!(u32); + + pub const SIZEOF_SHDR: usize = 40; + + elf_section_header_std_impl!(u32); +} + +pub mod section_header64 { + + pub use crate::elf::section_header::*; + + elf_section_header!(u64); + + pub const SIZEOF_SHDR: usize = 64; + + elf_section_header_std_impl!(u64); +} + +/////////////////////////////// +// Std/analysis/Unified Structs +/////////////////////////////// + +if_alloc! { + use crate::error; + use core::fmt; + use core::result; + use core::ops::Range; + use scroll::ctx; + use crate::container::{Container, Ctx}; + + #[cfg(feature = "endian_fd")] + use alloc::vec::Vec; + + #[derive(Default, PartialEq, Clone)] + /// A unified SectionHeader - convertable to and from 32-bit and 64-bit variants + pub struct SectionHeader { + /// Section name (string tbl index) + pub sh_name: usize, + /// Section type + pub sh_type: u32, + /// Section flags + pub sh_flags: u64, + /// Section virtual addr at execution + pub sh_addr: u64, + /// Section file offset + pub sh_offset: u64, + /// Section size in bytes + pub sh_size: u64, + /// Link to another section + pub sh_link: u32, + /// Additional section information + pub sh_info: u32, + /// Section alignment + pub sh_addralign: u64, + /// Entry size if section holds table + pub sh_entsize: u64, + } + + impl SectionHeader { + /// Return the size of the underlying program header, given a `container` + #[inline] + pub fn size(ctx: Ctx) -> usize { + use scroll::ctx::SizeWith; + Self::size_with(&ctx) + } + pub fn new() -> Self { + SectionHeader { + sh_name: 0, + sh_type: SHT_PROGBITS, + sh_flags: u64::from(SHF_ALLOC), + sh_addr: 0, + sh_offset: 0, + sh_size: 0, + sh_link: 0, + sh_info: 0, + sh_addralign: 2 << 8, + sh_entsize: 0, + } + } + /// Returns this section header's file offset range, + /// if the section occupies space in fhe file. + pub fn file_range(&self) -> Option<Range<usize>> { + // Sections with type SHT_NOBITS have no data in the file itself, + // they only exist in memory. + if self.sh_type == SHT_NOBITS { + None + } else { + Some(self.sh_offset as usize..(self.sh_offset as usize).saturating_add(self.sh_size as usize)) + } + } + /// Returns this section header's virtual memory range + pub fn vm_range(&self) -> Range<usize> { + self.sh_addr as usize..(self.sh_addr as usize).saturating_add(self.sh_size as usize) + } + /// Parse `count` section headers from `bytes` at `offset`, using the given `ctx` + #[cfg(feature = "endian_fd")] + pub fn parse(bytes: &[u8], mut offset: usize, mut count: usize, ctx: Ctx) -> error::Result<Vec<SectionHeader>> { + use scroll::Pread; + // Zero offset means no section headers, not even the null section header. + if offset == 0 { + return Ok(Vec::new()); + } + let empty_sh = bytes.gread_with::<SectionHeader>(&mut offset, ctx)?; + if count == 0 as usize { + // Zero count means either no section headers if offset is also zero (checked + // above), or the number of section headers overflows SHN_LORESERVE, in which + // case the count is stored in the sh_size field of the null section header. + count = empty_sh.sh_size as usize; + } + + // Sanity check to avoid OOM + if count > bytes.len() / Self::size(ctx) { + return Err(error::Error::BufferTooShort(count, "section headers")); + } + let mut section_headers = Vec::with_capacity(count); + section_headers.push(empty_sh); + for _ in 1..count { + let shdr = bytes.gread_with(&mut offset, ctx)?; + section_headers.push(shdr); + } + Ok(section_headers) + } + pub fn check_size(&self, size: usize) -> error::Result<()> { + if self.sh_type == SHT_NOBITS || self.sh_size == 0 { + return Ok(()); + } + let (end, overflow) = self.sh_offset.overflowing_add(self.sh_size); + if overflow || end > size as u64 { + let message = format!("Section {} size ({}) + offset ({}) is out of bounds. Overflowed: {}", + self.sh_name, self.sh_offset, self.sh_size, overflow); + return Err(error::Error::Malformed(message)); + } + let (_, overflow) = self.sh_addr.overflowing_add(self.sh_size); + if overflow { + let message = format!("Section {} size ({}) + addr ({}) is out of bounds. Overflowed: {}", + self.sh_name, self.sh_addr, self.sh_size, overflow); + return Err(error::Error::Malformed(message)); + } + Ok(()) + } + pub fn is_relocation(&self) -> bool { + self.sh_type == SHT_RELA + } + pub fn is_executable(&self) -> bool { + self.is_alloc() && self.sh_flags as u32 & SHF_EXECINSTR == SHF_EXECINSTR + } + pub fn is_writable(&self) -> bool { + self.is_alloc() && self.sh_flags as u32 & SHF_WRITE == SHF_WRITE + } + pub fn is_alloc(&self) -> bool { + self.sh_flags as u32 & SHF_ALLOC == SHF_ALLOC + } + } + + impl fmt::Debug for SectionHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("SectionHeader") + .field("sh_name", &self.sh_name) + .field("sh_type", &sht_to_str(self.sh_type)) + .field("sh_flags", &format_args!("0x{:x}", self.sh_flags)) + .field("sh_addr", &format_args!("0x{:x}", self.sh_addr)) + .field("sh_offset", &format_args!("0x{:x}", self.sh_offset)) + .field("sh_size", &format_args!("0x{:x}", self.sh_size)) + .field("sh_link", &format_args!("0x{:x}", self.sh_link)) + .field("sh_info", &format_args!("0x{:x}", self.sh_info)) + .field("sh_addralign", &format_args!("0x{:x}", self.sh_addralign)) + .field("sh_entsize", &format_args!("0x{:x}", self.sh_entsize)) + .finish() + } + } + + impl ctx::SizeWith<Ctx> for SectionHeader { + fn size_with( &Ctx { container, .. }: &Ctx) -> usize { + match container { + Container::Little => { + section_header32::SIZEOF_SHDR + }, + Container::Big => { + section_header64::SIZEOF_SHDR + }, + } + } + } + + impl<'a> ctx::TryFromCtx<'a, Ctx> for SectionHeader { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], Ctx {container, le}: Ctx) -> result::Result<(Self, usize), Self::Error> { + use scroll::Pread; + let res = match container { + Container::Little => { + (bytes.pread_with::<section_header32::SectionHeader>(0, le)?.into(), section_header32::SIZEOF_SHDR) + }, + Container::Big => { + (bytes.pread_with::<section_header64::SectionHeader>(0, le)?.into(), section_header64::SIZEOF_SHDR) + } + }; + Ok(res) + } + } + + impl ctx::TryIntoCtx<Ctx> for SectionHeader { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result<usize, Self::Error> { + use scroll::Pwrite; + match container { + Container::Little => { + let shdr: section_header32::SectionHeader = self.into(); + Ok(bytes.pwrite_with(shdr, 0, le)?) + }, + Container::Big => { + let shdr: section_header64::SectionHeader = self.into(); + Ok(bytes.pwrite_with(shdr, 0, le)?) + } + } + } + } + impl ctx::IntoCtx<Ctx> for SectionHeader { + fn into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) { + use scroll::Pwrite; + match container { + Container::Little => { + let shdr: section_header32::SectionHeader = self.into(); + bytes.pwrite_with(shdr, 0, le).unwrap(); + }, + Container::Big => { + let shdr: section_header64::SectionHeader = self.into(); + bytes.pwrite_with(shdr, 0, le).unwrap(); + } + } + } + } +} // end if_alloc diff --git a/third_party/rust/goblin/src/elf/sym.rs b/third_party/rust/goblin/src/elf/sym.rs new file mode 100644 index 0000000000..a13e7abcfd --- /dev/null +++ b/third_party/rust/goblin/src/elf/sym.rs @@ -0,0 +1,607 @@ +/// === Sym bindings === +/// Local symbol. +pub const STB_LOCAL: u8 = 0; +/// Global symbol. +pub const STB_GLOBAL: u8 = 1; +/// Weak symbol. +pub const STB_WEAK: u8 = 2; +/// Number of defined types.. +pub const STB_NUM: u8 = 3; +/// Start of OS-specific. +pub const STB_LOOS: u8 = 10; +/// Unique symbol.. +pub const STB_GNU_UNIQUE: u8 = 10; +/// End of OS-specific. +pub const STB_HIOS: u8 = 12; +/// Start of processor-specific. +pub const STB_LOPROC: u8 = 13; +/// End of processor-specific. +pub const STB_HIPROC: u8 = 15; + +/// === Sym types === +/// Symbol type is unspecified. +pub const STT_NOTYPE: u8 = 0; +/// Symbol is a data object. +pub const STT_OBJECT: u8 = 1; +/// Symbol is a code object. +pub const STT_FUNC: u8 = 2; +/// Symbol associated with a section. +pub const STT_SECTION: u8 = 3; +/// Symbol's name is file name. +pub const STT_FILE: u8 = 4; +/// Symbol is a common data object. +pub const STT_COMMON: u8 = 5; +/// Symbol is thread-local data object. +pub const STT_TLS: u8 = 6; +/// Number of defined types. +pub const STT_NUM: u8 = 7; +/// Start of OS-specific. +pub const STT_LOOS: u8 = 10; +/// Symbol is indirect code object. +pub const STT_GNU_IFUNC: u8 = 10; +/// End of OS-specific. +pub const STT_HIOS: u8 = 12; +/// Start of processor-specific. +pub const STT_LOPROC: u8 = 13; +/// End of processor-specific. +pub const STT_HIPROC: u8 = 15; + +/// === Sym visibility === +/// Default: Visibility is specified by the symbol's binding type +pub const STV_DEFAULT: u8 = 0; +/// Internal: use of this attribute is currently reserved. +pub const STV_INTERNAL: u8 = 1; +/// Hidden: Not visible to other components, necessarily protected. Binding scope becomes local +/// when the object is included in an executable or shared object. +pub const STV_HIDDEN: u8 = 2; +/// Protected: Symbol defined in current component is visible in other components, but cannot be preempted. +/// Any reference from within the defining component must be resolved to the definition in that +/// component. +pub const STV_PROTECTED: u8 = 3; +/// Exported: ensures a symbol remains global, cannot be demoted or eliminated by any other symbol +/// visibility technique. +pub const STV_EXPORTED: u8 = 4; +/// Singleton: ensures a symbol remains global, and that a single instance of the definition is +/// bound to by all references within a process. Cannot be demoted or eliminated. +pub const STV_SINGLETON: u8 = 5; +/// Eliminate: extends the hidden attribute. Not written in any symbol table of a dynamic +/// executable or shared object. +pub const STV_ELIMINATE: u8 = 6; + +/// Get the ST bind. +/// +/// This is the first four bits of the "info" byte. +#[inline] +pub fn st_bind(info: u8) -> u8 { + info >> 4 +} + +/// Get the ST type. +/// +/// This is the last four bits of the "info" byte. +#[inline] +pub fn st_type(info: u8) -> u8 { + info & 0xf +} + +/// Get the ST visibility. +/// +/// This is the last three bits of the "other" byte. +#[inline] +pub fn st_visibility(other: u8) -> u8 { + other & 0x7 +} + +/// Is this information defining an import? +#[inline] +pub fn is_import(info: u8, value: u64) -> bool { + let bind = st_bind(info); + bind == STB_GLOBAL && value == 0 +} + +/// Convenience function to get the &'static str type from the symbols `st_info`. +#[inline] +pub fn get_type(info: u8) -> &'static str { + type_to_str(st_type(info)) +} + +/// Get the string for some bind. +#[inline] +pub fn bind_to_str(typ: u8) -> &'static str { + match typ { + STB_LOCAL => "LOCAL", + STB_GLOBAL => "GLOBAL", + STB_WEAK => "WEAK", + STB_NUM => "NUM", + STB_GNU_UNIQUE => "GNU_UNIQUE", + _ => "UNKNOWN_STB", + } +} + +/// Get the string for some type. +#[inline] +pub fn type_to_str(typ: u8) -> &'static str { + match typ { + STT_NOTYPE => "NOTYPE", + STT_OBJECT => "OBJECT", + STT_FUNC => "FUNC", + STT_SECTION => "SECTION", + STT_FILE => "FILE", + STT_COMMON => "COMMON", + STT_TLS => "TLS", + STT_NUM => "NUM", + STT_GNU_IFUNC => "GNU_IFUNC", + _ => "UNKNOWN_STT", + } +} + +/// Get the string for some visibility +#[inline] +pub fn visibility_to_str(typ: u8) -> &'static str { + match typ { + STV_DEFAULT => "DEFAULT", + STV_INTERNAL => "INTERNAL", + STV_HIDDEN => "HIDDEN", + STV_PROTECTED => "PROTECTED", + STV_EXPORTED => "EXPORTED", + STV_SINGLETON => "SINGLETON", + STV_ELIMINATE => "ELIMINATE", + _ => "UNKNOWN_STV", + } +} + +macro_rules! elf_sym_std_impl { + ($size:ty) => { + #[cfg(test)] + mod tests { + use super::*; + #[test] + fn size_of() { + assert_eq!(::std::mem::size_of::<Sym>(), SIZEOF_SYM); + } + } + + use crate::elf::sym::Sym as ElfSym; + + use core::fmt; + use core::slice; + + impl Sym { + /// Checks whether this `Sym` has `STB_GLOBAL`/`STB_WEAK` bind and a `st_value` of 0 + #[inline] + pub fn is_import(&self) -> bool { + let bind = self.st_info >> 4; + (bind == STB_GLOBAL || bind == STB_WEAK) && self.st_value == 0 + } + /// Checks whether this `Sym` has type `STT_FUNC` + #[inline] + pub fn is_function(&self) -> bool { + st_type(self.st_info) == STT_FUNC + } + } + + impl From<Sym> for ElfSym { + #[inline] + fn from(sym: Sym) -> Self { + ElfSym { + st_name: sym.st_name as usize, + st_info: sym.st_info, + st_other: sym.st_other, + st_shndx: sym.st_shndx as usize, + st_value: u64::from(sym.st_value), + st_size: u64::from(sym.st_size), + } + } + } + + impl From<ElfSym> for Sym { + #[inline] + fn from(sym: ElfSym) -> Self { + Sym { + st_name: sym.st_name as u32, + st_info: sym.st_info, + st_other: sym.st_other, + st_shndx: sym.st_shndx as u16, + st_value: sym.st_value as $size, + st_size: sym.st_size as $size, + } + } + } + + impl fmt::Debug for Sym { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let bind = st_bind(self.st_info); + let typ = st_type(self.st_info); + let vis = st_visibility(self.st_other); + f.debug_struct("Sym") + .field("st_name", &self.st_name) + .field("st_value", &format_args!("{:x}", self.st_value)) + .field("st_size", &self.st_size) + .field( + "st_info", + &format_args!( + "{:x} {} {}", + self.st_info, + bind_to_str(bind), + type_to_str(typ) + ), + ) + .field( + "st_other", + &format_args!("{} {}", self.st_other, visibility_to_str(vis)), + ) + .field("st_shndx", &self.st_shndx) + .finish() + } + } + + /// # Safety + /// + /// This function creates a `Sym` slice directly from a raw pointer + #[inline] + pub unsafe fn from_raw<'a>(symp: *const Sym, count: usize) -> &'a [Sym] { + slice::from_raw_parts(symp, count) + } + + if_std! { + use crate::error::Result; + + use std::fs::File; + use std::io::{Read, Seek}; + use std::io::SeekFrom::Start; + + pub fn from_fd(fd: &mut File, offset: usize, count: usize) -> Result<Vec<Sym>> { + // TODO: AFAIK this shouldn't work, since i pass in a byte size... + let mut syms = vec![Sym::default(); count]; + fd.seek(Start(offset as u64))?; + unsafe { + fd.read_exact(plain::as_mut_bytes(&mut *syms))?; + } + syms.dedup(); + Ok(syms) + } + } + }; +} + +#[cfg(feature = "alloc")] +use scroll::{Pread, Pwrite, SizeWith}; + +pub mod sym32 { + pub use crate::elf::sym::*; + + #[repr(C)] + #[derive(Clone, Copy, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// 32-bit Sym - used for both static and dynamic symbol information in a binary + pub struct Sym { + /// Symbol name (string tbl index) + pub st_name: u32, + /// Symbol value + pub st_value: u32, + /// Symbol size + pub st_size: u32, + /// Symbol type and binding + pub st_info: u8, + /// Symbol visibility + pub st_other: u8, + /// Section index + pub st_shndx: u16, + } + + // Declare that the type is plain. + unsafe impl plain::Plain for Sym {} + + pub const SIZEOF_SYM: usize = 4 + 1 + 1 + 2 + 4 + 4; + + elf_sym_std_impl!(u32); +} + +pub mod sym64 { + pub use crate::elf::sym::*; + + #[repr(C)] + #[derive(Clone, Copy, PartialEq, Default)] + #[cfg_attr(feature = "alloc", derive(Pread, Pwrite, SizeWith))] + /// 64-bit Sym - used for both static and dynamic symbol information in a binary + pub struct Sym { + /// Symbol name (string tbl index) + pub st_name: u32, + /// Symbol type and binding + pub st_info: u8, + /// Symbol visibility + pub st_other: u8, + /// Section index + pub st_shndx: u16, + /// Symbol value + pub st_value: u64, + /// Symbol size + pub st_size: u64, + } + + // Declare that the type is plain. + unsafe impl plain::Plain for Sym {} + + pub const SIZEOF_SYM: usize = 4 + 1 + 1 + 2 + 8 + 8; + + elf_sym_std_impl!(u64); +} + +use crate::container::{Container, Ctx}; +#[cfg(feature = "alloc")] +use crate::error::Result; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +use core::fmt; +use scroll::ctx; +use scroll::ctx::SizeWith; + +#[derive(Clone, Copy, PartialEq, Default)] +/// A unified Sym definition - convertible to and from 32-bit and 64-bit variants +pub struct Sym { + pub st_name: usize, + pub st_info: u8, + pub st_other: u8, + pub st_shndx: usize, + pub st_value: u64, + pub st_size: u64, +} + +impl Sym { + #[inline] + pub fn size(container: Container) -> usize { + Self::size_with(&Ctx::from(container)) + } + /// Checks whether this `Sym` has `STB_GLOBAL`/`STB_WEAK` bind and a `st_value` of 0 + #[inline] + pub fn is_import(&self) -> bool { + let bind = self.st_bind(); + (bind == STB_GLOBAL || bind == STB_WEAK) && self.st_value == 0 + } + /// Checks whether this `Sym` has type `STT_FUNC` + #[inline] + pub fn is_function(&self) -> bool { + st_type(self.st_info) == STT_FUNC + } + /// Get the ST bind. + /// + /// This is the first four bits of the "info" byte. + #[inline] + pub fn st_bind(&self) -> u8 { + self.st_info >> 4 + } + /// Get the ST type. + /// + /// This is the last four bits of the "info" byte. + #[inline] + pub fn st_type(&self) -> u8 { + st_type(self.st_info) + } + /// Get the ST visibility. + /// + /// This is the last three bits of the "other" byte. + #[inline] + pub fn st_visibility(&self) -> u8 { + st_visibility(self.st_other) + } + #[cfg(feature = "endian_fd")] + /// Parse `count` vector of ELF symbols from `offset` + pub fn parse(bytes: &[u8], mut offset: usize, count: usize, ctx: Ctx) -> Result<Vec<Sym>> { + if count > bytes.len() / Sym::size_with(&ctx) { + return Err(crate::error::Error::BufferTooShort(count, "symbols")); + } + let mut syms = Vec::with_capacity(count); + for _ in 0..count { + let sym = bytes.gread_with(&mut offset, ctx)?; + syms.push(sym); + } + Ok(syms) + } +} + +impl fmt::Debug for Sym { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let bind = self.st_bind(); + let typ = self.st_type(); + let vis = self.st_visibility(); + f.debug_struct("Sym") + .field("st_name", &self.st_name) + .field( + "st_info", + &format_args!( + "0x{:x} {} {}", + self.st_info, + bind_to_str(bind), + type_to_str(typ) + ), + ) + .field( + "st_other", + &format_args!("{} {}", self.st_other, visibility_to_str(vis)), + ) + .field("st_shndx", &self.st_shndx) + .field("st_value", &format_args!("0x{:x}", self.st_value)) + .field("st_size", &self.st_size) + .finish() + } +} + +impl ctx::SizeWith<Ctx> for Sym { + #[inline] + fn size_with(&Ctx { container, .. }: &Ctx) -> usize { + match container { + Container::Little => sym32::SIZEOF_SYM, + Container::Big => sym64::SIZEOF_SYM, + } + } +} + +if_alloc! { + use core::result; + + impl<'a> ctx::TryFromCtx<'a, Ctx> for Sym { + type Error = crate::error::Error; + #[inline] + fn try_from_ctx(bytes: &'a [u8], Ctx { container, le}: Ctx) -> result::Result<(Self, usize), Self::Error> { + let sym = match container { + Container::Little => { + (bytes.pread_with::<sym32::Sym>(0, le)?.into(), sym32::SIZEOF_SYM) + }, + Container::Big => { + (bytes.pread_with::<sym64::Sym>(0, le)?.into(), sym64::SIZEOF_SYM) + } + }; + Ok(sym) + } + } + + impl ctx::TryIntoCtx<Ctx> for Sym { + type Error = crate::error::Error; + #[inline] + fn try_into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) -> result::Result<usize, Self::Error> { + match container { + Container::Little => { + let sym: sym32::Sym = self.into(); + Ok(bytes.pwrite_with(sym, 0, le)?) + }, + Container::Big => { + let sym: sym64::Sym = self.into(); + Ok(bytes.pwrite_with(sym, 0, le)?) + } + } + } + } + + impl ctx::IntoCtx<Ctx> for Sym { + #[inline] + fn into_ctx(self, bytes: &mut [u8], Ctx {container, le}: Ctx) { + match container { + Container::Little => { + let sym: sym32::Sym = self.into(); + bytes.pwrite_with(sym, 0, le).unwrap(); + }, + Container::Big => { + let sym: sym64::Sym = self.into(); + bytes.pwrite_with(sym, 0, le).unwrap(); + } + } + } + } +} + +if_alloc! { + #[derive(Default)] + /// An ELF symbol table, allowing lazy iteration over symbols + pub struct Symtab<'a> { + bytes: &'a [u8], + count: usize, + ctx: Ctx, + start: usize, + end: usize, + } + + impl<'a> fmt::Debug for Symtab<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let len = self.bytes.len(); + fmt.debug_struct("Symtab") + .field("bytes", &len) + .field("range", &format_args!("{:#x}..{:#x}", self.start, self.end)) + .field("count", &self.count) + .field("Symbols", &self.to_vec()) + .finish() + } + } + + impl<'a> Symtab<'a> { + /// Parse a table of `count` ELF symbols from `offset`. + pub fn parse(bytes: &'a [u8], offset: usize, count: usize, ctx: Ctx) -> Result<Symtab<'a>> { + let size = count + .checked_mul(Sym::size_with(&ctx)) + .ok_or_else(|| crate::error::Error::Malformed( + format!("Too many ELF symbols (offset {:#x}, count {})", offset, count) + ))?; + // TODO: make this a better error message when too large + let bytes = bytes.pread_with(offset, size)?; + Ok(Symtab { bytes, count, ctx, start: offset, end: offset+size }) + } + + /// Try to parse a single symbol from the binary, at `index`. + #[inline] + pub fn get(&self, index: usize) -> Option<Sym> { + if index >= self.count { + None + } else { + Some(self.bytes.pread_with(index * Sym::size_with(&self.ctx), self.ctx).unwrap()) + } + } + + /// The number of symbols in the table. + #[inline] + pub fn len(&self) -> usize { + self.count + } + + /// Returns true if table has no symbols. + #[inline] + pub fn is_empty(&self) -> bool { + self.count == 0 + } + + /// Iterate over all symbols. + #[inline] + pub fn iter(&self) -> SymIterator<'a> { + self.into_iter() + } + + /// Parse all symbols into a vector. + pub fn to_vec(&self) -> Vec<Sym> { + self.iter().collect() + } + } + + impl<'a, 'b> IntoIterator for &'b Symtab<'a> { + type Item = <SymIterator<'a> as Iterator>::Item; + type IntoIter = SymIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + SymIterator { + bytes: self.bytes, + offset: 0, + index: 0, + count: self.count, + ctx: self.ctx, + } + } + } + + /// An iterator over symbols in an ELF symbol table + pub struct SymIterator<'a> { + bytes: &'a [u8], + offset: usize, + index: usize, + count: usize, + ctx: Ctx, + } + + impl<'a> Iterator for SymIterator<'a> { + type Item = Sym; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.count { + None + } else { + self.index += 1; + Some(self.bytes.gread_with(&mut self.offset, self.ctx).unwrap()) + } + } + } + + impl<'a> ExactSizeIterator for SymIterator<'a> { + #[inline] + fn len(&self) -> usize { + self.count - self.index + } + } +} // end if_alloc diff --git a/third_party/rust/goblin/src/elf/symver.rs b/third_party/rust/goblin/src/elf/symver.rs new file mode 100644 index 0000000000..0dd55e3d25 --- /dev/null +++ b/third_party/rust/goblin/src/elf/symver.rs @@ -0,0 +1,880 @@ +//! Symbol versioning +//! +//! Implementation of the GNU symbol versioning extension according to +//! [LSB Core Specification - Symbol Versioning][lsb-symver]. +//! +//! # Examples +//! +//! List the dependencies of an ELF file that have [version needed][lsb-verneed] information along +//! with the versions needed for each dependency. +//! ```rust +//! use goblin::error::Error; +//! +//! pub fn show_verneed(bytes: &[u8]) -> Result<(), Error> { +//! let binary = goblin::elf::Elf::parse(&bytes)?; +//! +//! if let Some(verneed) = binary.verneed { +//! for need_file in verneed.iter() { +//! println!( +//! "Depend on {:?} with version(s):", +//! binary.dynstrtab.get_at(need_file.vn_file) +//! ); +//! for need_ver in need_file.iter() { +//! println!("{:?}", binary.dynstrtab.get_at(need_ver.vna_name)); +//! } +//! } +//! } +//! +//! Ok(()) +//! } +//! ``` +//! +//! List the [version defined][lsb-verdef] information of an ELF file, effectively listing the version +//! defined by this ELF file. +//! ```rust +//! use goblin::error::Error; +//! +//! pub fn show_verdef(bytes: &[u8]) -> Result<(), Error> { +//! let binary = goblin::elf::Elf::parse(&bytes)?; +//! +//! if let Some(verdef) = &binary.verdef { +//! for def in verdef.iter() { +//! for (n, aux) in def.iter().enumerate() { +//! let name = binary.dynstrtab.get_at(aux.vda_name); +//! match n { +//! 0 => print!("Name: {:?}", name), +//! 1 => print!(" Parent: {:?}", name), +//! _ => print!(", {:?}", name), +//! } +//! } +//! print!("\n"); +//! } +//! } +//! +//! Ok(()) +//! } +//! ``` +//! +//! [lsb-symver]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html +//! [lsb-verneed]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#SYMVERRQMTS +//! [lsb-verdef]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#SYMVERDEFS + +use crate::container; +use crate::elf::section_header::{SectionHeader, SHT_GNU_VERDEF, SHT_GNU_VERNEED, SHT_GNU_VERSYM}; +use crate::error::Result; +use core::iter::FusedIterator; +use scroll::Pread; + +/****************** + * ELF Constants * + ******************/ + +// Versym constants. + +/// Constant describing a local symbol, see [`Versym::is_local`]. +pub const VER_NDX_LOCAL: u16 = 0; +/// Constant describing a global symbol, see [`Versym::is_global`]. +pub const VER_NDX_GLOBAL: u16 = 1; +/// Bitmask to check hidden bit, see [`Versym::is_hidden`]. +pub const VERSYM_HIDDEN: u16 = 0x8000; +/// Bitmask to get version information, see [`Versym::version`]. +pub const VERSYM_VERSION: u16 = 0x7fff; + +// Verdef constants. + +/// Bitmask to check `base` flag in [`Verdef::vd_flags`]. +pub const VER_FLG_BASE: u16 = 0x1; +/// Bitmask to check `weak` flag in [`Verdef::vd_flags`]. +pub const VER_FLG_WEAK: u16 = 0x2; +/// Bitmask to check `info` flag in [`Verdef::vd_flags`]. +pub const VER_FLG_INFO: u16 = 0x4; + +/******************** + * ELF Structures * + ********************/ + +/// An ELF `Symbol Version` entry. +/// +/// https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#SYMVERTBL +#[repr(C)] +#[derive(Debug, Pread)] +struct ElfVersym { + vs_val: u16, +} + +/// An ELF `Version Definition` entry Elfxx_Verdef. +/// +/// https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERDEFENTRIES +#[repr(C)] +#[derive(Debug, Pread)] +struct ElfVerdef { + vd_version: u16, + vd_flags: u16, + vd_ndx: u16, + vd_cnt: u16, + vd_hash: u32, + vd_aux: u32, + vd_next: u32, +} + +/// An ELF `Version Definition Auxiliary` entry Elfxx_Verdaux. +/// +/// https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERDEFEXTS +#[repr(C)] +#[derive(Debug, Pread)] +struct ElfVerdaux { + vda_name: u32, + vda_next: u32, +} + +/// An ELF `Version Need` entry Elfxx_Verneed. +/// +/// https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERNEEDFIG +#[repr(C)] +#[derive(Debug, Pread)] +struct ElfVerneed { + vn_version: u16, + vn_cnt: u16, + vn_file: u32, + vn_aux: u32, + vn_next: u32, +} + +/// An ELF `Version Need Auxiliary` entry Elfxx_Vernaux. +/// +/// https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERNEEDEXTFIG +#[repr(C)] +#[derive(Debug, Pread)] +struct ElfVernaux { + vna_hash: u32, + vna_flags: u16, + vna_other: u16, + vna_name: u32, + vna_next: u32, +} + +/******************** + * Symbol Version * + ********************/ + +/// Helper struct to iterate over [Symbol Version][Versym] entries. +#[derive(Debug)] +pub struct VersymSection<'a> { + bytes: &'a [u8], + ctx: container::Ctx, +} + +impl<'a> VersymSection<'a> { + pub fn parse( + bytes: &'a [u8], + shdrs: &[SectionHeader], + ctx: container::Ctx, + ) -> Result<Option<VersymSection<'a>>> { + // Get fields needed from optional `symbol version` section. + let (offset, size) = + if let Some(shdr) = shdrs.iter().find(|shdr| shdr.sh_type == SHT_GNU_VERSYM) { + (shdr.sh_offset as usize, shdr.sh_size as usize) + } else { + return Ok(None); + }; + + // Get a slice of bytes of the `version definition` section content. + let bytes: &'a [u8] = bytes.pread_with(offset, size)?; + + Ok(Some(VersymSection { bytes, ctx })) + } + + /// Get an iterator over the [`Versym`] entries. + #[inline] + pub fn iter(&'a self) -> VersymIter<'a> { + self.into_iter() + } + + /// True if there are no [`Versym`] entries. + #[inline] + pub fn is_empty(&self) -> bool { + self.bytes.is_empty() + } + + /// Number of [`Versym`] entries. + #[inline] + pub fn len(&self) -> usize { + let entsize = core::mem::size_of::<ElfVersym>(); + + self.bytes.len() / entsize + } + + /// Get [`Versym`] entry at index. + #[inline] + pub fn get_at(&self, idx: usize) -> Option<Versym> { + let entsize = core::mem::size_of::<ElfVersym>(); + let offset = idx.checked_mul(entsize)?; + + self.bytes + .pread_with::<ElfVersym>(offset, self.ctx.le) + .ok() + .map(Versym::from) + } +} + +impl<'a> IntoIterator for &'_ VersymSection<'a> { + type Item = <VersymIter<'a> as Iterator>::Item; + type IntoIter = VersymIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + VersymIter { + bytes: self.bytes, + offset: 0, + ctx: self.ctx, + } + } +} + +/// Iterator over the [`Versym`] entries from the [`SHT_GNU_VERSYM`] section. +pub struct VersymIter<'a> { + bytes: &'a [u8], + offset: usize, + ctx: container::Ctx, +} + +impl<'a> Iterator for VersymIter<'a> { + type Item = Versym; + + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.bytes.len() { + None + } else { + self.bytes + .gread_with::<ElfVersym>(&mut self.offset, self.ctx.le) + .ok() + .map(Versym::from) + .or_else(|| { + // self.bytes are not a multiple of ElfVersym. + // Adjust offset to continue yielding Nones. + self.offset = self.bytes.len(); + None + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = (self.bytes.len() - self.offset) / core::mem::size_of::<Self::Item>(); + (len, Some(len)) + } +} + +impl ExactSizeIterator for VersymIter<'_> {} + +impl FusedIterator for VersymIter<'_> {} + +/// An ELF [Symbol Version][lsb-versym] entry. +/// +/// [lsb-versym]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#SYMVERTBL +#[derive(Debug)] +pub struct Versym { + pub vs_val: u16, +} + +impl Versym { + /// Returns true if the symbol is local and not available outside the object according to + /// [`VER_NDX_LOCAL`]. + #[inline] + pub fn is_local(&self) -> bool { + self.vs_val == VER_NDX_LOCAL + } + + /// Returns true if the symbol is defined in this object and globally available according + /// to [`VER_NDX_GLOBAL`]. + #[inline] + pub fn is_global(&self) -> bool { + self.vs_val == VER_NDX_GLOBAL + } + + /// Returns true if the `hidden` bit is set according to the [`VERSYM_HIDDEN`] bitmask. + #[inline] + pub fn is_hidden(&self) -> bool { + (self.vs_val & VERSYM_HIDDEN) == VERSYM_HIDDEN + } + + /// Returns the symbol version index according to the [`VERSYM_VERSION`] bitmask. + #[inline] + pub fn version(&self) -> u16 { + self.vs_val & VERSYM_VERSION + } +} + +impl From<ElfVersym> for Versym { + fn from(ElfVersym { vs_val }: ElfVersym) -> Self { + Versym { vs_val } + } +} + +/************************ + * Version Definition * + ************************/ + +/// Helper struct to iterate over [Version Definition][Verdef] and [Version Definition +/// Auxiliary][Verdaux] entries. +#[derive(Debug)] +pub struct VerdefSection<'a> { + /// String table used to resolve version strings. + bytes: &'a [u8], + count: usize, + ctx: container::Ctx, +} + +impl<'a> VerdefSection<'a> { + pub fn parse( + bytes: &'a [u8], + shdrs: &[SectionHeader], + ctx: container::Ctx, + ) -> Result<Option<VerdefSection<'a>>> { + // Get fields needed from optional `version definition` section. + let (offset, size, count) = + if let Some(shdr) = shdrs.iter().find(|shdr| shdr.sh_type == SHT_GNU_VERDEF) { + ( + shdr.sh_offset as usize, + shdr.sh_size as usize, + shdr.sh_info as usize, // Encodes the number of ElfVerdef entries. + ) + } else { + return Ok(None); + }; + + // Get a slice of bytes of the `version definition` section content. + let bytes: &'a [u8] = bytes.pread_with(offset, size)?; + + Ok(Some(VerdefSection { bytes, count, ctx })) + } + + /// Get an iterator over the [`Verdef`] entries. + #[inline] + pub fn iter(&'a self) -> VerdefIter<'a> { + self.into_iter() + } +} + +impl<'a> IntoIterator for &'_ VerdefSection<'a> { + type Item = <VerdefIter<'a> as Iterator>::Item; + type IntoIter = VerdefIter<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + VerdefIter { + bytes: self.bytes, + count: self.count, + index: 0, + offset: 0, + ctx: self.ctx, + } + } +} + +/// Iterator over the [`Verdef`] entries from the [`SHT_GNU_VERDEF`] section. +pub struct VerdefIter<'a> { + bytes: &'a [u8], + count: usize, + index: usize, + offset: usize, + ctx: container::Ctx, +} + +impl<'a> Iterator for VerdefIter<'a> { + type Item = Verdef<'a>; + + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.count { + None + } else { + self.index += 1; + + let do_next = |iter: &mut Self| { + let ElfVerdef { + vd_version, + vd_flags, + vd_ndx, + vd_cnt, + vd_hash, + vd_aux, + vd_next, + } = iter.bytes.pread_with(iter.offset, iter.ctx.le).ok()?; + + // Validate offset to first ElfVerdaux entry. + let offset = iter.offset.checked_add(vd_aux as usize)?; + + // Validate if offset is valid index into bytes slice. + if offset >= iter.bytes.len() { + return None; + } + + // Get a slice of bytes starting with the first ElfVerdaux entry. + let bytes: &'a [u8] = &iter.bytes[offset..]; + + // Bump the offset to the next ElfVerdef entry. + iter.offset = iter.offset.checked_add(vd_next as usize)?; + + // Start yielding None on the next call if there is no next offset. + if vd_next == 0 { + iter.index = iter.count; + } + + Some(Verdef { + vd_version, + vd_flags, + vd_ndx, + vd_cnt, + vd_hash, + vd_aux, + vd_next, + bytes, + ctx: iter.ctx, + }) + }; + + do_next(self).or_else(|| { + // Adjust current index to count in case of an error. + self.index = self.count; + None + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.count - self.index; + (0, Some(len)) + } +} + +impl ExactSizeIterator for VerdefIter<'_> {} + +impl FusedIterator for VerdefIter<'_> {} + +/// An ELF [Version Definition][lsb-verdef] entry . +/// +/// [lsb-verdef]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERDEFENTRIES +#[derive(Debug)] +pub struct Verdef<'a> { + /// Version revision. This field shall be set to 1. + pub vd_version: u16, + /// Version information flag bitmask. + pub vd_flags: u16, + /// Version index numeric value referencing the SHT_GNU_versym section. + pub vd_ndx: u16, + /// Number of associated verdaux array entries. + pub vd_cnt: u16, + /// Version name hash value (ELF hash function). + pub vd_hash: u32, + /// Offset in bytes to a corresponding entry in an array of Elfxx_Verdaux structures. + pub vd_aux: u32, + /// Offset to the next verdef entry, in bytes. + pub vd_next: u32, + + bytes: &'a [u8], + ctx: container::Ctx, +} + +impl<'a> Verdef<'a> { + /// Get an iterator over the [`Verdaux`] entries of this [`Verdef`] entry. + #[inline] + pub fn iter(&'a self) -> VerdauxIter<'a> { + self.into_iter() + } +} + +impl<'a> IntoIterator for &'_ Verdef<'a> { + type Item = <VerdauxIter<'a> as Iterator>::Item; + type IntoIter = VerdauxIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + VerdauxIter { + bytes: self.bytes, + count: self.vd_cnt, + index: 0, + offset: 0, + ctx: self.ctx, + } + } +} + +/// Iterator over the [`Verdaux`] entries for an specific [`Verdef`] entry. +pub struct VerdauxIter<'a> { + bytes: &'a [u8], + count: u16, + index: u16, + offset: usize, + ctx: container::Ctx, +} + +impl<'a> Iterator for VerdauxIter<'a> { + type Item = Verdaux; + + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.count { + None + } else { + self.index += 1; + + let do_next = |iter: &mut Self| { + let ElfVerdaux { vda_name, vda_next } = + iter.bytes.pread_with(iter.offset, iter.ctx.le).ok()?; + + // Bump the offset to the next ElfVerdaux entry. + iter.offset = iter.offset.checked_add(vda_next as usize)?; + + // Start yielding None on the next call if there is no next offset. + if vda_next == 0 { + iter.index = iter.count; + } + + Some(Verdaux { + vda_name: vda_name as usize, + vda_next, + }) + }; + + do_next(self).or_else(|| { + // Adjust current index to count in case of an error. + self.index = self.count; + None + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = usize::from(self.count - self.index); + (0, Some(len)) + } +} + +impl ExactSizeIterator for VerdauxIter<'_> {} + +impl FusedIterator for VerdauxIter<'_> {} + +/// An ELF [Version Definition Auxiliary][lsb-verdaux] entry. +/// +/// [lsb-verdaux]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERDEFEXTS +#[derive(Debug)] +pub struct Verdaux { + /// Offset to the version or dependency name string in the section header, in bytes. + pub vda_name: usize, + /// Offset to the next verdaux entry, in bytes. + pub vda_next: u32, +} + +/************************** + * Version Requirements * + **************************/ + +/// Helper struct to iterate over [Version Needed][Verneed] and [Version Needed +/// Auxiliary][Vernaux] entries. +#[derive(Debug)] +pub struct VerneedSection<'a> { + bytes: &'a [u8], + count: usize, + ctx: container::Ctx, +} + +impl<'a> VerneedSection<'a> { + /// Try to parse the optional [`SHT_GNU_VERNEED`] section. + pub fn parse( + bytes: &'a [u8], + shdrs: &[SectionHeader], + ctx: container::Ctx, + ) -> Result<Option<VerneedSection<'a>>> { + // Get fields needed from optional `version needed` section. + let (offset, size, count) = + if let Some(shdr) = shdrs.iter().find(|shdr| shdr.sh_type == SHT_GNU_VERNEED) { + ( + shdr.sh_offset as usize, + shdr.sh_size as usize, + shdr.sh_info as usize, // Encodes the number of ElfVerneed entries. + ) + } else { + return Ok(None); + }; + + // Get a slice of bytes of the `version needed` section content. + let bytes: &'a [u8] = bytes.pread_with(offset, size)?; + + Ok(Some(VerneedSection { bytes, count, ctx })) + } + + /// Get an iterator over the [`Verneed`] entries. + #[inline] + pub fn iter(&'a self) -> VerneedIter<'a> { + self.into_iter() + } +} + +impl<'a> IntoIterator for &'_ VerneedSection<'a> { + type Item = <VerneedIter<'a> as Iterator>::Item; + type IntoIter = VerneedIter<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + VerneedIter { + bytes: self.bytes, + count: self.count, + index: 0, + offset: 0, + ctx: self.ctx, + } + } +} + +/// Iterator over the [`Verneed`] entries from the [`SHT_GNU_VERNEED`] section. +pub struct VerneedIter<'a> { + bytes: &'a [u8], + count: usize, + index: usize, + offset: usize, + ctx: container::Ctx, +} + +impl<'a> Iterator for VerneedIter<'a> { + type Item = Verneed<'a>; + + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.count { + None + } else { + self.index += 1; + + let do_next = |iter: &mut Self| { + let ElfVerneed { + vn_version, + vn_cnt, + vn_file, + vn_aux, + vn_next, + } = iter.bytes.pread_with(iter.offset, iter.ctx.le).ok()?; + + // Validate offset to first ElfVernaux entry. + let offset = iter.offset.checked_add(vn_aux as usize)?; + + // Validate if offset is valid index into bytes slice. + if offset >= iter.bytes.len() { + return None; + } + + // Get a slice of bytes starting with the first ElfVernaux entry. + let bytes: &'a [u8] = &iter.bytes[offset..]; + + // Bump the offset to the next ElfVerneed entry. + iter.offset = iter.offset.checked_add(vn_next as usize)?; + + // Start yielding None on the next call if there is no next offset. + if vn_next == 0 { + iter.index = iter.count; + } + + Some(Verneed { + vn_version, + vn_cnt, + vn_file: vn_file as usize, + vn_aux, + vn_next, + bytes, + ctx: iter.ctx, + }) + }; + + do_next(self).or_else(|| { + // Adjust current index to count in case of an error. + self.index = self.count; + None + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.count - self.index; + (0, Some(len)) + } +} + +impl ExactSizeIterator for VerneedIter<'_> {} + +impl FusedIterator for VerneedIter<'_> {} + +/// An ELF [Version Need][lsb-verneed] entry . +/// +/// [lsb-verneed]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERNEEDFIG +#[derive(Debug)] +pub struct Verneed<'a> { + /// Version of structure. This value is currently set to 1, and will be reset if the versioning + /// implementation is incompatibly altered. + pub vn_version: u16, + /// Number of associated verneed array entries. + pub vn_cnt: u16, + /// Offset to the file name string in the section header, in bytes. + pub vn_file: usize, + /// Offset to a corresponding entry in the vernaux array, in bytes. + pub vn_aux: u32, + /// Offset to the next verneed entry, in bytes. + pub vn_next: u32, + + bytes: &'a [u8], + ctx: container::Ctx, +} + +impl<'a> Verneed<'a> { + /// Get an iterator over the [`Vernaux`] entries of this [`Verneed`] entry. + #[inline] + pub fn iter(&'a self) -> VernauxIter<'a> { + self.into_iter() + } +} + +impl<'a> IntoIterator for &'_ Verneed<'a> { + type Item = <VernauxIter<'a> as Iterator>::Item; + type IntoIter = VernauxIter<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + VernauxIter { + bytes: self.bytes, + count: self.vn_cnt, + index: 0, + offset: 0, + ctx: self.ctx, + } + } +} + +/// Iterator over the [`Vernaux`] entries for an specific [`Verneed`] entry. +pub struct VernauxIter<'a> { + bytes: &'a [u8], + count: u16, + index: u16, + offset: usize, + ctx: container::Ctx, +} + +impl<'a> Iterator for VernauxIter<'a> { + type Item = Vernaux; + + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.count { + None + } else { + self.index += 1; + + let do_next = |iter: &mut Self| { + let ElfVernaux { + vna_hash, + vna_flags, + vna_other, + vna_name, + vna_next, + } = iter.bytes.pread_with(iter.offset, iter.ctx.le).ok()?; + + // Bump the offset to the next ElfVernaux entry. + iter.offset = iter.offset.checked_add(vna_next as usize)?; + + // Start yielding None on the next call if there is no next offset. + if vna_next == 0 { + iter.index = iter.count; + } + + Some(Vernaux { + vna_hash, + vna_flags, + vna_other, + vna_name: vna_name as usize, + vna_next, + }) + }; + + do_next(self).or_else(|| { + // Adjust current index to count in case of an error. + self.index = self.count; + None + }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = usize::from(self.count - self.index); + (0, Some(len)) + } +} + +impl ExactSizeIterator for VernauxIter<'_> {} + +impl FusedIterator for VernauxIter<'_> {} + +/// An ELF [Version Need Auxiliary][lsb-vernaux] entry. +/// +/// [lsb-vernaux]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERNEEDEXTFIG +#[derive(Debug)] +pub struct Vernaux { + /// Dependency name hash value (ELF hash function). + pub vna_hash: u32, + /// Dependency information flag bitmask. + pub vna_flags: u16, + /// Object file version identifier used in the .gnu.version symbol version array. Bit number 15 + /// controls whether or not the object is hidden; if this bit is set, the object cannot be used + /// and the static linker will ignore the symbol's presence in the object. + pub vna_other: u16, + /// Offset to the dependency name string in the section header, in bytes. + pub vna_name: usize, + /// Offset to the next vernaux entry, in bytes. + pub vna_next: u32, +} + +#[cfg(test)] +mod test { + use super::{ElfVerdaux, ElfVerdef, ElfVernaux, ElfVerneed, ElfVersym}; + use super::{Versym, VERSYM_HIDDEN, VER_NDX_GLOBAL, VER_NDX_LOCAL}; + use core::mem::size_of; + + #[test] + fn check_size() { + assert_eq!(2, size_of::<ElfVersym>()); + assert_eq!(20, size_of::<ElfVerdef>()); + assert_eq!(8, size_of::<ElfVerdaux>()); + assert_eq!(16, size_of::<ElfVerneed>()); + assert_eq!(16, size_of::<ElfVernaux>()); + } + + #[test] + fn check_versym() { + let local = Versym { + vs_val: VER_NDX_LOCAL, + }; + assert_eq!(true, local.is_local()); + assert_eq!(false, local.is_global()); + assert_eq!(false, local.is_hidden()); + assert_eq!(VER_NDX_LOCAL, local.version()); + + let global = Versym { + vs_val: VER_NDX_GLOBAL, + }; + assert_eq!(false, global.is_local()); + assert_eq!(true, global.is_global()); + assert_eq!(false, global.is_hidden()); + assert_eq!(VER_NDX_GLOBAL, global.version()); + + let hidden = Versym { + vs_val: VERSYM_HIDDEN, + }; + assert_eq!(false, hidden.is_local()); + assert_eq!(false, hidden.is_global()); + assert_eq!(true, hidden.is_hidden()); + assert_eq!(0, hidden.version()); + + let hidden = Versym { + vs_val: VERSYM_HIDDEN | 0x123, + }; + assert_eq!(false, hidden.is_local()); + assert_eq!(false, hidden.is_global()); + assert_eq!(true, hidden.is_hidden()); + assert_eq!(0x123, hidden.version()); + } +} diff --git a/third_party/rust/goblin/src/error.rs b/third_party/rust/goblin/src/error.rs new file mode 100644 index 0000000000..e2dd517e15 --- /dev/null +++ b/third_party/rust/goblin/src/error.rs @@ -0,0 +1,65 @@ +//! A custom Goblin error +//! + +use alloc::string::String; +use core::fmt; +use core::result; +#[cfg(feature = "std")] +use std::{error, io}; + +#[non_exhaustive] +#[derive(Debug)] +/// A custom Goblin error +pub enum Error { + /// The binary is malformed somehow + Malformed(String), + /// The binary's magic is unknown or bad + BadMagic(u64), + /// An error emanating from reading and interpreting bytes + Scroll(scroll::Error), + /// An IO based error + #[cfg(feature = "std")] + IO(io::Error), + /// Buffer is too short to hold N items + BufferTooShort(usize, &'static str), +} + +#[cfg(feature = "std")] +impl error::Error for Error { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match *self { + Error::IO(ref io) => Some(io), + Error::Scroll(ref scroll) => Some(scroll), + _ => None, + } + } +} + +#[cfg(feature = "std")] +impl From<io::Error> for Error { + fn from(err: io::Error) -> Error { + Error::IO(err) + } +} + +impl From<scroll::Error> for Error { + fn from(err: scroll::Error) -> Error { + Error::Scroll(err) + } +} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + match *self { + #[cfg(feature = "std")] + Error::IO(ref err) => write!(fmt, "{}", err), + Error::Scroll(ref err) => write!(fmt, "{}", err), + Error::BadMagic(magic) => write!(fmt, "Invalid magic number: 0x{:x}", magic), + Error::Malformed(ref msg) => write!(fmt, "Malformed entity: {}", msg), + Error::BufferTooShort(n, item) => write!(fmt, "Buffer is too short for {} {}", n, item), + } + } +} + +/// An impish result +pub type Result<T> = result::Result<T, Error>; diff --git a/third_party/rust/goblin/src/lib.rs b/third_party/rust/goblin/src/lib.rs new file mode 100644 index 0000000000..25ab841322 --- /dev/null +++ b/third_party/rust/goblin/src/lib.rs @@ -0,0 +1,377 @@ +//! # libgoblin +//! +//! ![say the right +//! words](https://s-media-cache-ak0.pinimg.com/736x/1b/6a/aa/1b6aaa2bae005e2fed84b1a7c32ecb1b.jpg) +//! +//! `libgoblin` is a cross-platform trifecta of binary parsing and loading fun. It supports: +//! +//! * An ELF32/64 parser, and raw C structs +//! * A 32/64-bit, zero-copy, endian aware, Mach-o parser, and raw C structs +//! * A PE32/PE32+ (64-bit) parser, and raw C structs +//! * A Unix archive parser and loader +//! +//! Goblin requires at least `rustc` 1.36.0, uses the 2018 rust edition, and is developed on stable. +//! +//! Goblin primarily supports the following important use cases: +//! +//! 1. Core, std-free `#[repr(C)]` structs, tiny compile time, 32/64 (or both) at your leisure +//! +//! 2. Type punning. Define a function once on a type, but have it work on 32 or 64-bit variants - without really changing anything, and no macros! See `examples/automagic.rs` for a basic example. +//! +//! 3. `std` mode. This throws in read and write impls via `Pread` and `Pwrite`, reading from file, convenience allocations, extra methods, etc. This is for clients who can allocate and want to read binaries off disk. +//! +//! 4. `Endian_fd`. A truly terrible name :laughing: this is for binary analysis like in [panopticon](https://github.com/das-labor/panopticon) which needs to read binaries of foreign endianness, _or_ as a basis for constructing cross platform foreign architecture binutils, e.g. [cargo-sym](https://github.com/m4b/cargo-sym) and [bingrep](https://github.com/m4b/bingrep) are simple examples of this, but the sky is the limit. +//! +//! # Example +//! +//! ```rust +//! use goblin::{error, Object}; +//! use std::path::Path; +//! use std::env; +//! use std::fs; +//! +//! fn run () -> error::Result<()> { +//! for (i, arg) in env::args().enumerate() { +//! if i == 1 { +//! let path = Path::new(arg.as_str()); +//! let buffer = fs::read(path)?; +//! match Object::parse(&buffer)? { +//! Object::Elf(elf) => { +//! println!("elf: {:#?}", &elf); +//! }, +//! Object::PE(pe) => { +//! println!("pe: {:#?}", &pe); +//! }, +//! Object::Mach(mach) => { +//! println!("mach: {:#?}", &mach); +//! }, +//! Object::Archive(archive) => { +//! println!("archive: {:#?}", &archive); +//! }, +//! Object::Unknown(magic) => { println!("unknown magic: {:#x}", magic) } +//! } +//! } +//! } +//! Ok(()) +//! } +//! ``` +//! +//! # Feature Usage +//! +//! `libgoblin` is engineered to be tailored towards very different use-case scenarios, for example: +//! +//! * a no-std mode; just simply set default features to false +//! * a endian aware parsing and reading +//! * for binary loaders which don't require this, simply use `elf32` and `elf64` (and `std` of course) +//! +//! For example, if you are writing a 64-bit kernel, or just want a barebones C-like +//! header interface which defines the structures, just select `elf64`, `--cfg +//! feature=\"elf64\"`, which will compile without `std`. +//! +//! Similarly, if you want to use host endianness loading via the various `from_fd` methods, `--cfg +//! feature=\"std\"`, which will not use the `byteorder` extern crate, and read the bytes +//! from disk in the endianness of the host machine. +//! +//! If you want endian aware reading, and you don't use `default`, then you need to opt in as normal +//! via `endian_fd` + +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(feature = "std")] +extern crate core; + +#[cfg(feature = "alloc")] +#[macro_use] +extern crate alloc; + +///////////////////////// +// Misc/Helper Modules +///////////////////////// + +#[allow(unused)] +macro_rules! if_std { + ($($i:item)*) => ($( + #[cfg(feature = "std")] + $i + )*) +} + +#[allow(unused)] +macro_rules! if_alloc { + ($($i:item)*) => ($( + #[cfg(feature = "alloc")] + $i + )*) +} + +#[cfg(feature = "alloc")] +pub mod error; + +pub mod strtab; + +/// Binary container size information and byte-order context +pub mod container { + pub use scroll::Endian; + + #[derive(Debug, Copy, Clone, PartialEq)] + /// The size of a binary container + pub enum Container { + Little, + Big, + } + + impl Container { + /// Is this a 64-bit container or not? + pub fn is_big(self) -> bool { + self == Container::Big + } + } + + #[cfg(not(target_pointer_width = "64"))] + /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not + pub const CONTAINER: Container = Container::Little; + + #[cfg(target_pointer_width = "64")] + /// The default binary container size - either `Big` or `Little`, depending on whether the host machine's pointer size is 64 or not + pub const CONTAINER: Container = Container::Big; + + impl Default for Container { + #[inline] + fn default() -> Self { + CONTAINER + } + } + + #[derive(Debug, Copy, Clone, PartialEq)] + /// A binary parsing context, including the container size and underlying byte endianness + pub struct Ctx { + pub container: Container, + pub le: scroll::Endian, + } + + impl Ctx { + /// Whether this binary container context is "big" or not + pub fn is_big(self) -> bool { + self.container.is_big() + } + /// Whether this binary container context is little endian or not + pub fn is_little_endian(self) -> bool { + self.le.is_little() + } + /// Create a new binary container context + pub fn new(container: Container, le: scroll::Endian) -> Self { + Ctx { container, le } + } + /// Return a dubious pointer/address byte size for the container + pub fn size(self) -> usize { + match self.container { + // TODO: require pointer size initialization/setting or default to container size with these values, e.g., avr pointer width will be smaller iirc + Container::Little => 4, + Container::Big => 8, + } + } + } + + impl From<Container> for Ctx { + fn from(container: Container) -> Self { + Ctx { + container, + le: scroll::Endian::default(), + } + } + } + + impl From<scroll::Endian> for Ctx { + fn from(le: scroll::Endian) -> Self { + Ctx { + container: CONTAINER, + le, + } + } + } + + impl Default for Ctx { + #[inline] + fn default() -> Self { + Ctx { + container: Container::default(), + le: scroll::Endian::default(), + } + } + } +} + +/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use. +/// Returns None if bytes's length is less than 16. +#[allow(unused)] +fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> { + bytes + .get(0..16) + .and_then(|hint_bytes_slice| hint_bytes_slice.try_into().ok()) +} + +#[derive(Debug, Default)] +/// Information obtained from a peek `Hint` +pub struct HintData { + pub is_lsb: bool, + pub is_64: Option<bool>, +} + +#[derive(Debug)] +/// A hint at the underlying binary format for 16 bytes of arbitrary data +pub enum Hint { + Elf(HintData), + Mach(HintData), + MachFat(usize), + PE, + Archive, + Unknown(u64), +} + +macro_rules! if_everything { + ($($i:item)*) => ($( + #[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "mach64", feature = "mach32", feature = "archive"))] + $i + )*) +} + +if_everything! { + + /// Peeks at `bytes`, and returns a `Hint` + pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> { + use scroll::{Pread, LE}; + if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG { + let class = bytes[elf::header::EI_CLASS]; + let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB; + let is_64 = + if class == elf::header::ELFCLASS64 { + Some (true) + } else if class == elf::header::ELFCLASS32 { + Some (false) + } else { None }; + + Ok(Hint::Elf(HintData { is_lsb, is_64 })) + } else if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC { + Ok(Hint::Archive) + } else if (&bytes[0..2]).pread_with::<u16>(0, LE)? == pe::header::DOS_MAGIC { + Ok(Hint::PE) + } else { + mach::peek_bytes(bytes) + } + } + + /// Peeks at the underlying Read object. Requires the underlying bytes to have at least 16 byte length. Resets the seek to `Start` after reading. + #[cfg(feature = "std")] + pub fn peek<R: ::std::io::Read + ::std::io::Seek>(fd: &mut R) -> error::Result<Hint> { + use std::io::SeekFrom; + let mut bytes = [0u8; 16]; + fd.seek(SeekFrom::Start(0))?; + fd.read_exact(&mut bytes)?; + fd.seek(SeekFrom::Start(0))?; + peek_bytes(&bytes) + } + + #[derive(Debug)] + #[allow(clippy::large_enum_variant)] + /// A parseable object that goblin understands + pub enum Object<'a> { + /// An ELF32/ELF64! + Elf(elf::Elf<'a>), + /// A PE32/PE32+! + PE(pe::PE<'a>), + /// A 32/64-bit Mach-o binary _OR_ it is a multi-architecture binary container! + Mach(mach::Mach<'a>), + /// A Unix archive + Archive(archive::Archive<'a>), + /// None of the above, with the given magic value + Unknown(u64), + } + + impl<'a> Object<'a> { + /// Tries to parse an `Object` from `bytes` + pub fn parse(bytes: &[u8]) -> error::Result<Object> { + if let Some(hint_bytes) = take_hint_bytes(bytes) { + match peek_bytes(hint_bytes)? { + Hint::Elf(_) => Ok(Object::Elf(elf::Elf::parse(bytes)?)), + Hint::Mach(_) | Hint::MachFat(_) => Ok(Object::Mach(mach::Mach::parse(bytes)?)), + Hint::Archive => Ok(Object::Archive(archive::Archive::parse(bytes)?)), + Hint::PE => Ok(Object::PE(pe::PE::parse(bytes)?)), + Hint::Unknown(magic) => Ok(Object::Unknown(magic)) + } + } else { + Err(error::Error::Malformed(format!("Object is too small."))) + } + } + } +} // end if_endian_fd + +///////////////////////// +// Binary Modules +///////////////////////// + +#[cfg(any(feature = "elf64", feature = "elf32"))] +#[macro_use] +pub mod elf; + +#[cfg(feature = "elf32")] +/// The ELF 32-bit struct definitions and associated values, re-exported for easy "type-punning" +pub mod elf32 { + pub use crate::elf::dynamic::dyn32 as dynamic; + pub use crate::elf::header::header32 as header; + pub use crate::elf::note::Nhdr32 as Note; + pub use crate::elf::program_header::program_header32 as program_header; + pub use crate::elf::reloc::reloc32 as reloc; + pub use crate::elf::section_header::section_header32 as section_header; + pub use crate::elf::sym::sym32 as sym; + + pub mod gnu_hash { + pub use crate::elf::gnu_hash::hash; + elf_gnu_hash_impl!(u32); + } +} + +#[cfg(feature = "elf64")] +/// The ELF 64-bit struct definitions and associated values, re-exported for easy "type-punning" +pub mod elf64 { + pub use crate::elf::dynamic::dyn64 as dynamic; + pub use crate::elf::header::header64 as header; + pub use crate::elf::note::Nhdr64 as Note; + pub use crate::elf::program_header::program_header64 as program_header; + pub use crate::elf::reloc::reloc64 as reloc; + pub use crate::elf::section_header::section_header64 as section_header; + pub use crate::elf::sym::sym64 as sym; + + pub mod gnu_hash { + pub use crate::elf::gnu_hash::hash; + elf_gnu_hash_impl!(u64); + } +} + +#[cfg(any(feature = "mach32", feature = "mach64"))] +pub mod mach; + +#[cfg(any(feature = "pe32", feature = "pe64"))] +pub mod pe; + +#[cfg(feature = "archive")] +pub mod archive; + +#[cfg(test)] +mod tests { + use super::*; + if_everything! { + #[test] + fn take_hint_bytes_long_enough() { + let bytes_array = [1; 32]; + let bytes = &bytes_array[..]; + assert!(take_hint_bytes(bytes).is_some()) + } + + #[test] + fn take_hint_bytes_not_long_enough() { + let bytes_array = [1; 8]; + let bytes = &bytes_array[..]; + assert!(take_hint_bytes(bytes).is_none()) + } + } +} diff --git a/third_party/rust/goblin/src/mach/bind_opcodes.rs b/third_party/rust/goblin/src/mach/bind_opcodes.rs new file mode 100644 index 0000000000..970ab10af1 --- /dev/null +++ b/third_party/rust/goblin/src/mach/bind_opcodes.rs @@ -0,0 +1,60 @@ +//! Bind opcodes are interpreted by the dynamic linker to efficiently collect every symbol imported by this binary, and from which library using two-level namespacing +//! +//! Some uses of external symbols do not need to be bound immediately. +//! Instead they can be lazily bound on first use. The lazy_bind +//! are contains a stream of BIND opcodes to bind all lazy symbols. +//! Normal use is that dyld ignores the lazy_bind section when +//! loading an image. Instead the static linker arranged for a +//! lazy pointer to initially point to a helper function which +//! pushes the offset into the lazy_bind area for the symbol +//! needing to be bound, then jumps to dyld which simply adds +//! the offset to lazy_bind_off to get the information on what +//! to bind. + +pub type Opcode = u8; + +// The following are used to encode binding information +pub const BIND_TYPE_POINTER: u8 = 1; +pub const BIND_TYPE_TEXT_ABSOLUTE32: u8 = 2; +pub const BIND_TYPE_TEXT_PCREL32: u8 = 3; +pub const BIND_SPECIAL_DYLIB_SELF: u8 = 0; +pub const BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE: u8 = 0xf; // -1 +pub const BIND_SPECIAL_DYLIB_FLAT_LOOKUP: u8 = 0xe; // -2 +pub const BIND_SYMBOL_FLAGS_WEAK_IMPORT: u8 = 0x1; +pub const BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION: u8 = 0x8; +pub const BIND_OPCODE_MASK: u8 = 0xF0; +pub const BIND_IMMEDIATE_MASK: u8 = 0x0F; +pub const BIND_OPCODE_DONE: Opcode = 0x00; +pub const BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: Opcode = 0x10; +pub const BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: Opcode = 0x20; +pub const BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: Opcode = 0x30; +pub const BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Opcode = 0x40; +pub const BIND_OPCODE_SET_TYPE_IMM: Opcode = 0x50; +pub const BIND_OPCODE_SET_ADDEND_SLEB: Opcode = 0x60; +pub const BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: Opcode = 0x70; +pub const BIND_OPCODE_ADD_ADDR_ULEB: Opcode = 0x80; +pub const BIND_OPCODE_DO_BIND: Opcode = 0x90; +pub const BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: Opcode = 0xA0; +pub const BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: Opcode = 0xB0; +pub const BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: Opcode = 0xC0; + +pub fn opcode_to_str(opcode: Opcode) -> &'static str { + match opcode { + BIND_OPCODE_DONE => "BIND_OPCODE_DONE", + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM", + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB", + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM", + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM", + BIND_OPCODE_SET_TYPE_IMM => "BIND_OPCODE_SET_TYPE_IMM", + BIND_OPCODE_SET_ADDEND_SLEB => "BIND_OPCODE_SET_ADDEND_SLEB", + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB", + BIND_OPCODE_ADD_ADDR_ULEB => "BIND_OPCODE_ADD_ADDR_ULEB", + BIND_OPCODE_DO_BIND => "BIND_OPCODE_DO_BIND", + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB", + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED", + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { + "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB" + } + _ => "UNKNOWN OPCODE", + } +} diff --git a/third_party/rust/goblin/src/mach/constants.rs b/third_party/rust/goblin/src/mach/constants.rs new file mode 100644 index 0000000000..8ae17851c8 --- /dev/null +++ b/third_party/rust/goblin/src/mach/constants.rs @@ -0,0 +1,483 @@ +//! Miscellaneous constants used inside of and when constructing, Mach-o binaries +// Convienence constants for return values from dyld_get_sdk_version() and friends. +pub const DYLD_MACOSX_VERSION_10_4: u32 = 0x000A_0400; +pub const DYLD_MACOSX_VERSION_10_5: u32 = 0x000A_0500; +pub const DYLD_MACOSX_VERSION_10_6: u32 = 0x000A_0600; +pub const DYLD_MACOSX_VERSION_10_7: u32 = 0x000A_0700; +pub const DYLD_MACOSX_VERSION_10_8: u32 = 0x000A_0800; +pub const DYLD_MACOSX_VERSION_10_9: u32 = 0x000A_0900; +pub const DYLD_MACOSX_VERSION_10_10: u32 = 0x000A_0A00; +pub const DYLD_MACOSX_VERSION_10_11: u32 = 0x000A_0B00; +pub const DYLD_MACOSX_VERSION_10_12: u32 = 0x000A_0C00; +pub const DYLD_MACOSX_VERSION_10_13: u32 = 0x000A_0D00; + +pub const DYLD_IOS_VERSION_2_0: u32 = 0x0002_0000; +pub const DYLD_IOS_VERSION_2_1: u32 = 0x0002_0100; +pub const DYLD_IOS_VERSION_2_2: u32 = 0x0002_0200; +pub const DYLD_IOS_VERSION_3_0: u32 = 0x0003_0000; +pub const DYLD_IOS_VERSION_3_1: u32 = 0x0003_0100; +pub const DYLD_IOS_VERSION_3_2: u32 = 0x0003_0200; +pub const DYLD_IOS_VERSION_4_0: u32 = 0x0004_0000; +pub const DYLD_IOS_VERSION_4_1: u32 = 0x0004_0100; +pub const DYLD_IOS_VERSION_4_2: u32 = 0x0004_0200; +pub const DYLD_IOS_VERSION_4_3: u32 = 0x0004_0300; +pub const DYLD_IOS_VERSION_5_0: u32 = 0x0005_0000; +pub const DYLD_IOS_VERSION_5_1: u32 = 0x0005_0100; +pub const DYLD_IOS_VERSION_6_0: u32 = 0x0006_0000; +pub const DYLD_IOS_VERSION_6_1: u32 = 0x0006_0100; +pub const DYLD_IOS_VERSION_7_0: u32 = 0x0007_0000; +pub const DYLD_IOS_VERSION_7_1: u32 = 0x0007_0100; +pub const DYLD_IOS_VERSION_8_0: u32 = 0x0008_0000; +pub const DYLD_IOS_VERSION_9_0: u32 = 0x0009_0000; +pub const DYLD_IOS_VERSION_10_0: u32 = 0x000A_0000; +pub const DYLD_IOS_VERSION_11_0: u32 = 0x000B_0000; + +// Segment and Section Constants + +// The flags field of a section structure is separated into two parts a section +// type and section attributes. The section types are mutually exclusive (it +// can only have one type) but the section attributes are not (it may have more +// than one attribute). +/// 256 section types +pub const SECTION_TYPE: u32 = 0x0000_00ff; +/// 24 section attributes +pub const SECTION_ATTRIBUTES: u32 = 0xffff_ff00; + +// Constants for the type of a section +/// regular section +pub const S_REGULAR: u32 = 0x0; +/// zero fill on demand section +pub const S_ZEROFILL: u32 = 0x1; +/// section with only literal C strings +pub const S_CSTRING_LITERALS: u32 = 0x2; +/// section with only 4 byte literals +pub const S_4BYTE_LITERALS: u32 = 0x3; +/// section with only 8 byte literals +pub const S_8BYTE_LITERALS: u32 = 0x4; +/// section with only pointers to +pub const S_LITERAL_POINTERS: u32 = 0x5; + +// literals +// For the two types of symbol pointers sections and the symbol stubs section +// they have indirect symbol table entries. For each of the entries in the +// section the indirect symbol table entries, in corresponding order in the +// indirect symbol table, start at the index stored in the reserved1 field +// of the section structure. Since the indirect symbol table entries +// correspond to the entries in the section the number of indirect symbol table +// entries is inferred from the size of the section divided by the size of the +// entries in the section. For symbol pointers sections the size of the entries +// in the section is 4 bytes and for symbol stubs sections the byte size of the +// stubs is stored in the reserved2 field of the section structure. +/// section with only non-lazy symbol pointers +pub const S_NON_LAZY_SYMBOL_POINTERS: u32 = 0x6; +/// section with only lazy symbol pointers +pub const S_LAZY_SYMBOL_POINTERS: u32 = 0x7; +/// section with only symbol stubs, byte size of stub in the reserved2 field +pub const S_SYMBOL_STUBS: u32 = 0x8; +/// section with only function pointers for initialization +pub const S_MOD_INIT_FUNC_POINTERS: u32 = 0x9; +/// section with only function pointers for termination +pub const S_MOD_TERM_FUNC_POINTERS: u32 = 0xa; +/// section contains symbols that are to be coalesced +pub const S_COALESCED: u32 = 0xb; +/// zero fill on demand section that can be larger than 4 gigabytes) +pub const S_GB_ZEROFILL: u32 = 0xc; +/// section with only pairs of function pointers for interposing +pub const S_INTERPOSING: u32 = 0xd; +/// section with only 16 byte literals +pub const S_16BYTE_LITERALS: u32 = 0xe; +/// section contains DTrace Object Format +pub const S_DTRACE_DOF: u32 = 0xf; +/// section with only lazy symbol pointers to lazy loaded dylibs +pub const S_LAZY_DYLIB_SYMBOL_POINTERS: u32 = 0x10; + +// Section types to support thread local variables +/// template of initial values for TLVs +pub const S_THREAD_LOCAL_REGULAR: u32 = 0x11; +/// template of initial values for TLVs +pub const S_THREAD_LOCAL_ZEROFILL: u32 = 0x12; +/// TLV descriptors +pub const S_THREAD_LOCAL_VARIABLES: u32 = 0x13; +/// pointers to TLV descriptors +pub const S_THREAD_LOCAL_VARIABLE_POINTERS: u32 = 0x14; +/// functions to call to initialize TLV values +pub const S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: u32 = 0x15; + +// Constants for the section attributes part of the flags field of a section +// structure. +/// User setable attributes +pub const SECTION_ATTRIBUTES_USR: u32 = 0xff00_0000; +/// section contains only true machine instructions +pub const S_ATTR_PURE_INSTRUCTIONS: u32 = 0x8000_0000; +/// section contains coalesced symbols that are not to be in a ranlib table of contents +pub const S_ATTR_NO_TOC: u32 = 0x4000_0000; +/// ok to strip static symbols in this section in files with the MH_DYLDLINK flag +pub const S_ATTR_STRIP_STATIC_SYMS: u32 = 0x2000_0000; +/// no dead stripping +pub const S_ATTR_NO_DEAD_STRIP: u32 = 0x1000_0000; +/// blocks are live if they reference live blocks +pub const S_ATTR_LIVE_SUPPORT: u32 = 0x0800_0000; +/// Used with i386 code stubs written on by dyld +pub const S_ATTR_SELF_MODIFYING_CODE: u32 = 0x0400_0000; + +// If a segment contains any sections marked with S_ATTR_DEBUG then all +// sections in that segment must have this attribute. No section other than +// a section marked with this attribute may reference the contents of this +// section. A section with this attribute may contain no symbols and must have +// a section type S_REGULAR. The static linker will not copy section contents +// from sections with this attribute into its output file. These sections +// generally contain DWARF debugging info. +/// debug section +pub const S_ATTR_DEBUG: u32 = 0x0200_0000; +/// system setable attributes +pub const SECTION_ATTRIBUTES_SYS: u32 = 0x00ff_ff00; +/// section contains some machine instructions +pub const S_ATTR_SOME_INSTRUCTIONS: u32 = 0x0000_0400; +/// section has external relocation entries +pub const S_ATTR_EXT_RELOC: u32 = 0x0000_0200; +/// section has local relocation entries +pub const S_ATTR_LOC_RELOC: u32 = 0x0000_0100; + +// The names of segments and sections in them are mostly meaningless to the +// link-editor. But there are few things to support traditional UNIX +// executables that require the link-editor and assembler to use some names +// agreed upon by convention. +// The initial protection of the "__TEXT" segment has write protection turned +// off (not writeable). +// The link-editor will allocate common symbols at the end of the "__common" +// section in the "__DATA" segment. It will create the section and segment +// if needed. + +// The currently known segment names and the section names in those segments +/// the pagezero segment which has no protections and catches NULL references for MH_EXECUTE files +pub const SEG_PAGEZERO: &str = "__PAGEZERO"; +/// the tradition UNIX text segment +pub const SEG_TEXT: &str = "__TEXT"; +/// the real text part of the text section no headers, and no padding +pub const SECT_TEXT: &str = "__text"; +/// the fvmlib initialization section +pub const SECT_FVMLIB_INIT0: &str = "__fvmlib_init0"; +/// the section following the fvmlib initialization section +pub const SECT_FVMLIB_INIT1: &str = "__fvmlib_init1"; +/// the tradition UNIX data segment +pub const SEG_DATA: &str = "__DATA"; +/// the real initialized data section no padding, no bss overlap +pub const SECT_DATA: &str = "__data"; +/// the real uninitialized data sectionno padding +pub const SECT_BSS: &str = "__bss"; +/// the section common symbols are allocated in by the link editor +pub const SECT_COMMON: &str = "__common"; +/// objective-C runtime segment +pub const SEG_OBJC: &str = "__OBJC"; +/// symbol table +pub const SECT_OBJC_SYMBOLS: &str = "__symbol_table"; +/// module information +pub const SECT_OBJC_MODULES: &str = "__module_info"; +/// string table +pub const SECT_OBJC_STRINGS: &str = "__selector_strs"; +/// string table +pub const SECT_OBJC_REFS: &str = "__selector_refs"; +/// the icon segment +pub const SEG_ICON: &str = "__ICON"; +/// the icon headers +pub const SECT_ICON_HEADER: &str = "__header"; +/// the icons in tiff format +pub const SECT_ICON_TIFF: &str = "__tiff"; +/// the segment containing all structs created and maintained by the link editor. Created with -seglinkedit option to ld(1) for MH_EXECUTE and FVMLIB file types only +pub const SEG_LINKEDIT: &str = "__LINKEDIT"; +/// the unix stack segment +pub const SEG_UNIXSTACK: &str = "__UNIXSTACK"; +/// the segment for the self (dyld) modifing code stubs that has read, write and execute permissions +pub const SEG_IMPORT: &str = "__IMPORT"; + +/// Segment is readable. +pub const VM_PROT_READ: u32 = 0x1; +/// Segment is writable. +pub const VM_PROT_WRITE: u32 = 0x2; +/// Segment is executable. +pub const VM_PROT_EXECUTE: u32 = 0x4; + +pub mod cputype { + + /// An alias for u32 + pub type CpuType = u32; + /// An alias for u32 + pub type CpuSubType = u32; + + /// the mask for CPU feature flags + pub const CPU_SUBTYPE_MASK: u32 = 0xff00_0000; + /// mask for architecture bits + pub const CPU_ARCH_MASK: CpuType = 0xff00_0000; + /// the mask for 64 bit ABI + pub const CPU_ARCH_ABI64: CpuType = 0x0100_0000; + /// the mask for ILP32 ABI on 64 bit hardware + pub const CPU_ARCH_ABI64_32: CpuType = 0x0200_0000; + + // CPU Types + pub const CPU_TYPE_ANY: CpuType = !0; + pub const CPU_TYPE_VAX: CpuType = 1; + pub const CPU_TYPE_MC680X0: CpuType = 6; + pub const CPU_TYPE_X86: CpuType = 7; + pub const CPU_TYPE_I386: CpuType = CPU_TYPE_X86; + pub const CPU_TYPE_X86_64: CpuType = CPU_TYPE_X86 | CPU_ARCH_ABI64; + pub const CPU_TYPE_MIPS: CpuType = 8; + pub const CPU_TYPE_MC98000: CpuType = 10; + pub const CPU_TYPE_HPPA: CpuType = 11; + pub const CPU_TYPE_ARM: CpuType = 12; + pub const CPU_TYPE_ARM64: CpuType = CPU_TYPE_ARM | CPU_ARCH_ABI64; + pub const CPU_TYPE_ARM64_32: CpuType = CPU_TYPE_ARM | CPU_ARCH_ABI64_32; + pub const CPU_TYPE_MC88000: CpuType = 13; + pub const CPU_TYPE_SPARC: CpuType = 14; + pub const CPU_TYPE_I860: CpuType = 15; + pub const CPU_TYPE_ALPHA: CpuType = 16; + pub const CPU_TYPE_POWERPC: CpuType = 18; + pub const CPU_TYPE_POWERPC64: CpuType = CPU_TYPE_POWERPC | CPU_ARCH_ABI64; + + // CPU Subtypes + pub const CPU_SUBTYPE_MULTIPLE: CpuSubType = !0; + pub const CPU_SUBTYPE_LITTLE_ENDIAN: CpuSubType = 0; + pub const CPU_SUBTYPE_BIG_ENDIAN: CpuSubType = 1; + pub const CPU_SUBTYPE_VAX_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_VAX780: CpuSubType = 1; + pub const CPU_SUBTYPE_VAX785: CpuSubType = 2; + pub const CPU_SUBTYPE_VAX750: CpuSubType = 3; + pub const CPU_SUBTYPE_VAX730: CpuSubType = 4; + pub const CPU_SUBTYPE_UVAXI: CpuSubType = 5; + pub const CPU_SUBTYPE_UVAXII: CpuSubType = 6; + pub const CPU_SUBTYPE_VAX8200: CpuSubType = 7; + pub const CPU_SUBTYPE_VAX8500: CpuSubType = 8; + pub const CPU_SUBTYPE_VAX8600: CpuSubType = 9; + pub const CPU_SUBTYPE_VAX8650: CpuSubType = 10; + pub const CPU_SUBTYPE_VAX8800: CpuSubType = 11; + pub const CPU_SUBTYPE_UVAXIII: CpuSubType = 12; + pub const CPU_SUBTYPE_MC680X0_ALL: CpuSubType = 1; + pub const CPU_SUBTYPE_MC68030: CpuSubType = 1; /* compat */ + pub const CPU_SUBTYPE_MC68040: CpuSubType = 2; + pub const CPU_SUBTYPE_MC68030_ONLY: CpuSubType = 3; + + macro_rules! CPU_SUBTYPE_INTEL { + ($f:expr, $m:expr) => {{ + ($f) + (($m) << 4) + }}; + } + + pub const CPU_SUBTYPE_I386_ALL: CpuSubType = CPU_SUBTYPE_INTEL!(3, 0); + pub const CPU_SUBTYPE_386: CpuSubType = CPU_SUBTYPE_INTEL!(3, 0); + pub const CPU_SUBTYPE_486: CpuSubType = CPU_SUBTYPE_INTEL!(4, 0); + pub const CPU_SUBTYPE_486SX: CpuSubType = CPU_SUBTYPE_INTEL!(4, 8); // 8 << 4 = 128 + pub const CPU_SUBTYPE_586: CpuSubType = CPU_SUBTYPE_INTEL!(5, 0); + pub const CPU_SUBTYPE_PENT: CpuSubType = CPU_SUBTYPE_INTEL!(5, 0); + pub const CPU_SUBTYPE_PENTPRO: CpuSubType = CPU_SUBTYPE_INTEL!(6, 1); + pub const CPU_SUBTYPE_PENTII_M3: CpuSubType = CPU_SUBTYPE_INTEL!(6, 3); + pub const CPU_SUBTYPE_PENTII_M5: CpuSubType = CPU_SUBTYPE_INTEL!(6, 5); + pub const CPU_SUBTYPE_CELERON: CpuSubType = CPU_SUBTYPE_INTEL!(7, 6); + pub const CPU_SUBTYPE_CELERON_MOBILE: CpuSubType = CPU_SUBTYPE_INTEL!(7, 7); + pub const CPU_SUBTYPE_PENTIUM_3: CpuSubType = CPU_SUBTYPE_INTEL!(8, 0); + pub const CPU_SUBTYPE_PENTIUM_3_M: CpuSubType = CPU_SUBTYPE_INTEL!(8, 1); + pub const CPU_SUBTYPE_PENTIUM_3_XEON: CpuSubType = CPU_SUBTYPE_INTEL!(8, 2); + pub const CPU_SUBTYPE_PENTIUM_M: CpuSubType = CPU_SUBTYPE_INTEL!(9, 0); + pub const CPU_SUBTYPE_PENTIUM_4: CpuSubType = CPU_SUBTYPE_INTEL!(10, 0); + pub const CPU_SUBTYPE_PENTIUM_4_M: CpuSubType = CPU_SUBTYPE_INTEL!(10, 1); + pub const CPU_SUBTYPE_ITANIUM: CpuSubType = CPU_SUBTYPE_INTEL!(11, 0); + pub const CPU_SUBTYPE_ITANIUM_2: CpuSubType = CPU_SUBTYPE_INTEL!(11, 1); + pub const CPU_SUBTYPE_XEON: CpuSubType = CPU_SUBTYPE_INTEL!(12, 0); + pub const CPU_SUBTYPE_XEON_MP: CpuSubType = CPU_SUBTYPE_INTEL!(12, 1); + pub const CPU_SUBTYPE_INTEL_FAMILY_MAX: CpuSubType = 15; + pub const CPU_SUBTYPE_INTEL_MODEL_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_X86_ALL: CpuSubType = 3; + pub const CPU_SUBTYPE_X86_64_ALL: CpuSubType = 3; + pub const CPU_SUBTYPE_X86_ARCH1: CpuSubType = 4; + pub const CPU_SUBTYPE_X86_64_H: CpuSubType = 8; + pub const CPU_SUBTYPE_MIPS_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_MIPS_R2300: CpuSubType = 1; + pub const CPU_SUBTYPE_MIPS_R2600: CpuSubType = 2; + pub const CPU_SUBTYPE_MIPS_R2800: CpuSubType = 3; + pub const CPU_SUBTYPE_MIPS_R2000A: CpuSubType = 4; + pub const CPU_SUBTYPE_MIPS_R2000: CpuSubType = 5; + pub const CPU_SUBTYPE_MIPS_R3000A: CpuSubType = 6; + pub const CPU_SUBTYPE_MIPS_R3000: CpuSubType = 7; + pub const CPU_SUBTYPE_MC98000_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_MC98601: CpuSubType = 1; + pub const CPU_SUBTYPE_HPPA_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_HPPA_7100: CpuSubType = 0; + pub const CPU_SUBTYPE_HPPA_7100LC: CpuSubType = 1; + pub const CPU_SUBTYPE_MC88000_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_MC88100: CpuSubType = 1; + pub const CPU_SUBTYPE_MC88110: CpuSubType = 2; + pub const CPU_SUBTYPE_SPARC_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_I860_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_I860_860: CpuSubType = 1; + pub const CPU_SUBTYPE_POWERPC_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_POWERPC_601: CpuSubType = 1; + pub const CPU_SUBTYPE_POWERPC_602: CpuSubType = 2; + pub const CPU_SUBTYPE_POWERPC_603: CpuSubType = 3; + pub const CPU_SUBTYPE_POWERPC_603E: CpuSubType = 4; + pub const CPU_SUBTYPE_POWERPC_603EV: CpuSubType = 5; + pub const CPU_SUBTYPE_POWERPC_604: CpuSubType = 6; + pub const CPU_SUBTYPE_POWERPC_604E: CpuSubType = 7; + pub const CPU_SUBTYPE_POWERPC_620: CpuSubType = 8; + pub const CPU_SUBTYPE_POWERPC_750: CpuSubType = 9; + pub const CPU_SUBTYPE_POWERPC_7400: CpuSubType = 10; + pub const CPU_SUBTYPE_POWERPC_7450: CpuSubType = 11; + pub const CPU_SUBTYPE_POWERPC_970: CpuSubType = 100; + pub const CPU_SUBTYPE_ARM_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM_V4T: CpuSubType = 5; + pub const CPU_SUBTYPE_ARM_V6: CpuSubType = 6; + pub const CPU_SUBTYPE_ARM_V5TEJ: CpuSubType = 7; + pub const CPU_SUBTYPE_ARM_XSCALE: CpuSubType = 8; + pub const CPU_SUBTYPE_ARM_V7: CpuSubType = 9; + pub const CPU_SUBTYPE_ARM_V7F: CpuSubType = 10; + pub const CPU_SUBTYPE_ARM_V7S: CpuSubType = 11; + pub const CPU_SUBTYPE_ARM_V7K: CpuSubType = 12; + pub const CPU_SUBTYPE_ARM_V6M: CpuSubType = 14; + pub const CPU_SUBTYPE_ARM_V7M: CpuSubType = 15; + pub const CPU_SUBTYPE_ARM_V7EM: CpuSubType = 16; + pub const CPU_SUBTYPE_ARM_V8: CpuSubType = 13; + pub const CPU_SUBTYPE_ARM64_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM64_V8: CpuSubType = 1; + pub const CPU_SUBTYPE_ARM64_E: CpuSubType = 2; + pub const CPU_SUBTYPE_ARM64_32_ALL: CpuSubType = 0; + pub const CPU_SUBTYPE_ARM64_32_V8: CpuSubType = 1; + + macro_rules! cpu_flag_mapping { + ( + $(($name:expr, $cputype:ident, $cpusubtype:ident),)* + ) => { + fn get_arch_from_flag_no_alias(name: &str) -> Option<(CpuType, CpuSubType)> { + match name { + $($name => Some(($cputype, $cpusubtype)),)* + _ => None + } + } + + /// Get the architecture name from cputype and cpusubtype + /// + /// When using this method to determine the architecture + /// name of an instance of + /// [`goblin::mach::header::Header`](/goblin/mach/header/struct.Header.html), + /// use the provided method + /// [`cputype()`](/goblin/mach/header/struct.Header.html#method.cputype) and + /// [`cpusubtype()`](/goblin/mach/header/struct.Header.html#method.cpusubtype) + /// instead of corresponding field `cputype` and `cpusubtype`. + /// + /// For example: + /// + /// ```rust + /// use std::fs::read; + /// use goblin::mach::constants::cputype::get_arch_name_from_types; + /// use goblin::mach::Mach; + /// + /// read("path/to/macho").and_then(|buf| { + /// if let Ok(Mach::Binary(a)) = Mach::parse(&buf) { + /// println!("arch name: {}", get_arch_name_from_types(a.header.cputype(), a.header.cpusubtype()).unwrap()); + /// } + /// Ok(()) + /// }); + /// ``` + pub fn get_arch_name_from_types(cputype: CpuType, cpusubtype: CpuSubType) + -> Option<&'static str> { + match (cputype, cpusubtype) { + $(($cputype, $cpusubtype) => Some($name),)* + (_, _) => None + } + } + } + } + + /// Get the cputype and cpusubtype from a name + pub fn get_arch_from_flag(name: &str) -> Option<(CpuType, CpuSubType)> { + get_arch_from_flag_no_alias(name).or_else(|| { + // we also handle some common aliases + match name { + // these are used by apple + "pentium" => Some((CPU_TYPE_I386, CPU_SUBTYPE_PENT)), + "pentpro" => Some((CPU_TYPE_I386, CPU_SUBTYPE_PENTPRO)), + // these are used commonly for consistency + "x86" => Some((CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL)), + _ => None, + } + }) + } + + cpu_flag_mapping! { + // generic types + ("any", CPU_TYPE_ANY, CPU_SUBTYPE_MULTIPLE), + ("little", CPU_TYPE_ANY, CPU_SUBTYPE_LITTLE_ENDIAN), + ("big", CPU_TYPE_ANY, CPU_SUBTYPE_BIG_ENDIAN), + + // macho names + ("ppc64", CPU_TYPE_POWERPC64, CPU_SUBTYPE_POWERPC_ALL), + ("x86_64", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL), + ("x86_64h", CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_H), + ("arm64", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL), + ("arm64_32", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_ALL), + ("ppc970-64", CPU_TYPE_POWERPC64, CPU_SUBTYPE_POWERPC_970), + ("ppc", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL), + ("i386", CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL), + ("m68k", CPU_TYPE_MC680X0, CPU_SUBTYPE_MC680X0_ALL), + ("hppa", CPU_TYPE_HPPA, CPU_SUBTYPE_HPPA_ALL), + ("sparc", CPU_TYPE_SPARC, CPU_SUBTYPE_SPARC_ALL), + ("m88k", CPU_TYPE_MC88000, CPU_SUBTYPE_MC88000_ALL), + ("i860", CPU_TYPE_I860, CPU_SUBTYPE_I860_ALL), + ("arm", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_ALL), + ("ppc601", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_601), + ("ppc603", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_603), + ("ppc603e", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_603E), + ("ppc603ev", CPU_TYPE_POWERPC,CPU_SUBTYPE_POWERPC_603EV), + ("ppc604", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_604), + ("ppc604e",CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_604E), + ("ppc750", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_750), + ("ppc7400", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_7400), + ("ppc7450", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_7450), + ("ppc970", CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_970), + ("i486", CPU_TYPE_I386, CPU_SUBTYPE_486), + ("i486SX", CPU_TYPE_I386, CPU_SUBTYPE_486SX), + ("i586", CPU_TYPE_I386, CPU_SUBTYPE_586), + ("i686", CPU_TYPE_I386, CPU_SUBTYPE_PENTPRO), + ("pentIIm3", CPU_TYPE_I386, CPU_SUBTYPE_PENTII_M3), + ("pentIIm5", CPU_TYPE_I386, CPU_SUBTYPE_PENTII_M5), + ("pentium4", CPU_TYPE_I386, CPU_SUBTYPE_PENTIUM_4), + ("m68030", CPU_TYPE_MC680X0, CPU_SUBTYPE_MC68030_ONLY), + ("m68040", CPU_TYPE_MC680X0, CPU_SUBTYPE_MC68040), + ("hppa7100LC", CPU_TYPE_HPPA, CPU_SUBTYPE_HPPA_7100LC), + ("armv4t", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V4T), + ("armv5", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V5TEJ), + ("xscale", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_XSCALE), + ("armv6", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6), + ("armv6m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6M), + ("armv7", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7), + ("armv7f", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7F), + ("armv7s", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S), + ("armv7k", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7K), + ("armv7m", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7M), + ("armv7em", CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7EM), + ("arm64v8", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_V8), + ("arm64e", CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_E), + ("arm64_32_v8", CPU_TYPE_ARM64_32, CPU_SUBTYPE_ARM64_32_V8), + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_basic_mapping() { + use super::cputype::*; + + assert_eq!( + get_arch_from_flag("armv7"), + Some((CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7)) + ); + assert_eq!( + get_arch_name_from_types(CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7), + Some("armv7") + ); + assert_eq!( + get_arch_from_flag("i386"), + Some((CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL)) + ); + assert_eq!( + get_arch_from_flag("x86"), + Some((CPU_TYPE_I386, CPU_SUBTYPE_I386_ALL)) + ); + } +} diff --git a/third_party/rust/goblin/src/mach/exports.rs b/third_party/rust/goblin/src/mach/exports.rs new file mode 100644 index 0000000000..2ce4f89e0d --- /dev/null +++ b/third_party/rust/goblin/src/mach/exports.rs @@ -0,0 +1,381 @@ +//! Symbols exported by this binary and available for dynamic linking are encoded in mach-o binaries using a special trie +//! +//! **Note**: the trie is constructed lazily in case it won't be used, and since computing exports will require allocation, to compute the exports, you need call the export trie's [exports()](struct.ExportTrie.html#method.exports) method. + +// TODO: +// (1) Weak of regular_symbol_info type probably needs to be added ? +// (3) /usr/lib/libstdc++.6.0.9.dylib has flag 0xc at many offsets... they're weak + +use crate::error; +use crate::mach::load_command; +use alloc::string::String; +use alloc::vec::Vec; +use core::fmt::{self, Debug}; +use core::ops::Range; +use scroll::{Pread, Uleb128}; + +type Flag = u64; + +// "The following are used on the flags byte of a terminal node +// in the export information." +pub const EXPORT_SYMBOL_FLAGS_KIND_MASK: Flag = 0x03; +pub const EXPORT_SYMBOL_FLAGS_KIND_REGULAR: Flag = 0x00; +pub const EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE: Flag = 0x02; // this is a symbol not present in the loader.h but only in the dyld compressed image loader source code, and only available with a #def macro for export flags but libobjc. def has this +pub const EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL: Flag = 0x01; +pub const EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION: Flag = 0x04; +pub const EXPORT_SYMBOL_FLAGS_REEXPORT: Flag = 0x08; +pub const EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER: Flag = 0x10; + +#[derive(Debug)] +pub enum SymbolKind { + Regular, + Absolute, + ThreadLocal, + UnknownSymbolKind(Flag), +} + +impl SymbolKind { + pub fn new(kind: Flag) -> SymbolKind { + match kind & EXPORT_SYMBOL_FLAGS_KIND_MASK { + 0x00 => SymbolKind::Regular, + 0x01 => SymbolKind::ThreadLocal, + 0x02 => SymbolKind::Absolute, + _ => SymbolKind::UnknownSymbolKind(kind), + } + } + pub fn to_str(&self) -> &'static str { + match self { + SymbolKind::Regular => "Regular", + SymbolKind::Absolute => "Absolute", + SymbolKind::ThreadLocal => "Thread_LOCAL", + SymbolKind::UnknownSymbolKind(_k) => "Unknown", + } + } +} + +#[derive(Debug)] +/// An export can be a regular export, a re-export, or a stub +pub enum ExportInfo<'a> { + /// A regular exported symbol, which is an address where it is found, and the flags associated with it + Regular { address: u64, flags: Flag }, + /// if lib_symbol_name None then same symbol name, otherwise reexport of lib_symbol_name with name in the trie + /// "If the string is zero length, then the symbol is re-export from the specified dylib with the same name" + Reexport { + lib: &'a str, + lib_symbol_name: Option<&'a str>, + flags: Flag, + }, + /// If the flags is `EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER`, then following the flags are two `Uleb128`s: the stub offset and the resolver offset. The stub is used by non-lazy pointers. The resolver is used by lazy pointers and must be called to get the actual address to use + Stub { + stub_offset: scroll::Uleb128, + resolver_offset: scroll::Uleb128, + flags: Flag, + }, +} + +impl<'a> ExportInfo<'a> { + /// Parse out the export info from `bytes`, at `offset` + pub fn parse( + bytes: &'a [u8], + libs: &[&'a str], + flags: Flag, + mut offset: usize, + ) -> error::Result<ExportInfo<'a>> { + use self::ExportInfo::*; + let regular = |offset| -> error::Result<ExportInfo> { + let address = bytes.pread::<Uleb128>(offset)?; + Ok(Regular { + address: address.into(), + flags, + }) + }; + let reexport = |mut offset| -> error::Result<ExportInfo<'a>> { + let lib_ordinal: u64 = { + let tmp = bytes.pread::<Uleb128>(offset)?; + offset += tmp.size(); + tmp.into() + }; + let lib_symbol_name = bytes.pread::<&str>(offset)?; + let lib = libs[lib_ordinal as usize]; + let lib_symbol_name = if lib_symbol_name == "" { + None + } else { + Some(lib_symbol_name) + }; + Ok(Reexport { + lib, + lib_symbol_name, + flags, + }) + }; + match SymbolKind::new(flags) { + SymbolKind::Regular => { + if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 { + reexport(offset) + } else if flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0 { + // 0x10 + let stub_offset = bytes.pread::<Uleb128>(offset)?; + offset += stub_offset.size(); + let resolver_offset = bytes.pread::<Uleb128>(offset)?; + Ok(Stub { + stub_offset, + resolver_offset, + flags, + }) + // else if (flags = kEXPORT_SYMBOL_FLAGS_WEAK_DEFINITION) then (*0x40 unused*) + } else { + regular(offset) + } + } + SymbolKind::ThreadLocal | SymbolKind::Absolute => { + if flags & EXPORT_SYMBOL_FLAGS_REEXPORT != 0 { + reexport(offset) + } else { + regular(offset) + } + } + SymbolKind::UnknownSymbolKind(_kind) => { + // 0x5f causes errors, but parsing as regular symbol resolves... + //Err(error::Error::Malformed(format!("Unknown kind {:#x} from flags {:#x} in get_symbol_type at offset {}", kind, flags, offset))) + regular(offset) + } + } + } +} + +#[derive(Debug)] +/// A finalized symbolic export reconstructed from the export trie +pub struct Export<'a> { + /// The reconsituted export name which dyld matches against + pub name: String, + /// The export info in the node data + pub info: ExportInfo<'a>, + /// How large this export is + pub size: usize, + /// The offset this symbol export is found in the binary + pub offset: u64, +} + +impl<'a> Export<'a> { + /// Create a new export from `name` and `info` + pub fn new(name: String, info: ExportInfo<'a>) -> Export<'a> { + let offset = match info { + ExportInfo::Regular { address, .. } => address, + _ => 0x0, + }; + Export { + name, + info, + size: 0, + offset, + } + } +} + +/// An export trie efficiently encodes all of the symbols exported by this binary for dynamic linking +pub struct ExportTrie<'a> { + data: &'a [u8], + location: Range<usize>, +} + +impl<'a> ExportTrie<'a> { + #[inline] + fn walk_nodes( + &self, + libs: &[&'a str], + branches: Vec<(String, usize)>, + acc: &mut Vec<Export<'a>>, + ) -> error::Result<()> { + for (symbol, next_node) in branches { + self.walk_trie(libs, symbol, next_node, acc)?; + } + Ok(()) + } + + // current_symbol can be a str iiuc + fn walk_branches( + &self, + nbranches: usize, + current_symbol: String, + mut offset: usize, + ) -> error::Result<Vec<(String, usize)>> { + if nbranches > self.data.len() { + return Err(error::Error::BufferTooShort(nbranches, "branches")); + } + let mut branches = Vec::with_capacity(nbranches); + //println!("\t@{:#x}", *offset); + for _i in 0..nbranches { + // additional offset calculations are relative to the base we received + let offset = &mut offset; + let string = self.data.pread::<&str>(*offset)?; + let mut key = current_symbol.clone(); + key.push_str(string); + // +1 for null terminator + *offset = *offset + string.len() + 1; + //println!("\t({}) string_len: {} offset: {:#x}", i, string.len(), *offset); + // value is relative to export trie base + let next_node = Uleb128::read(&self.data, offset)? as usize + self.location.start; + //println!("\t({}) string: {} next_node: {:#x}", _i, key, next_node); + branches.push((key, next_node)); + } + Ok(branches) + } + + fn walk_trie( + &self, + libs: &[&'a str], + current_symbol: String, + start: usize, + exports: &mut Vec<Export<'a>>, + ) -> error::Result<()> { + if start < self.location.end { + let mut offset = start; + let terminal_size = Uleb128::read(&self.data, &mut offset)?; + // let mut input = String::new(); + // ::std::io::stdin().read_line(&mut input).unwrap(); + // println!("@ {:#x} node: {:#x} current_symbol: {}", start, terminal_size, current_symbol); + if terminal_size == 0 { + let nbranches = Uleb128::read(&self.data, &mut offset)? as usize; + //println!("\t@ {:#x} BRAN {}", *offset, nbranches); + let branches = self.walk_branches(nbranches, current_symbol, offset)?; + self.walk_nodes(libs, branches, exports) + } else { + // terminal node, but the tricky part is that they can have children... + let pos = offset; + let children_start = &mut (pos + terminal_size as usize); + let nchildren = Uleb128::read(&self.data, children_start)? as usize; + let flags = Uleb128::read(&self.data, &mut offset)?; + //println!("\t@ {:#x} TERM {} flags: {:#x}", offset, nchildren, flags); + let info = ExportInfo::parse(&self.data, libs, flags, offset)?; + let export = Export::new(current_symbol.clone(), info); + //println!("\t{:?}", &export); + exports.push(export); + if nchildren == 0 { + // this branch is done + Ok(()) + } else { + // more branches to walk + let branches = + self.walk_branches(nchildren, current_symbol, *children_start)?; + self.walk_nodes(libs, branches, exports) + } + } + } else { + Ok(()) + } + } + + fn new_impl(bytes: &'a [u8], start: usize, size: usize) -> Self { + // FIXME: Ideally, this should validate `command`, but the best we can + // do for now is return an empty `Range`. + let location = match start + .checked_add(size) + .and_then(|end| bytes.get(start..end).map(|_| start..end)) + { + Some(location) => location, + None => { + log::warn!("Invalid `DyldInfo` `command`."); + 0..0 + } + }; + ExportTrie { + data: bytes, + location, + } + } + + /// Walk the export trie for symbols exported by this binary, using the provided `libs` to resolve re-exports + pub fn exports(&self, libs: &[&'a str]) -> error::Result<Vec<Export<'a>>> { + let offset = self.location.start; + let current_symbol = String::new(); + let mut exports = Vec::new(); + self.walk_trie(libs, current_symbol, offset, &mut exports)?; + Ok(exports) + } + + /// Create a new, lazy, zero-copy export trie from the `DyldInfo` `command` + pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self { + Self::new_impl( + bytes, + command.export_off as usize, + command.export_size as usize, + ) + } + + /// Create a new, lazy, zero-copy export trie from the `LinkeditDataCommand` `command` + pub fn new_from_linkedit_data_command( + bytes: &'a [u8], + command: &load_command::LinkeditDataCommand, + ) -> Self { + Self::new_impl(bytes, command.dataoff as usize, command.datasize as usize) + } +} + +impl<'a> Debug for ExportTrie<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("ExportTrie") + .field("data", &"<... redacted ...>") + .field( + "location", + &format_args!("{:#x}..{:#x}", self.location.start, self.location.end), + ) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn export_trie() { + const EXPORTS: [u8; 64] = [ + 0x00, 0x01, 0x5f, 0x00, 0x05, 0x00, 0x02, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x00, 0x1f, 0x6d, + 0x61, 0x00, 0x23, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x78, 0x69, 0x6d, 0x75, 0x6d, + 0x00, 0x30, 0x69, 0x6e, 0x00, 0x35, 0x03, 0x00, 0xc0, 0x1e, 0x00, 0x03, 0x00, 0xd0, + 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + let exports = &EXPORTS[..]; + let libs = vec!["/usr/lib/libderp.so", "/usr/lib/libthuglife.so"]; + let mut command = load_command::DyldInfoCommand::default(); + command.export_size = exports.len() as u32; + let trie = ExportTrie::new(&exports, &command); + println!("trie: {:#?}", &trie); + let exports = trie.exports(&libs).unwrap(); + println!("len: {} exports: {:#?}", exports.len(), &exports); + assert_eq!(exports.len() as usize, 3usize) + } + + #[test] + fn export_trie_linkedit_data() { + const EXPORTS: [u8; 64] = [ + 0x00, 0x01, 0x5f, 0x00, 0x05, 0x00, 0x02, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, + 0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x00, 0x1f, 0x6d, + 0x61, 0x00, 0x23, 0x02, 0x00, 0x00, 0x00, 0x00, 0x02, 0x78, 0x69, 0x6d, 0x75, 0x6d, + 0x00, 0x30, 0x69, 0x6e, 0x00, 0x35, 0x03, 0x00, 0xc0, 0x1e, 0x00, 0x03, 0x00, 0xd0, + 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + let exports = &EXPORTS[..]; + let libs = vec!["/usr/lib/libderp.so", "/usr/lib/libthuglife.so"]; + let command = load_command::LinkeditDataCommand { + datasize: exports.len() as u32, + ..Default::default() + }; + let trie = ExportTrie::new_from_linkedit_data_command(exports, &command); + println!("trie: {:#?}", &trie); + let exports = trie.exports(&libs).unwrap(); + println!("len: {} exports: {:#?}", exports.len(), &exports); + assert_eq!(exports.len() as usize, 3usize); + } + + #[test] + fn invalid_range() { + let mut command = load_command::DyldInfoCommand::default(); + command.export_off = 0xffff_ff00; + command.export_size = 0x00ff_ff00; + let trie = ExportTrie::new(&[], &command); + // FIXME: it would have been nice if this were an `Err`. + let exports = trie.exports(&[]).unwrap(); + assert_eq!(exports.len(), 0); + } +} diff --git a/third_party/rust/goblin/src/mach/fat.rs b/third_party/rust/goblin/src/mach/fat.rs new file mode 100644 index 0000000000..33d945748a --- /dev/null +++ b/third_party/rust/goblin/src/mach/fat.rs @@ -0,0 +1,132 @@ +//! A Mach-o fat binary is a multi-architecture binary container + +use core::fmt; + +if_std! { + use std::fs::File; + use std::io::{self, Read}; +} + +use crate::error; +use crate::mach::constants::cputype::{CpuSubType, CpuType, CPU_ARCH_ABI64, CPU_SUBTYPE_MASK}; +use scroll::{Pread, Pwrite, SizeWith}; + +pub const FAT_MAGIC: u32 = 0xcafe_babe; +pub const FAT_CIGAM: u32 = 0xbeba_feca; + +#[repr(C)] +#[derive(Clone, Copy, Default, Pread, Pwrite, SizeWith)] +/// The Mach-o `FatHeader` always has its data bigendian +pub struct FatHeader { + /// The magic number, `cafebabe` + pub magic: u32, + /// How many fat architecture headers there are + pub nfat_arch: u32, +} + +pub const SIZEOF_FAT_HEADER: usize = 8; + +impl fmt::Debug for FatHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("FatHeader") + .field("magic", &format_args!("0x{:x}", self.magic)) + .field("nfat_arch", &self.nfat_arch) + .finish() + } +} + +impl FatHeader { + /// Reinterpret a `FatHeader` from `bytes` + pub fn from_bytes(bytes: [u8; SIZEOF_FAT_HEADER]) -> FatHeader { + let mut offset = 0; + let magic = bytes.gread_with(&mut offset, scroll::BE).unwrap(); + let nfat_arch = bytes.gread_with(&mut offset, scroll::BE).unwrap(); + FatHeader { magic, nfat_arch } + } + + /// Reads a `FatHeader` from a `File` on disk + #[cfg(feature = "std")] + pub fn from_fd(fd: &mut File) -> io::Result<FatHeader> { + let mut header = [0; SIZEOF_FAT_HEADER]; + fd.read_exact(&mut header)?; + Ok(FatHeader::from_bytes(header)) + } + + /// Parse a mach-o fat header from the `bytes` + pub fn parse(bytes: &[u8]) -> error::Result<FatHeader> { + Ok(bytes.pread_with::<FatHeader>(0, scroll::BE)?) + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default, Pread, Pwrite, SizeWith)] +/// The Mach-o `FatArch` always has its data bigendian +pub struct FatArch { + /// What kind of CPU this binary is + pub cputype: u32, + pub cpusubtype: u32, + /// Where in the fat binary it starts + pub offset: u32, + /// How big the binary is + pub size: u32, + pub align: u32, +} + +pub const SIZEOF_FAT_ARCH: usize = 20; + +impl fmt::Debug for FatArch { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("FatArch") + .field("cputype", &self.cputype()) + .field("cmdsize", &self.cpusubtype()) + .field("offset", &format_args!("{:#x}", &self.offset)) + .field("size", &self.size) + .field("align", &self.align) + .finish() + } +} + +impl FatArch { + /// Get the slice of bytes this header describes from `bytes` + pub fn slice<'a>(&self, bytes: &'a [u8]) -> &'a [u8] { + // FIXME: This function should ideally validate the inputs and return a `Result`. + // Best we can do for now without `panic`ing is return an empty slice. + let start = self.offset as usize; + match start + .checked_add(self.size as usize) + .and_then(|end| bytes.get(start..end)) + { + Some(slice) => slice, + None => { + log::warn!("invalid `FatArch` offset"); + &[] + } + } + } + + /// Returns the cpu type + pub fn cputype(&self) -> CpuType { + self.cputype + } + + /// Returns the cpu subtype with the capabilities removed + pub fn cpusubtype(&self) -> CpuSubType { + self.cpusubtype & !CPU_SUBTYPE_MASK + } + + /// Returns the capabilities of the CPU + pub fn cpu_caps(&self) -> u32 { + (self.cpusubtype & CPU_SUBTYPE_MASK) >> 24 + } + + /// Whether this fat architecture header describes a 64-bit binary + pub fn is_64(&self) -> bool { + (self.cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64 + } + + /// Parse a `FatArch` header from `bytes` at `offset` + pub fn parse(bytes: &[u8], offset: usize) -> error::Result<Self> { + let arch = bytes.pread_with::<FatArch>(offset, scroll::BE)?; + Ok(arch) + } +} diff --git a/third_party/rust/goblin/src/mach/header.rs b/third_party/rust/goblin/src/mach/header.rs new file mode 100644 index 0000000000..df8731f111 --- /dev/null +++ b/third_party/rust/goblin/src/mach/header.rs @@ -0,0 +1,438 @@ +//! A header contains minimal architecture information, the binary kind, the number of load commands, as well as an endianness hint + +use core::fmt; +use plain::Plain; +use scroll::ctx; +use scroll::ctx::SizeWith; +use scroll::{Pread, Pwrite, SizeWith}; + +use crate::container::{self, Container}; +use crate::error; +use crate::mach::constants::cputype::{CpuSubType, CpuType, CPU_SUBTYPE_MASK}; + +// Constants for the flags field of the mach_header +/// the object file has no undefined references +pub const MH_NOUNDEFS: u32 = 0x1; +/// the object file is the output of an incremental link against a base file and can't be +/// link edited again +pub const MH_INCRLINK: u32 = 0x2; +/// the object file is input for the dynamic linker and can't be staticly link edited again +pub const MH_DYLDLINK: u32 = 0x4; +/// the object file's undefined references are bound by the dynamic linker when loaded. +pub const MH_BINDATLOAD: u32 = 0x8; +/// the file has its dynamic undefined references prebound. +pub const MH_PREBOUND: u32 = 0x10; +/// the file has its read-only and read-write segments split +pub const MH_SPLIT_SEGS: u32 = 0x20; +/// the shared library init routine is to be run lazily via catching memory faults to its writeable +/// segments (obsolete) +pub const MH_LAZY_INIT: u32 = 0x40; +/// the image is using two-level name space bindings +pub const MH_TWOLEVEL: u32 = 0x80; +/// the executable is forcing all images to use flat name space bindings +pub const MH_FORCE_FLAT: u32 = 0x100; +/// this umbrella guarantees no multiple defintions of symbols in its sub-images so the +/// two-level namespace hints can always be used. +pub const MH_NOMULTIDEFS: u32 = 0x200; +/// do not have dyld notify the prebinding agent about this executable +pub const MH_NOFIXPREBINDING: u32 = 0x400; +/// the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set. +pub const MH_PREBINDABLE: u32 = 0x800; +/// indicates that this binary binds to all two-level namespace modules of its dependent libraries. +/// Only used when MH_PREBINDABLE and MH_TWOLEVEL are both set. +pub const MH_ALLMODSBOUND: u32 = 0x1000; +/// safe to divide up the sections into sub-sections via symbols for dead code stripping +pub const MH_SUBSECTIONS_VIA_SYMBOLS: u32 = 0x2000; +/// the binary has been canonicalized via the unprebind operation +pub const MH_CANONICAL: u32 = 0x4000; +/// the final linked image contains external weak symbols +pub const MH_WEAK_DEFINES: u32 = 0x8000; +/// the final linked image uses weak symbols +pub const MH_BINDS_TO_WEAK: u32 = 0x10000; +/// When this bit is set, all stacks in the task will be given stack execution privilege. +/// Only used in MH_EXECUTE filetypes. +pub const MH_ALLOW_STACK_EXECUTION: u32 = 0x20000; +/// When this bit is set, the binary declares it is safe for use in processes with uid zero +pub const MH_ROOT_SAFE: u32 = 0x40000; +/// When this bit is set, the binary declares it is safe for use in processes when issetugid() is true +pub const MH_SETUID_SAFE: u32 = 0x80000; +/// When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to +/// see if any are re-exported +pub const MH_NO_REEXPORTED_DYLIBS: u32 = 0x0010_0000; +/// When this bit is set, the OS will load the main executable at a random address. +/// Only used in MH_EXECUTE filetypes. +pub const MH_PIE: u32 = 0x0020_0000; +/// Only for use on dylibs. When linking against a dylib that has this bit set, the static linker +/// will automatically not create a LC_LOAD_DYLIB load command to the dylib if no symbols are being +/// referenced from the dylib. +pub const MH_DEAD_STRIPPABLE_DYLIB: u32 = 0x0040_0000; +/// Contains a section of type S_THREAD_LOCAL_VARIABLES +pub const MH_HAS_TLV_DESCRIPTORS: u32 = 0x0080_0000; +/// When this bit is set, the OS will run the main executable with a non-executable heap even on +/// platforms (e.g. i386) that don't require it. Only used in MH_EXECUTE filetypes. +pub const MH_NO_HEAP_EXECUTION: u32 = 0x0100_0000; + +// TODO: verify this number is correct, it was previously 0x02000000 which could indicate a typo/data entry error +/// The code was linked for use in an application extension. +pub const MH_APP_EXTENSION_SAFE: u32 = 0x0200_0000; + +#[inline(always)] +pub fn flag_to_str(flag: u32) -> &'static str { + match flag { + MH_NOUNDEFS => "MH_NOUNDEFS", + MH_INCRLINK => "MH_INCRLINK", + MH_DYLDLINK => "MH_DYLDLINK", + MH_BINDATLOAD => "MH_BINDATLOAD", + MH_PREBOUND => "MH_PREBOUND", + MH_SPLIT_SEGS => "MH_SPLIT_SEGS", + MH_LAZY_INIT => "MH_LAZY_INIT", + MH_TWOLEVEL => "MH_TWOLEVEL", + MH_FORCE_FLAT => "MH_FORCE_FLAT", + MH_NOMULTIDEFS => "MH_NOMULTIDEFS", + MH_NOFIXPREBINDING => "MH_NOFIXPREBINDING", + MH_PREBINDABLE => "MH_PREBINDABLE ", + MH_ALLMODSBOUND => "MH_ALLMODSBOUND", + MH_SUBSECTIONS_VIA_SYMBOLS => "MH_SUBSECTIONS_VIA_SYMBOLS", + MH_CANONICAL => "MH_CANONICAL", + MH_WEAK_DEFINES => "MH_WEAK_DEFINES", + MH_BINDS_TO_WEAK => "MH_BINDS_TO_WEAK", + MH_ALLOW_STACK_EXECUTION => "MH_ALLOW_STACK_EXECUTION", + MH_ROOT_SAFE => "MH_ROOT_SAFE", + MH_SETUID_SAFE => "MH_SETUID_SAFE", + MH_NO_REEXPORTED_DYLIBS => "MH_NO_REEXPORTED_DYLIBS", + MH_PIE => "MH_PIE", + MH_DEAD_STRIPPABLE_DYLIB => "MH_DEAD_STRIPPABLE_DYLIB", + MH_HAS_TLV_DESCRIPTORS => "MH_HAS_TLV_DESCRIPTORS", + MH_NO_HEAP_EXECUTION => "MH_NO_HEAP_EXECUTION", + MH_APP_EXTENSION_SAFE => "MH_APP_EXTENSION_SAFE", + _ => "UNKNOWN FLAG", + } +} + +/// Mach Header magic constant +pub const MH_MAGIC: u32 = 0xfeed_face; +pub const MH_CIGAM: u32 = 0xcefa_edfe; +/// Mach Header magic constant for 64-bit +pub const MH_MAGIC_64: u32 = 0xfeed_facf; +pub const MH_CIGAM_64: u32 = 0xcffa_edfe; + +// Constants for the filetype field of the mach_header +/// relocatable object file +pub const MH_OBJECT: u32 = 0x1; +/// demand paged executable file +pub const MH_EXECUTE: u32 = 0x2; +/// fixed VM shared library file +pub const MH_FVMLIB: u32 = 0x3; +/// core file +pub const MH_CORE: u32 = 0x4; +/// preloaded executable file +pub const MH_PRELOAD: u32 = 0x5; +/// dynamically bound shared library +pub const MH_DYLIB: u32 = 0x6; +/// dynamic link editor +pub const MH_DYLINKER: u32 = 0x7; +/// dynamically bound bundle file +pub const MH_BUNDLE: u32 = 0x8; +/// shared library stub for static linking only, no section contents +pub const MH_DYLIB_STUB: u32 = 0x9; +/// companion file with only debug sections +pub const MH_DSYM: u32 = 0xa; +/// x86_64 kexts +pub const MH_KEXT_BUNDLE: u32 = 0xb; +/// set of mach-o's +pub const MH_FILESET: u32 = 0xc; + +pub fn filetype_to_str(filetype: u32) -> &'static str { + match filetype { + MH_OBJECT => "OBJECT", + MH_EXECUTE => "EXECUTE", + MH_FVMLIB => "FVMLIB", + MH_CORE => "CORE", + MH_PRELOAD => "PRELOAD", + MH_DYLIB => "DYLIB", + MH_DYLINKER => "DYLINKER", + MH_BUNDLE => "BUNDLE", + MH_DYLIB_STUB => "DYLIB_STUB", + MH_DSYM => "DSYM", + MH_KEXT_BUNDLE => "KEXT_BUNDLE", + MH_FILESET => "FILESET", + _ => "UNKNOWN FILETYPE", + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default, Debug, Pread, Pwrite, SizeWith)] +/// A 32-bit Mach-o header +pub struct Header32 { + /// mach magic number identifier + pub magic: u32, + /// cpu specifier + pub cputype: u32, + /// machine specifier + pub cpusubtype: u32, + /// type of file + pub filetype: u32, + /// number of load commands + pub ncmds: u32, + /// the size of all the load commands + pub sizeofcmds: u32, + /// flags + pub flags: u32, +} + +pub const SIZEOF_HEADER_32: usize = 0x1c; + +unsafe impl Plain for Header32 {} + +impl Header32 { + /// Transmutes the given byte array into the corresponding 32-bit Mach-o header + pub fn from_bytes(bytes: &[u8; SIZEOF_HEADER_32]) -> &Self { + plain::from_bytes(bytes).unwrap() + } + pub fn size(&self) -> usize { + SIZEOF_HEADER_32 + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default, Debug, Pread, Pwrite, SizeWith)] +/// A 64-bit Mach-o header +pub struct Header64 { + /// mach magic number identifier + pub magic: u32, + /// cpu specifier + pub cputype: u32, + /// machine specifier + pub cpusubtype: u32, + /// type of file + pub filetype: u32, + /// number of load commands + pub ncmds: u32, + /// the size of all the load commands + pub sizeofcmds: u32, + /// flags + pub flags: u32, + pub reserved: u32, +} + +unsafe impl Plain for Header64 {} + +pub const SIZEOF_HEADER_64: usize = 32; + +impl Header64 { + /// Transmutes the given byte array into the corresponding 64-bit Mach-o header + pub fn from_bytes(bytes: &[u8; SIZEOF_HEADER_64]) -> &Self { + plain::from_bytes(bytes).unwrap() + } + pub fn size(&self) -> usize { + SIZEOF_HEADER_64 + } +} + +#[repr(C)] +#[derive(Clone, Copy, Default)] +/// Generic sized header +pub struct Header { + pub magic: u32, + pub cputype: u32, + pub cpusubtype: u32, + /// type of file + pub filetype: u32, + /// number of load commands + pub ncmds: usize, + /// the size of all the load commands + pub sizeofcmds: u32, + /// flags + pub flags: u32, + pub reserved: u32, +} + +impl fmt::Debug for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Header") + .field("magic", &format_args!("0x{:x}", self.magic)) + .field("cputype", &self.cputype()) + .field("cpusubtype", &format_args!("0x{:x}", self.cpusubtype())) + .field("filetype", &filetype_to_str(self.filetype)) + .field("ncmds", &self.ncmds) + .field("sizeofcmds", &self.sizeofcmds) + .field("flags", &format_args!("0x{:x}", self.flags)) + .field("reserved", &format_args!("0x{:x}", self.reserved)) + .finish() + } +} + +impl From<Header32> for Header { + fn from(header: Header32) -> Self { + Header { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as usize, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + reserved: 0, + } + } +} + +impl From<Header> for Header32 { + fn from(header: Header) -> Self { + Header32 { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as u32, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + } + } +} + +impl From<Header64> for Header { + fn from(header: Header64) -> Self { + Header { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as usize, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + reserved: header.reserved, + } + } +} + +impl From<Header> for Header64 { + fn from(header: Header) -> Self { + Header64 { + magic: header.magic, + cputype: header.cputype, + cpusubtype: header.cpusubtype, + filetype: header.filetype, + ncmds: header.ncmds as u32, + sizeofcmds: header.sizeofcmds, + flags: header.flags, + reserved: header.reserved, + } + } +} + +impl Header { + pub fn new(ctx: container::Ctx) -> Self { + let mut header = Header::default(); + header.magic = if ctx.is_big() { MH_MAGIC_64 } else { MH_MAGIC }; + header + } + /// Returns the cpu type + pub fn cputype(&self) -> CpuType { + self.cputype + } + /// Returns the cpu subtype with the capabilities removed + pub fn cpusubtype(&self) -> CpuSubType { + self.cpusubtype & !CPU_SUBTYPE_MASK + } + /// Returns the capabilities of the CPU + pub fn cpu_caps(&self) -> u32 { + (self.cpusubtype & CPU_SUBTYPE_MASK) >> 24 + } +} + +impl ctx::SizeWith<container::Ctx> for Header { + fn size_with(container: &container::Ctx) -> usize { + match container.container { + Container::Little => SIZEOF_HEADER_32, + Container::Big => SIZEOF_HEADER_64, + } + } +} + +impl ctx::SizeWith<Container> for Header { + fn size_with(container: &Container) -> usize { + match container { + Container::Little => SIZEOF_HEADER_32, + Container::Big => SIZEOF_HEADER_64, + } + } +} + +impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Header { + type Error = crate::error::Error; + fn try_from_ctx( + bytes: &'a [u8], + container::Ctx { le, container }: container::Ctx, + ) -> error::Result<(Self, usize)> { + let size = bytes.len(); + if size < SIZEOF_HEADER_32 || size < SIZEOF_HEADER_64 { + let error = + error::Error::Malformed("bytes size is smaller than a Mach-o header".into()); + Err(error) + } else { + match container { + Container::Little => { + let header = bytes.pread_with::<Header32>(0, le)?; + Ok((Header::from(header), SIZEOF_HEADER_32)) + } + Container::Big => { + let header = bytes.pread_with::<Header64>(0, le)?; + Ok((Header::from(header), SIZEOF_HEADER_64)) + } + } + } + } +} + +impl ctx::TryIntoCtx<container::Ctx> for Header { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> error::Result<usize> { + match ctx.container { + Container::Little => { + bytes.pwrite_with(Header32::from(self), 0, ctx.le)?; + } + Container::Big => { + bytes.pwrite_with(Header64::from(self), 0, ctx.le)?; + } + }; + Ok(Header::size_with(&ctx)) + } +} + +impl ctx::IntoCtx<container::Ctx> for Header { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem::size_of; + + #[test] + fn test_parse_armv7_header() { + use crate::mach::constants::cputype::CPU_TYPE_ARM; + const CPU_SUBTYPE_ARM_V7: u32 = 9; + use super::Header; + use crate::container::{Container, Ctx, Endian}; + use scroll::Pread; + let bytes = b"\xce\xfa\xed\xfe\x0c\x00\x00\x00\t\x00\x00\x00\n\x00\x00\x00\x06\x00\x00\x00\x8c\r\x00\x00\x00\x00\x00\x00\x1b\x00\x00\x00\x18\x00\x00\x00\xe0\xf7B\xbb\x1c\xf50w\xa6\xf7u\xa3\xba("; + let header: Header = bytes + .pread_with(0, Ctx::new(Container::Little, Endian::Little)) + .unwrap(); + assert_eq!(header.cputype, CPU_TYPE_ARM); + assert_eq!(header.cpusubtype, CPU_SUBTYPE_ARM_V7); + } + + #[test] + fn sizeof_header32() { + assert_eq!(SIZEOF_HEADER_32, size_of::<Header32>()); + } + + #[test] + fn sizeof_header64() { + assert_eq!(SIZEOF_HEADER_64, size_of::<Header64>()); + } +} diff --git a/third_party/rust/goblin/src/mach/imports.rs b/third_party/rust/goblin/src/mach/imports.rs new file mode 100644 index 0000000000..3c31329a96 --- /dev/null +++ b/third_party/rust/goblin/src/mach/imports.rs @@ -0,0 +1,308 @@ +//! Dynamically linked symbolic imports + +// table of tuples: +// <seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend> +// symbol flags are undocumented + +use alloc::vec::Vec; +use core::fmt::{self, Debug}; +use core::ops::Range; +use scroll::{Pread, Sleb128, Uleb128}; + +use crate::container; +use crate::error; +use crate::mach::bind_opcodes; +use crate::mach::load_command; +use crate::mach::segment; + +#[derive(Debug)] +/// Import binding information generated by running the Finite State Automaton programmed via `bind_opcodes` +struct BindInformation<'a> { + seg_index: u8, + seg_offset: u64, + bind_type: u8, + symbol_library_ordinal: u8, + symbol_name: &'a str, + symbol_flags: u8, + addend: i64, + special_dylib: u8, // seeing self = 0 assuming this means the symbol is imported from itself, because its... libSystem.B.dylib? + is_lazy: bool, +} + +impl<'a> BindInformation<'a> { + pub fn new(is_lazy: bool) -> Self { + let mut bind_info = BindInformation::default(); + if is_lazy { + bind_info.is_lazy = true; + bind_info.bind_type = bind_opcodes::BIND_TYPE_POINTER; + } + bind_info + } + pub fn is_weak(&self) -> bool { + self.symbol_flags & bind_opcodes::BIND_SYMBOL_FLAGS_WEAK_IMPORT != 0 + } +} + +impl<'a> Default for BindInformation<'a> { + fn default() -> Self { + BindInformation { + seg_index: 0, + seg_offset: 0x0, + bind_type: 0x0, + special_dylib: 1, + symbol_library_ordinal: 0, + symbol_name: "", + symbol_flags: 0, + addend: 0, + is_lazy: false, + } + } +} + +#[derive(Debug)] +/// An dynamically linked symbolic import +pub struct Import<'a> { + /// The symbol name dyld uses to resolve this import + pub name: &'a str, + /// The library this symbol belongs to (thanks to two-level namespaces) + pub dylib: &'a str, + /// Whether the symbol is lazily resolved or not + pub is_lazy: bool, + /// The offset in the binary this import is found + pub offset: u64, + /// The size of this import + pub size: usize, + /// The virtual memory address at which this import is found + pub address: u64, + /// The addend of this import + pub addend: i64, + /// Whether this import is weak + pub is_weak: bool, + /// The offset in the stream of bind opcodes that caused this import + pub start_of_sequence_offset: u64, +} + +impl<'a> Import<'a> { + /// Create a new import from the import binding information in `bi` + fn new( + bi: &BindInformation<'a>, + libs: &[&'a str], + segments: &[segment::Segment], + start_of_sequence_offset: usize, + ) -> Import<'a> { + let (offset, address) = { + let segment = &segments[bi.seg_index as usize]; + ( + segment.fileoff + bi.seg_offset, + segment.vmaddr + bi.seg_offset, + ) + }; + let size = if bi.is_lazy { 8 } else { 0 }; + Import { + name: bi.symbol_name, + dylib: libs[bi.symbol_library_ordinal as usize], + is_lazy: bi.is_lazy, + offset, + size, + address, + addend: bi.addend, + is_weak: bi.is_weak(), + start_of_sequence_offset: start_of_sequence_offset as u64, + } + } +} + +/// An interpreter for mach BIND opcodes. +/// Runs on prebound (non lazy) symbols (usually dylib extern consts and extern variables), +/// and lazy symbols (usually dylib functions) +pub struct BindInterpreter<'a> { + data: &'a [u8], + location: Range<usize>, + lazy_location: Range<usize>, +} + +impl<'a> Debug for BindInterpreter<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("BindInterpreter") + .field("data", &"<... redacted ...>") + .field( + "location", + &format_args!("{:#x}..{:#x}", self.location.start, self.location.end), + ) + .field( + "lazy_location", + &format_args!( + "{:#x}..{:#x}", + self.lazy_location.start, self.lazy_location.end + ), + ) + .finish() + } +} + +impl<'a> BindInterpreter<'a> { + /// Construct a new import binding interpreter from `bytes` and the load `command` + pub fn new(bytes: &'a [u8], command: &load_command::DyldInfoCommand) -> Self { + let get_pos = |off: u32, size: u32| -> Range<usize> { + let start = off as usize; + start..start.saturating_add(size as usize) + }; + let location = get_pos(command.bind_off, command.bind_size); + let lazy_location = get_pos(command.lazy_bind_off, command.lazy_bind_size); + BindInterpreter { + data: bytes, + location, + lazy_location, + } + } + /// Return the imports in this binary + pub fn imports( + &self, + libs: &[&'a str], + segments: &[segment::Segment], + ctx: container::Ctx, + ) -> error::Result<Vec<Import<'a>>> { + let mut imports = Vec::new(); + self.run(false, libs, segments, ctx, &mut imports)?; + self.run(true, libs, segments, ctx, &mut imports)?; + Ok(imports) + } + fn run( + &self, + is_lazy: bool, + libs: &[&'a str], + segments: &[segment::Segment], + ctx: container::Ctx, + imports: &mut Vec<Import<'a>>, + ) -> error::Result<()> { + use crate::mach::bind_opcodes::*; + let location = if is_lazy { + &self.lazy_location + } else { + &self.location + }; + let mut bind_info = BindInformation::new(is_lazy); + let mut offset = location.start; + let mut start_of_sequence: usize = 0; + while offset < location.end { + let opcode = self.data.gread::<i8>(&mut offset)? as bind_opcodes::Opcode; + // let mut input = String::new(); + // ::std::io::stdin().read_line(&mut input).unwrap(); + // println!("opcode: {} ({:#x}) offset: {:#x}\n {:?}", opcode_to_str(opcode & BIND_OPCODE_MASK), opcode, offset - location.start - 1, &bind_info); + match opcode & BIND_OPCODE_MASK { + // we do nothing, don't update our records, and add a new, fresh record + BIND_OPCODE_DONE => { + bind_info = BindInformation::new(is_lazy); + start_of_sequence = offset - location.start; + } + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + let symbol_library_ordinal = opcode & BIND_IMMEDIATE_MASK; + bind_info.symbol_library_ordinal = symbol_library_ordinal; + } + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + let symbol_library_ordinal = Uleb128::read(&self.data, &mut offset)?; + bind_info.symbol_library_ordinal = symbol_library_ordinal as u8; + } + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM => { + // dyld puts the immediate into the symbol_library_ordinal field... + let special_dylib = opcode & BIND_IMMEDIATE_MASK; + // Printf.printf "special_dylib: 0x%x\n" special_dylib + bind_info.special_dylib = special_dylib; + } + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + let symbol_flags = opcode & BIND_IMMEDIATE_MASK; + let symbol_name = self.data.pread::<&str>(offset)?; + offset += symbol_name.len() + 1; // second time this \0 caused debug woes + bind_info.symbol_name = symbol_name; + bind_info.symbol_flags = symbol_flags; + } + BIND_OPCODE_SET_TYPE_IMM => { + let bind_type = opcode & BIND_IMMEDIATE_MASK; + bind_info.bind_type = bind_type; + } + BIND_OPCODE_SET_ADDEND_SLEB => { + let addend = Sleb128::read(&self.data, &mut offset)?; + bind_info.addend = addend; + } + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + let seg_index = opcode & BIND_IMMEDIATE_MASK; + // dyld sets the address to the segActualLoadAddress(segIndex) + uleb128 + // address = segActualLoadAddress(segmentIndex) + read_uleb128(p, end); + let seg_offset = Uleb128::read(&self.data, &mut offset)?; + bind_info.seg_index = seg_index; + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_ADD_ADDR_ULEB => { + let addr = Uleb128::read(&self.data, &mut offset)?; + let seg_offset = bind_info.seg_offset.wrapping_add(addr); + bind_info.seg_offset = seg_offset; + } + // record the record by placing its value into our list + BIND_OPCODE_DO_BIND => { + // from dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += sizeof(intptr_t); + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let seg_offset = bind_info.seg_offset.wrapping_add(ctx.size() as u64); + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB => { + // dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += read_uleb128(p, end) + sizeof(intptr_t); + // we bind the old record, then increment bind info address for the next guy, plus the ptr offset *) + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let addr = Uleb128::read(&self.data, &mut offset)?; + let seg_offset = bind_info + .seg_offset + .wrapping_add(addr) + .wrapping_add(ctx.size() as u64); + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED => { + // dyld: + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += immediate*sizeof(intptr_t) + sizeof(intptr_t); + // break; + // similarly, we bind the old record, then perform address manipulation for the next record + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let scale = opcode & BIND_IMMEDIATE_MASK; + let size = ctx.size() as u64; + let seg_offset = bind_info + .seg_offset + .wrapping_add(u64::from(scale) * size) + .wrapping_add(size); + bind_info.seg_offset = seg_offset; + } + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB => { + // dyld: + // count = read_uleb128(p, end); + // skip = read_uleb128(p, end); + // for (uint32_t i=0; i < count; ++i) { + // if ( address >= segmentEndAddress ) + // throwBadBindingAddress(address, segmentEndAddress, segmentIndex, start, end, p); + // (this->*handler)(context, address, type, symbolName, symboFlags, addend, libraryOrdinal, "", &last); + // address += skip + sizeof(intptr_t); + // } + // break; + let count = Uleb128::read(&self.data, &mut offset)?; + let skip = Uleb128::read(&self.data, &mut offset)?; + let skip_plus_size = skip + ctx.size() as u64; + for _i in 0..count { + imports.push(Import::new(&bind_info, libs, segments, start_of_sequence)); + let seg_offset = bind_info.seg_offset.wrapping_add(skip_plus_size); + bind_info.seg_offset = seg_offset; + } + } + _ => {} + } + } + Ok(()) + } +} diff --git a/third_party/rust/goblin/src/mach/load_command.rs b/third_party/rust/goblin/src/mach/load_command.rs new file mode 100644 index 0000000000..34a44e2bae --- /dev/null +++ b/third_party/rust/goblin/src/mach/load_command.rs @@ -0,0 +1,1857 @@ +//! Load commands tell the kernel and dynamic linker anything from how to load this binary into memory, what the entry point is, apple specific information, to which libraries it requires for dynamic linking + +use crate::error; +use core::convert::TryFrom; +use core::fmt::{self, Display}; +use scroll::{ctx, Endian}; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/////////////////////////////////////// +// Load Commands from mach-o/loader.h +// with some rusty additions +////////////////////////////////////// + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, SizeWith)] +/// Occurs at the beginning of every load command to serve as a sort of tagged union/enum discriminant +pub struct LoadCommandHeader { + pub cmd: u32, + pub cmdsize: u32, +} + +impl Display for LoadCommandHeader { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!( + fmt, + "LoadCommandHeader: {} size: {}", + cmd_to_str(self.cmd), + self.cmdsize + ) + } +} + +pub const SIZEOF_LOAD_COMMAND: usize = 8; + +pub type LcStr = u32; + +pub const SIZEOF_LC_STR: usize = 4; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Section32 { + /// name of this section + pub sectname: [u8; 16], + /// segment this section goes in + pub segname: [u8; 16], + /// memory address of this section + pub addr: u32, + /// size in bytes of this section + pub size: u32, + /// file offset of this section + pub offset: u32, + /// section alignment (power of 2) + pub align: u32, + /// file offset of relocation entries + pub reloff: u32, + /// number of relocation entries + pub nreloc: u32, + /// flags (section type and attributes) + pub flags: u32, + /// reserved (for offset or index) + pub reserved1: u32, + /// reserved (for count or sizeof) + pub reserved2: u32, +} + +pub const SIZEOF_SECTION_32: usize = 68; + +/// for 64-bit architectures +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Section64 { + /// name of this section + pub sectname: [u8; 16], + /// segment this section goes in + pub segname: [u8; 16], + /// memory address of this section + pub addr: u64, + /// size in bytes of this section + pub size: u64, + /// file offset of this section + pub offset: u32, + /// section alignment (power of 2) + pub align: u32, + /// file offset of relocation entries + pub reloff: u32, + /// number of relocation entries + pub nreloc: u32, + /// flags (section type and attributes + pub flags: u32, + /// reserved (for offset or index) + pub reserved1: u32, + /// reserved (for count or sizeof) + pub reserved2: u32, + /// reserved + pub reserved3: u32, +} + +pub const SIZEOF_SECTION_64: usize = 80; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SegmentCommand32 { + pub cmd: u32, + pub cmdsize: u32, + pub segname: [u8; 16], + pub vmaddr: u32, + pub vmsize: u32, + pub fileoff: u32, + pub filesize: u32, + pub maxprot: u32, + pub initprot: u32, + pub nsects: u32, + pub flags: u32, +} + +pub const SIZEOF_SEGMENT_COMMAND_32: usize = 56; + +impl SegmentCommand32 { + pub fn name(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SegmentCommand64 { + pub cmd: u32, + pub cmdsize: u32, + pub segname: [u8; 16], + pub vmaddr: u64, + pub vmsize: u64, + pub fileoff: u64, + pub filesize: u64, + pub maxprot: u32, + pub initprot: u32, + pub nsects: u32, + pub flags: u32, +} + +pub const SIZEOF_SEGMENT_COMMAND_64: usize = 72; + +impl SegmentCommand64 { + pub fn name(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } +} +/// Fixed virtual memory shared libraries are identified by two things. The +/// target pathname (the name of the library as found for execution), and the +/// minor version number. The address of where the headers are loaded is in +/// header_addr. (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Fvmlib { + /// library's target pathname + pub name: u32, + /// library's minor version number + pub minor_version: u32, + /// library's header address + pub header_addr: u32, +} + +pub const SIZEOF_FVMLIB: usize = 12; + +/// A fixed virtual shared library (fipub constype == MH_FVMLIB in the mach header) +/// contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. +/// An object that uses a fixed virtual shared library also contains a +/// fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. +/// (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct FvmlibCommand { + /// LC_IDFVMLIB or LC_LOADFVMLIB + pub cmd: u32, + /// includes pathname string + pub cmdsize: u32, + /// the library identification + pub fvmlib: Fvmlib, +} + +pub const SIZEOF_FVMLIB_COMMAND: usize = 20; + +// /// Dynamicly linked shared libraries are identified by two things. The +// /// pathname (the name of the library as found for execution), and the +// /// compatibility version number. The pathname must match and the compatibility +// /// number in the user of the library must be greater than or equal to the +// /// library being used. The time stamp is used to record the time a library was +// /// built and copied into user so it can be use to determined if the library used +// /// at runtime is exactly the same as used to built the program. +// struct dylib { +// union lc_str name; // library's path name +// uint32_t timestamp; // library's build time stamp +// uint32_t current_version; // library's current version number +// uint32_t compatibility_version; // library's compatibility vers number +// } + +/// A dynamically linked shared library (fipub constype == MH_DYLIB in the mach header) +/// contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. +/// An object that uses a dynamically linked shared library also contains a +/// dylib_command (cmd == LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, or +/// LC_REEXPORT_DYLIB) for each library it uses. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Dylib { + /// library's path name + pub name: LcStr, + /// library's build time stamp + pub timestamp: u32, + /// library's current version number + pub current_version: u32, + /// library's compatibility vers number + pub compatibility_version: u32, +} + +pub const SIZEOF_DYLIB: usize = 16; + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibCommand { + /// LC_ID_DYLIB, LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB + pub cmd: u32, + /// includes pathname string + pub cmdsize: u32, + /// the library identification + pub dylib: Dylib, +} + +pub const SIZEOF_DYLIB_COMMAND: usize = 20; + +/// A dynamically linked shared library may be a subframework of an umbrella +/// framework. If so it will be linked with "-umbrella umbrella_name" where +/// Where "umbrella_name" is the name of the umbrella framework. A subframework +/// can only be linked against by its umbrella framework or other subframeworks +/// that are part of the same umbrella framework. Otherwise the static link +/// editor produces an error and states to link against the umbrella framework. +/// The name of the umbrella framework for subframeworks is recorded in the +/// following structure. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubFrameworkCommand { + /// LC_SUB_FRAMEWORK + pub cmd: u32, + /// includes umbrella string + pub cmdsize: u32, + /// the umbrella framework name + pub umbrella: u32, +} + +pub const SIZEOF_SUB_FRAMEWORK_COMMAND: usize = 12; + +/// For dynamically linked shared libraries that are subframework of an umbrella +/// framework they can allow clients other than the umbrella framework or other +/// subframeworks in the same umbrella framework. To do this the subframework +/// is built with "-allowable_client client_name" and an LC_SUB_CLIENT load +/// command is created for each -allowable_client flag. The client_name is +/// usually a framework name. It can also be a name used for bundles clients +/// where the bundle is built with "-client_name client_name". +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubClientCommand { + /// LC_SUB_CLIENT + pub cmd: u32, + /// includes client string + pub cmdsize: u32, + /// the client name + pub client: LcStr, +} + +pub const SIZEOF_SUB_CLIENT_COMMAND: usize = 12; + +/// A dynamically linked shared library may be a sub_umbrella of an umbrella +/// framework. If so it will be linked with "-sub_umbrella umbrella_name" where +/// Where "umbrella_name" is the name of the sub_umbrella framework. When +/// staticly linking when -twolevel_namespace is in effect a twolevel namespace +/// umbrella framework will only cause its subframeworks and those frameworks +/// listed as sub_umbrella frameworks to be implicited linked in. Any other +/// dependent dynamic libraries will not be linked it when -twolevel_namespace +/// is in effect. The primary library recorded by the static linker when +/// resolving a symbol in these libraries will be the umbrella framework. +/// Zero or more sub_umbrella frameworks may be use by an umbrella framework. +/// The name of a sub_umbrella framework is recorded in the following structure. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubUmbrellaCommand { + /// LC_SUB_UMBRELLA + pub cmd: u32, + /// includes sub_umbrella string + pub cmdsize: u32, + /// the sub_umbrella framework name + pub sub_umbrella: LcStr, +} + +pub const SIZEOF_SUB_UMBRELLA_COMMAND: usize = 12; + +/// A dynamically linked shared library may be a sub_library of another shared +/// library. If so it will be linked with "-sub_library library_name" where +/// Where "library_name" is the name of the sub_library shared library. When +/// staticly linking when -twolevel_namespace is in effect a twolevel namespace +/// shared library will only cause its subframeworks and those frameworks +/// listed as sub_umbrella frameworks and libraries listed as sub_libraries to +/// be implicited linked in. Any other dependent dynamic libraries will not be +/// linked it when -twolevel_namespace is in effect. The primary library +/// recorded by the static linker when resolving a symbol in these libraries +/// will be the umbrella framework (or dynamic library). Zero or more sub_library +/// shared libraries may be use by an umbrella framework or (or dynamic library). +/// The name of a sub_library framework is recorded in the following structure. +/// For example /usr/lib/libobjc_profile.A.dylib would be recorded as "libobjc". +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SubLibraryCommand { + /// LC_SUB_LIBRARY + pub cmd: u32, + /// includes sub_library string + pub cmdsize: u32, + /// the sub_library name + pub sub_library: LcStr, +} + +pub const SIZEOF_SUB_LIBRARY_COMMAND: usize = 12; + +/// A program (type == MH_EXECUTE) that is +/// prebound to its dynamic libraries has one of these for each library that +/// the static linker used in prebinding. It contains a bit vector for the +/// modules in the library. The bits indicate which modules are bound (1) and +/// which are not (0) from the library. The bit for module 0 is the low bit +/// of the first byte. So the bit for the Nth module is: +/// (linked_modules[N/8] >> N%8) & 1 +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct PreboundDylibCommand { + /// LC_PREBOUND_DYLIB + pub cmd: u32, + /// includes strings + pub cmdsize: u32, + /// library's path name + pub name: LcStr, + /// number of modules in library + pub nmodules: u32, + /// bit vector of linked modules + // TODO: fixme + pub linked_modules: LcStr, +} + +pub const SIZEOF_PREBOUND_DYLIB_COMMAND: usize = 20; + +/// The name of the dynamic linker +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylinkerCommand { + pub cmd: u32, + pub cmdsize: u32, + pub name: LcStr, +} + +pub const SIZEOF_DYLINKER_COMMAND: usize = 12; + +/// Thread commands contain machine-specific data structures suitable for +/// use in the thread state primitives. The machine specific data structures +/// follow the struct thread_command as follows. +/// Each flavor of machine specific data structure is preceded by an unsigned +/// long constant for the flavor of that data structure, an uint32_t +/// that is the count of longs of the size of the state data structure and then +/// the state data structure follows. This triple may be repeated for many +/// flavors. The constants for the flavors, counts and state data structure +/// definitions are expected to be in the header file <machine/thread_status.h>. +/// These machine specific data structures sizes must be multiples of +/// 4 bytes The cmdsize reflects the total size of the thread_command +/// and all of the sizes of the constants for the flavors, counts and state +/// data structures. +/// +/// For executable objects that are unix processes there will be one +/// thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. +/// This is the same as a LC_THREAD, except that a stack is automatically +/// created (based on the shell's limit for the stack size). CommandVariant arguments +/// and environment variables are copied onto that stack. +// unimplemented, see machine/thread_status.h for rest of values: +// uint32_t flavor flavor of thread state +// uint32_t count count of longs in thread state +// struct XXX_thread_state state thread state for this flavor +// ... +#[repr(C)] +#[derive(Copy)] +pub struct ThreadCommand { + /// LC_THREAD or LC_UNIXTHREAD + pub cmd: u32, + /// total size of this command + pub cmdsize: u32, + + /// flavor of thread state (but you also need to know the `cputype`) + pub flavor: u32, + + /// number of elements in `thread_state` that are valid + pub count: u32, + + /// The raw thread state, details of which varies by CPU + pub thread_state: [u32; 70], +} + +impl ThreadCommand { + pub fn instruction_pointer(&self, cputype: super::cputype::CpuType) -> error::Result<u64> { + // The thread command includes a `flavor` value which distinguishes between related thread + // states. However, `dyld` ignores this entirely, blindly interpreting the thread state + // values as a machine-specific set of registers matching the build configuration of the + // active `dyld` binary. + // + // Really the only thing that `dyld` cares is that the Mach header's `cputype`, so that's + // what we use here. + match cputype { + super::cputype::CPU_TYPE_X86 => { + // struct i386_thread_state_t { + // uint32_t eax; + // uint32_t ebx; + // uint32_t ecx; + // uint32_t edx; + // uint32_t edi; + // uint32_t esi; + // uint32_t ebp; + // uint32_t esp; + // uint32_t ss; + // uint32_t eflags; + // uint32_t eip; + // uint32_t cs; + // uint32_t ds; + // uint32_t es; + // uint32_t fs; + // uint32_t gs; + // } + let eip: u32 = self.thread_state[10]; + Ok(u64::from(eip)) + } + super::cputype::CPU_TYPE_X86_64 => { + // struct x86_thread_state64_t { + // uint64_t rax; + // uint64_t rbx; + // uint64_t rcx; + // uint64_t rdx; + // uint64_t rdi; + // uint64_t rsi; + // uint64_t rbp; + // uint64_t rsp; + // uint64_t r8; + // uint64_t r9; + // uint64_t r10; + // uint64_t r11; + // uint64_t r12; + // uint64_t r13; + // uint64_t r14; + // uint64_t r15; + // uint64_t rip; + // uint64_t rflags; + // uint64_t cs; + // uint64_t fs; + // uint64_t gs; + // } + let rip: u64 = + (u64::from(self.thread_state[32])) | ((u64::from(self.thread_state[33])) << 32); + Ok(rip) + } + super::cputype::CPU_TYPE_ARM => { + // struct arm_thread_state32_t { + // uint32_t r[13]; + // uint32_t sp; + // uint32_t lr; + // uint32_t pc; + // uint32_t cpsr; + // } + let pc: u32 = self.thread_state[15]; + Ok(u64::from(pc)) + } + super::cputype::CPU_TYPE_ARM64 | super::cputype::CPU_TYPE_ARM64_32 => { + // struct arm_thread_state64_t { + // uint64_t x[29]; + // uint64_t fp; + // uint64_t lr; + // uint64_t sp; + // uint64_t pc; + // uint32_t cpsr; + // uint32_t pad; + // } + let pc: u64 = + (u64::from(self.thread_state[64])) | ((u64::from(self.thread_state[65])) << 32); + Ok(pc) + } + // https://github.com/m4b/goblin/issues/64 + // Probably a G4 + super::cputype::CPU_TYPE_POWERPC => Ok(u64::from(self.thread_state[0])), + // I think the G5 was the last motorola powerpc processor used by apple before switching to intel cpus. + // unfortunately I don't have any binaries on hand to see what its thread state looks like :/ + // super::cputype::CPU_TYPE_POWERPC64 => { + // } + // Assuming above is added, I don't believe apple ever ported mach-o the mach kernel + // (and hence its binary format) to any other machines except the above, + // but I would be happy to learn otherwise + _ => Err(error::Error::Malformed(format!( + "unable to find instruction pointer for cputype {:?}", + cputype + ))), + } + } +} + +impl<'a> ctx::TryFromCtx<'a, Endian> for ThreadCommand { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], le: Endian) -> error::Result<(Self, usize)> { + let lc = bytes.pread_with::<LoadCommandHeader>(0, le)?; + + // read the thread state flavor and length of the thread state + let flavor: u32 = bytes.pread_with(8, le)?; + let count: u32 = bytes.pread_with(12, le)?; + + if count > 70 { + return Err(error::Error::Malformed(format!( + "thread command specifies {} longs for thread state but we handle only 70", + count + ))); + } + + // get a byte slice of the thread state + let thread_state_byte_length = count as usize * 4; + + // check the length + if bytes.len() < 16 + thread_state_byte_length { + return Err(error::Error::Malformed(format!( + "thread command specifies {} bytes for thread state but has only {}", + thread_state_byte_length, + bytes.len() + ))); + } + + let thread_state_bytes = &bytes[16..16 + thread_state_byte_length]; + + // read the thread state + let mut thread_state: [u32; 70] = [0; 70]; + for (i, state) in thread_state.iter_mut().enumerate().take(count as usize) { + *state = thread_state_bytes.pread_with(i * 4, le)?; + } + + Ok(( + ThreadCommand { + cmd: lc.cmd, + cmdsize: lc.cmdsize, + flavor, + count, + thread_state, + }, + lc.cmdsize as _, + )) + } +} + +impl Clone for ThreadCommand { + fn clone(&self) -> Self { + *self + } +} + +impl fmt::Debug for ThreadCommand { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("ThreadCommand") + .field("cmd", &self.cmd) + .field("cmdsize", &self.cmdsize) + .field("flavor", &self.flavor) + .field("count", &self.count) + .field("thread_state", &&self.thread_state[..]) + .finish() + } +} + +/// The routines command contains the address of the dynamic shared library +/// initialization routine and an index into the module table for the module +/// that defines the routine. Before any modules are used from the library the +/// dynamic linker fully binds the module that defines the initialization routine +/// and then calls it. This gets called before any module initialization +/// routines (used for C++ static constructors) in the library. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct RoutinesCommand32 { + /// LC_ROUTINES + pub cmd: u32, + /// total size of this command + pub cmdsize: u32, + /// address of initialization routine + pub init_address: u32, + /// index into the module table that the init routine is defined in + pub init_module: u32, + pub reserved1: u32, + pub reserved2: u32, + pub reserved3: u32, + pub reserved4: u32, + pub reserved5: u32, + pub reserved6: u32, +} + +/// The 64-bit routines command. Same use as above. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct RoutinesCommand64 { + /// LC_ROUTINES_64 + pub cmd: u32, + /// total size of this command + pub cmdsize: u32, + /// address of initialization routine + pub init_address: u64, + /// index into the module table that the init routine is defined in 8 bytes each + pub init_module: u64, + pub reserved1: u64, + pub reserved2: u64, + pub reserved3: u64, + pub reserved4: u64, + pub reserved5: u64, + pub reserved6: u64, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SymtabCommand { + pub cmd: u32, + pub cmdsize: u32, + pub symoff: u32, + pub nsyms: u32, + pub stroff: u32, + pub strsize: u32, +} + +impl Default for SymtabCommand { + fn default() -> Self { + SymtabCommand { + cmd: LC_SYMTAB, + cmdsize: SIZEOF_SYMTAB_COMMAND as u32, + symoff: 0, + nsyms: 0, + stroff: 0, + strsize: 0, + } + } +} + +impl SymtabCommand { + pub fn new() -> Self { + Default::default() + } +} + +pub const SIZEOF_SYMTAB_COMMAND: usize = 24; + +/// This is the second set of the symbolic information which is used to support +/// the data structures for the dynamically link editor. +/// +/// The original set of symbolic information in the symtab_command which contains +/// the symbol and string tables must also be present when this load command is +/// present. When this load command is present the symbol table is organized +/// into three groups of symbols: +/// local symbols (static and debugging symbols) - grouped by module +/// defined external symbols - grouped by module (sorted by name if not lib) +/// undefined external symbols (sorted by name if MH_BINDATLOAD is not set, +/// and in order the were seen by the static +/// linker if MH_BINDATLOAD is set) +/// In this load command there are offsets and counts to each of the three groups +/// of symbols. +/// +/// This load command contains a the offsets and sizes of the following new +/// symbolic information tables: +/// table of contents +/// module table +/// reference symbol table +/// indirect symbol table +/// The first three tables above (the table of contents, module table and +/// reference symbol table) are only present if the file is a dynamically linked +/// shared library. For executable and object modules, which are files +/// containing only one module, the information that would be in these three +/// tables is determined as follows: +/// table of contents - the defined external symbols are sorted by name +/// module table - the file contains only one module so everything in the +/// file is part of the module. +/// reference symbol table - is the defined and undefined external symbols +/// +/// For dynamically linked shared library files this load command also contains +/// offsets and sizes to the pool of relocation entries for all sections +/// separated into two groups: +/// external relocation entries +/// local relocation entries +/// For executable and object modules the relocation entries continue to hang +/// off the section structures. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DysymtabCommand { + pub cmd: u32, + pub cmdsize: u32, + /// index to local symbols + pub ilocalsym: u32, + /// number of local symbols + pub nlocalsym: u32, + /// index to externally defined symbols + pub iextdefsym: u32, + /// number of externally defined symbols + pub nextdefsym: u32, + /// index to undefined symbols + pub iundefsym: u32, + /// number of undefined symbols + pub nundefsym: u32, + /// file offset to table of contents + pub tocoff: u32, + /// number of entries in table of contents + pub ntoc: u32, + /// file offset to module table + pub modtaboff: u32, + /// number of module table entries + pub nmodtab: u32, + /// offset to referenced symbol table + pub extrefsymoff: u32, + /// number of referenced symbol table entries + pub nextrefsyms: u32, + /// file offset to the indirect symbol table + pub indirectsymoff: u32, + /// number of indirect symbol table entries + pub nindirectsyms: u32, + /// offset to external relocation entries + pub extreloff: u32, + /// number of external relocation entries + pub nextrel: u32, + /// offset to local relocation entries + pub locreloff: u32, + /// number of local relocation entries + pub nlocrel: u32, +} + +impl Default for DysymtabCommand { + fn default() -> Self { + DysymtabCommand { + cmd: LC_DYSYMTAB, + cmdsize: SIZEOF_DYSYMTAB_COMMAND as u32, + ilocalsym: 0, + nlocalsym: 0, + iextdefsym: 0, + nextdefsym: 0, + iundefsym: 0, + nundefsym: 0, + tocoff: 0, + ntoc: 0, + modtaboff: 0, + nmodtab: 0, + extrefsymoff: 0, + nextrefsyms: 0, + indirectsymoff: 0, + nindirectsyms: 0, + extreloff: 0, + nextrel: 0, + locreloff: 0, + nlocrel: 0, + } + } +} + +impl DysymtabCommand { + pub fn new() -> Self { + Default::default() + } +} + +pub const SIZEOF_DYSYMTAB_COMMAND: usize = 80; + +// TODO: unimplemented +/// a table of contents entry +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibTableOfContents { + /// the defined external symbol (index into the symbol table) + pub symbol_index: u32, + /// index into the module table this symbol is defined in + pub module_index: u32, +} + +// TODO: unimplemented +/// a module table entry +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibModule { + /// the module name (index into string table) + pub module_name: u32, + ///index into externally defined symbols + pub iextdefsym: u32, + ///number of externally defined symbols + pub nextdefsym: u32, + /// index into reference symbol table + pub irefsym: u32, + ///number of reference symbol table entries + pub nrefsym: u32, + /// index into symbols for local symbols + pub ilocalsym: u32, + ///number of local symbols + pub nlocalsym: u32, + + /// index into external relocation entries + pub iextrel: u32, + /// number of external relocation entries + pub nextrel: u32, + + /// low 16 bits are the index into the init section, high 16 bits are the index into the term section + pub iinit_iterm: u32, + /// low 16 bits are the number of init section entries, high 16 bits are the number of term section entries + pub ninit_nterm: u32, + /// the (__OBJC,_module_info) section + pub objc_module_info_addr: u32, + /// the (__OBJC,__module_info) section + pub objc_module_info_size: u32, +} + +// TODO: unimplemented +/// a 64-bit module table entry +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibModule64 { + /// the module name (index into string table) + pub module_name: u32, + + /// index into externally defined symbols + pub iextdefsym: u32, + /// number of externally defined symbols + pub nextdefsym: u32, + /// index into reference symbol table + pub irefsym: u32, + /// number of reference symbol table entries + pub nrefsym: u32, + /// index into symbols for local symbols + pub ilocalsym: u32, + /// number of local symbols + pub nlocalsym: u32, + + /// index into external relocation entries + pub iextrel: u32, + /// number of external relocation entries + pub nextrel: u32, + + /// low 16 bits are the index into the init section, high 16 bits are the index into the term section + pub iinit_iterm: u32, + /// low 16 bits are the number of init section entries, high 16 bits are the number of term section entries + pub ninit_nterm: u32, + + /// the (__OBJC,__module_info) section + pub objc_module_info_size: u32, + /// the (__OBJC,__module_info) section + pub objc_module_info_addr: u64, +} + +/// The entries in the reference symbol table are used when loading the module +/// (both by the static and dynamic link editors) and if the module is unloaded +/// or replaced. Therefore all external symbols (defined and undefined) are +/// listed in the module's reference table. The flags describe the type of +/// reference that is being made. The constants for the flags are defined in +/// <mach-o/nlist.h> as they are also used for symbol table entries. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DylibReference { + /// 24 bits bit-field index into the symbol table + pub isym: [u8; 24], + /// flags to indicate the type of reference + pub flags: u64, +} + +/// The twolevel_hints_command contains the offset and number of hints in the +/// two-level namespace lookup hints table. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct TwolevelHintsCommand { + /// LC_TWOLEVEL_HINTS + pub cmd: u32, + /// sizeof(struct twolevel_hints_command) + pub cmdsize: u32, + /// offset to the hint table + pub offset: u32, + /// number of hints in the hint table + pub nhints: u32, +} + +/// The entries in the two-level namespace lookup hints table are twolevel_hint +/// structs. These provide hints to the dynamic link editor where to start +/// looking for an undefined symbol in a two-level namespace image. The +/// isub_image field is an index into the sub-images (sub-frameworks and +/// sub-umbrellas list) that made up the two-level image that the undefined +/// symbol was found in when it was built by the static link editor. If +/// isub-image is 0 the the symbol is expected to be defined in library and not +/// in the sub-images. If isub-image is non-zero it is an index into the array +/// of sub-images for the umbrella with the first index in the sub-images being +/// 1. The array of sub-images is the ordered list of sub-images of the umbrella +/// that would be searched for a symbol that has the umbrella recorded as its +/// primary library. The table of contents index is an index into the +/// library's table of contents. This is used as the starting point of the +/// binary search or a directed linear search. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct TwolevelHint { + /// index into the sub images + pub isub_image: u64, + /// 24 bit field index into the table of contents + pub itoc: [u8; 24], +} + +/// The prebind_cksum_command contains the value of the original check sum for +/// prebound files or zero. When a prebound file is first created or modified +/// for other than updating its prebinding information the value of the check sum +/// is set to zero. When the file has it prebinding re-done and if the value of +/// the check sum is zero the original check sum is calculated and stored in +/// cksum field of this load command in the output file. If when the prebinding +/// is re-done and the cksum field is non-zero it is left unchanged from the +/// input file. +// TODO: unimplemented +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct PrebindCksumCommand { + /// LC_PREBIND_CKSUM + pub cmd: u32, + /// sizeof(struct prebind_cksum_command) + pub cmdsize: u32, + /// the check sum or zero + pub cksum: u32, +} + +/// The uuid load command contains a single 128-bit unique random number that +/// identifies an object produced by the static link editor. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct UuidCommand { + /// LC_UUID + pub cmd: u32, + /// sizeof(struct uuid_command) + pub cmdsize: u32, + /// 16 bytes the 128-bit uuid + pub uuid: [u8; 16], +} + +pub const SIZEOF_UUID_COMMAND: usize = 24; + +/// The rpath_command contains a path which at runtime should be added to +/// the current run path used to find @rpath prefixed dylibs. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct RpathCommand { + /// LC_RPATH + pub cmd: u32, + /// includes string + pub cmdsize: u32, + /// path to add to run path + pub path: LcStr, +} + +pub const SIZEOF_RPATH_COMMAND: usize = 12; + +/// The linkedit_data_command contains the offsets and sizes of a blob +/// of data in the __LINKEDIT segment. +#[repr(C)] +#[derive(Default, Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct LinkeditDataCommand { + /// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, + /// LC_DYLIB_CODE_SIGN_DRS, LC_LINKER_OPTIMIZATION_HINT, LC_DYLD_EXPORTS_TRIE, or LC_DYLD_CHAINED_FIXUPS. + pub cmd: u32, + /// sizeof(struct linkedit_data_command) + pub cmdsize: u32, + /// file offset of data in __LINKEDIT segment + pub dataoff: u32, + /// file size of data in __LINKEDIT segment + pub datasize: u32, +} + +pub const SIZEOF_LINKEDIT_DATA_COMMAND: usize = 16; + +/// The encryption_info_command contains the file offset and size of an +/// of an encrypted segment. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct EncryptionInfoCommand32 { + /// LC_ENCRYPTION_INFO + pub cmd: u32, + /// sizeof(struct encryption_info_command) + pub cmdsize: u32, + /// file offset of encrypted range + pub cryptoff: u32, + /// file size of encrypted range + pub cryptsize: u32, + /// which enryption system, 0 means not-encrypted yet + pub cryptid: u32, +} + +pub const SIZEOF_ENCRYPTION_INFO_COMMAND_32: usize = 20; + +/// The encryption_info_command_64 contains the file offset and size of an +/// of an encrypted segment (for use in x86_64 targets). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct EncryptionInfoCommand64 { + /// LC_ENCRYPTION_INFO_64 + pub cmd: u32, + /// sizeof(struct encryption_info_command_64) + pub cmdsize: u32, + /// file offset of encrypted range + pub cryptoff: u32, + /// file size of encrypted range + pub cryptsize: u32, + /// which enryption system, 0 means not-encrypted yet + pub cryptid: u32, + /// padding to make this struct's size a multiple of 8 bytes + pub pad: u32, +} + +pub const SIZEOF_ENCRYPTION_INFO_COMMAND_64: usize = 24; + +/// An enumeration of platforms currently identifiable within a version_min_command. +#[non_exhaustive] +#[repr(u32)] +#[derive(Debug)] +pub enum Platform { + Macos = LC_VERSION_MIN_MACOSX, + Iphoneos = LC_VERSION_MIN_IPHONEOS, + Tvos = LC_VERSION_MIN_TVOS, + Watchos = LC_VERSION_MIN_WATCHOS, +} + +impl TryFrom<u32> for Platform { + type Error = error::Error; + + fn try_from(cmd: u32) -> Result<Self, Self::Error> { + Ok(match cmd { + LC_VERSION_MIN_MACOSX => Platform::Macos, + LC_VERSION_MIN_IPHONEOS => Platform::Iphoneos, + LC_VERSION_MIN_TVOS => Platform::Tvos, + LC_VERSION_MIN_WATCHOS => Platform::Watchos, + _ => { + return Err(error::Error::Malformed(format!( + "unknown platform for load command: {:x}", + cmd + ))) + } + }) + } +} + +/// The version_min_command contains the min OS version on which this +/// binary was built to run. +/// +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct VersionMinCommand { + /// LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS, or LC_VERSION_MIN_WATCHOS. + pub cmd: u32, + pub cmdsize: u32, + /// X.Y.Z is encoded in nibbles xxxx.yy.zz + pub version: u32, + /// X.Y.Z is encoded in nibbles xxxx.yy.zz + pub sdk: u32, +} + +impl VersionMinCommand { + pub fn new(platform: Platform) -> Self { + VersionMinCommand { + cmd: platform as u32, + cmdsize: SIZEOF_VERSION_MIN_COMMAND as u32, + version: 0, + sdk: 0, + } + } + + pub fn platform(&self) -> Platform { + // A panic here indicates an incomplete API change above: VersionMinCommand + // can only be constructed from one of the LC_VERSION_* commands or directly + // from a Platform, so an error indicates that a new one hasn't been correctly + // added to the Platform enum. + Platform::try_from(self.cmd).expect("impossible platform (implementation error)") + } +} + +pub const SIZEOF_VERSION_MIN_COMMAND: usize = 16; + +#[repr(C)] +#[derive(Default, Debug, Clone, Copy, Pread, Pwrite, SizeWith)] +pub struct DyldInfoCommand { + /// LC_DYLD_INFO or LC_DYLD_INFO_ONLY + pub cmd: u32, + /// sizeof(struct dyld_info_command) + pub cmdsize: u32, + /// file offset to rebase info + pub rebase_off: u32, + /// size of rebase info + pub rebase_size: u32, + /// file offset to binding info + pub bind_off: u32, + /// size of binding info + pub bind_size: u32, + /// file offset to weak binding info + pub weak_bind_off: u32, + /// size of weak binding info + pub weak_bind_size: u32, + /// file offset to lazy binding info + pub lazy_bind_off: u32, + /// size of lazy binding infs + pub lazy_bind_size: u32, + /// file offset to lazy binding info + pub export_off: u32, + /// size of lazy binding infs + pub export_size: u32, +} + +pub const SIZEOF_DYLIB_INFO_COMMAND: usize = 48; + +/// The linker_option_command contains linker options embedded in object files. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct LinkerOptionCommand { + /// LC_LINKER_OPTION only used in MH_OBJECT fipub constypes + pub cmd: u32, + pub cmdsize: u32, + /// number of strings concatenation of zero terminated UTF8 strings. Zero filled at end to align + pub count: u32, +} + +pub const SIZEOF_LINKER_OPTION_COMMAND: usize = 12; + +/// The symseg_command contains the offset and size of the GNU style +/// symbol table information as described in the header file <symseg.h>. +/// The symbol roots of the symbol segments must also be aligned properly +/// in the file. So the requirement of keeping the offsets aligned to a +/// multiple of a 4 bytes translates to the length field of the symbol +/// roots also being a multiple of a long. Also the padding must again be +/// zeroed. (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SymsegCommand { + /// LC_SYMSEG + pub cmd: u32, + /// sizeof(struct symseg_command) + pub cmdsize: u32, + /// symbol segment offset + pub offset: u32, + /// symbol segment size in bytes + pub size: u32, +} + +pub const SIZEOF_SYMSEG_COMMAND: usize = 16; + +/// The ident_command contains a free format string table following the +/// ident_command structure. The strings are null terminated and the size of +/// the command is padded out with zero bytes to a multiple of 4 bytes/ +/// (THIS IS OBSOLETE and no longer supported). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct IdentCommand { + /// LC_IDENT + pub cmd: u32, + /// strings that follow this command + pub cmdsize: u32, +} + +pub const SIZEOF_IDENT_COMMAND: usize = 8; + +/// The fvmfile_command contains a reference to a file to be loaded at the +/// specified virtual address. (Presently, this command is reserved for +/// internal use. The kernel ignores this command when loading a program into +/// memory). +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct FvmfileCommand { + /// LC_FVMFILE + pub cmd: u32, + /// includes pathname string + pub cmdsize: u32, + /// files pathname + pub name: LcStr, + /// files virtual address + pub header_addr: u32, +} + +pub const SIZEOF_FVMFILE_COMMAND: usize = 16; + +/// The entry_point_command is a replacement for thread_command. +/// It is used for main executables to specify the location (file offset) +/// of main(). If -stack_size was used at link time, the stacksize +/// field will contain the stack size need for the main thread. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct EntryPointCommand { + pub cmd: u32, + pub cmdsize: u32, + /// uint64_t file __TEXT offset of main + pub entryoff: u64, + /// uint64_t if not zero, initial stack size + pub stacksize: u64, +} + +pub const SIZEOF_ENTRY_POINT_COMMAND: usize = 24; + +/// The build_version_command contains the min OS version on which this +/// binary was built to run for its platform. The list of known platforms and +/// tool values following it. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct BuildVersionCommand { + /// LC_BUILD_VERSION + pub cmd: u32, + pub cmdsize: u32, + /// platform + pub platform: u32, + /// X.Y.Z is encoded in nibbles xxxx.yy.zz + pub minos: u32, + /// X.Y.Z is encoded in nibbles xxxx.yy.zz + pub sdk: u32, + /// number of tool entries following this + pub ntools: u32, +} + +/// Build tool version +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct BuildToolVersion { + /// enum for the tool + pub tool: u32, + /// version number of the tool + pub version: u32, +} + +/// The LC_FILESET_ENTRY command is used for Mach-O filesets which contain +/// multiple Mach-O's, such as the dyld shared cache and kernelcache +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct FilesetEntryCommand { + /// LC_FILSET_ENTRY + pub cmd: u32, + pub cmdsize: u32, + /// memory address of the dylib + pub vmaddr: u64, + /// file offset of the dylib + pub fileoff: u64, + /// contained entry id + pub entry_id: LcStr, + /// reserved + pub reserved: u32, +} + +/// The source_version_command is an optional load command containing +/// the version of the sources used to build the binary. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct SourceVersionCommand { + /// LC_SOURCE_VERSION + pub cmd: u32, + pub cmdsize: u32, + /// A.B.C.D.E packed as a24.b10.c10.d10.e10 + pub version: u64, +} + +/// The LC_DATA_IN_CODE load commands uses a linkedit_data_command +/// to point to an array of data_in_code_entry entries. Each entry +/// describes a range of data in a code section. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct DataInCodeEntry { + /// from mach_header to start of data range + pub offset: u32, + /// number of bytes in data range + pub length: u16, + /// a DICE_KIND_* value + pub kind: u16, +} + +/// LC_NOTE commands describe a region of arbitrary data included in a Mach-O +/// file. Its initial use is to record extra data in MH_CORE files. +#[repr(C)] +#[derive(Debug, Clone, Copy, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct NoteCommand { + /// LC_NOTE + pub cmd: u32, + pub cmdsize: u32, + /// owner name for this LC_NOTE + pub data_owner: [u8; 16], + /// file offset of this data + pub offset: u64, + /// length of data region + pub size: u64, +} + +/////////////////////////////////////// +// Constants, et. al +/////////////////////////////////////// + +pub const LC_REQ_DYLD: u32 = 0x8000_0000; +pub const LC_LOAD_WEAK_DYLIB: u32 = 0x18 | LC_REQ_DYLD; +pub const LC_RPATH: u32 = 0x1c | LC_REQ_DYLD; +pub const LC_REEXPORT_DYLIB: u32 = 0x1f | LC_REQ_DYLD; +pub const LC_DYLD_INFO_ONLY: u32 = 0x22 | LC_REQ_DYLD; +pub const LC_LOAD_UPWARD_DYLIB: u32 = 0x23 | LC_REQ_DYLD; +pub const LC_MAIN: u32 = 0x28 | LC_REQ_DYLD; +pub const LC_DYLD_EXPORTS_TRIE: u32 = 0x33 | LC_REQ_DYLD; +pub const LC_DYLD_CHAINED_FIXUPS: u32 = 0x34 | LC_REQ_DYLD; +pub const LC_SEGMENT: u32 = 0x1; +pub const LC_SYMTAB: u32 = 0x2; +pub const LC_SYMSEG: u32 = 0x3; +pub const LC_THREAD: u32 = 0x4; +pub const LC_UNIXTHREAD: u32 = 0x5; +pub const LC_LOADFVMLIB: u32 = 0x6; +pub const LC_IDFVMLIB: u32 = 0x7; +pub const LC_IDENT: u32 = 0x8; +pub const LC_FVMFILE: u32 = 0x9; +pub const LC_PREPAGE: u32 = 0xa; +pub const LC_DYSYMTAB: u32 = 0xb; +pub const LC_LOAD_DYLIB: u32 = 0xc; +pub const LC_ID_DYLIB: u32 = 0xd; +pub const LC_LOAD_DYLINKER: u32 = 0xe; +pub const LC_ID_DYLINKER: u32 = 0xf; +pub const LC_PREBOUND_DYLIB: u32 = 0x10; +pub const LC_ROUTINES: u32 = 0x11; +pub const LC_SUB_FRAMEWORK: u32 = 0x12; +pub const LC_SUB_UMBRELLA: u32 = 0x13; +pub const LC_SUB_CLIENT: u32 = 0x14; +pub const LC_SUB_LIBRARY: u32 = 0x15; +pub const LC_TWOLEVEL_HINTS: u32 = 0x16; +pub const LC_PREBIND_CKSUM: u32 = 0x17; +pub const LC_SEGMENT_64: u32 = 0x19; +pub const LC_ROUTINES_64: u32 = 0x1a; +pub const LC_UUID: u32 = 0x1b; +pub const LC_CODE_SIGNATURE: u32 = 0x1d; +pub const LC_SEGMENT_SPLIT_INFO: u32 = 0x1e; +pub const LC_LAZY_LOAD_DYLIB: u32 = 0x20; +pub const LC_ENCRYPTION_INFO: u32 = 0x21; +pub const LC_DYLD_INFO: u32 = 0x22; +pub const LC_VERSION_MIN_MACOSX: u32 = 0x24; +pub const LC_VERSION_MIN_IPHONEOS: u32 = 0x25; +pub const LC_FUNCTION_STARTS: u32 = 0x26; +pub const LC_DYLD_ENVIRONMENT: u32 = 0x27; +pub const LC_DATA_IN_CODE: u32 = 0x29; +pub const LC_SOURCE_VERSION: u32 = 0x2A; +pub const LC_DYLIB_CODE_SIGN_DRS: u32 = 0x2B; +pub const LC_ENCRYPTION_INFO_64: u32 = 0x2C; +pub const LC_LINKER_OPTION: u32 = 0x2D; +pub const LC_LINKER_OPTIMIZATION_HINT: u32 = 0x2E; +pub const LC_VERSION_MIN_TVOS: u32 = 0x2F; +pub const LC_VERSION_MIN_WATCHOS: u32 = 0x30; +pub const LC_NOTE: u32 = 0x31; +pub const LC_BUILD_VERSION: u32 = 0x32; +pub const LC_FILESET_ENTRY: u32 = 0x35 | LC_REQ_DYLD; +pub const PLATFORM_MACOS: u32 = 1; +pub const PLATFORM_IOS: u32 = 2; +pub const PLATFORM_TVOS: u32 = 3; +pub const PLATFORM_WATCHOS: u32 = 4; +pub const PLATFORM_BRIDGEOS: u32 = 5; +pub const PLATFORM_MACCATALYST: u32 = 6; +pub const PLATFORM_IOSSIMULATOR: u32 = 7; +pub const PLATFORM_TVOSSIMULATOR: u32 = 8; +pub const PLATFORM_WATCHOSSIMULATOR: u32 = 9; +pub const PLATFORM_DRIVERKIT: u32 = 10; +pub const TOOL_CLANG: u32 = 1; +pub const TOOL_SWIFT: u32 = 2; +pub const TOOL_LD: u32 = 3; + +pub fn cmd_to_str(cmd: u32) -> &'static str { + match cmd { + LC_SEGMENT => "LC_SEGMENT", + LC_SYMTAB => "LC_SYMTAB", + LC_SYMSEG => "LC_SYMSEG", + LC_THREAD => "LC_THREAD", + LC_UNIXTHREAD => "LC_UNIXTHREAD", + LC_LOADFVMLIB => "LC_LOADFVMLIB", + LC_IDFVMLIB => "LC_IDFVMLIB", + LC_IDENT => "LC_IDENT", + LC_FVMFILE => "LC_FVMFILE", + LC_PREPAGE => "LC_PREPAGE", + LC_DYSYMTAB => "LC_DYSYMTAB", + LC_LOAD_DYLIB => "LC_LOAD_DYLIB", + LC_ID_DYLIB => "LC_ID_DYLIB", + LC_LOAD_DYLINKER => "LC_LOAD_DYLINKER", + LC_ID_DYLINKER => "LC_ID_DYLINKER", + LC_PREBOUND_DYLIB => "LC_PREBOUND_DYLIB", + LC_ROUTINES => "LC_ROUTINES", + LC_SUB_FRAMEWORK => "LC_SUB_FRAMEWORK", + LC_SUB_UMBRELLA => "LC_SUB_UMBRELLA", + LC_SUB_CLIENT => "LC_SUB_CLIENT", + LC_SUB_LIBRARY => "LC_SUB_LIBRARY", + LC_TWOLEVEL_HINTS => "LC_TWOLEVEL_HINTS", + LC_PREBIND_CKSUM => "LC_PREBIND_CKSUM", + LC_LOAD_WEAK_DYLIB => "LC_LOAD_WEAK_DYLIB", + LC_SEGMENT_64 => "LC_SEGMENT_64", + LC_ROUTINES_64 => "LC_ROUTINES_64", + LC_UUID => "LC_UUID", + LC_RPATH => "LC_RPATH", + LC_CODE_SIGNATURE => "LC_CODE_SIGNATURE", + LC_SEGMENT_SPLIT_INFO => "LC_SEGMENT_SPLIT_INFO", + LC_REEXPORT_DYLIB => "LC_REEXPORT_DYLIB", + LC_LAZY_LOAD_DYLIB => "LC_LAZY_LOAD_DYLIB", + LC_ENCRYPTION_INFO => "LC_ENCRYPTION_INFO", + LC_DYLD_INFO => "LC_DYLD_INFO", + LC_DYLD_INFO_ONLY => "LC_DYLD_INFO_ONLY", + LC_LOAD_UPWARD_DYLIB => "LC_LOAD_UPWARD_DYLIB", + LC_VERSION_MIN_MACOSX => "LC_VERSION_MIN_MACOSX", + LC_VERSION_MIN_IPHONEOS => "LC_VERSION_MIN_IPHONEOS", + LC_FUNCTION_STARTS => "LC_FUNCTION_STARTS", + LC_DYLD_ENVIRONMENT => "LC_DYLD_ENVIRONMENT", + LC_MAIN => "LC_MAIN", + LC_DATA_IN_CODE => "LC_DATA_IN_CODE", + LC_SOURCE_VERSION => "LC_SOURCE_VERSION", + LC_DYLIB_CODE_SIGN_DRS => "LC_DYLIB_CODE_SIGN_DRS", + LC_ENCRYPTION_INFO_64 => "LC_ENCRYPTION_INFO_64", + LC_LINKER_OPTION => "LC_LINKER_OPTION", + LC_LINKER_OPTIMIZATION_HINT => "LC_LINKER_OPTIMIZATION_HINT", + LC_VERSION_MIN_TVOS => "LC_VERSION_MIN_TVOS", + LC_VERSION_MIN_WATCHOS => "LC_VERSION_MIN_WATCHOS", + LC_NOTE => "LC_NOTE", + LC_BUILD_VERSION => "LC_BUILD_VERSION", + LC_FILESET_ENTRY => "LC_FILESET_ENTRY", + LC_DYLD_EXPORTS_TRIE => "LC_DYLD_EXPORTS_TRIE", + LC_DYLD_CHAINED_FIXUPS => "LC_DYLD_CHAINED_FIXUPS", + _ => "LC_UNKNOWN", + } +} + +/////////////////////////////////////////// +// Typesafe Command Variants +/////////////////////////////////////////// + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +#[non_exhaustive] +/// The various load commands as a cast-free variant/enum +pub enum CommandVariant { + Segment32(SegmentCommand32), + Segment64(SegmentCommand64), + Uuid(UuidCommand), + Symtab(SymtabCommand), + Symseg(SymsegCommand), + Thread(ThreadCommand), + Unixthread(ThreadCommand), + LoadFvmlib(FvmlibCommand), + IdFvmlib(FvmlibCommand), + Ident(IdentCommand), + Fvmfile(FvmfileCommand), + Prepage(LoadCommandHeader), + Dysymtab(DysymtabCommand), + LoadDylib(DylibCommand), + IdDylib(DylibCommand), + LoadDylinker(DylinkerCommand), + IdDylinker(DylinkerCommand), + PreboundDylib(PreboundDylibCommand), + Routines32(RoutinesCommand32), + Routines64(RoutinesCommand64), + SubFramework(SubFrameworkCommand), + SubUmbrella(SubUmbrellaCommand), + SubClient(SubClientCommand), + SubLibrary(SubLibraryCommand), + TwolevelHints(TwolevelHintsCommand), + PrebindCksum(PrebindCksumCommand), + LoadWeakDylib(DylibCommand), + Rpath(RpathCommand), + CodeSignature(LinkeditDataCommand), + SegmentSplitInfo(LinkeditDataCommand), + ReexportDylib(DylibCommand), + LazyLoadDylib(DylibCommand), + EncryptionInfo32(EncryptionInfoCommand32), + EncryptionInfo64(EncryptionInfoCommand64), + DyldInfo(DyldInfoCommand), + DyldInfoOnly(DyldInfoCommand), + LoadUpwardDylib(DylibCommand), + VersionMinMacosx(VersionMinCommand), + VersionMinIphoneos(VersionMinCommand), + FunctionStarts(LinkeditDataCommand), + DyldEnvironment(DylinkerCommand), + Main(EntryPointCommand), + DataInCode(LinkeditDataCommand), + BuildVersion(BuildVersionCommand), + FilesetEntry(FilesetEntryCommand), + SourceVersion(SourceVersionCommand), + DylibCodeSignDrs(LinkeditDataCommand), + LinkerOption(LinkeditDataCommand), + LinkerOptimizationHint(LinkeditDataCommand), + VersionMinTvos(VersionMinCommand), + VersionMinWatchos(VersionMinCommand), + DyldExportsTrie(LinkeditDataCommand), + DyldChainedFixups(LinkeditDataCommand), + Note(NoteCommand), + Unimplemented(LoadCommandHeader), +} + +impl<'a> ctx::TryFromCtx<'a, Endian> for CommandVariant { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], le: Endian) -> error::Result<(Self, usize)> { + use self::CommandVariant::*; + let lc = bytes.pread_with::<LoadCommandHeader>(0, le)?; + let size = lc.cmdsize as usize; + //println!("offset {:#x} cmd: {:#x} size: {:?} ctx: {:?}", offset, lc.cmd, size, le); + if size > bytes.len() { + return Err(error::Error::Malformed(format!( + "{} has size larger than remainder of binary: {:?}", + &lc, + bytes.len() + ))); + } + match lc.cmd { + LC_SEGMENT => { + let comm = bytes.pread_with::<SegmentCommand32>(0, le)?; + Ok((Segment32(comm), size)) + } + LC_SEGMENT_64 => { + let comm = bytes.pread_with::<SegmentCommand64>(0, le)?; + Ok((Segment64(comm), size)) + } + LC_DYSYMTAB => { + let comm = bytes.pread_with::<DysymtabCommand>(0, le)?; + Ok((Dysymtab(comm), size)) + } + LC_LOAD_DYLINKER => { + let comm = bytes.pread_with::<DylinkerCommand>(0, le)?; + Ok((LoadDylinker(comm), size)) + } + LC_ID_DYLINKER => { + let comm = bytes.pread_with::<DylinkerCommand>(0, le)?; + Ok((IdDylinker(comm), size)) + } + LC_UUID => { + let comm = bytes.pread_with::<UuidCommand>(0, le)?; + Ok((Uuid(comm), size)) + } + LC_SYMTAB => { + let comm = bytes.pread_with::<SymtabCommand>(0, le)?; + Ok((Symtab(comm), size)) + } + LC_SYMSEG => { + let comm = bytes.pread_with::<SymsegCommand>(0, le)?; + Ok((Symseg(comm), size)) + } + LC_THREAD => { + let comm = bytes.pread_with::<ThreadCommand>(0, le)?; + Ok((Thread(comm), size)) + } + LC_UNIXTHREAD => { + let comm = bytes.pread_with::<ThreadCommand>(0, le)?; + Ok((Unixthread(comm), size)) + } + LC_LOADFVMLIB => { + let comm = bytes.pread_with::<FvmlibCommand>(0, le)?; + Ok((LoadFvmlib(comm), size)) + } + LC_IDFVMLIB => { + let comm = bytes.pread_with::<FvmlibCommand>(0, le)?; + Ok((IdFvmlib(comm), size)) + } + LC_IDENT => { + let comm = bytes.pread_with::<IdentCommand>(0, le)?; + Ok((Ident(comm), size)) + } + LC_FVMFILE => { + let comm = bytes.pread_with::<FvmfileCommand>(0, le)?; + Ok((Fvmfile(comm), size)) + } + LC_PREPAGE => { + let comm = bytes.pread_with::<LoadCommandHeader>(0, le)?; + Ok((Prepage(comm), size)) + } + LC_LOAD_DYLIB => { + let comm = bytes.pread_with::<DylibCommand>(0, le)?; + Ok((LoadDylib(comm), size)) + } + LC_ID_DYLIB => { + let comm = bytes.pread_with::<DylibCommand>(0, le)?; + Ok((IdDylib(comm), size)) + } + LC_PREBOUND_DYLIB => { + let comm = bytes.pread_with::<PreboundDylibCommand>(0, le)?; + Ok((PreboundDylib(comm), size)) + } + LC_ROUTINES => { + let comm = bytes.pread_with::<RoutinesCommand32>(0, le)?; + Ok((Routines32(comm), size)) + } + LC_ROUTINES_64 => { + let comm = bytes.pread_with::<RoutinesCommand64>(0, le)?; + Ok((Routines64(comm), size)) + } + LC_SUB_FRAMEWORK => { + let comm = bytes.pread_with::<SubFrameworkCommand>(0, le)?; + Ok((SubFramework(comm), size)) + } + LC_SUB_UMBRELLA => { + let comm = bytes.pread_with::<SubUmbrellaCommand>(0, le)?; + Ok((SubUmbrella(comm), size)) + } + LC_SUB_CLIENT => { + let comm = bytes.pread_with::<SubClientCommand>(0, le)?; + Ok((SubClient(comm), size)) + } + LC_SUB_LIBRARY => { + let comm = bytes.pread_with::<SubLibraryCommand>(0, le)?; + Ok((SubLibrary(comm), size)) + } + LC_TWOLEVEL_HINTS => { + let comm = bytes.pread_with::<TwolevelHintsCommand>(0, le)?; + Ok((TwolevelHints(comm), size)) + } + LC_PREBIND_CKSUM => { + let comm = bytes.pread_with::<PrebindCksumCommand>(0, le)?; + Ok((PrebindCksum(comm), size)) + } + LC_LOAD_WEAK_DYLIB => { + let comm = bytes.pread_with::<DylibCommand>(0, le)?; + Ok((LoadWeakDylib(comm), size)) + } + LC_RPATH => { + let comm = bytes.pread_with::<RpathCommand>(0, le)?; + Ok((Rpath(comm), size)) + } + LC_CODE_SIGNATURE => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((CodeSignature(comm), size)) + } + LC_SEGMENT_SPLIT_INFO => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((SegmentSplitInfo(comm), size)) + } + LC_REEXPORT_DYLIB => { + let comm = bytes.pread_with::<DylibCommand>(0, le)?; + Ok((ReexportDylib(comm), size)) + } + LC_LAZY_LOAD_DYLIB => { + let comm = bytes.pread_with::<DylibCommand>(0, le)?; + Ok((LazyLoadDylib(comm), size)) + } + LC_ENCRYPTION_INFO => { + let comm = bytes.pread_with::<EncryptionInfoCommand32>(0, le)?; + Ok((EncryptionInfo32(comm), size)) + } + LC_ENCRYPTION_INFO_64 => { + let comm = bytes.pread_with::<EncryptionInfoCommand64>(0, le)?; + Ok((EncryptionInfo64(comm), size)) + } + LC_DYLD_INFO => { + let comm = bytes.pread_with::<DyldInfoCommand>(0, le)?; + Ok((DyldInfo(comm), size)) + } + LC_DYLD_INFO_ONLY => { + let comm = bytes.pread_with::<DyldInfoCommand>(0, le)?; + Ok((DyldInfoOnly(comm), size)) + } + LC_LOAD_UPWARD_DYLIB => { + let comm = bytes.pread_with::<DylibCommand>(0, le)?; + Ok((LoadUpwardDylib(comm), size)) + } + LC_VERSION_MIN_MACOSX => { + let comm = bytes.pread_with::<VersionMinCommand>(0, le)?; + Ok((VersionMinMacosx(comm), size)) + } + LC_VERSION_MIN_IPHONEOS => { + let comm = bytes.pread_with::<VersionMinCommand>(0, le)?; + Ok((VersionMinIphoneos(comm), size)) + } + LC_FUNCTION_STARTS => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((FunctionStarts(comm), size)) + } + LC_DYLD_ENVIRONMENT => { + let comm = bytes.pread_with::<DylinkerCommand>(0, le)?; + Ok((DyldEnvironment(comm), size)) + } + LC_MAIN => { + let comm = bytes.pread_with::<EntryPointCommand>(0, le)?; + Ok((Main(comm), size)) + } + LC_DATA_IN_CODE => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((DataInCode(comm), size)) + } + LC_BUILD_VERSION => { + let comm = bytes.pread_with::<BuildVersionCommand>(0, le)?; + Ok((BuildVersion(comm), size)) + } + LC_FILESET_ENTRY => { + let comm = bytes.pread_with::<FilesetEntryCommand>(0, le)?; + Ok((FilesetEntry(comm), size)) + } + LC_SOURCE_VERSION => { + let comm = bytes.pread_with::<SourceVersionCommand>(0, le)?; + Ok((SourceVersion(comm), size)) + } + LC_DYLIB_CODE_SIGN_DRS => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((DylibCodeSignDrs(comm), size)) + } + LC_LINKER_OPTION => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((LinkerOption(comm), size)) + } + LC_LINKER_OPTIMIZATION_HINT => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((LinkerOptimizationHint(comm), size)) + } + LC_VERSION_MIN_TVOS => { + let comm = bytes.pread_with::<VersionMinCommand>(0, le)?; + Ok((VersionMinTvos(comm), size)) + } + LC_VERSION_MIN_WATCHOS => { + let comm = bytes.pread_with::<VersionMinCommand>(0, le)?; + Ok((VersionMinWatchos(comm), size)) + } + LC_DYLD_EXPORTS_TRIE => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((DyldExportsTrie(comm), size)) + } + LC_DYLD_CHAINED_FIXUPS => { + let comm = bytes.pread_with::<LinkeditDataCommand>(0, le)?; + Ok((DyldChainedFixups(comm), size)) + } + LC_NOTE => { + let comm = bytes.pread_with::<NoteCommand>(0, le)?; + Ok((Note(comm), size)) + } + _ => Ok((Unimplemented(lc), size)), + } + } +} + +impl CommandVariant { + pub fn cmdsize(&self) -> usize { + use self::CommandVariant::*; + let cmdsize = match *self { + Segment32(comm) => comm.cmdsize, + Segment64(comm) => comm.cmdsize, + Uuid(comm) => comm.cmdsize, + Symtab(comm) => comm.cmdsize, + Symseg(comm) => comm.cmdsize, + Thread(comm) => comm.cmdsize, + Unixthread(comm) => comm.cmdsize, + LoadFvmlib(comm) => comm.cmdsize, + IdFvmlib(comm) => comm.cmdsize, + Ident(comm) => comm.cmdsize, + Fvmfile(comm) => comm.cmdsize, + Prepage(comm) => comm.cmdsize, + Dysymtab(comm) => comm.cmdsize, + LoadDylib(comm) => comm.cmdsize, + IdDylib(comm) => comm.cmdsize, + LoadDylinker(comm) => comm.cmdsize, + IdDylinker(comm) => comm.cmdsize, + PreboundDylib(comm) => comm.cmdsize, + Routines32(comm) => comm.cmdsize, + Routines64(comm) => comm.cmdsize, + SubFramework(comm) => comm.cmdsize, + SubUmbrella(comm) => comm.cmdsize, + SubClient(comm) => comm.cmdsize, + SubLibrary(comm) => comm.cmdsize, + TwolevelHints(comm) => comm.cmdsize, + PrebindCksum(comm) => comm.cmdsize, + LoadWeakDylib(comm) => comm.cmdsize, + Rpath(comm) => comm.cmdsize, + CodeSignature(comm) => comm.cmdsize, + SegmentSplitInfo(comm) => comm.cmdsize, + ReexportDylib(comm) => comm.cmdsize, + LazyLoadDylib(comm) => comm.cmdsize, + EncryptionInfo32(comm) => comm.cmdsize, + EncryptionInfo64(comm) => comm.cmdsize, + DyldInfo(comm) => comm.cmdsize, + DyldInfoOnly(comm) => comm.cmdsize, + LoadUpwardDylib(comm) => comm.cmdsize, + VersionMinMacosx(comm) => comm.cmdsize, + VersionMinIphoneos(comm) => comm.cmdsize, + FunctionStarts(comm) => comm.cmdsize, + DyldEnvironment(comm) => comm.cmdsize, + Main(comm) => comm.cmdsize, + DataInCode(comm) => comm.cmdsize, + BuildVersion(comm) => comm.cmdsize, + FilesetEntry(comm) => comm.cmdsize, + SourceVersion(comm) => comm.cmdsize, + DylibCodeSignDrs(comm) => comm.cmdsize, + LinkerOption(comm) => comm.cmdsize, + LinkerOptimizationHint(comm) => comm.cmdsize, + VersionMinTvos(comm) => comm.cmdsize, + VersionMinWatchos(comm) => comm.cmdsize, + DyldExportsTrie(comm) => comm.cmdsize, + DyldChainedFixups(comm) => comm.cmdsize, + Note(comm) => comm.cmdsize, + Unimplemented(comm) => comm.cmdsize, + }; + cmdsize as usize + } + pub fn cmd(&self) -> u32 { + use self::CommandVariant::*; + match *self { + Segment32(comm) => comm.cmd, + Segment64(comm) => comm.cmd, + Uuid(comm) => comm.cmd, + Symtab(comm) => comm.cmd, + Symseg(comm) => comm.cmd, + Thread(comm) => comm.cmd, + Unixthread(comm) => comm.cmd, + LoadFvmlib(comm) => comm.cmd, + IdFvmlib(comm) => comm.cmd, + Ident(comm) => comm.cmd, + Fvmfile(comm) => comm.cmd, + Prepage(comm) => comm.cmd, + Dysymtab(comm) => comm.cmd, + LoadDylib(comm) => comm.cmd, + IdDylib(comm) => comm.cmd, + LoadDylinker(comm) => comm.cmd, + IdDylinker(comm) => comm.cmd, + PreboundDylib(comm) => comm.cmd, + Routines32(comm) => comm.cmd, + Routines64(comm) => comm.cmd, + SubFramework(comm) => comm.cmd, + SubUmbrella(comm) => comm.cmd, + SubClient(comm) => comm.cmd, + SubLibrary(comm) => comm.cmd, + TwolevelHints(comm) => comm.cmd, + PrebindCksum(comm) => comm.cmd, + LoadWeakDylib(comm) => comm.cmd, + Rpath(comm) => comm.cmd, + CodeSignature(comm) => comm.cmd, + SegmentSplitInfo(comm) => comm.cmd, + ReexportDylib(comm) => comm.cmd, + LazyLoadDylib(comm) => comm.cmd, + EncryptionInfo32(comm) => comm.cmd, + EncryptionInfo64(comm) => comm.cmd, + DyldInfo(comm) => comm.cmd, + DyldInfoOnly(comm) => comm.cmd, + LoadUpwardDylib(comm) => comm.cmd, + VersionMinMacosx(comm) => comm.cmd, + VersionMinIphoneos(comm) => comm.cmd, + FunctionStarts(comm) => comm.cmd, + DyldEnvironment(comm) => comm.cmd, + Main(comm) => comm.cmd, + DataInCode(comm) => comm.cmd, + BuildVersion(comm) => comm.cmd, + FilesetEntry(comm) => comm.cmd, + SourceVersion(comm) => comm.cmd, + DylibCodeSignDrs(comm) => comm.cmd, + LinkerOption(comm) => comm.cmd, + LinkerOptimizationHint(comm) => comm.cmd, + VersionMinTvos(comm) => comm.cmd, + VersionMinWatchos(comm) => comm.cmd, + DyldExportsTrie(comm) => comm.cmd, + DyldChainedFixups(comm) => comm.cmd, + Note(comm) => comm.cmd, + Unimplemented(comm) => comm.cmd, + } + } +} + +#[derive(Debug)] +/// A tagged LoadCommand union +pub struct LoadCommand { + /// The offset this load command occurs at + pub offset: usize, + /// Which load command this is inside a variant + pub command: CommandVariant, +} + +impl LoadCommand { + /// Parse a load command from `bytes` at `offset` with the `le` endianness + pub fn parse(bytes: &[u8], offset: &mut usize, le: scroll::Endian) -> error::Result<Self> { + let start = *offset; + let command = bytes.pread_with::<CommandVariant>(start, le)?; + let size = command.cmdsize(); + *offset = start + size; + Ok(LoadCommand { + offset: start, + command, + }) + } +} diff --git a/third_party/rust/goblin/src/mach/mod.rs b/third_party/rust/goblin/src/mach/mod.rs new file mode 100644 index 0000000000..1636000080 --- /dev/null +++ b/third_party/rust/goblin/src/mach/mod.rs @@ -0,0 +1,586 @@ +//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions +use alloc::vec::Vec; +use core::fmt; + +use log::debug; + +use scroll::ctx::SizeWith; +use scroll::{Pread, BE}; + +use crate::{archive, container}; +use crate::{error, take_hint_bytes}; + +pub mod bind_opcodes; +pub mod constants; +pub mod exports; +pub mod fat; +pub mod header; +pub mod imports; +pub mod load_command; +pub mod relocation; +pub mod segment; +pub mod symbols; + +pub use self::constants::cputype; + +/// Returns a big endian magical number +pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> { + Ok(bytes.pread_with::<u32>(offset, scroll::BE)?) +} + +/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. +pub fn parse_magic_and_ctx( + bytes: &[u8], + offset: usize, +) -> error::Result<(u32, Option<container::Ctx>)> { + use crate::container::Container; + use crate::mach::header::*; + let magic = bytes.pread_with::<u32>(offset, BE)?; + let ctx = match magic { + MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { + let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64; + let le = scroll::Endian::from(is_lsb); + let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { + Container::Big + } else { + Container::Little + }; + Some(container::Ctx::new(container, le)) + } + _ => None, + }; + Ok((magic, ctx)) +} + +/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser +pub struct MachO<'a> { + /// The mach-o header + pub header: header::Header, + /// The load commands tell the kernel and dynamic linker how to use/interpret this binary + pub load_commands: Vec<load_command::LoadCommand>, + /// The load command "segments" - typically the pieces of the binary that are loaded into memory + pub segments: segment::Segments<'a>, + /// The "Nlist" style symbols in this binary - strippable + pub symbols: Option<symbols::Symbols<'a>>, + /// The dylibs this library depends on + pub libs: Vec<&'a str>, + /// The runtime search paths for dylibs this library depends on + pub rpaths: Vec<&'a str>, + /// The entry point (as a virtual memory address), 0 if none + pub entry: u64, + /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint + pub old_style_entry: bool, + /// The name of the dylib, if any + pub name: Option<&'a str>, + /// Are we a little-endian binary? + pub little_endian: bool, + /// Are we a 64-bit binary + pub is_64: bool, + data: &'a [u8], + ctx: container::Ctx, + export_trie: Option<exports::ExportTrie<'a>>, + bind_interpreter: Option<imports::BindInterpreter<'a>>, +} + +impl<'a> fmt::Debug for MachO<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MachO") + .field("header", &self.header) + .field("load_commands", &self.load_commands) + .field("segments", &self.segments) + .field("entry", &self.entry) + .field("old_style_entry", &self.old_style_entry) + .field("libs", &self.libs) + .field("name", &self.name) + .field("little_endian", &self.little_endian) + .field("is_64", &self.is_64) + .field("symbols()", &self.symbols().collect::<Vec<_>>()) + .field("exports()", &self.exports()) + .field("imports()", &self.imports()) + .finish() + } +} + +impl<'a> MachO<'a> { + /// Is this a relocatable object file? + pub fn is_object_file(&self) -> bool { + self.header.filetype == header::MH_OBJECT + } + /// Return an iterator over all the symbols in this binary + pub fn symbols(&self) -> symbols::SymbolIterator<'a> { + if let Some(ref symbols) = self.symbols { + symbols.into_iter() + } else { + symbols::SymbolIterator::default() + } + } + /// Return a vector of the relocations in this binary + pub fn relocations( + &self, + ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> { + debug!("Iterating relocations"); + let mut relocs = Vec::new(); + for (_i, segment) in (&self.segments).into_iter().enumerate() { + for (j, section) in segment.into_iter().enumerate() { + let (section, _data) = section?; + if section.nreloc > 0 { + relocs.push((j, section.iter_relocations(self.data, self.ctx), section)); + } + } + } + Ok(relocs) + } + /// Return the exported symbols in this binary (if any) + pub fn exports(&self) -> error::Result<Vec<exports::Export>> { + if let Some(ref trie) = self.export_trie { + trie.exports(self.libs.as_slice()) + } else { + Ok(vec![]) + } + } + /// Return the imported symbols in this binary that dyld knows about (if any) + pub fn imports(&self) -> error::Result<Vec<imports::Import>> { + if let Some(ref interpreter) = self.bind_interpreter { + interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) + } else { + Ok(vec![]) + } + } + /// Parses the Mach-o binary from `bytes` at `offset` + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> { + let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; + let ctx = if let Some(ctx) = maybe_ctx { + ctx + } else { + return Err(error::Error::BadMagic(u64::from(magic))); + }; + debug!("Ctx: {:?}", ctx); + let offset = &mut offset; + let header: header::Header = bytes.pread_with(*offset, ctx)?; + debug!("Mach-o header: {:?}", header); + let little_endian = ctx.le.is_little(); + let is_64 = ctx.container.is_big(); + *offset += header::Header::size_with(&ctx.container); + let ncmds = header.ncmds; + + let sizeofcmds = header.sizeofcmds as usize; + // a load cmd is at least 2 * 4 bytes, (type, sizeof) + if ncmds > sizeofcmds / 8 || sizeofcmds > bytes.len() { + return Err(error::Error::BufferTooShort(ncmds, "load commands")); + } + + let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds); + let mut symbols = None; + let mut libs = vec!["self"]; + let mut rpaths = vec![]; + let mut export_trie = None; + let mut bind_interpreter = None; + let mut unixthread_entry_address = None; + let mut main_entry_offset = None; + let mut name = None; + let mut segments = segment::Segments::new(ctx); + for i in 0..ncmds { + let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; + debug!("{} - {:?}", i, cmd); + match cmd.command { + load_command::CommandVariant::Segment32(command) => { + // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? + segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) + } + load_command::CommandVariant::Segment64(command) => { + segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) + } + load_command::CommandVariant::Symtab(command) => { + symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); + } + load_command::CommandVariant::LoadDylib(command) + | load_command::CommandVariant::LoadUpwardDylib(command) + | load_command::CommandVariant::ReexportDylib(command) + | load_command::CommandVariant::LoadWeakDylib(command) + | load_command::CommandVariant::LazyLoadDylib(command) => { + let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs.push(lib); + } + load_command::CommandVariant::Rpath(command) => { + let rpath = bytes.pread::<&str>(cmd.offset + command.path as usize)?; + rpaths.push(rpath); + } + load_command::CommandVariant::DyldInfo(command) + | load_command::CommandVariant::DyldInfoOnly(command) => { + export_trie = Some(exports::ExportTrie::new(bytes, &command)); + bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command)); + } + load_command::CommandVariant::DyldExportsTrie(command) => { + export_trie = Some(exports::ExportTrie::new_from_linkedit_data_command( + bytes, &command, + )); + } + load_command::CommandVariant::Unixthread(command) => { + // dyld cares only about the first LC_UNIXTHREAD + if unixthread_entry_address.is_none() { + unixthread_entry_address = + Some(command.instruction_pointer(header.cputype)?); + } + } + load_command::CommandVariant::Main(command) => { + // dyld cares only about the first LC_MAIN + if main_entry_offset.is_none() { + main_entry_offset = Some(command.entryoff); + } + } + load_command::CommandVariant::IdDylib(command) => { + let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; + libs[0] = id; + name = Some(id); + } + _ => (), + } + cmds.push(cmd) + } + + // dyld prefers LC_MAIN over LC_UNIXTHREAD + // choose the same way here + let (entry, old_style_entry) = if let Some(offset) = main_entry_offset { + // map the entrypoint offset to a virtual memory address + let base_address = segments + .iter() + .filter(|s| &s.segname[0..7] == b"__TEXT\0") + .map(|s| s.vmaddr - s.fileoff) + .next() + .ok_or_else(|| { + error::Error::Malformed(format!( + "image specifies LC_MAIN offset {} but has no __TEXT segment", + offset + )) + })?; + + (base_address + offset, false) + } else if let Some(address) = unixthread_entry_address { + (address, true) + } else { + (0, false) + }; + + Ok(MachO { + header, + load_commands: cmds, + segments, + symbols, + libs, + rpaths, + export_trie, + bind_interpreter, + entry, + old_style_entry, + name, + ctx, + is_64, + little_endian, + data: bytes, + }) + } +} + +/// A Mach-o multi architecture (Fat) binary container +pub struct MultiArch<'a> { + data: &'a [u8], + start: usize, + pub narches: usize, +} + +/// Iterator over the fat architecture headers in a `MultiArch` container +pub struct FatArchIterator<'a> { + index: usize, + data: &'a [u8], + narches: usize, + start: usize, +} + +/// A single architecture froma multi architecture binary container +/// ([MultiArch]). +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +pub enum SingleArch<'a> { + MachO(MachO<'a>), + Archive(archive::Archive<'a>), +} + +impl<'a> Iterator for FatArchIterator<'a> { + type Item = error::Result<fat::FatArch>; + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.narches { + None + } else { + let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self + .data + .pread_with::<fat::FatArch>(offset, scroll::BE) + .map_err(core::convert::Into::into); + self.index += 1; + Some(arch) + } + } +} + +/// Iterator over every entry contained in this `MultiArch` container +pub struct SingleArchIterator<'a> { + index: usize, + data: &'a [u8], + narches: usize, + start: usize, +} + +pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<crate::Hint> { + if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC { + Ok(crate::Hint::Archive) + } else { + let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?; + match magic { + header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => { + if let Some(ctx) = maybe_ctx { + Ok(crate::Hint::Mach(crate::HintData { + is_lsb: ctx.le.is_little(), + is_64: Some(ctx.container.is_big()), + })) + } else { + Err(error::Error::Malformed(format!( + "Correct mach magic {:#x} does not have a matching parsing context!", + magic + ))) + } + } + fat::FAT_MAGIC => { + // should probably verify this is always Big Endian... + let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize; + Ok(crate::Hint::MachFat(narchitectures)) + } + _ => Ok(crate::Hint::Unknown(bytes.pread::<u64>(0)?)), + } + } +} + +fn extract_multi_entry(bytes: &[u8]) -> error::Result<SingleArch> { + if let Some(hint_bytes) = take_hint_bytes(bytes) { + match peek_bytes(hint_bytes)? { + crate::Hint::Mach(_) => { + let binary = MachO::parse(bytes, 0)?; + Ok(SingleArch::MachO(binary)) + } + crate::Hint::Archive => { + let archive = archive::Archive::parse(bytes)?; + Ok(SingleArch::Archive(archive)) + } + _ => Err(error::Error::Malformed(format!( + "multi-arch entry must be a Mach-O binary or an archive" + ))), + } + } else { + Err(error::Error::Malformed(format!("Object is too small"))) + } +} + +impl<'a> Iterator for SingleArchIterator<'a> { + type Item = error::Result<SingleArch<'a>>; + fn next(&mut self) -> Option<Self::Item> { + if self.index >= self.narches { + None + } else { + let index = self.index; + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + self.index += 1; + match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) { + Ok(arch) => { + let bytes = arch.slice(self.data); + Some(extract_multi_entry(bytes)) + } + Err(e) => Some(Err(e.into())), + } + } + } +} + +impl<'a, 'b> IntoIterator for &'b MultiArch<'a> { + type Item = error::Result<SingleArch<'a>>; + type IntoIter = SingleArchIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + SingleArchIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } +} + +impl<'a> MultiArch<'a> { + /// Lazily construct `Self` + pub fn new(bytes: &'a [u8]) -> error::Result<Self> { + let header = fat::FatHeader::parse(bytes)?; + Ok(MultiArch { + data: bytes, + start: fat::SIZEOF_FAT_HEADER, + narches: header.nfat_arch as usize, + }) + } + /// Iterate every fat arch header + pub fn iter_arches(&self) -> FatArchIterator { + FatArchIterator { + index: 0, + data: self.data, + narches: self.narches, + start: self.start, + } + } + /// Return all the architectures in this binary + pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> { + if self.narches > self.data.len() / fat::SIZEOF_FAT_ARCH { + return Err(error::Error::BufferTooShort(self.narches, "arches")); + } + + let mut arches = Vec::with_capacity(self.narches); + for arch in self.iter_arches() { + arches.push(arch?); + } + Ok(arches) + } + /// Try to get the Mach-o binary at `index` + pub fn get(&self, index: usize) -> error::Result<SingleArch<'a>> { + if index >= self.narches { + return Err(error::Error::Malformed(format!( + "Requested the {}-th binary, but there are only {} architectures in this container", + index, self.narches + ))); + } + let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; + let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?; + let bytes = arch.slice(self.data); + extract_multi_entry(bytes) + } + + pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>( + &'a self, + f: F, + ) -> Option<error::Result<SingleArch<'a>>> { + for (i, arch) in self.iter_arches().enumerate() { + if f(arch) { + return Some(self.get(i)); + } + } + None + } + /// Try and find the `cputype` in `Self`, if there is one + pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> { + for arch in self.iter_arches() { + let arch = arch?; + if arch.cputype == cputype { + return Ok(Some(arch)); + } + } + Ok(None) + } +} + +impl<'a> fmt::Debug for MultiArch<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("MultiArch") + .field("arches", &self.arches().unwrap_or_default()) + .field("data", &self.data.len()) + .finish() + } +} + +#[derive(Debug)] +#[allow(clippy::large_enum_variant)] +/// Either a collection of multiple architectures, or a single mach-o binary +pub enum Mach<'a> { + /// A "fat" multi-architecture binary container + Fat(MultiArch<'a>), + /// A regular Mach-o binary + Binary(MachO<'a>), +} + +impl<'a> Mach<'a> { + /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + let size = bytes.len(); + if size < 4 { + let error = error::Error::Malformed("size is smaller than a magical number".into()); + return Err(error); + } + let magic = peek(&bytes, 0)?; + match magic { + fat::FAT_MAGIC => { + let multi = MultiArch::new(bytes)?; + Ok(Mach::Fat(multi)) + } + // we might be a regular binary + _ => { + let binary = MachO::parse(bytes, 0)?; + Ok(Mach::Binary(binary)) + } + } + } +} + +#[cfg(test)] +mod test { + use super::{Mach, SingleArch}; + + #[test] + fn parse_multi_arch_of_macho_binaries() { + // Create via: + // clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c + // clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c + // lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64 + // strip hello_world_fat_binaries + let bytes = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/hello_world_fat_binaries" + )); + let mach = Mach::parse(bytes).expect("failed to parse input file"); + match mach { + Mach::Fat(fat) => { + assert!(fat.into_iter().count() > 0); + for entry in fat.into_iter() { + let entry = entry.expect("failed to read entry"); + match entry { + SingleArch::MachO(macho) => { + assert!(macho.symbols().count() > 0); + } + _ => panic!("expected MultiArchEntry::MachO, got {:?}", entry), + } + } + } + Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"), + } + } + + #[test] + fn parse_multi_arch_of_archives() { + // Created with: + // clang -c -o /tmp/hello_world.o hello_world.c + // ar -r /tmp/hello_world.a /tmp/hello_world.o + // lipo -create -output hello_world_fat_archives /tmp/hello_world.a + // strip hello_world_fat_archives + let bytes = include_bytes!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/assets/hello_world_fat_archives" + )); + let mach = Mach::parse(bytes).expect("failed to parse input file"); + match mach { + Mach::Fat(fat) => { + assert!(fat.into_iter().count() > 0); + for entry in fat.into_iter() { + let entry = entry.expect("failed to read entry"); + match entry { + SingleArch::Archive(archive) => { + assert!(!archive.members().is_empty()) + } + _ => panic!("expected MultiArchEntry::Archive, got {:?}", entry), + } + } + } + Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"), + } + } +} diff --git a/third_party/rust/goblin/src/mach/relocation.rs b/third_party/rust/goblin/src/mach/relocation.rs new file mode 100644 index 0000000000..8e39fb749f --- /dev/null +++ b/third_party/rust/goblin/src/mach/relocation.rs @@ -0,0 +1,222 @@ +// Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD +// format. The modifications from the original format were changing the value +// of the r_symbolnum field for "local" (r_extern == 0) relocation entries. +// This modification is required to support symbols in an arbitrary number of +// sections not just the three sections (text, data and bss) in a 4.3BSD file. +// Also the last 4 bits have had the r_type tag added to them. + +// The r_address is not really the address as it's name indicates but an offset. +// In 4.3BSD a.out objects this offset is from the start of the "segment" for +// which relocation entry is for (text or data). For Mach-O object files it is +// also an offset but from the start of the "section" for which the relocation +// entry is for. See comments in <mach-o/loader.h> about the r_address feild +// in images for used with the dynamic linker. + +// In 4.3BSD a.out objects if r_extern is zero then r_symbolnum is an ordinal +// for the segment the symbol being relocated is in. These ordinals are the +// symbol types N_TEXT, N_DATA, N_BSS or N_ABS. In Mach-O object files these +// ordinals refer to the sections in the object file in the order their section +// structures appear in the headers of the object file they are in. The first +// section has the ordinal 1, the second 2, and so on. This means that the +// same ordinal in two different object files could refer to two different +// sections. And further could have still different ordinals when combined +// by the link-editor. The value R_ABS is used for relocation entries for +// absolute symbols which need no further relocation. +use crate::mach; +use core::fmt; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +// TODO: armv7 relocations are scattered, must and r_address with 0x8000_0000 to check if its scattered or not +#[derive(Copy, Clone, Pread, Pwrite, IOwrite, SizeWith, IOread)] +#[repr(C)] +pub struct RelocationInfo { + /// Offset in the section to what is being relocated + pub r_address: i32, + /// Contains all of the relocation info as a bitfield. + /// r_symbolnum, 24 bits, r_pcrel 1 bit, r_length 2 bits, r_extern 1 bit, r_type 4 bits + pub r_info: u32, +} + +pub const SIZEOF_RELOCATION_INFO: usize = 8; + +impl RelocationInfo { + /// Symbol index if `r_extern` == 1 or section ordinal if `r_extern` == 0. In bits :24 + #[inline] + pub fn r_symbolnum(self) -> usize { + (self.r_info & 0x00ff_ffffu32) as usize + } + /// Was relocated pc relative already, 1 bit + #[inline] + pub fn r_pcrel(self) -> u8 { + ((self.r_info & 0x0100_0000u32) >> 24) as u8 + } + /// The length of the relocation, 0=byte, 1=word, 2=long, 3=quad, 2 bits + #[inline] + pub fn r_length(self) -> u8 { + ((self.r_info & 0x0600_0000u32) >> 25) as u8 + } + /// Does not include value of sym referenced, 1 bit + #[inline] + pub fn r_extern(self) -> u8 { + ((self.r_info & 0x0800_0000) >> 27) as u8 + } + /// Ff not 0, machine specific relocation type, in bits :4 + #[inline] + pub fn r_type(self) -> u8 { + ((self.r_info & 0xf000_0000) >> 28) as u8 + } + /// If true, this relocation is for a symbol; if false, or a section ordinal otherwise + #[inline] + pub fn is_extern(self) -> bool { + self.r_extern() == 1 + } + /// If true, this is a PIC relocation + #[inline] + pub fn is_pic(self) -> bool { + self.r_pcrel() > 0 + } + /// Returns a string representation of this relocation, given the machine `cputype` + pub fn to_str(self, cputype: mach::cputype::CpuType) -> &'static str { + reloc_to_str(self.r_type(), cputype) + } +} + +/// Absolute relocation type for Mach-O files +pub const R_ABS: u8 = 0; + +impl fmt::Debug for RelocationInfo { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("RelocationInfo") + .field("r_address", &format_args!("{:#x}", &self.r_address)) + .field("r_info", &format_args!("{:#x}", &self.r_info)) + .field("r_symbolnum", &format_args!("{:#x}", &self.r_symbolnum())) + .field("r_pcrel", &(self.r_pcrel())) + .field("r_length", &self.r_length()) + .field("r_extern", &self.r_extern()) + .field("r_type", &self.r_type()) + .finish() + } +} + +pub type RelocType = u8; + +/// Absolute address +pub const X86_64_RELOC_UNSIGNED: RelocType = 0; +/// Signed 32-bit displacement +pub const X86_64_RELOC_SIGNED: RelocType = 1; +/// A CALL/JMP instruction with 32-bit displacement +pub const X86_64_RELOC_BRANCH: RelocType = 2; +/// A MOVQ load of a GOT entry +pub const X86_64_RELOC_GOT_LOAD: RelocType = 3; +/// Other GOT references +pub const X86_64_RELOC_GOT: RelocType = 4; +/// Must be followed by a X86_64_RELOC_UNSIGNED relocation +pub const X86_64_RELOC_SUBTRACTOR: RelocType = 5; +/// for signed 32-bit displacement with a -1 addend +pub const X86_64_RELOC_SIGNED_1: RelocType = 6; +/// for signed 32-bit displacement with a -2 addend +pub const X86_64_RELOC_SIGNED_2: RelocType = 7; +/// for signed 32-bit displacement with a -4 addend +pub const X86_64_RELOC_SIGNED_4: RelocType = 8; +/// for thread local variables +pub const X86_64_RELOC_TLV: RelocType = 9; + +// x86 relocations +pub const GENERIC_RELOC_VANILLA: RelocType = 0; +pub const GENERIC_RELOC_PAIR: RelocType = 1; +pub const GENERIC_RELOC_SECTDIFF: RelocType = 2; +pub const GENERIC_RELOC_PB_LA_PTR: RelocType = 3; +pub const GENERIC_RELOC_LOCAL_SECTDIFF: RelocType = 4; +pub const GENERIC_RELOC_TLV: RelocType = 5; + +// arm relocations +pub const ARM_RELOC_VANILLA: RelocType = GENERIC_RELOC_VANILLA; +pub const ARM_RELOC_PAIR: RelocType = GENERIC_RELOC_PAIR; +pub const ARM_RELOC_SECTDIFF: RelocType = GENERIC_RELOC_SECTDIFF; +pub const ARM_RELOC_LOCAL_SECTDIFF: RelocType = 3; +pub const ARM_RELOC_PB_LA_PTR: RelocType = 4; +pub const ARM_RELOC_BR24: RelocType = 5; +pub const ARM_THUMB_RELOC_BR22: RelocType = 6; +/// Obsolete +pub const ARM_THUMB_32BIT_BRANCH: RelocType = 7; +pub const ARM_RELOC_HALF: RelocType = 8; +pub const ARM_RELOC_HALF_SECTDIFF: RelocType = 9; + +/// For pointers. +pub const ARM64_RELOC_UNSIGNED: RelocType = 0; +/// Must be followed by an ARM64_RELOC_UNSIGNED +pub const ARM64_RELOC_SUBTRACTOR: RelocType = 1; +/// A B/BL instruction with 26-bit displacement. +pub const ARM64_RELOC_BRANCH26: RelocType = 2; +/// PC-rel distance to page of target. +pub const ARM64_RELOC_PAGE21: RelocType = 3; +/// Offset within page, scaled by r_length. +pub const ARM64_RELOC_PAGEOFF12: RelocType = 4; +/// PC-rel distance to page of GOT slot. +pub const ARM64_RELOC_GOT_LOAD_PAGE21: RelocType = 5; +/// Offset within page of GOT slot, scaled by r_length. +pub const ARM64_RELOC_GOT_LOAD_PAGEOFF12: RelocType = 6; +/// For pointers to GOT slots. +pub const ARM64_RELOC_POINTER_TO_GOT: RelocType = 7; +/// PC-rel distance to page of TLVP slot. +pub const ARM64_RELOC_TLVP_LOAD_PAGE21: RelocType = 8; +/// Offset within page of TLVP slot, scaled by r_length. +pub const ARM64_RELOC_TLVP_LOAD_PAGEOFF12: RelocType = 9; +/// Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. +pub const ARM64_RELOC_ADDEND: RelocType = 10; + +pub fn reloc_to_str(reloc: RelocType, cputype: mach::cputype::CpuType) -> &'static str { + use crate::mach::constants::cputype::*; + match cputype { + CPU_TYPE_ARM64 | CPU_TYPE_ARM64_32 => match reloc { + ARM64_RELOC_UNSIGNED => "ARM64_RELOC_UNSIGNED", + ARM64_RELOC_SUBTRACTOR => "ARM64_RELOC_SUBTRACTOR", + ARM64_RELOC_BRANCH26 => "ARM64_RELOC_BRANCH26", + ARM64_RELOC_PAGE21 => "ARM64_RELOC_PAGE21", + ARM64_RELOC_PAGEOFF12 => "ARM64_RELOC_PAGEOFF12", + ARM64_RELOC_GOT_LOAD_PAGE21 => "ARM64_RELOC_GOT_LOAD_PAGE21", + ARM64_RELOC_GOT_LOAD_PAGEOFF12 => "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + ARM64_RELOC_POINTER_TO_GOT => "ARM64_RELOC_POINTER_TO_GOT", + ARM64_RELOC_TLVP_LOAD_PAGE21 => "ARM64_RELOC_TLVP_LOAD_PAGE21", + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + ARM64_RELOC_ADDEND => "ARM64_RELOC_ADDEND", + _ => "UNKNOWN", + }, + CPU_TYPE_X86_64 => match reloc { + X86_64_RELOC_UNSIGNED => "X86_64_RELOC_UNSIGNED", + X86_64_RELOC_SIGNED => "X86_64_RELOC_SIGNED", + X86_64_RELOC_BRANCH => "X86_64_RELOC_BRANCH", + X86_64_RELOC_GOT_LOAD => "X86_64_RELOC_GOT_LOAD", + X86_64_RELOC_GOT => "X86_64_RELOC_GOT", + X86_64_RELOC_SUBTRACTOR => "X86_64_RELOC_SUBTRACTOR", + X86_64_RELOC_SIGNED_1 => "X86_64_RELOC_SIGNED_1", + X86_64_RELOC_SIGNED_2 => "X86_64_RELOC_SIGNED_2", + X86_64_RELOC_SIGNED_4 => "X86_64_RELOC_SIGNED_4", + X86_64_RELOC_TLV => "X86_64_RELOC_TLV", + _ => "UNKNOWN", + }, + CPU_TYPE_ARM => match reloc { + ARM_RELOC_VANILLA => "ARM_RELOC_VANILLA", + ARM_RELOC_PAIR => "ARM_RELOC_PAIR", + ARM_RELOC_SECTDIFF => "ARM_RELOC_SECTDIFF", + ARM_RELOC_LOCAL_SECTDIFF => "ARM_RELOC_LOCAL_SECTDIFF", + ARM_RELOC_PB_LA_PTR => "ARM_RELOC_PB_LA_PTR", + ARM_RELOC_BR24 => "ARM_RELOC_BR24", + ARM_THUMB_RELOC_BR22 => "ARM_THUMB_RELOC_BR22", + ARM_THUMB_32BIT_BRANCH => "ARM_THUMB_32BIT_BRANCH", + ARM_RELOC_HALF => "ARM_RELOC_HALF", + ARM_RELOC_HALF_SECTDIFF => "ARM_RELOC_HALF_SECTDIFF", + _ => "UNKNOWN", + }, + CPU_TYPE_X86 => match reloc { + GENERIC_RELOC_VANILLA => "GENERIC_RELOC_VANILLA", + GENERIC_RELOC_PAIR => "GENERIC_RELOC_PAIR", + GENERIC_RELOC_SECTDIFF => "GENERIC_RELOC_SECTDIFF", + GENERIC_RELOC_PB_LA_PTR => "GENERIC_RELOC_PB_LA_PTR", + GENERIC_RELOC_LOCAL_SECTDIFF => "GENERIC_RELOC_LOCAL_SECTDIFF", + GENERIC_RELOC_TLV => "GENERIC_RELOC_TLV", + _ => "UNKNOWN", + }, + _ => "BAD_CPUTYPE", + } +} diff --git a/third_party/rust/goblin/src/mach/segment.rs b/third_party/rust/goblin/src/mach/segment.rs new file mode 100644 index 0000000000..3c87bd62ed --- /dev/null +++ b/third_party/rust/goblin/src/mach/segment.rs @@ -0,0 +1,558 @@ +use scroll::ctx::{self, SizeWith}; +use scroll::{Pread, Pwrite}; + +use log::{debug, warn}; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::fmt; +use core::ops::{Deref, DerefMut}; + +use crate::container; +use crate::error; + +use crate::mach::constants::{SECTION_TYPE, S_GB_ZEROFILL, S_THREAD_LOCAL_ZEROFILL, S_ZEROFILL}; +use crate::mach::load_command::{ + Section32, Section64, SegmentCommand32, SegmentCommand64, LC_SEGMENT, LC_SEGMENT_64, + SIZEOF_SECTION_32, SIZEOF_SECTION_64, SIZEOF_SEGMENT_COMMAND_32, SIZEOF_SEGMENT_COMMAND_64, +}; +use crate::mach::relocation::RelocationInfo; + +pub struct RelocationIterator<'a> { + data: &'a [u8], + nrelocs: usize, + offset: usize, + count: usize, + ctx: scroll::Endian, +} + +impl<'a> Iterator for RelocationIterator<'a> { + type Item = error::Result<RelocationInfo>; + fn next(&mut self) -> Option<Self::Item> { + if self.count >= self.nrelocs { + None + } else { + self.count += 1; + match self.data.gread_with(&mut self.offset, self.ctx) { + Ok(res) => Some(Ok(res)), + Err(e) => Some(Err(e.into())), + } + } + } +} + +/// Generalized 32/64 bit Section +#[derive(Default)] +pub struct Section { + /// name of this section + pub sectname: [u8; 16], + /// segment this section goes in + pub segname: [u8; 16], + /// memory address of this section + pub addr: u64, + /// size in bytes of this section + pub size: u64, + /// file offset of this section + pub offset: u32, + /// section alignment (power of 2) + pub align: u32, + /// file offset of relocation entries + pub reloff: u32, + /// number of relocation entries + pub nreloc: u32, + /// flags (section type and attributes + pub flags: u32, +} + +impl Section { + /// The name of this section + pub fn name(&self) -> error::Result<&str> { + Ok(self.sectname.pread::<&str>(0)?) + } + /// The containing segment's name + pub fn segname(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } + /// Iterate this sections relocations given `data`; `data` must be the original binary + pub fn iter_relocations<'b>( + &self, + data: &'b [u8], + ctx: container::Ctx, + ) -> RelocationIterator<'b> { + let offset = self.reloff as usize; + debug!( + "Relocations for {} starting at offset: {:#x}", + self.name().unwrap_or("BAD_SECTION_NAME"), + offset + ); + RelocationIterator { + offset, + nrelocs: self.nreloc as usize, + count: 0, + data, + ctx: ctx.le, + } + } +} + +impl From<Section> for Section64 { + fn from(section: Section) -> Self { + Section64 { + sectname: section.sectname, + segname: section.segname, + addr: section.addr as u64, + size: section.size as u64, + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + reserved1: 0, + reserved2: 0, + reserved3: 0, + } + } +} + +impl From<Section> for Section32 { + fn from(section: Section) -> Self { + Section32 { + sectname: section.sectname, + segname: section.segname, + addr: section.addr as u32, + size: section.size as u32, + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + reserved1: 0, + reserved2: 0, + } + } +} + +impl fmt::Debug for Section { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Section") + .field("sectname", &self.name().unwrap()) + .field("segname", &self.segname().unwrap()) + .field("addr", &self.addr) + .field("size", &self.size) + .field("offset", &self.offset) + .field("align", &self.align) + .field("reloff", &self.reloff) + .field("nreloc", &self.nreloc) + .field("flags", &self.flags) + .finish() + } +} + +impl From<Section32> for Section { + fn from(section: Section32) -> Self { + Section { + sectname: section.sectname, + segname: section.segname, + addr: u64::from(section.addr), + size: u64::from(section.size), + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + } + } +} + +impl From<Section64> for Section { + fn from(section: Section64) -> Self { + Section { + sectname: section.sectname, + segname: section.segname, + addr: section.addr, + size: section.size, + offset: section.offset, + align: section.align, + reloff: section.reloff, + nreloc: section.nreloc, + flags: section.flags, + } + } +} + +impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Section { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], ctx: container::Ctx) -> Result<(Self, usize), Self::Error> { + match ctx.container { + container::Container::Little => { + let section = Section::from(bytes.pread_with::<Section32>(0, ctx.le)?); + Ok((section, SIZEOF_SECTION_32)) + } + container::Container::Big => { + let section = Section::from(bytes.pread_with::<Section64>(0, ctx.le)?); + Ok((section, SIZEOF_SECTION_64)) + } + } + } +} + +impl ctx::SizeWith<container::Ctx> for Section { + fn size_with(ctx: &container::Ctx) -> usize { + match ctx.container { + container::Container::Little => SIZEOF_SECTION_32, + container::Container::Big => SIZEOF_SECTION_64, + } + } +} + +impl ctx::TryIntoCtx<container::Ctx> for Section { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> Result<usize, Self::Error> { + if ctx.is_big() { + bytes.pwrite_with::<Section64>(self.into(), 0, ctx.le)?; + } else { + bytes.pwrite_with::<Section32>(self.into(), 0, ctx.le)?; + } + Ok(Self::size_with(&ctx)) + } +} + +impl ctx::IntoCtx<container::Ctx> for Section { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +pub struct SectionIterator<'a> { + data: &'a [u8], + count: usize, + offset: usize, + idx: usize, + ctx: container::Ctx, +} + +pub type SectionData<'a> = &'a [u8]; + +impl<'a> ::core::iter::ExactSizeIterator for SectionIterator<'a> { + fn len(&self) -> usize { + self.count + } +} + +impl<'a> Iterator for SectionIterator<'a> { + type Item = error::Result<(Section, SectionData<'a>)>; + fn next(&mut self) -> Option<Self::Item> { + if self.idx >= self.count { + None + } else { + self.idx += 1; + match self.data.gread_with::<Section>(&mut self.offset, self.ctx) { + Ok(section) => { + let section_type = section.flags & SECTION_TYPE; + let data = if section_type == S_ZEROFILL + || section_type == S_GB_ZEROFILL + || section_type == S_THREAD_LOCAL_ZEROFILL + { + &[] + } else { + // it's not uncommon to encounter macho files where files are + // truncated but the sections are still remaining in the header. + // Because of this we want to not panic here but instead just + // slice down to a empty data slice. This way only if code + // actually needs to access those sections it will fall over. + self.data + .get(section.offset as usize..) + .unwrap_or_else(|| { + warn!( + "section #{} offset {} out of bounds", + self.idx, section.offset + ); + &[] + }) + .get(..section.size as usize) + .unwrap_or_else(|| { + warn!("section #{} size {} out of bounds", self.idx, section.size); + &[] + }) + }; + Some(Ok((section, data))) + } + Err(e) => Some(Err(e)), + } + } + } +} + +impl<'a, 'b> IntoIterator for &'b Segment<'a> { + type Item = error::Result<(Section, SectionData<'a>)>; + type IntoIter = SectionIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + SectionIterator { + data: self.raw_data, + count: self.nsects as usize, + offset: self.offset + Segment::size_with(&self.ctx), + idx: 0, + ctx: self.ctx, + } + } +} + +/// Generalized 32/64 bit Segment Command +pub struct Segment<'a> { + pub cmd: u32, + pub cmdsize: u32, + pub segname: [u8; 16], + pub vmaddr: u64, + pub vmsize: u64, + pub fileoff: u64, + pub filesize: u64, + pub maxprot: u32, + pub initprot: u32, + pub nsects: u32, + pub flags: u32, + pub data: &'a [u8], + offset: usize, + raw_data: &'a [u8], + ctx: container::Ctx, +} + +impl<'a> From<Segment<'a>> for SegmentCommand64 { + fn from(segment: Segment<'a>) -> Self { + SegmentCommand64 { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: segment.vmaddr as u64, + vmsize: segment.vmsize as u64, + fileoff: segment.fileoff as u64, + filesize: segment.filesize as u64, + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + } + } +} + +impl<'a> From<Segment<'a>> for SegmentCommand32 { + fn from(segment: Segment<'a>) -> Self { + SegmentCommand32 { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: segment.vmaddr as u32, + vmsize: segment.vmsize as u32, + fileoff: segment.fileoff as u32, + filesize: segment.filesize as u32, + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + } + } +} + +impl<'a> fmt::Debug for Segment<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Segment") + .field("cmd", &self.cmd) + .field("cmdsize", &self.cmdsize) + .field("segname", &self.segname.pread::<&str>(0).unwrap()) + .field("vmaddr", &self.vmaddr) + .field("vmsize", &self.vmsize) + .field("fileoff", &self.fileoff) + .field("filesize", &self.filesize) + .field("maxprot", &self.maxprot) + .field("initprot", &self.initprot) + .field("nsects", &self.nsects) + .field("flags", &self.flags) + .field("data", &self.data.len()) + .field( + "sections()", + &self.sections().map(|sections| { + sections + .into_iter() + .map(|(section, _)| section) + .collect::<Vec<_>>() + }), + ) + .finish() + } +} + +impl<'a> ctx::SizeWith<container::Ctx> for Segment<'a> { + fn size_with(ctx: &container::Ctx) -> usize { + match ctx.container { + container::Container::Little => SIZEOF_SEGMENT_COMMAND_32, + container::Container::Big => SIZEOF_SEGMENT_COMMAND_64, + } + } +} + +impl<'a> ctx::TryIntoCtx<container::Ctx> for Segment<'a> { + type Error = crate::error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) -> Result<usize, Self::Error> { + let segment_size = Self::size_with(&ctx); + // should be able to write the section data inline after this, but not working at the moment + //let section_size = bytes.pwrite(data, segment_size)?; + //debug!("Segment size: {} raw section data size: {}", segment_size, data.len()); + if ctx.is_big() { + bytes.pwrite_with::<SegmentCommand64>(self.into(), 0, ctx.le)?; + } else { + bytes.pwrite_with::<SegmentCommand32>(self.into(), 0, ctx.le)?; + } + //debug!("Section size: {}", section_size); + Ok(segment_size) + } +} + +impl<'a> ctx::IntoCtx<container::Ctx> for Segment<'a> { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +/// Read data that belongs to a segment if the offset is within the boundaries of bytes. +fn segment_data(bytes: &[u8], fileoff: u64, filesize: u64) -> Result<&[u8], error::Error> { + let data: &[u8] = if filesize != 0 { + bytes.pread_with(fileoff as usize, filesize as usize)? + } else { + &[] + }; + Ok(data) +} + +impl<'a> Segment<'a> { + /// Create a new, blank segment, with cmd either `LC_SEGMENT_64`, or `LC_SEGMENT`, depending on `ctx`. + /// **NB** You are responsible for providing a correctly marshalled byte array as the sections. You should not use this for anything other than writing. + pub fn new(ctx: container::Ctx, sections: &'a [u8]) -> Self { + Segment { + cmd: if ctx.is_big() { + LC_SEGMENT_64 + } else { + LC_SEGMENT + }, + cmdsize: (Self::size_with(&ctx) + sections.len()) as u32, + segname: [0; 16], + vmaddr: 0, + vmsize: 0, + fileoff: 0, + filesize: 0, + maxprot: 0, + initprot: 0, + nsects: 0, + flags: 0, + data: sections, + offset: 0, + raw_data: &[], + ctx, + } + } + /// Get the name of this segment + pub fn name(&self) -> error::Result<&str> { + Ok(self.segname.pread::<&str>(0)?) + } + /// Get the sections from this segment, erroring if any section couldn't be retrieved + pub fn sections(&self) -> error::Result<Vec<(Section, SectionData<'a>)>> { + let mut sections = Vec::new(); + for section in self.into_iter() { + sections.push(section?); + } + Ok(sections) + } + /// Convert the raw C 32-bit segment command to a generalized version + pub fn from_32( + bytes: &'a [u8], + segment: &SegmentCommand32, + offset: usize, + ctx: container::Ctx, + ) -> Result<Self, error::Error> { + Ok(Segment { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: u64::from(segment.vmaddr), + vmsize: u64::from(segment.vmsize), + fileoff: u64::from(segment.fileoff), + filesize: u64::from(segment.filesize), + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + data: segment_data( + bytes, + u64::from(segment.fileoff), + u64::from(segment.filesize), + )?, + offset, + raw_data: bytes, + ctx, + }) + } + /// Convert the raw C 64-bit segment command to a generalized version + pub fn from_64( + bytes: &'a [u8], + segment: &SegmentCommand64, + offset: usize, + ctx: container::Ctx, + ) -> Result<Self, error::Error> { + Ok(Segment { + cmd: segment.cmd, + cmdsize: segment.cmdsize, + segname: segment.segname, + vmaddr: segment.vmaddr, + vmsize: segment.vmsize, + fileoff: segment.fileoff, + filesize: segment.filesize, + maxprot: segment.maxprot, + initprot: segment.initprot, + nsects: segment.nsects, + flags: segment.flags, + data: segment_data(bytes, segment.fileoff, segment.filesize)?, + offset, + raw_data: bytes, + ctx, + }) + } +} + +#[derive(Debug, Default)] +/// An opaque 32/64-bit container for Mach-o segments +pub struct Segments<'a> { + segments: Vec<Segment<'a>>, +} + +impl<'a> Deref for Segments<'a> { + type Target = Vec<Segment<'a>>; + fn deref(&self) -> &Self::Target { + &self.segments + } +} + +impl<'a> DerefMut for Segments<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.segments + } +} + +impl<'a, 'b> IntoIterator for &'b Segments<'a> { + type Item = &'b Segment<'a>; + type IntoIter = ::core::slice::Iter<'b, Segment<'a>>; + fn into_iter(self) -> Self::IntoIter { + self.segments.iter() + } +} + +impl<'a> Segments<'a> { + /// Construct a new generalized segment container from this `ctx` + pub fn new(_ctx: container::Ctx) -> Self { + Segments { + segments: Vec::new(), + } + } + /// Get every section from every segment + // thanks to SpaceManic for figuring out the 'b lifetimes here :) + pub fn sections<'b>(&'b self) -> Box<dyn Iterator<Item = SectionIterator<'a>> + 'b> { + Box::new(self.segments.iter().map(|segment| segment.into_iter())) + } +} diff --git a/third_party/rust/goblin/src/mach/symbols.rs b/third_party/rust/goblin/src/mach/symbols.rs new file mode 100644 index 0000000000..adfc4d665b --- /dev/null +++ b/third_party/rust/goblin/src/mach/symbols.rs @@ -0,0 +1,488 @@ +//! "Nlist" style symbols in this binary - beware, like most symbol tables in most binary formats, they are strippable, and should not be relied upon, see the imports and exports modules for something more permanent. +//! +//! Symbols are essentially a type, offset, and the symbol name + +use crate::container::{self, Container}; +use crate::error; +use crate::mach::load_command; +use core::fmt::{self, Debug}; +use scroll::ctx; +use scroll::ctx::SizeWith; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +// The n_type field really contains four fields which are used via the following masks. +/// if any of these bits set, a symbolic debugging entry +pub const N_STAB: u8 = 0xe0; +/// private external symbol bit +pub const N_PEXT: u8 = 0x10; +/// mask for the type bits +pub const N_TYPE: u8 = 0x0e; +/// external symbol bit, set for external symbols +pub const N_EXT: u8 = 0x01; + +// If the type is N_SECT then the n_sect field contains an ordinal of the +// section the symbol is defined in. The sections are numbered from 1 and +// refer to sections in order they appear in the load commands for the file +// they are in. This means the same ordinal may very well refer to different +// sections in different files. + +// The n_value field for all symbol table entries (including N_STAB's) gets +// updated by the link editor based on the value of it's n_sect field and where +// the section n_sect references gets relocated. If the value of the n_sect +// field is NO_SECT then it's n_value field is not changed by the link editor. +/// symbol is not in any section +pub const NO_SECT: u8 = 0; +/// 1 thru 255 inclusive +pub const MAX_SECT: u8 = 255; + +/// undefined, n_sect == NO_SECT +pub const N_UNDF: u8 = 0x0; +/// absolute, n_sect == NO_SECT +pub const N_ABS: u8 = 0x2; +/// defined in section number n_sect +pub const N_SECT: u8 = 0xe; +/// prebound undefined (defined in a dylib) +pub const N_PBUD: u8 = 0xc; +/// indirect +pub const N_INDR: u8 = 0xa; + +// n_types when N_STAB +pub const N_GSYM: u8 = 0x20; +pub const N_FNAME: u8 = 0x22; +pub const N_FUN: u8 = 0x24; +pub const N_STSYM: u8 = 0x26; +pub const N_LCSYM: u8 = 0x28; +pub const N_BNSYM: u8 = 0x2e; +pub const N_PC: u8 = 0x30; +pub const N_AST: u8 = 0x32; +pub const N_OPT: u8 = 0x3c; +pub const N_RSYM: u8 = 0x40; +pub const N_SLINE: u8 = 0x44; +pub const N_ENSYM: u8 = 0x4e; +pub const N_SSYM: u8 = 0x60; +pub const N_SO: u8 = 0x64; +pub const N_OSO: u8 = 0x66; +pub const N_LSYM: u8 = 0x80; +pub const N_BINCL: u8 = 0x82; +pub const N_SOL: u8 = 0x84; +pub const N_PARAMS: u8 = 0x86; +pub const N_VERSION: u8 = 0x88; +pub const N_OLEVEL: u8 = 0x8a; +pub const N_PSYM: u8 = 0xa0; +pub const N_EINCL: u8 = 0xa2; +pub const N_ENTRY: u8 = 0xa4; +pub const N_LBRAC: u8 = 0xc0; +pub const N_EXCL: u8 = 0xc2; +pub const N_RBRAC: u8 = 0xe0; +pub const N_BCOMM: u8 = 0xe2; +pub const N_ECOMM: u8 = 0xe4; +pub const N_ECOML: u8 = 0xe8; +pub const N_LENG: u8 = 0xfe; + +pub const NLIST_TYPE_MASK: u8 = 0xe; +pub const NLIST_TYPE_GLOBAL: u8 = 0x1; +pub const NLIST_TYPE_LOCAL: u8 = 0x0; + +/// Mask for reference flags of `n_desc` field. +pub const REFERENCE_TYPE: u16 = 0xf; +/// This symbol is a reference to an external non-lazy (data) symbol. +pub const REFERENCE_FLAG_UNDEFINED_NON_LAZY: u16 = 0x0; +/// This symbol is a reference to an external lazy symbol—that is, to a function call. +pub const REFERENCE_FLAG_UNDEFINED_LAZY: u16 = 0x1; +/// This symbol is defined in this module. +pub const REFERENCE_FLAG_DEFINED: u16 = 0x2; +/// This symbol is defined in this module and is visible only to modules within this +/// shared library. +pub const REFERENCE_FLAG_PRIVATE_DEFINED: u16 = 0x3; +/// This symbol is defined in another module in this file, is a non-lazy (data) symbol, +/// and is visible only to modules within this shared library. +pub const REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY: u16 = 0x4; +/// This symbol is defined in another module in this file, is a lazy (function) symbol, +/// and is visible only to modules within this shared library. +pub const REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY: u16 = 0x5; + +// Additional flags of n_desc field. + +/// Must be set for any defined symbol that is referenced by dynamic-loader APIs +/// (such as dlsym and NSLookupSymbolInImage) and not ordinary undefined symbol +/// references. The `strip` tool uses this bit to avoid removing symbols that must +/// exist: If the symbol has this bit set, `strip` does not strip it. +pub const REFERENCED_DYNAMICALLY: u16 = 0x10; +/// Sometimes used by the dynamic linker at runtime in a fully linked image. Do not +/// set this bit in a fully linked image. +pub const N_DESC_DISCARDED: u16 = 0x20; +/// When set in a relocatable object file (file type MH_OBJECT) on a defined symbol, +/// indicates to the static linker to never dead-strip the symbol. +// (Note that the same bit (0x20) is used for two nonoverlapping purposes.) +pub const N_NO_DEAD_STRIP: u16 = 0x20; +/// Indicates that this undefined symbol is a weak reference. If the dynamic linker +/// cannot find a definition for this symbol, it sets the address of this symbol to 0. +/// The static linker sets this symbol given the appropriate weak-linking flags. +pub const N_WEAK_REF: u16 = 0x40; +/// Indicates that this symbol is a weak definition. If the static linker or the +/// dynamic linker finds another (non-weak) definition for this symbol, the weak +/// definition is ignored. Only symbols in a coalesced section can be marked as a +/// weak definition. +pub const N_WEAK_DEF: u16 = 0x80; + +pub fn n_type_to_str(n_type: u8) -> &'static str { + match n_type { + N_UNDF => "N_UNDF", + N_ABS => "N_ABS", + N_SECT => "N_SECT", + N_PBUD => "N_PBUD", + N_INDR => "N_INDR", + _ => "UNKNOWN_N_TYPE", + } +} + +#[repr(C)] +#[derive(Clone, Copy, Pread, Pwrite, SizeWith, IOread, IOwrite)] +pub struct Nlist32 { + /// index into the string table + pub n_strx: u32, + /// type flag, see below + pub n_type: u8, + /// section number or NO_SECT + pub n_sect: u8, + /// see <mach-o/stab.h> + pub n_desc: u16, + /// value of this symbol (or stab offset) + pub n_value: u32, +} + +pub const SIZEOF_NLIST_32: usize = 12; + +impl Debug for Nlist32 { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Nlist32") + .field("n_strx", &format_args!("{:04}", self.n_strx)) + .field("n_type", &format_args!("{:#02x}", self.n_type)) + .field("n_sect", &format_args!("{:#x}", self.n_sect)) + .field("n_desc", &format_args!("{:#03x}", self.n_desc)) + .field("n_value", &format_args!("{:#x}", self.n_value)) + .finish() + } +} + +#[repr(C)] +#[derive(Clone, Copy, Pread, Pwrite, SizeWith, IOread, IOwrite)] +pub struct Nlist64 { + /// index into the string table + pub n_strx: u32, + /// type flag, see below + pub n_type: u8, + /// section number or NO_SECT + pub n_sect: u8, + /// see <mach-o/stab.h> + pub n_desc: u16, + /// value of this symbol (or stab offset) + pub n_value: u64, +} + +pub const SIZEOF_NLIST_64: usize = 16; + +impl Debug for Nlist64 { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Nlist64") + .field("n_strx", &format_args!("{:04}", self.n_strx)) + .field("n_type", &format_args!("{:#02x}", self.n_type)) + .field("n_sect", &format_args!("{:#x}", self.n_sect)) + .field("n_desc", &format_args!("{:#03x}", self.n_desc)) + .field("n_value", &format_args!("{:#x}", self.n_value)) + .finish() + } +} + +#[derive(Debug, Clone)] +pub struct Nlist { + /// index into the string table + pub n_strx: usize, + /// type flag, see below + pub n_type: u8, + /// section number or NO_SECT + pub n_sect: usize, + /// see <mach-o/stab.h> + pub n_desc: u16, + /// value of this symbol (or stab offset) + pub n_value: u64, +} + +impl Nlist { + /// Gets this symbol's type in bits 0xe + pub fn get_type(&self) -> u8 { + self.n_type & N_TYPE + } + /// Gets the str representation of the type of this symbol + pub fn type_str(&self) -> &'static str { + n_type_to_str(self.get_type()) + } + /// Whether this symbol is global or not + pub fn is_global(&self) -> bool { + self.n_type & N_EXT != 0 + } + /// Whether this symbol is weak or not + pub fn is_weak(&self) -> bool { + self.n_desc & (N_WEAK_REF | N_WEAK_DEF) != 0 + } + /// Whether this symbol is undefined or not + pub fn is_undefined(&self) -> bool { + self.n_sect == 0 && self.n_type & N_TYPE == N_UNDF + } + /// Whether this symbol is a symbolic debugging entry + pub fn is_stab(&self) -> bool { + self.n_type & N_STAB != 0 + } +} + +impl ctx::SizeWith<container::Ctx> for Nlist { + fn size_with(ctx: &container::Ctx) -> usize { + match ctx.container { + Container::Little => SIZEOF_NLIST_32, + Container::Big => SIZEOF_NLIST_64, + } + } +} + +impl From<Nlist32> for Nlist { + fn from(nlist: Nlist32) -> Self { + Nlist { + n_strx: nlist.n_strx as usize, + n_type: nlist.n_type, + n_sect: nlist.n_sect as usize, + n_desc: nlist.n_desc, + n_value: u64::from(nlist.n_value), + } + } +} + +impl From<Nlist64> for Nlist { + fn from(nlist: Nlist64) -> Self { + Nlist { + n_strx: nlist.n_strx as usize, + n_type: nlist.n_type, + n_sect: nlist.n_sect as usize, + n_desc: nlist.n_desc, + n_value: nlist.n_value, + } + } +} + +impl From<Nlist> for Nlist32 { + fn from(nlist: Nlist) -> Self { + Nlist32 { + n_strx: nlist.n_strx as u32, + n_type: nlist.n_type, + n_sect: nlist.n_sect as u8, + n_desc: nlist.n_desc, + n_value: nlist.n_value as u32, + } + } +} + +impl From<Nlist> for Nlist64 { + fn from(nlist: Nlist) -> Self { + Nlist64 { + n_strx: nlist.n_strx as u32, + n_type: nlist.n_type, + n_sect: nlist.n_sect as u8, + n_desc: nlist.n_desc, + n_value: nlist.n_value, + } + } +} + +impl<'a> ctx::TryFromCtx<'a, container::Ctx> for Nlist { + type Error = crate::error::Error; + fn try_from_ctx( + bytes: &'a [u8], + container::Ctx { container, le }: container::Ctx, + ) -> crate::error::Result<(Self, usize)> { + let nlist = match container { + Container::Little => (bytes.pread_with::<Nlist32>(0, le)?.into(), SIZEOF_NLIST_32), + Container::Big => (bytes.pread_with::<Nlist64>(0, le)?.into(), SIZEOF_NLIST_64), + }; + Ok(nlist) + } +} + +impl ctx::TryIntoCtx<container::Ctx> for Nlist { + type Error = crate::error::Error; + fn try_into_ctx( + self, + bytes: &mut [u8], + container::Ctx { container, le }: container::Ctx, + ) -> Result<usize, Self::Error> { + let size = match container { + Container::Little => bytes.pwrite_with::<Nlist32>(self.into(), 0, le)?, + Container::Big => bytes.pwrite_with::<Nlist64>(self.into(), 0, le)?, + }; + Ok(size) + } +} + +impl ctx::IntoCtx<container::Ctx> for Nlist { + fn into_ctx(self, bytes: &mut [u8], ctx: container::Ctx) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct SymbolsCtx { + pub nsyms: usize, + pub strtab: usize, + pub ctx: container::Ctx, +} + +impl<'a, T: ?Sized> ctx::TryFromCtx<'a, SymbolsCtx, T> for Symbols<'a> +where + T: AsRef<[u8]>, +{ + type Error = crate::error::Error; + fn try_from_ctx( + bytes: &'a T, + SymbolsCtx { nsyms, strtab, ctx }: SymbolsCtx, + ) -> crate::error::Result<(Self, usize)> { + let data = bytes.as_ref(); + Ok(( + Symbols { + data, + start: 0, + nsyms, + strtab, + ctx, + }, + data.len(), + )) + } +} + +#[derive(Default)] +pub struct SymbolIterator<'a> { + data: &'a [u8], + nsyms: usize, + offset: usize, + count: usize, + ctx: container::Ctx, + strtab: usize, +} + +impl<'a> Iterator for SymbolIterator<'a> { + type Item = error::Result<(&'a str, Nlist)>; + fn next(&mut self) -> Option<Self::Item> { + if self.count >= self.nsyms { + None + } else { + self.count += 1; + match self.data.gread_with::<Nlist>(&mut self.offset, self.ctx) { + Ok(symbol) => match self.data.pread(self.strtab + symbol.n_strx) { + Ok(name) => Some(Ok((name, symbol))), + Err(e) => Some(Err(e.into())), + }, + Err(e) => Some(Err(e)), + } + } + } +} + +/// A zero-copy "nlist" style symbol table ("stab"), including the string table +pub struct Symbols<'a> { + data: &'a [u8], + start: usize, + nsyms: usize, + // TODO: we can use an actual strtab here and tie it to symbols lifetime + strtab: usize, + ctx: container::Ctx, +} + +impl<'a, 'b> IntoIterator for &'b Symbols<'a> { + type Item = <SymbolIterator<'a> as Iterator>::Item; + type IntoIter = SymbolIterator<'a>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> Symbols<'a> { + /// Creates a new symbol table with `count` elements, from the `start` offset, using the string table at `strtab`, with a _default_ ctx. + //// + /// **Beware**, this will provide incorrect results if you construct this on a 32-bit mach binary, using a 64-bit machine; use `parse` instead if you want 32/64 bit support + pub fn new( + bytes: &'a [u8], + start: usize, + count: usize, + strtab: usize, + ) -> error::Result<Symbols<'a>> { + let nsyms = count; + Ok(Symbols { + data: bytes, + start, + nsyms, + strtab, + ctx: container::Ctx::default(), + }) + } + pub fn parse( + bytes: &'a [u8], + symtab: &load_command::SymtabCommand, + ctx: container::Ctx, + ) -> error::Result<Symbols<'a>> { + // we need to normalize the strtab offset before we receive the truncated bytes in pread_with + let strtab = symtab + .stroff + .checked_sub(symtab.symoff) + .ok_or_else(|| error::Error::Malformed("invalid symbol table offset".into()))?; + bytes.pread_with( + symtab.symoff as usize, + SymbolsCtx { + nsyms: symtab.nsyms as usize, + strtab: strtab as usize, + ctx, + }, + ) + } + + pub fn iter(&self) -> SymbolIterator<'a> { + SymbolIterator { + offset: self.start as usize, + nsyms: self.nsyms as usize, + count: 0, + data: self.data, + ctx: self.ctx, + strtab: self.strtab, + } + } + + /// Parses a single Nlist symbol from the binary, with its accompanying name + pub fn get(&self, index: usize) -> crate::error::Result<(&'a str, Nlist)> { + let sym: Nlist = self + .data + .pread_with(self.start + (index * Nlist::size_with(&self.ctx)), self.ctx)?; + let name = self.data.pread(self.strtab + sym.n_strx)?; + Ok((name, sym)) + } +} + +impl<'a> Debug for Symbols<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Symbols") + .field("data", &self.data.len()) + .field("start", &format_args!("{:#?}", self.start)) + .field("nsyms", &self.nsyms) + .field("strtab", &format_args!("{:#x}", self.strtab)) + .finish()?; + + writeln!(fmt, "Symbol List {{")?; + for (i, res) in self.iter().enumerate() { + match res { + Ok((name, nlist)) => writeln!( + fmt, + "{: >10x} {} sect: {:#x} type: {:#02x} desc: {:#03x}", + nlist.n_value, name, nlist.n_sect, nlist.n_type, nlist.n_desc + )?, + Err(error) => writeln!(fmt, " Bad symbol, index: {}, sym: {:?}", i, error)?, + } + } + writeln!(fmt, "}}") + } +} diff --git a/third_party/rust/goblin/src/pe/authenticode.rs b/third_party/rust/goblin/src/pe/authenticode.rs new file mode 100644 index 0000000000..e16b7997cd --- /dev/null +++ b/third_party/rust/goblin/src/pe/authenticode.rs @@ -0,0 +1,116 @@ +// Reference: +// https://learn.microsoft.com/en-us/windows-hardware/drivers/install/authenticode +// https://download.microsoft.com/download/9/c/5/9c5b2167-8017-4bae-9fde-d599bac8184a/Authenticode_PE.docx + +// Authenticode works by omiting sections of the PE binary from the digest +// those sections are: +// - checksum +// - data directory entry for certtable +// - certtable + +use core::ops::Range; + +use super::PE; + +impl PE<'_> { + /// [`authenticode_ranges`] returns the various ranges of the binary that are relevant for + /// signature. + pub fn authenticode_ranges(&self) -> ExcludedSectionsIter<'_> { + ExcludedSectionsIter { + pe: self, + state: IterState::default(), + } + } +} + +/// [`ExcludedSections`] holds the various ranges of the binary that are expected to be +/// excluded from the authenticode computation. +#[derive(Debug, Clone, Default)] +pub(super) struct ExcludedSections { + checksum: Range<usize>, + datadir_entry_certtable: Range<usize>, + certtable: Option<Range<usize>>, +} + +impl ExcludedSections { + pub(super) fn new( + checksum: Range<usize>, + datadir_entry_certtable: Range<usize>, + certtable: Option<Range<usize>>, + ) -> Self { + Self { + checksum, + datadir_entry_certtable, + certtable, + } + } +} + +pub struct ExcludedSectionsIter<'s> { + pe: &'s PE<'s>, + state: IterState, +} + +#[derive(Debug, PartialEq)] +enum IterState { + Initial, + DatadirEntry(usize), + CertTable(usize), + Final(usize), + Done, +} + +impl Default for IterState { + fn default() -> Self { + Self::Initial + } +} + +impl<'s> Iterator for ExcludedSectionsIter<'s> { + type Item = &'s [u8]; + + fn next(&mut self) -> Option<Self::Item> { + let bytes = &self.pe.bytes; + + if let Some(sections) = self.pe.authenticode_excluded_sections.as_ref() { + loop { + match self.state { + IterState::Initial => { + self.state = IterState::DatadirEntry(sections.checksum.end); + return Some(&bytes[..sections.checksum.start]); + } + IterState::DatadirEntry(start) => { + self.state = IterState::CertTable(sections.datadir_entry_certtable.end); + return Some(&bytes[start..sections.datadir_entry_certtable.start]); + } + IterState::CertTable(start) => { + if let Some(certtable) = sections.certtable.as_ref() { + self.state = IterState::Final(certtable.end); + return Some(&bytes[start..certtable.start]); + } else { + self.state = IterState::Final(start) + } + } + IterState::Final(start) => { + self.state = IterState::Done; + return Some(&bytes[start..]); + } + IterState::Done => return None, + } + } + } else { + loop { + match self.state { + IterState::Initial => { + self.state = IterState::Done; + return Some(bytes); + } + IterState::Done => return None, + _ => { + self.state = IterState::Done; + } + } + } + } + } +} diff --git a/third_party/rust/goblin/src/pe/certificate_table.rs b/third_party/rust/goblin/src/pe/certificate_table.rs new file mode 100644 index 0000000000..353a6c70ce --- /dev/null +++ b/third_party/rust/goblin/src/pe/certificate_table.rs @@ -0,0 +1,164 @@ +/// Implements parsing of pe32's Attribute Certificate Table +/// See reference: +/// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only +/// https://learn.microsoft.com/en-us/windows/win32/api/wintrust/ns-wintrust-win_certificate +use crate::error; +use scroll::Pread; + +use alloc::string::ToString; +use alloc::vec::Vec; + +#[repr(u16)] +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum AttributeCertificateRevision { + /// WIN_CERT_REVISION_1_0 + Revision1_0 = 0x0100, + /// WIN_CERT_REVISION_2_0 + Revision2_0 = 0x0200, +} + +impl TryFrom<u16> for AttributeCertificateRevision { + type Error = error::Error; + + fn try_from(value: u16) -> Result<Self, Self::Error> { + Ok(match value { + x if x == AttributeCertificateRevision::Revision1_0 as u16 => { + AttributeCertificateRevision::Revision1_0 + } + x if x == AttributeCertificateRevision::Revision2_0 as u16 => { + AttributeCertificateRevision::Revision2_0 + } + _ => { + return Err(error::Error::Malformed( + "Invalid certificate attribute revision".to_string(), + )) + } + }) + } +} + +#[repr(u16)] +#[derive(Debug)] +pub enum AttributeCertificateType { + /// WIN_CERT_TYPE_X509 + X509 = 0x0001, + /// WIN_CERT_TYPE_PKCS_SIGNED_DATA + PkcsSignedData = 0x0002, + /// WIN_CERT_TYPE_RESERVED_1 + Reserved1 = 0x0003, + /// WIN_CERT_TYPE_TS_STACK_SIGNED + TsStackSigned = 0x0004, +} + +impl TryFrom<u16> for AttributeCertificateType { + type Error = error::Error; + + fn try_from(value: u16) -> Result<Self, Self::Error> { + Ok(match value { + x if x == AttributeCertificateType::X509 as u16 => AttributeCertificateType::X509, + x if x == AttributeCertificateType::PkcsSignedData as u16 => { + AttributeCertificateType::PkcsSignedData + } + x if x == AttributeCertificateType::Reserved1 as u16 => { + AttributeCertificateType::Reserved1 + } + x if x == AttributeCertificateType::TsStackSigned as u16 => { + AttributeCertificateType::TsStackSigned + } + _ => { + return Err(error::Error::Malformed( + "Invalid attribute certificate type".to_string(), + )) + } + }) + } +} + +#[derive(Clone, Pread)] +struct AttributeCertificateHeader { + /// dwLength + length: u32, + revision: u16, + certificate_type: u16, +} + +const CERTIFICATE_DATA_OFFSET: u32 = 8; +#[derive(Debug)] +pub struct AttributeCertificate<'a> { + pub length: u32, + pub revision: AttributeCertificateRevision, + pub certificate_type: AttributeCertificateType, + pub certificate: &'a [u8], +} + +impl<'a> AttributeCertificate<'a> { + pub fn parse( + bytes: &'a [u8], + current_offset: &mut usize, + ) -> Result<AttributeCertificate<'a>, error::Error> { + // `current_offset` is moved sizeof(AttributeCertificateHeader) = 8 bytes further. + let header: AttributeCertificateHeader = bytes.gread_with(current_offset, scroll::LE)?; + let cert_size = usize::try_from(header.length.saturating_sub(CERTIFICATE_DATA_OFFSET)) + .map_err(|_err| { + error::Error::Malformed( + "Attribute certificate size do not fit in usize".to_string(), + ) + })?; + + if let Some(bytes) = bytes.get(*current_offset..(*current_offset + cert_size)) { + let attr = Self { + length: header.length, + revision: header.revision.try_into()?, + certificate_type: header.certificate_type.try_into()?, + certificate: bytes, + }; + // Moving past the certificate data. + // Prevent the current_offset to wrap and ensure current_offset is strictly increasing. + *current_offset = current_offset.saturating_add(cert_size); + // Round to the next 8-bytes. + *current_offset = (*current_offset + 7) & !7; + Ok(attr) + } else { + Err(error::Error::Malformed(format!( + "Unable to extract certificate. Probably cert_size:{} is malformed", + cert_size + ))) + } + } +} + +pub type CertificateDirectoryTable<'a> = Vec<AttributeCertificate<'a>>; +pub(crate) fn enumerate_certificates( + bytes: &[u8], + table_virtual_address: u32, + table_size: u32, +) -> Result<CertificateDirectoryTable, error::Error> { + let table_start_offset = usize::try_from(table_virtual_address).map_err(|_err| { + error::Error::Malformed("Certificate table RVA do not fit in a usize".to_string()) + })?; + // Here, we do not want wrapping semantics as it means that a too big table size or table start + // offset will provide table_end_offset such that table_end_offset < table_start_offset, which + // is not desirable at all. + let table_end_offset = + table_start_offset.saturating_add(usize::try_from(table_size).map_err(|_err| { + error::Error::Malformed("Certificate table size do not fit in a usize".to_string()) + })?); + let mut current_offset = table_start_offset; + let mut attrs = vec![]; + + // End offset cannot be further than the binary we have at hand. + if table_end_offset > bytes.len() { + return Err(error::Error::Malformed( + "End of attribute certificates table is after the end of the PE binary".to_string(), + )); + } + + // This is guaranteed to terminate, either by a malformed error being returned + // or because current_offset >= table_end_offset by virtue of current_offset being strictly + // increasing through `AttributeCertificate::parse`. + while current_offset < table_end_offset { + attrs.push(AttributeCertificate::parse(bytes, &mut current_offset)?); + } + + Ok(attrs) +} diff --git a/third_party/rust/goblin/src/pe/characteristic.rs b/third_party/rust/goblin/src/pe/characteristic.rs new file mode 100644 index 0000000000..f5c1fe4e5d --- /dev/null +++ b/third_party/rust/goblin/src/pe/characteristic.rs @@ -0,0 +1,99 @@ +/* +type characteristic = + | IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_EXECUTABLE_IMAGE + | IMAGE_FILE_LINE_NUMS_STRIPPED + | IMAGE_FILE_LOCAL_SYMS_STRIPPED + | IMAGE_FILE_AGGRESSIVE_WS_TRIM + | IMAGE_FILE_LARGE_ADDRESS_AWARE + | RESERVED + | IMAGE_FILE_BYTES_REVERSED_LO + | IMAGE_FILE_32BIT_MACHINE + | IMAGE_FILE_DEBUG_STRIPPED + | IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP + | IMAGE_FILE_NET_RUN_FROM_SWAP + | IMAGE_FILE_SYSTEM + | IMAGE_FILE_DLL + | IMAGE_FILE_UP_SYSTEM_ONLY + | IMAGE_FILE_BYTES_REVERSED_HI + | UNKNOWN of int + +let get_characteristic = + function + | 0x0001 -> IMAGE_FILE_RELOCS_STRIPPED + | 0x0002 -> IMAGE_FILE_EXECUTABLE_IMAGE + | 0x0004 -> IMAGE_FILE_LINE_NUMS_STRIPPED + | 0x0008 -> IMAGE_FILE_LOCAL_SYMS_STRIPPED + | 0x0010 -> IMAGE_FILE_AGGRESSIVE_WS_TRIM + | 0x0020 -> IMAGE_FILE_LARGE_ADDRESS_AWARE + | 0x0040 -> RESERVED + | 0x0080 -> IMAGE_FILE_BYTES_REVERSED_LO + | 0x0100 -> IMAGE_FILE_32BIT_MACHINE + | 0x0200 -> IMAGE_FILE_DEBUG_STRIPPED + | 0x0400 -> IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP + | 0x0800 -> IMAGE_FILE_NET_RUN_FROM_SWAP + | 0x1000 -> IMAGE_FILE_SYSTEM + | 0x2000 -> IMAGE_FILE_DLL + | 0x4000 -> IMAGE_FILE_UP_SYSTEM_ONLY + | 0x8000 -> IMAGE_FILE_BYTES_REVERSED_HI + | x -> UNKNOWN x + +let characteristic_to_string = + function + | IMAGE_FILE_RELOCS_STRIPPED -> "IMAGE_FILE_RELOCS_STRIPPED" + | IMAGE_FILE_EXECUTABLE_IMAGE -> "IMAGE_FILE_EXECUTABLE_IMAGE" + | IMAGE_FILE_LINE_NUMS_STRIPPED -> "IMAGE_FILE_LINE_NUMS_STRIPPED" + | IMAGE_FILE_LOCAL_SYMS_STRIPPED -> "IMAGE_FILE_LOCAL_SYMS_STRIPPED" + | IMAGE_FILE_AGGRESSIVE_WS_TRIM -> "IMAGE_FILE_AGGRESSIVE_WS_TRIM" + | IMAGE_FILE_LARGE_ADDRESS_AWARE -> "IMAGE_FILE_LARGE_ADDRESS_AWARE" + | RESERVED -> "RESERVED" + | IMAGE_FILE_BYTES_REVERSED_LO -> "IMAGE_FILE_BYTES_REVERSED_LO" + | IMAGE_FILE_32BIT_MACHINE -> "IMAGE_FILE_32BIT_MACHINE" + | IMAGE_FILE_DEBUG_STRIPPED -> "IMAGE_FILE_DEBUG_STRIPPED" + | IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP -> "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP" + | IMAGE_FILE_NET_RUN_FROM_SWAP -> "IMAGE_FILE_NET_RUN_FROM_SWAP" + | IMAGE_FILE_SYSTEM -> "IMAGE_FILE_SYSTEM" + | IMAGE_FILE_DLL -> "IMAGE_FILE_DLL" + | IMAGE_FILE_UP_SYSTEM_ONLY -> "IMAGE_FILE_UP_SYSTEM_ONLY" + | IMAGE_FILE_BYTES_REVERSED_HI -> "IMAGE_FILE_BYTES_REVERSED_HI" + | UNKNOWN x -> Printf.sprintf "UNKNOWN_CHARACTERISTIC 0x%x" x + +let is_dll characteristics = + let characteristic = characteristic_to_int IMAGE_FILE_DLL in + characteristics land characteristic = characteristic + +let has characteristic characteristics = + let characteristic = characteristic_to_int characteristic in + characteristics land characteristic = characteristic + +(* TODO: this is a mad hack *) +let show_type characteristics = + if (has IMAGE_FILE_DLL characteristics) then "DLL" + else if (has IMAGE_FILE_EXECUTABLE_IMAGE characteristics) then "EXE" + else "MANY" (* print all *) + */ + +pub const IMAGE_FILE_RELOCS_STRIPPED: u16 = 0x0001; +pub const IMAGE_FILE_EXECUTABLE_IMAGE: u16 = 0x0002; +pub const IMAGE_FILE_LINE_NUMS_STRIPPED: u16 = 0x0004; +pub const IMAGE_FILE_LOCAL_SYMS_STRIPPED: u16 = 0x0008; +pub const IMAGE_FILE_AGGRESSIVE_WS_TRIM: u16 = 0x0010; +pub const IMAGE_FILE_LARGE_ADDRESS_AWARE: u16 = 0x0020; +pub const RESERVED: u16 = 0x0040; +pub const IMAGE_FILE_BYTES_REVERSED_LO: u16 = 0x0080; +pub const IMAGE_FILE_32BIT_MACHINE: u16 = 0x0100; +pub const IMAGE_FILE_DEBUG_STRIPPED: u16 = 0x0200; +pub const IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP: u16 = 0x0400; +pub const IMAGE_FILE_NET_RUN_FROM_SWAP: u16 = 0x0800; +pub const IMAGE_FILE_SYSTEM: u16 = 0x1000; +pub const IMAGE_FILE_DLL: u16 = 0x2000; +pub const IMAGE_FILE_UP_SYSTEM_ONLY: u16 = 0x4000; +pub const IMAGE_FILE_BYTES_REVERSED_HI: u16 = 0x8000; + +pub fn is_dll(characteristics: u16) -> bool { + characteristics & IMAGE_FILE_DLL == IMAGE_FILE_DLL +} + +pub fn is_exe(characteristics: u16) -> bool { + characteristics & IMAGE_FILE_EXECUTABLE_IMAGE == IMAGE_FILE_EXECUTABLE_IMAGE +} diff --git a/third_party/rust/goblin/src/pe/data_directories.rs b/third_party/rust/goblin/src/pe/data_directories.rs new file mode 100644 index 0000000000..265e4e27f7 --- /dev/null +++ b/third_party/rust/goblin/src/pe/data_directories.rs @@ -0,0 +1,106 @@ +use crate::error; +use scroll::{Pread, Pwrite, SizeWith}; + +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct DataDirectory { + pub virtual_address: u32, + pub size: u32, +} + +pub const SIZEOF_DATA_DIRECTORY: usize = 8; +const NUM_DATA_DIRECTORIES: usize = 16; + +impl DataDirectory { + pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result<Self> { + let dd = bytes.gread_with(offset, scroll::LE)?; + Ok(dd) + } +} + +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct DataDirectories { + pub data_directories: [Option<DataDirectory>; NUM_DATA_DIRECTORIES], +} + +impl DataDirectories { + pub fn parse(bytes: &[u8], count: usize, offset: &mut usize) -> error::Result<Self> { + let mut data_directories = [None; NUM_DATA_DIRECTORIES]; + if count > NUM_DATA_DIRECTORIES { + return Err(error::Error::Malformed(format!( + "data directory count ({}) is greater than maximum number of data directories ({})", + count, NUM_DATA_DIRECTORIES + ))); + } + for dir in data_directories.iter_mut().take(count) { + let dd = DataDirectory::parse(bytes, offset)?; + let dd = if dd.virtual_address == 0 && dd.size == 0 { + None + } else { + Some(dd) + }; + *dir = dd; + } + Ok(DataDirectories { data_directories }) + } + pub fn get_export_table(&self) -> &Option<DataDirectory> { + let idx = 0; + &self.data_directories[idx] + } + pub fn get_import_table(&self) -> &Option<DataDirectory> { + let idx = 1; + &self.data_directories[idx] + } + pub fn get_resource_table(&self) -> &Option<DataDirectory> { + let idx = 2; + &self.data_directories[idx] + } + pub fn get_exception_table(&self) -> &Option<DataDirectory> { + let idx = 3; + &self.data_directories[idx] + } + pub fn get_certificate_table(&self) -> &Option<DataDirectory> { + let idx = 4; + &self.data_directories[idx] + } + pub fn get_base_relocation_table(&self) -> &Option<DataDirectory> { + let idx = 5; + &self.data_directories[idx] + } + pub fn get_debug_table(&self) -> &Option<DataDirectory> { + let idx = 6; + &self.data_directories[idx] + } + pub fn get_architecture(&self) -> &Option<DataDirectory> { + let idx = 7; + &self.data_directories[idx] + } + pub fn get_global_ptr(&self) -> &Option<DataDirectory> { + let idx = 8; + &self.data_directories[idx] + } + pub fn get_tls_table(&self) -> &Option<DataDirectory> { + let idx = 9; + &self.data_directories[idx] + } + pub fn get_load_config_table(&self) -> &Option<DataDirectory> { + let idx = 10; + &self.data_directories[idx] + } + pub fn get_bound_import_table(&self) -> &Option<DataDirectory> { + let idx = 11; + &self.data_directories[idx] + } + pub fn get_import_address_table(&self) -> &Option<DataDirectory> { + let idx = 12; + &self.data_directories[idx] + } + pub fn get_delay_import_descriptor(&self) -> &Option<DataDirectory> { + let idx = 13; + &self.data_directories[idx] + } + pub fn get_clr_runtime_header(&self) -> &Option<DataDirectory> { + let idx = 14; + &self.data_directories[idx] + } +} diff --git a/third_party/rust/goblin/src/pe/debug.rs b/third_party/rust/goblin/src/pe/debug.rs new file mode 100644 index 0000000000..311bc632cb --- /dev/null +++ b/third_party/rust/goblin/src/pe/debug.rs @@ -0,0 +1,186 @@ +use crate::error; +use scroll::{Pread, Pwrite, SizeWith}; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct DebugData<'a> { + pub image_debug_directory: ImageDebugDirectory, + pub codeview_pdb70_debug_info: Option<CodeviewPDB70DebugInfo<'a>>, +} + +impl<'a> DebugData<'a> { + pub fn parse( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Self> { + Self::parse_with_opts( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Self> { + let image_debug_directory = + ImageDebugDirectory::parse_with_opts(bytes, dd, sections, file_alignment, opts)?; + let codeview_pdb70_debug_info = + CodeviewPDB70DebugInfo::parse_with_opts(bytes, &image_debug_directory, opts)?; + + Ok(DebugData { + image_debug_directory, + codeview_pdb70_debug_info, + }) + } + + /// Return this executable's debugging GUID, suitable for matching against a PDB file. + pub fn guid(&self) -> Option<[u8; 16]> { + self.codeview_pdb70_debug_info.map(|pdb70| pdb70.signature) + } +} + +// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680307(v=vs.85).aspx +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct ImageDebugDirectory { + pub characteristics: u32, + pub time_date_stamp: u32, + pub major_version: u16, + pub minor_version: u16, + pub data_type: u32, + pub size_of_data: u32, + pub address_of_raw_data: u32, + pub pointer_to_raw_data: u32, +} + +pub const IMAGE_DEBUG_TYPE_UNKNOWN: u32 = 0; +pub const IMAGE_DEBUG_TYPE_COFF: u32 = 1; +pub const IMAGE_DEBUG_TYPE_CODEVIEW: u32 = 2; +pub const IMAGE_DEBUG_TYPE_FPO: u32 = 3; +pub const IMAGE_DEBUG_TYPE_MISC: u32 = 4; +pub const IMAGE_DEBUG_TYPE_EXCEPTION: u32 = 5; +pub const IMAGE_DEBUG_TYPE_FIXUP: u32 = 6; +pub const IMAGE_DEBUG_TYPE_BORLAND: u32 = 9; + +impl ImageDebugDirectory { + #[allow(unused)] + fn parse( + bytes: &[u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Self> { + Self::parse_with_opts( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + fn parse_with_opts( + bytes: &[u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Self> { + let rva = dd.virtual_address as usize; + let offset = utils::find_offset(rva, sections, file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!( + "Cannot map ImageDebugDirectory rva {:#x} into offset", + rva + )) + })?; + let idd: Self = bytes.pread_with(offset, scroll::LE)?; + Ok(idd) + } +} + +pub const CODEVIEW_PDB70_MAGIC: u32 = 0x5344_5352; +pub const CODEVIEW_PDB20_MAGIC: u32 = 0x3031_424e; +pub const CODEVIEW_CV50_MAGIC: u32 = 0x3131_424e; +pub const CODEVIEW_CV41_MAGIC: u32 = 0x3930_424e; + +// http://llvm.org/doxygen/CVDebugRecord_8h_source.html +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct CodeviewPDB70DebugInfo<'a> { + pub codeview_signature: u32, + pub signature: [u8; 16], + pub age: u32, + pub filename: &'a [u8], +} + +impl<'a> CodeviewPDB70DebugInfo<'a> { + pub fn parse(bytes: &'a [u8], idd: &ImageDebugDirectory) -> error::Result<Option<Self>> { + Self::parse_with_opts(bytes, idd, &options::ParseOptions::default()) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + idd: &ImageDebugDirectory, + opts: &options::ParseOptions, + ) -> error::Result<Option<Self>> { + if idd.data_type != IMAGE_DEBUG_TYPE_CODEVIEW { + // not a codeview debug directory + // that's not an error, but it's not a CodeviewPDB70DebugInfo either + return Ok(None); + } + + // ImageDebugDirectory.pointer_to_raw_data stores a raw offset -- not a virtual offset -- which we can use directly + let mut offset: usize = match opts.resolve_rva { + true => idd.pointer_to_raw_data as usize, + false => idd.address_of_raw_data as usize, + }; + + // calculate how long the eventual filename will be, which doubles as a check of the record size + let filename_length = idd.size_of_data as isize - 24; + if filename_length < 0 { + // the record is too short to be plausible + return Err(error::Error::Malformed(format!( + "ImageDebugDirectory size of data seems wrong: {:?}", + idd.size_of_data + ))); + } + let filename_length = filename_length as usize; + + // check the codeview signature + let codeview_signature: u32 = bytes.gread_with(&mut offset, scroll::LE)?; + if codeview_signature != CODEVIEW_PDB70_MAGIC { + return Ok(None); + } + + // read the rest + let mut signature: [u8; 16] = [0; 16]; + signature.copy_from_slice(bytes.gread_with(&mut offset, 16)?); + let age: u32 = bytes.gread_with(&mut offset, scroll::LE)?; + if let Some(filename) = bytes.get(offset..offset + filename_length) { + Ok(Some(CodeviewPDB70DebugInfo { + codeview_signature, + signature, + age, + filename, + })) + } else { + Err(error::Error::Malformed(format!( + "ImageDebugDirectory seems corrupted: {:?}", + idd + ))) + } + } +} diff --git a/third_party/rust/goblin/src/pe/exception.rs b/third_party/rust/goblin/src/pe/exception.rs new file mode 100644 index 0000000000..d26854d3e9 --- /dev/null +++ b/third_party/rust/goblin/src/pe/exception.rs @@ -0,0 +1,1058 @@ +//! Exception handling and stack unwinding for x64. +//! +//! Exception information is exposed via the [`ExceptionData`] structure. If present in a PE file, +//! it contains a list of [`RuntimeFunction`] entries that can be used to get [`UnwindInfo`] for a +//! particular code location. +//! +//! Unwind information contains a list of unwind codes which specify the operations that are +//! necessary to restore registers (including the stack pointer RSP) when unwinding out of a +//! function. +//! +//! Depending on where the instruction pointer lies, there are three strategies to unwind: +//! +//! 1. If the RIP is within an epilog, then control is leaving the function, there can be no +//! exception handler associated with this exception for this function, and the effects of the +//! epilog must be continued to compute the context of the caller function. To determine if the +//! RIP is within an epilog, the code stream from RIP on is examined. If that code stream can be +//! matched to the trailing portion of a legitimate epilog, then it's in an epilog, and the +//! remaining portion of the epilog is simulated, with the context record updated as each +//! instruction is processed. After this, step 1 is repeated. +//! +//! 2. Case b) If the RIP lies within the prologue, then control has not entered the function, +//! there can be no exception handler associated with this exception for this function, and the +//! effects of the prolog must be undone to compute the context of the caller function. The RIP +//! is within the prolog if the distance from the function start to the RIP is less than or +//! equal to the prolog size encoded in the unwind info. The effects of the prolog are unwound +//! by scanning forward through the unwind codes array for the first entry with an offset less +//! than or equal to the offset of the RIP from the function start, then undoing the effect of +//! all remaining items in the unwind code array. Step 1 is then repeated. +//! +//! 3. If the RIP is not within a prolog or epilog and the function has an exception handler, then +//! the language-specific handler is called. The handler scans its data and calls filter +//! functions as appropriate. The language-specific handler can return that the exception was +//! handled or that the search is to be continued. It can also initiate an unwind directly. +//! +//! For more information, see [x64 exception handling]. +//! +//! [`ExceptionData`]: struct.ExceptionData.html +//! [`RuntimeFunction`]: struct.RuntimeFunction.html +//! [`UnwindInfo`]: struct.UnwindInfo.html +//! [x64 exception handling]: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2017 + +use core::cmp::Ordering; +use core::fmt; +use core::iter::FusedIterator; + +use scroll::ctx::TryFromCtx; +use scroll::{self, Pread, Pwrite}; + +use crate::error; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +/// The function has an exception handler that should be called when looking for functions that need +/// to examine exceptions. +const UNW_FLAG_EHANDLER: u8 = 0x01; +/// The function has a termination handler that should be called when unwinding an exception. +const UNW_FLAG_UHANDLER: u8 = 0x02; +/// This unwind info structure is not the primary one for the procedure. Instead, the chained unwind +/// info entry is the contents of a previous `RUNTIME_FUNCTION` entry. If this flag is set, then the +/// `UNW_FLAG_EHANDLER` and `UNW_FLAG_UHANDLER` flags must be cleared. Also, the frame register and +/// fixed-stack allocation fields must have the same values as in the primary unwind info. +const UNW_FLAG_CHAININFO: u8 = 0x04; + +/// info == register number +const UWOP_PUSH_NONVOL: u8 = 0; +/// no info, alloc size in next 2 slots +const UWOP_ALLOC_LARGE: u8 = 1; +/// info == size of allocation / 8 - 1 +const UWOP_ALLOC_SMALL: u8 = 2; +/// no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 +const UWOP_SET_FPREG: u8 = 3; +/// info == register number, offset in next slot +const UWOP_SAVE_NONVOL: u8 = 4; +/// info == register number, offset in next 2 slots +const UWOP_SAVE_NONVOL_FAR: u8 = 5; +/// changes the structure of unwind codes to `struct Epilogue`. +/// (was UWOP_SAVE_XMM in version 1, but deprecated and removed) +const UWOP_EPILOG: u8 = 6; +/// reserved +/// (was UWOP_SAVE_XMM_FAR in version 1, but deprecated and removed) +const UWOP_SPARE_CODE: u8 = 7; +/// info == XMM reg number, offset in next slot +const UWOP_SAVE_XMM128: u8 = 8; +/// info == XMM reg number, offset in next 2 slots +const UWOP_SAVE_XMM128_FAR: u8 = 9; +/// info == 0: no error-code, 1: error-code +const UWOP_PUSH_MACHFRAME: u8 = 10; + +/// Size of `RuntimeFunction` entries. +const RUNTIME_FUNCTION_SIZE: usize = 12; +/// Size of unwind code slots. Codes take 1 - 3 slots. +const UNWIND_CODE_SIZE: usize = 2; + +/// An unwind entry for a range of a function. +/// +/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`]. +/// +/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Default, Pread, Pwrite)] +pub struct RuntimeFunction { + /// Function start address. + pub begin_address: u32, + /// Function end address. + pub end_address: u32, + /// Unwind info address. + pub unwind_info_address: u32, +} + +impl fmt::Debug for RuntimeFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("RuntimeFunction") + .field("begin_address", &format_args!("{:#x}", self.begin_address)) + .field("end_address", &format_args!("{:#x}", self.end_address)) + .field( + "unwind_info_address", + &format_args!("{:#x}", self.unwind_info_address), + ) + .finish() + } +} + +/// Iterator over runtime function entries in [`ExceptionData`](struct.ExceptionData.html). +#[derive(Debug)] +pub struct RuntimeFunctionIterator<'a> { + data: &'a [u8], +} + +impl Iterator for RuntimeFunctionIterator<'_> { + type Item = error::Result<RuntimeFunction>; + + fn next(&mut self) -> Option<Self::Item> { + if self.data.is_empty() { + return None; + } + + Some(match self.data.pread_with(0, scroll::LE) { + Ok(func) => { + self.data = &self.data[RUNTIME_FUNCTION_SIZE..]; + Ok(func) + } + Err(error) => { + self.data = &[]; + Err(error.into()) + } + }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.data.len() / RUNTIME_FUNCTION_SIZE; + (len, Some(len)) + } +} + +impl FusedIterator for RuntimeFunctionIterator<'_> {} +impl ExactSizeIterator for RuntimeFunctionIterator<'_> {} + +/// An x64 register used during unwinding. +/// +/// - `0` - `15`: General purpose registers +/// - `17` - `32`: XMM registers +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct Register(pub u8); + +impl Register { + fn xmm(number: u8) -> Self { + Register(number + 17) + } + + /// Returns the x64 register name. + pub fn name(self) -> &'static str { + match self.0 { + 0 => "$rax", + 1 => "$rcx", + 2 => "$rdx", + 3 => "$rbx", + 4 => "$rsp", + 5 => "$rbp", + 6 => "$rsi", + 7 => "$rdi", + 8 => "$r8", + 9 => "$r9", + 10 => "$r10", + 11 => "$r11", + 12 => "$r12", + 13 => "$r13", + 14 => "$r14", + 15 => "$r15", + 16 => "$rip", + 17 => "$xmm0", + 18 => "$xmm1", + 19 => "$xmm2", + 20 => "$xmm3", + 21 => "$xmm4", + 22 => "$xmm5", + 23 => "$xmm6", + 24 => "$xmm7", + 25 => "$xmm8", + 26 => "$xmm9", + 27 => "$xmm10", + 28 => "$xmm11", + 29 => "$xmm12", + 30 => "$xmm13", + 31 => "$xmm14", + 32 => "$xmm15", + _ => "", + } + } +} + +/// An unsigned offset to a value in the local stack frame. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StackFrameOffset { + /// Offset from the current RSP, that is, the lowest address of the fixed stack allocation. + /// + /// To restore this register, read the value at the given offset from the RSP. + RSP(u32), + + /// Offset from the value of the frame pointer register. + /// + /// To restore this register, read the value at the given offset from the FP register, reduced + /// by the `frame_register_offset` value specified in the `UnwindInfo` structure. By definition, + /// the frame pointer register is any register other than RAX (`0`). + FP(u32), +} + +impl StackFrameOffset { + fn with_ctx(offset: u32, ctx: UnwindOpContext) -> Self { + match ctx.frame_register { + Register(0) => StackFrameOffset::RSP(offset), + Register(_) => StackFrameOffset::FP(offset), + } + } +} + +impl fmt::Display for Register { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.name()) + } +} + +/// An unwind operation corresponding to code in the function prolog. +/// +/// Unwind operations can be used to reverse the effects of the function prolog and restore register +/// values of parent stack frames that have been saved to the stack. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum UnwindOperation { + /// Push a nonvolatile integer register, decrementing `RSP` by 8. + PushNonVolatile(Register), + + /// Allocate a fixed-size area on the stack. + Alloc(u32), + + /// Establish the frame pointer register by setting the register to some offset of the current + /// RSP. The use of an offset permits establishing a frame pointer that points to the middle of + /// the fixed stack allocation, helping code density by allowing more accesses to use short + /// instruction forms. + SetFPRegister, + + /// Save a nonvolatile integer register on the stack using a MOV instead of a PUSH. This code is + /// primarily used for shrink-wrapping, where a nonvolatile register is saved to the stack in a + /// position that was previously allocated. + SaveNonVolatile(Register, StackFrameOffset), + + /// Save the lower 64 bits of a nonvolatile XMM register on the stack. + SaveXMM(Register, StackFrameOffset), + + /// Describes the function epilog. + /// + /// This operation has been introduced with unwind info version 2 and is not implemented yet. + Epilog, + + /// Save all 128 bits of a nonvolatile XMM register on the stack. + SaveXMM128(Register, StackFrameOffset), + + /// Push a machine frame. This is used to record the effect of a hardware interrupt or + /// exception. Depending on the error flag, this frame has two different layouts. + /// + /// This unwind code always appears in a dummy prolog, which is never actually executed but + /// instead appears before the real entry point of an interrupt routine, and exists only to + /// provide a place to simulate the push of a machine frame. This operation records that + /// simulation, which indicates the machine has conceptually done this: + /// + /// 1. Pop RIP return address from top of stack into `temp` + /// 2. `$ss`, Push old `$rsp`, `$rflags`, `$cs`, `temp` + /// 3. If error flag is `true`, push the error code + /// + /// Without an error code, RSP was incremented by `40` and the following was frame pushed: + /// + /// Offset | Value + /// ---------|-------- + /// RSP + 32 | `$ss` + /// RSP + 24 | old `$rsp` + /// RSP + 16 | `$rflags` + /// RSP + 8 | `$cs` + /// RSP + 0 | `$rip` + /// + /// With an error code, RSP was incremented by `48` and the following was frame pushed: + /// + /// Offset | Value + /// ---------|-------- + /// RSP + 40 | `$ss` + /// RSP + 32 | old `$rsp` + /// RSP + 24 | `$rflags` + /// RSP + 16 | `$cs` + /// RSP + 8 | `$rip` + /// RSP + 0 | error code + PushMachineFrame(bool), + + /// A reserved operation without effect. + Noop, +} + +/// Context used to parse unwind operation. +#[derive(Clone, Copy, Debug, PartialEq)] +struct UnwindOpContext { + /// Version of the unwind info. + version: u8, + + /// The nonvolatile register used as the frame pointer of this function. + /// + /// If this register is non-zero, all stack frame offsets used in unwind operations are of type + /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of + /// this frame register instead of the conventional RSP. This allows the RSP to be modified. + frame_register: Register, +} + +/// An unwind operation that is executed at a particular place in the function prolog. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct UnwindCode { + /// Offset of the corresponding instruction in the function prolog. + /// + /// To be precise, this is the offset from the beginning of the prolog of the end of the + /// instruction that performs this operation, plus 1 (that is, the offset of the start of the + /// next instruction). + /// + /// Unwind codes are ordered by this offset in reverse order, suitable for unwinding. + pub code_offset: u8, + + /// The operation that was performed by the code in the prolog. + pub operation: UnwindOperation, +} + +impl<'a> TryFromCtx<'a, UnwindOpContext> for UnwindCode { + type Error = error::Error; + #[inline] + fn try_from_ctx(bytes: &'a [u8], ctx: UnwindOpContext) -> Result<(Self, usize), Self::Error> { + let mut read = 0; + let code_offset = bytes.gread_with::<u8>(&mut read, scroll::LE)?; + let operation = bytes.gread_with::<u8>(&mut read, scroll::LE)?; + + let operation_code = operation & 0xf; + let operation_info = operation >> 4; + + let operation = match operation_code { + self::UWOP_PUSH_NONVOL => { + let register = Register(operation_info); + UnwindOperation::PushNonVolatile(register) + } + self::UWOP_ALLOC_LARGE => { + let offset = match operation_info { + 0 => u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 8, + 1 => bytes.gread_with::<u32>(&mut read, scroll::LE)?, + i => { + let msg = format!("invalid op info ({}) for UWOP_ALLOC_LARGE", i); + return Err(error::Error::Malformed(msg)); + } + }; + UnwindOperation::Alloc(offset) + } + self::UWOP_ALLOC_SMALL => { + let offset = u32::from(operation_info) * 8 + 8; + UnwindOperation::Alloc(offset) + } + self::UWOP_SET_FPREG => UnwindOperation::SetFPRegister, + self::UWOP_SAVE_NONVOL => { + let register = Register(operation_info); + let offset = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 8; + UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_SAVE_NONVOL_FAR => { + let register = Register(operation_info); + let offset = bytes.gread_with::<u32>(&mut read, scroll::LE)?; + UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_EPILOG => { + let data = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 16; + if ctx.version == 1 { + let register = Register::xmm(operation_info); + UnwindOperation::SaveXMM(register, StackFrameOffset::with_ctx(data, ctx)) + } else { + // TODO: See https://weekly-geekly.github.io/articles/322956/index.html + UnwindOperation::Epilog + } + } + self::UWOP_SPARE_CODE => { + let data = bytes.gread_with::<u32>(&mut read, scroll::LE)?; + if ctx.version == 1 { + let register = Register::xmm(operation_info); + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(data, ctx)) + } else { + UnwindOperation::Noop + } + } + self::UWOP_SAVE_XMM128 => { + let register = Register::xmm(operation_info); + let offset = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 16; + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_SAVE_XMM128_FAR => { + let register = Register::xmm(operation_info); + let offset = bytes.gread_with::<u32>(&mut read, scroll::LE)?; + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_PUSH_MACHFRAME => { + let is_error = match operation_info { + 0 => false, + 1 => true, + i => { + let msg = format!("invalid op info ({}) for UWOP_PUSH_MACHFRAME", i); + return Err(error::Error::Malformed(msg)); + } + }; + UnwindOperation::PushMachineFrame(is_error) + } + op => { + let msg = format!("unknown unwind op code ({})", op); + return Err(error::Error::Malformed(msg)); + } + }; + + let code = UnwindCode { + code_offset, + operation, + }; + + Ok((code, read)) + } +} + +/// An iterator over unwind codes for a function or part of a function, returned from +/// [`UnwindInfo`]. +/// +/// [`UnwindInfo`]: struct.UnwindInfo.html +#[derive(Clone, Debug)] +pub struct UnwindCodeIterator<'a> { + bytes: &'a [u8], + offset: usize, + context: UnwindOpContext, +} + +impl Iterator for UnwindCodeIterator<'_> { + type Item = error::Result<UnwindCode>; + + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.bytes.len() { + return None; + } + + Some(self.bytes.gread_with(&mut self.offset, self.context)) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let upper = (self.bytes.len() - self.offset) / UNWIND_CODE_SIZE; + // the largest codes take up three slots + let lower = (upper + 3 - (upper % 3)) / 3; + (lower, Some(upper)) + } +} + +impl FusedIterator for UnwindCodeIterator<'_> {} + +/// A language-specific handler that is called as part of the search for an exception handler or as +/// part of an unwind. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum UnwindHandler<'a> { + /// The image-relative address of an exception handler and its implementation-defined data. + ExceptionHandler(u32, &'a [u8]), + /// The image-relative address of a termination handler and its implementation-defined data. + TerminationHandler(u32, &'a [u8]), +} + +/// Unwind information for a function or portion of a function. +/// +/// The unwind info structure is used to record the effects a function has on the stack pointer and +/// where the nonvolatile registers are saved on the stack. The unwind codes can be enumerated with +/// [`unwind_codes`]. +/// +/// This unwind info might only be secondary information, and link to a [chained unwind handler]. +/// For unwinding, this link shall be followed until the root unwind info record has been resolved. +/// +/// [`unwind_codes`]: struct.UnwindInfo.html#method.unwind_codes +/// [chained unwind handler]: struct.UnwindInfo.html#structfield.chained_info +#[derive(Clone)] +pub struct UnwindInfo<'a> { + /// Version of this unwind info. + pub version: u8, + + /// Length of the function prolog in bytes. + pub size_of_prolog: u8, + + /// The nonvolatile register used as the frame pointer of this function. + /// + /// If this register is non-zero, all stack frame offsets used in unwind operations are of type + /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of + /// this frame register instead of the conventional RSP. This allows the RSP to be modified. + pub frame_register: Register, + + /// Offset from RSP that is applied to the FP register when it is established. + /// + /// When loading offsets of type `StackFrameOffset::FP` from the stack, this offset has to be + /// subtracted before loading the value since the actual RSP was lower by that amount in the + /// prolog. + pub frame_register_offset: u32, + + /// A record pointing to chained unwind information. + /// + /// If chained unwind info is present, then this unwind info is a secondary one and the linked + /// unwind info contains primary information. Chained info is useful in two situations. First, + /// it is used for noncontiguous code segments. Second, this mechanism is sometimes used to + /// group volatile register saves. + /// + /// The referenced unwind info can itself specify chained unwind information, until it arrives + /// at the root unwind info. Generally, the entire chain should be considered when unwinding. + pub chained_info: Option<RuntimeFunction>, + + /// An exception or termination handler called as part of the unwind. + pub handler: Option<UnwindHandler<'a>>, + + /// A list of unwind codes, sorted descending by code offset. + code_bytes: &'a [u8], +} + +impl<'a> UnwindInfo<'a> { + /// Parses unwind information from the image at the given offset. + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<Self> { + // Read the version and flags fields, which are combined into a single byte. + let version_flags: u8 = bytes.gread_with(&mut offset, scroll::LE)?; + let version = version_flags & 0b111; + let flags = version_flags >> 3; + + if version < 1 || version > 2 { + let msg = format!("unsupported unwind code version ({})", version); + return Err(error::Error::Malformed(msg)); + } + + let size_of_prolog = bytes.gread_with::<u8>(&mut offset, scroll::LE)?; + let count_of_codes = bytes.gread_with::<u8>(&mut offset, scroll::LE)?; + + // Parse the frame register and frame register offset values, that are combined into a + // single byte. + let frame_info = bytes.gread_with::<u8>(&mut offset, scroll::LE)?; + // If nonzero, then the function uses a frame pointer (FP), and this field is the number + // of the nonvolatile register used as the frame pointer. The zero register value does + // not need special casing since it will not be referenced by the unwind operations. + let frame_register = Register(frame_info & 0xf); + // The the scaled offset from RSP that is applied to the FP register when it's + // established. The actual FP register is set to RSP + 16 * this number, allowing + // offsets from 0 to 240. + let frame_register_offset = u32::from((frame_info >> 4) * 16); + + // An array of items that explains the effect of the prolog on the nonvolatile registers and + // RSP. Some unwind codes require more than one slot in the array. + let codes_size = count_of_codes as usize * UNWIND_CODE_SIZE; + let code_bytes = bytes.gread_with(&mut offset, codes_size)?; + + // For alignment purposes, the codes array always has an even number of entries, and the + // final entry is potentially unused. In that case, the array is one longer than indicated + // by the count of unwind codes field. + if count_of_codes % 2 != 0 { + offset += 2; + } + debug_assert!(offset % 4 == 0); + + let mut chained_info = None; + let mut handler = None; + + // If flag UNW_FLAG_CHAININFO is set then the UNWIND_INFO structure ends with three UWORDs. + // These UWORDs represent the RUNTIME_FUNCTION information for the function of the chained + // unwind. + if flags & UNW_FLAG_CHAININFO != 0 { + chained_info = Some(bytes.gread_with(&mut offset, scroll::LE)?); + + // The relative address of the language-specific handler is present in the UNWIND_INFO + // whenever flags UNW_FLAG_EHANDLER or UNW_FLAG_UHANDLER are set. The language-specific + // handler is called as part of the search for an exception handler or as part of an unwind. + } else if flags & (UNW_FLAG_EHANDLER | UNW_FLAG_UHANDLER) != 0 { + let address = bytes.gread_with::<u32>(&mut offset, scroll::LE)?; + let data = &bytes[offset..]; + + handler = Some(if flags & UNW_FLAG_EHANDLER != 0 { + UnwindHandler::ExceptionHandler(address, data) + } else { + UnwindHandler::TerminationHandler(address, data) + }); + } + + Ok(UnwindInfo { + version, + size_of_prolog, + frame_register, + frame_register_offset, + chained_info, + handler, + code_bytes, + }) + } + + /// Returns an iterator over unwind codes in this unwind info. + /// + /// Unwind codes are iterated in descending `code_offset` order suitable for unwinding. If the + /// optional [`chained_info`] is present, codes of that unwind info should be interpreted + /// immediately afterwards. + pub fn unwind_codes(&self) -> UnwindCodeIterator<'a> { + UnwindCodeIterator { + bytes: self.code_bytes, + offset: 0, + context: UnwindOpContext { + version: self.version, + frame_register: self.frame_register, + }, + } + } +} + +impl fmt::Debug for UnwindInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let count_of_codes = self.code_bytes.len() / UNWIND_CODE_SIZE; + + f.debug_struct("UnwindInfo") + .field("version", &self.version) + .field("size_of_prolog", &self.size_of_prolog) + .field("frame_register", &self.frame_register) + .field("frame_register_offset", &self.frame_register_offset) + .field("count_of_codes", &count_of_codes) + .field("chained_info", &self.chained_info) + .field("handler", &self.handler) + .finish() + } +} + +impl<'a> IntoIterator for &'_ UnwindInfo<'a> { + type Item = error::Result<UnwindCode>; + type IntoIter = UnwindCodeIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.unwind_codes() + } +} + +/// Exception handling and stack unwind information for functions in the image. +pub struct ExceptionData<'a> { + bytes: &'a [u8], + offset: usize, + size: usize, + file_alignment: u32, +} + +impl<'a> ExceptionData<'a> { + /// Parses exception data from the image at the given offset. + pub fn parse( + bytes: &'a [u8], + directory: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Self> { + Self::parse_with_opts( + bytes, + directory, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + /// Parses exception data from the image at the given offset. + pub fn parse_with_opts( + bytes: &'a [u8], + directory: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Self> { + let size = directory.size as usize; + + if size % RUNTIME_FUNCTION_SIZE != 0 { + return Err(error::Error::from(scroll::Error::BadInput { + size, + msg: "invalid exception directory table size", + })); + } + + let rva = directory.virtual_address as usize; + let offset = utils::find_offset(rva, sections, file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!("cannot map exception_rva ({:#x}) into offset", rva)) + })?; + + if offset % 4 != 0 { + return Err(error::Error::from(scroll::Error::BadOffset(offset))); + } + + Ok(ExceptionData { + bytes, + offset, + size, + file_alignment, + }) + } + + /// The number of function entries described by this exception data. + pub fn len(&self) -> usize { + self.size / RUNTIME_FUNCTION_SIZE + } + + /// Indicating whether there are functions in this entry. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Iterates all function entries in order of their code offset. + /// + /// To search for a function by relative instruction address, use [`find_function`]. To resolve + /// unwind information, use [`get_unwind_info`]. + /// + /// [`find_function`]: struct.ExceptionData.html#method.find_function + /// [`get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info + pub fn functions(&self) -> RuntimeFunctionIterator<'a> { + RuntimeFunctionIterator { + data: &self.bytes[self.offset..self.offset + self.size], + } + } + + /// Returns the function at the given index. + pub fn get_function(&self, index: usize) -> error::Result<RuntimeFunction> { + self.get_function_by_offset(self.offset + index * RUNTIME_FUNCTION_SIZE) + } + + /// Performs a binary search to find a function entry covering the given RVA relative to the + /// image. + pub fn find_function(&self, rva: u32) -> error::Result<Option<RuntimeFunction>> { + // NB: Binary search implementation copied from std::slice::binary_search_by and adapted. + // Theoretically, there should be nothing that causes parsing runtime functions to fail and + // all access to the bytes buffer is guaranteed to be in range. However, since all other + // functions also return Results, this is much more ergonomic here. + + let mut size = self.len(); + if size == 0 { + return Ok(None); + } + + let mut base = 0; + while size > 1 { + let half = size / 2; + let mid = base + half; + let offset = self.offset + mid * RUNTIME_FUNCTION_SIZE; + let addr = self.bytes.pread_with::<u32>(offset, scroll::LE)?; + base = if addr > rva { base } else { mid }; + size -= half; + } + + let offset = self.offset + base * RUNTIME_FUNCTION_SIZE; + let addr = self.bytes.pread_with::<u32>(offset, scroll::LE)?; + let function = match addr.cmp(&rva) { + Ordering::Less | Ordering::Equal => self.get_function(base)?, + Ordering::Greater if base == 0 => return Ok(None), + Ordering::Greater => self.get_function(base - 1)?, + }; + + if function.end_address > rva { + Ok(Some(function)) + } else { + Ok(None) + } + } + + /// Resolves unwind information for the given function entry. + pub fn get_unwind_info( + &self, + function: RuntimeFunction, + sections: &[section_table::SectionTable], + ) -> error::Result<UnwindInfo<'a>> { + self.get_unwind_info_with_opts(function, sections, &options::ParseOptions::default()) + } + + /// Resolves unwind information for the given function entry. + pub fn get_unwind_info_with_opts( + &self, + mut function: RuntimeFunction, + sections: &[section_table::SectionTable], + opts: &options::ParseOptions, + ) -> error::Result<UnwindInfo<'a>> { + while function.unwind_info_address % 2 != 0 { + let rva = (function.unwind_info_address & !1) as usize; + function = self.get_function_by_rva_with_opts(rva, sections, opts)?; + } + + let rva = function.unwind_info_address as usize; + let offset = + utils::find_offset(rva, sections, self.file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!("cannot map unwind rva ({:#x}) into offset", rva)) + })?; + + UnwindInfo::parse(self.bytes, offset) + } + + #[allow(dead_code)] + fn get_function_by_rva( + &self, + rva: usize, + sections: &[section_table::SectionTable], + ) -> error::Result<RuntimeFunction> { + self.get_function_by_rva_with_opts(rva, sections, &options::ParseOptions::default()) + } + + fn get_function_by_rva_with_opts( + &self, + rva: usize, + sections: &[section_table::SectionTable], + opts: &options::ParseOptions, + ) -> error::Result<RuntimeFunction> { + let offset = + utils::find_offset(rva, sections, self.file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!( + "cannot map exception rva ({:#x}) into offset", + rva + )) + })?; + + self.get_function_by_offset(offset) + } + + #[inline] + fn get_function_by_offset(&self, offset: usize) -> error::Result<RuntimeFunction> { + debug_assert!((offset - self.offset) % RUNTIME_FUNCTION_SIZE == 0); + debug_assert!(offset < self.offset + self.size); + + Ok(self.bytes.pread_with(offset, scroll::LE)?) + } +} + +impl fmt::Debug for ExceptionData<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("ExceptionData") + .field("file_alignment", &self.file_alignment) + .field("offset", &format_args!("{:#x}", self.offset)) + .field("size", &format_args!("{:#x}", self.size)) + .field("len", &self.len()) + .finish() + } +} + +impl<'a> IntoIterator for &'_ ExceptionData<'a> { + type Item = error::Result<RuntimeFunction>; + type IntoIter = RuntimeFunctionIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.functions() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_size_of_runtime_function() { + assert_eq!( + std::mem::size_of::<RuntimeFunction>(), + RUNTIME_FUNCTION_SIZE + ); + } + + // Tests disabled until there is a solution for handling binary test data + // See https://github.com/m4b/goblin/issues/185 + + // macro_rules! microsoft_symbol { + // ($name:literal, $id:literal) => {{ + // use std::fs::File; + // use std::path::Path; + + // let path = Path::new(concat!("cache/", $name)); + // if !path.exists() { + // let url = format!( + // "https://msdl.microsoft.com/download/symbols/{}/{}/{}", + // $name, $id, $name + // ); + + // let mut response = reqwest::get(&url).expect(concat!("get ", $name)); + // let mut target = File::create(path).expect(concat!("create ", $name)); + // response + // .copy_to(&mut target) + // .expect(concat!("download ", $name)); + // } + + // std::fs::read(path).expect(concat!("open ", $name)) + // }}; + // } + + // lazy_static::lazy_static! { + // static ref PE_DATA: Vec<u8> = microsoft_symbol!("WSHTCPIP.DLL", "4a5be0b77000"); + // } + + // #[test] + // fn test_parse() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // assert_eq!(exception_data.len(), 19); + // assert!(!exception_data.is_empty()); + // } + + // #[test] + // fn test_iter_functions() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // let functions: Vec<RuntimeFunction> = exception_data + // .functions() + // .map(|result| result.expect("parse runtime function")) + // .collect(); + + // assert_eq!(functions.len(), 19); + + // let expected = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // assert_eq!(functions[4], expected); + // } + + // #[test] + // fn test_get_function() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // let expected = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // assert_eq!( + // exception_data.get_function(4).expect("find function"), + // expected + // ); + // } + + // #[test] + // fn test_find_function() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // let expected = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // assert_eq!( + // exception_data.find_function(0x1400).expect("find function"), + // Some(expected) + // ); + // } + + // #[test] + // fn test_find_function_none() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // // 0x1d00 is the end address of the last function. + + // assert_eq!( + // exception_data.find_function(0x1d00).expect("find function"), + // None + // ); + // } + + // #[test] + // fn test_get_unwind_info() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // // runtime function #0 directly refers to unwind info + // let rt_function = RuntimeFunction { + // begin_address: 0x1010, + // end_address: 0x1090, + // unwind_info_address: 0x25d8, + // }; + + // let unwind_info = exception_data + // .get_unwind_info(rt_function, &pe.sections) + // .expect("get unwind info"); + + // // Unwind codes just used to assert that the right unwind info was resolved + // let expected = &[4, 98]; + + // assert_eq!(unwind_info.code_bytes, expected); + // } + + // #[test] + // fn test_get_unwind_info_redirect() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // // runtime function #4 has a redirect (unwind_info_address & 1). + // let rt_function = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // let unwind_info = exception_data + // .get_unwind_info(rt_function, &pe.sections) + // .expect("get unwind info"); + + // // Unwind codes just used to assert that the right unwind info was resolved + // let expected = &[ + // 28, 100, 15, 0, 28, 84, 14, 0, 28, 52, 12, 0, 28, 82, 24, 240, 22, 224, 20, 208, 18, + // 192, 16, 112, + // ]; + + // assert_eq!(unwind_info.code_bytes, expected); + // } + + #[test] + fn test_iter_unwind_codes() { + let unwind_info = UnwindInfo { + version: 1, + size_of_prolog: 4, + frame_register: Register(0), + frame_register_offset: 0, + chained_info: None, + handler: None, + code_bytes: &[4, 98], + }; + + let unwind_codes: Vec<UnwindCode> = unwind_info + .unwind_codes() + .map(|result| result.expect("parse unwind code")) + .collect(); + + assert_eq!(unwind_codes.len(), 1); + + let expected = UnwindCode { + code_offset: 4, + operation: UnwindOperation::Alloc(56), + }; + + assert_eq!(unwind_codes[0], expected); + } +} diff --git a/third_party/rust/goblin/src/pe/export.rs b/third_party/rust/goblin/src/pe/export.rs new file mode 100644 index 0000000000..f892c75276 --- /dev/null +++ b/third_party/rust/goblin/src/pe/export.rs @@ -0,0 +1,533 @@ +use alloc::vec::Vec; +use scroll::{Pread, Pwrite}; + +use log::debug; + +use crate::error; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite)] +pub struct ExportDirectoryTable { + pub export_flags: u32, + pub time_date_stamp: u32, + pub major_version: u16, + pub minor_version: u16, + pub name_rva: u32, + pub ordinal_base: u32, + pub address_table_entries: u32, + pub number_of_name_pointers: u32, + pub export_address_table_rva: u32, + pub name_pointer_rva: u32, + pub ordinal_table_rva: u32, +} + +pub const SIZEOF_EXPORT_DIRECTORY_TABLE: usize = 40; + +impl ExportDirectoryTable { + pub fn parse(bytes: &[u8], offset: usize) -> error::Result<Self> { + let res = bytes.pread_with(offset, scroll::LE)?; + Ok(res) + } +} + +#[derive(Debug)] +pub enum ExportAddressTableEntry { + ExportRVA(u32), + ForwarderRVA(u32), +} + +pub const SIZEOF_EXPORT_ADDRESS_TABLE_ENTRY: usize = 4; + +pub type ExportAddressTable = Vec<ExportAddressTableEntry>; + +/// Array of rvas into the export name table +/// +/// Export name is defined iff pointer table has pointer to the name +pub type ExportNamePointerTable = Vec<u32>; + +/// Array of indexes into the export address table. +/// +/// Should obey the formula `idx = ordinal - ordinalbase` +pub type ExportOrdinalTable = Vec<u16>; + +#[derive(Debug, Default)] +/// Export data contains the `dll` name which other libraries can import symbols by (two-level namespace), as well as other important indexing data allowing symbol lookups +pub struct ExportData<'a> { + pub name: Option<&'a str>, + pub export_directory_table: ExportDirectoryTable, + pub export_name_pointer_table: ExportNamePointerTable, + pub export_ordinal_table: ExportOrdinalTable, + pub export_address_table: ExportAddressTable, +} + +impl<'a> ExportData<'a> { + pub fn parse( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<ExportData<'a>> { + Self::parse_with_opts( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<ExportData<'a>> { + let export_rva = dd.virtual_address as usize; + let size = dd.size as usize; + debug!("export_rva {:#x} size {:#}", export_rva, size); + let export_offset = utils::find_offset_or( + export_rva, + sections, + file_alignment, + opts, + &format!("cannot map export_rva ({:#x}) into offset", export_rva), + )?; + let export_directory_table = + ExportDirectoryTable::parse(bytes, export_offset).map_err(|_| { + error::Error::Malformed(format!( + "cannot parse export_directory_table (offset {:#x})", + export_offset + )) + })?; + let number_of_name_pointers = export_directory_table.number_of_name_pointers as usize; + let address_table_entries = export_directory_table.address_table_entries as usize; + + if number_of_name_pointers > bytes.len() { + return Err(error::Error::BufferTooShort( + number_of_name_pointers, + "name pointers", + )); + } + if address_table_entries > bytes.len() { + return Err(error::Error::BufferTooShort( + address_table_entries, + "address table entries", + )); + } + + let export_name_pointer_table = utils::find_offset( + export_directory_table.name_pointer_rva as usize, + sections, + file_alignment, + opts, + ) + .map_or(vec![], |table_offset| { + let mut offset = table_offset; + let mut table: ExportNamePointerTable = Vec::with_capacity(number_of_name_pointers); + + for _ in 0..number_of_name_pointers { + if let Ok(name_rva) = bytes.gread_with(&mut offset, scroll::LE) { + table.push(name_rva); + } else { + break; + } + } + + table + }); + + let export_ordinal_table = utils::find_offset( + export_directory_table.ordinal_table_rva as usize, + sections, + file_alignment, + opts, + ) + .map_or(vec![], |table_offset| { + let mut offset = table_offset; + let mut table: ExportOrdinalTable = Vec::with_capacity(number_of_name_pointers); + + for _ in 0..number_of_name_pointers { + if let Ok(name_ordinal) = bytes.gread_with(&mut offset, scroll::LE) { + table.push(name_ordinal); + } else { + break; + } + } + + table + }); + + let export_address_table = utils::find_offset( + export_directory_table.export_address_table_rva as usize, + sections, + file_alignment, + opts, + ) + .map_or(vec![], |table_offset| { + let mut offset = table_offset; + let mut table: ExportAddressTable = Vec::with_capacity(address_table_entries); + let export_end = export_rva + size; + + for _ in 0..address_table_entries { + if let Ok(func_rva) = bytes.gread_with::<u32>(&mut offset, scroll::LE) { + if utils::is_in_range(func_rva as usize, export_rva, export_end) { + table.push(ExportAddressTableEntry::ForwarderRVA(func_rva)); + } else { + table.push(ExportAddressTableEntry::ExportRVA(func_rva)); + } + } else { + break; + } + } + + table + }); + + let name = utils::find_offset( + export_directory_table.name_rva as usize, + sections, + file_alignment, + opts, + ) + .and_then(|offset| bytes.pread(offset).ok()); + + Ok(ExportData { + name, + export_directory_table, + export_name_pointer_table, + export_ordinal_table, + export_address_table, + }) + } +} + +#[derive(Debug)] +/// PE binaries have two kinds of reexports, either specifying the dll's name, or the ordinal value of the dll +pub enum Reexport<'a> { + DLLName { export: &'a str, lib: &'a str }, + DLLOrdinal { ordinal: usize, lib: &'a str }, +} + +impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for Reexport<'a> { + type Error = crate::error::Error; + #[inline] + fn try_from_ctx(bytes: &'a [u8], _ctx: scroll::Endian) -> Result<(Self, usize), Self::Error> { + let reexport = bytes.pread::<&str>(0)?; + let reexport_len = reexport.len(); + debug!("reexport: {}", &reexport); + for o in 0..reexport_len { + let c: u8 = bytes.pread(o)?; + debug!("reexport offset: {:#x} char: {:#x}", o, c); + if c == b'.' { + let dll: &'a str = bytes.pread_with(0, scroll::ctx::StrCtx::Length(o))?; + debug!("dll: {:?}", &dll); + if o + 1 == reexport_len { + break; + } + let len = reexport_len - o - 1; + let rest: &'a [u8] = bytes.pread_with(o + 1, len)?; + debug!("rest: {:?}", &rest); + if rest[0] == b'#' { + let ordinal = + rest.pread_with::<&str>(1, scroll::ctx::StrCtx::Length(len - 1))?; + let ordinal = ordinal.parse::<u32>().map_err(|_e| { + error::Error::Malformed(format!( + "Cannot parse reexport ordinal from {} bytes", + bytes.len() + )) + })?; + return Ok(( + Reexport::DLLOrdinal { + ordinal: ordinal as usize, + lib: dll, + }, + reexport_len + 1, + )); + } else { + let export = rest.pread_with::<&str>(0, scroll::ctx::StrCtx::Length(len))?; + return Ok((Reexport::DLLName { export, lib: dll }, reexport_len + 1)); + } + } + } + Err(error::Error::Malformed(format!( + "Reexport {:#} is malformed", + reexport + ))) + } +} + +impl<'a> Reexport<'a> { + pub fn parse(bytes: &'a [u8], offset: usize) -> crate::error::Result<Reexport<'a>> { + bytes.pread(offset) + } +} + +#[derive(Debug, Default)] +/// An exported symbol in this binary, contains synthetic data (name offset, etc., are computed) +pub struct Export<'a> { + pub name: Option<&'a str>, + pub offset: Option<usize>, + pub rva: usize, + pub size: usize, + pub reexport: Option<Reexport<'a>>, +} + +#[derive(Debug, Copy, Clone)] +struct ExportCtx<'a> { + pub ptr: u32, + pub idx: usize, + pub sections: &'a [section_table::SectionTable], + pub file_alignment: u32, + pub addresses: &'a ExportAddressTable, + pub ordinals: &'a ExportOrdinalTable, + pub opts: options::ParseOptions, +} + +impl<'a, 'b> scroll::ctx::TryFromCtx<'a, ExportCtx<'b>> for Export<'a> { + type Error = error::Error; + #[inline] + fn try_from_ctx( + bytes: &'a [u8], + ExportCtx { + ptr, + idx, + sections, + file_alignment, + addresses, + ordinals, + opts, + }: ExportCtx<'b>, + ) -> Result<(Self, usize), Self::Error> { + use self::ExportAddressTableEntry::*; + + let name = utils::find_offset(ptr as usize, sections, file_alignment, &opts) + .and_then(|offset| bytes.pread::<&str>(offset).ok()); + + if let Some(ordinal) = ordinals.get(idx) { + if let Some(rva) = addresses.get(*ordinal as usize) { + match *rva { + ExportRVA(rva) => { + let rva = rva as usize; + let offset = utils::find_offset(rva, sections, file_alignment, &opts); + Ok(( + Export { + name, + offset, + rva, + reexport: None, + size: 0, + }, + 0, + )) + } + + ForwarderRVA(rva) => { + let rva = rva as usize; + let offset = utils::find_offset_or( + rva, + sections, + file_alignment, + &opts, + &format!( + "cannot map RVA ({:#x}) of export ordinal {} into offset", + rva, ordinal + ), + )?; + let reexport = Reexport::parse(bytes, offset)?; + Ok(( + Export { + name, + offset: Some(offset), + rva, + reexport: Some(reexport), + size: 0, + }, + 0, + )) + } + } + } else { + Err(error::Error::Malformed(format!( + "cannot get RVA of export ordinal {}", + ordinal + ))) + } + } else { + Err(error::Error::Malformed(format!( + "cannot get ordinal of export name entry {}", + idx + ))) + } + } +} + +impl<'a> Export<'a> { + pub fn parse( + bytes: &'a [u8], + export_data: &ExportData, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Vec<Export<'a>>> { + Self::parse_with_opts( + bytes, + export_data, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + export_data: &ExportData, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Vec<Export<'a>>> { + let pointers = &export_data.export_name_pointer_table; + let addresses = &export_data.export_address_table; + let ordinals = &export_data.export_ordinal_table; + + let mut exports = Vec::with_capacity(pointers.len()); + for (idx, &ptr) in pointers.iter().enumerate() { + if let Ok(export) = bytes.pread_with( + 0, + ExportCtx { + ptr, + idx, + sections, + file_alignment, + addresses, + ordinals, + opts: *opts, + }, + ) { + exports.push(export); + } + } + + // TODO: sort + compute size + Ok(exports) + } +} + +#[cfg(test)] +mod tests { + use self::data_directories::*; + use super::*; + + static CORKAMI_POCS_PE_EXPORTSDATA_EXE: [u8; 0x400] = [ + 0x4d, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x50, 0x45, 0x00, 0x00, 0x4c, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x02, 0x01, 0x0b, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x60, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0xb0, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x80, 0x03, + 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x20, 0x2a, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, + 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x20, 0x61, 0x73, 0x20, 0x66, 0x61, 0x6b, 0x65, 0x20, + 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x10, 0x00, 0x00, + 0x40, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xa0, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x66, + 0x00, 0x6d, 0x73, 0x76, 0x63, 0x72, 0x74, 0x2e, 0x64, 0x6c, 0x6c, 0x00, 0x65, 0x78, 0x70, + 0x6f, 0x72, 0x74, 0x73, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x65, 0x78, 0x65, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x68, 0x14, 0x10, 0xf0, 0x10, 0xff, 0x15, 0x30, 0x10, 0x00, 0x10, 0x73, 0xc4, 0x04, + 0xc3, 0xbc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + #[test] + fn size_export_directory_table() { + assert_eq!( + ::std::mem::size_of::<ExportDirectoryTable>(), + SIZEOF_EXPORT_DIRECTORY_TABLE + ); + } + + #[test] + fn parse_export_table() { + let data_dirs = + DataDirectories::parse(&CORKAMI_POCS_PE_EXPORTSDATA_EXE[..], 16, &mut 0xb8).unwrap(); + let export_table = data_dirs.get_export_table().unwrap(); + + assert_eq!(export_table.virtual_address, 0x10b0); + assert_eq!(export_table.size, 0x0); + } + + #[test] + fn parse_export_directory() { + let data_dir = ExportDirectoryTable::parse(&CORKAMI_POCS_PE_EXPORTSDATA_EXE[..], 0x2b0); + assert!(data_dir.is_ok()); + + let data_dir = data_dir.unwrap(); + assert_eq!(data_dir.export_flags, 0x0); + assert_eq!(data_dir.time_date_stamp, 0x0); + assert_eq!(data_dir.major_version, 0x0); + assert_eq!(data_dir.minor_version, 0x0); + assert_eq!(data_dir.name_rva, 0x0); + assert_eq!(data_dir.ordinal_base, 0x0); + assert_eq!(data_dir.address_table_entries, 0x4); + assert_eq!(data_dir.number_of_name_pointers, 0x0); + assert_eq!(data_dir.export_address_table_rva, 0x10e0); + assert_eq!(data_dir.name_pointer_rva, 0x0); + assert_eq!(data_dir.ordinal_table_rva, 0x1100); + } +} diff --git a/third_party/rust/goblin/src/pe/header.rs b/third_party/rust/goblin/src/pe/header.rs new file mode 100644 index 0000000000..c1ded9dd9f --- /dev/null +++ b/third_party/rust/goblin/src/pe/header.rs @@ -0,0 +1,312 @@ +use crate::error; +use crate::pe::{optional_header, section_table, symbol}; +use crate::strtab; +use alloc::vec::Vec; +use log::debug; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// DOS header present in all PE binaries +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct DosHeader { + /// Magic number: 5a4d + pub signature: u16, + /// Pointer to PE header, always at offset 0x3c + pub pe_pointer: u32, +} + +pub const DOS_MAGIC: u16 = 0x5a4d; +pub const PE_POINTER_OFFSET: u32 = 0x3c; + +impl DosHeader { + pub fn parse(bytes: &[u8]) -> error::Result<Self> { + let signature = bytes.pread_with(0, scroll::LE).map_err(|_| { + error::Error::Malformed(format!("cannot parse DOS signature (offset {:#x})", 0)) + })?; + if signature != DOS_MAGIC { + return Err(error::Error::Malformed(format!( + "DOS header is malformed (signature {:#x})", + signature + ))); + } + let pe_pointer = bytes + .pread_with(PE_POINTER_OFFSET as usize, scroll::LE) + .map_err(|_| { + error::Error::Malformed(format!( + "cannot parse PE header pointer (offset {:#x})", + PE_POINTER_OFFSET + )) + })?; + let pe_signature: u32 = + bytes + .pread_with(pe_pointer as usize, scroll::LE) + .map_err(|_| { + error::Error::Malformed(format!( + "cannot parse PE header signature (offset {:#x})", + pe_pointer + )) + })?; + if pe_signature != PE_MAGIC { + return Err(error::Error::Malformed(format!( + "PE header is malformed (signature {:#x})", + pe_signature + ))); + } + Ok(DosHeader { + signature, + pe_pointer, + }) + } +} + +/// COFF Header +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct CoffHeader { + /// The machine type + pub machine: u16, + pub number_of_sections: u16, + pub time_date_stamp: u32, + pub pointer_to_symbol_table: u32, + pub number_of_symbol_table: u32, + pub size_of_optional_header: u16, + pub characteristics: u16, +} + +pub const SIZEOF_COFF_HEADER: usize = 20; +/// PE\0\0, little endian +pub const PE_MAGIC: u32 = 0x0000_4550; +pub const SIZEOF_PE_MAGIC: usize = 4; +/// The contents of this field are assumed to be applicable to any machine type +pub const COFF_MACHINE_UNKNOWN: u16 = 0x0; +/// Matsushita AM33 +pub const COFF_MACHINE_AM33: u16 = 0x1d3; +/// x64 +pub const COFF_MACHINE_X86_64: u16 = 0x8664; +/// ARM little endian +pub const COFF_MACHINE_ARM: u16 = 0x1c0; +/// ARM64 little endian +pub const COFF_MACHINE_ARM64: u16 = 0xaa64; +/// ARM Thumb-2 little endian +pub const COFF_MACHINE_ARMNT: u16 = 0x1c4; +/// EFI byte code +pub const COFF_MACHINE_EBC: u16 = 0xebc; +/// Intel 386 or later processors and compatible processors +pub const COFF_MACHINE_X86: u16 = 0x14c; +/// Intel Itanium processor family +pub const COFF_MACHINE_IA64: u16 = 0x200; +/// Mitsubishi M32R little endian +pub const COFF_MACHINE_M32R: u16 = 0x9041; +/// MIPS16 +pub const COFF_MACHINE_MIPS16: u16 = 0x266; +/// MIPS with FPU +pub const COFF_MACHINE_MIPSFPU: u16 = 0x366; +/// MIPS16 with FPU +pub const COFF_MACHINE_MIPSFPU16: u16 = 0x466; +/// Power PC little endian +pub const COFF_MACHINE_POWERPC: u16 = 0x1f0; +/// Power PC with floating point support +pub const COFF_MACHINE_POWERPCFP: u16 = 0x1f1; +/// MIPS little endian +pub const COFF_MACHINE_R4000: u16 = 0x166; +/// RISC-V 32-bit address space +pub const COFF_MACHINE_RISCV32: u16 = 0x5032; +/// RISC-V 64-bit address space +pub const COFF_MACHINE_RISCV64: u16 = 0x5064; +/// RISC-V 128-bit address space +pub const COFF_MACHINE_RISCV128: u16 = 0x5128; +/// Hitachi SH3 +pub const COFF_MACHINE_SH3: u16 = 0x1a2; +/// Hitachi SH3 DSP +pub const COFF_MACHINE_SH3DSP: u16 = 0x1a3; +/// Hitachi SH4 +pub const COFF_MACHINE_SH4: u16 = 0x1a6; +/// Hitachi SH5 +pub const COFF_MACHINE_SH5: u16 = 0x1a8; +/// Thumb +pub const COFF_MACHINE_THUMB: u16 = 0x1c2; +/// MIPS little-endian WCE v2 +pub const COFF_MACHINE_WCEMIPSV2: u16 = 0x169; + +impl CoffHeader { + pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result<Self> { + Ok(bytes.gread_with(offset, scroll::LE)?) + } + + /// Parse the COFF section headers. + /// + /// For COFF, these immediately follow the COFF header. For PE, these immediately follow the + /// optional header. + pub fn sections( + &self, + bytes: &[u8], + offset: &mut usize, + ) -> error::Result<Vec<section_table::SectionTable>> { + let nsections = self.number_of_sections as usize; + + // a section table is at least 40 bytes + if nsections > bytes.len() / 40 { + return Err(error::Error::BufferTooShort(nsections, "sections")); + } + + let mut sections = Vec::with_capacity(nsections); + // Note that if we are handling a BigCoff, the size of the symbol will be different! + let string_table_offset = self.pointer_to_symbol_table as usize + + symbol::SymbolTable::size(self.number_of_symbol_table as usize); + for i in 0..nsections { + let section = + section_table::SectionTable::parse(bytes, offset, string_table_offset as usize)?; + debug!("({}) {:#?}", i, section); + sections.push(section); + } + Ok(sections) + } + + /// Return the COFF symbol table. + pub fn symbols<'a>(&self, bytes: &'a [u8]) -> error::Result<symbol::SymbolTable<'a>> { + let offset = self.pointer_to_symbol_table as usize; + let number = self.number_of_symbol_table as usize; + symbol::SymbolTable::parse(bytes, offset, number) + } + + /// Return the COFF string table. + pub fn strings<'a>(&self, bytes: &'a [u8]) -> error::Result<strtab::Strtab<'a>> { + let mut offset = self.pointer_to_symbol_table as usize + + symbol::SymbolTable::size(self.number_of_symbol_table as usize); + + let length_field_size = core::mem::size_of::<u32>(); + let length = bytes.pread_with::<u32>(offset, scroll::LE)? as usize - length_field_size; + + // The offset needs to be advanced in order to read the strings. + offset += length_field_size; + + Ok(strtab::Strtab::parse(bytes, offset, length, 0)?) + } +} + +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct Header { + pub dos_header: DosHeader, + /// PE Magic: PE\0\0, little endian + pub signature: u32, + pub coff_header: CoffHeader, + pub optional_header: Option<optional_header::OptionalHeader>, +} + +impl Header { + pub fn parse(bytes: &[u8]) -> error::Result<Self> { + let dos_header = DosHeader::parse(&bytes)?; + let mut offset = dos_header.pe_pointer as usize; + let signature = bytes.gread_with(&mut offset, scroll::LE).map_err(|_| { + error::Error::Malformed(format!("cannot parse PE signature (offset {:#x})", offset)) + })?; + let coff_header = CoffHeader::parse(&bytes, &mut offset)?; + let optional_header = if coff_header.size_of_optional_header > 0 { + Some(bytes.pread::<optional_header::OptionalHeader>(offset)?) + } else { + None + }; + Ok(Header { + dos_header, + signature, + coff_header, + optional_header, + }) + } +} + +/// Convert machine to str representation +pub fn machine_to_str(machine: u16) -> &'static str { + match machine { + COFF_MACHINE_UNKNOWN => "UNKNOWN", + COFF_MACHINE_AM33 => "AM33", + COFF_MACHINE_X86_64 => "X86_64", + COFF_MACHINE_ARM => "ARM", + COFF_MACHINE_ARM64 => "ARM64", + COFF_MACHINE_ARMNT => "ARM_NT", + COFF_MACHINE_EBC => "EBC", + COFF_MACHINE_X86 => "X86", + COFF_MACHINE_IA64 => "IA64", + COFF_MACHINE_M32R => "M32R", + COFF_MACHINE_MIPS16 => "MIPS_16", + COFF_MACHINE_MIPSFPU => "MIPS_FPU", + COFF_MACHINE_MIPSFPU16 => "MIPS_FPU_16", + COFF_MACHINE_POWERPC => "POWERPC", + COFF_MACHINE_POWERPCFP => "POWERCFP", + COFF_MACHINE_R4000 => "R4000", + COFF_MACHINE_RISCV32 => "RISC-V_32", + COFF_MACHINE_RISCV64 => "RISC-V_64", + COFF_MACHINE_RISCV128 => "RISC-V_128", + COFF_MACHINE_SH3 => "SH3", + COFF_MACHINE_SH3DSP => "SH3DSP", + COFF_MACHINE_SH4 => "SH4", + COFF_MACHINE_SH5 => "SH5", + COFF_MACHINE_THUMB => "THUMB", + COFF_MACHINE_WCEMIPSV2 => "WCE_MIPS_V2", + _ => "COFF_UNKNOWN", + } +} + +#[cfg(test)] +mod tests { + use super::{machine_to_str, Header, COFF_MACHINE_X86, DOS_MAGIC, PE_MAGIC}; + + const CRSS_HEADER: [u8; 688] = [ + 0x4d, 0x5a, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, + 0x00, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd0, 0x00, 0x00, 0x00, 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, + 0x4c, 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, + 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, + 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x0d, 0x0d, 0x0a, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0x4a, 0xc3, 0xeb, 0xee, 0x2b, 0xad, + 0xb8, 0xee, 0x2b, 0xad, 0xb8, 0xee, 0x2b, 0xad, 0xb8, 0xee, 0x2b, 0xac, 0xb8, 0xfe, 0x2b, + 0xad, 0xb8, 0x33, 0xd4, 0x66, 0xb8, 0xeb, 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x63, 0xb8, 0xea, + 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x7a, 0xb8, 0xed, 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x64, 0xb8, + 0xef, 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x61, 0xb8, 0xef, 0x2b, 0xad, 0xb8, 0x52, 0x69, 0x63, + 0x68, 0xee, 0x2b, 0xad, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x45, + 0x00, 0x00, 0x4c, 0x01, 0x05, 0x00, 0xd9, 0x8f, 0x15, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xe0, 0x00, 0x02, 0x01, 0x0b, 0x01, 0x0b, 0x00, 0x00, 0x08, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x06, 0x00, 0x03, 0x00, 0x06, 0x00, 0x03, 0x00, 0x06, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0xe4, 0xab, 0x00, 0x00, + 0x01, 0x00, 0x40, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x10, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x1a, 0x00, 0x00, 0xb8, 0x22, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x38, 0x00, 0x00, + 0x00, 0x10, 0x10, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x68, 0x10, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x24, + 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x60, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xc0, 0x2e, 0x69, 0x64, 0x61, + 0x74, 0x61, 0x00, 0x00, 0xf8, 0x01, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x2e, 0x72, 0x73, 0x72, 0x63, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, + 0x42, 0x2e, 0x72, 0x65, 0x6c, 0x6f, 0x63, 0x00, 0x00, 0x86, 0x01, 0x00, 0x00, 0x00, 0x50, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + + #[test] + fn crss_header() { + let header = Header::parse(&&CRSS_HEADER[..]).unwrap(); + assert!(header.dos_header.signature == DOS_MAGIC); + assert!(header.signature == PE_MAGIC); + assert!(header.coff_header.machine == COFF_MACHINE_X86); + assert!(machine_to_str(header.coff_header.machine) == "X86"); + println!("header: {:?}", &header); + } +} diff --git a/third_party/rust/goblin/src/pe/import.rs b/third_party/rust/goblin/src/pe/import.rs new file mode 100644 index 0000000000..0284fc3276 --- /dev/null +++ b/third_party/rust/goblin/src/pe/import.rs @@ -0,0 +1,417 @@ +use alloc::borrow::Cow; +use alloc::vec::Vec; +use core::fmt::{Debug, LowerHex}; + +use crate::error; +use scroll::ctx::TryFromCtx; +use scroll::{Pread, Pwrite, SizeWith}; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +use log::{debug, warn}; + +pub const IMPORT_BY_ORDINAL_32: u32 = 0x8000_0000; +pub const IMPORT_BY_ORDINAL_64: u64 = 0x8000_0000_0000_0000; +pub const IMPORT_RVA_MASK_32: u32 = 0x7fff_ffff; +pub const IMPORT_RVA_MASK_64: u64 = 0x0000_0000_7fff_ffff; + +pub trait Bitfield<'a>: + Into<u64> + + PartialEq + + Eq + + LowerHex + + Debug + + TryFromCtx<'a, scroll::Endian, Error = scroll::Error> +{ + fn is_ordinal(&self) -> bool; + fn to_ordinal(&self) -> u16; + fn to_rva(&self) -> u32; + fn size_of() -> usize; + fn is_zero(&self) -> bool; +} + +impl<'a> Bitfield<'a> for u64 { + fn is_ordinal(&self) -> bool { + self & IMPORT_BY_ORDINAL_64 == IMPORT_BY_ORDINAL_64 + } + fn to_ordinal(&self) -> u16 { + (0xffff & self) as u16 + } + fn to_rva(&self) -> u32 { + (self & IMPORT_RVA_MASK_64) as u32 + } + fn size_of() -> usize { + 8 + } + fn is_zero(&self) -> bool { + *self == 0 + } +} + +impl<'a> Bitfield<'a> for u32 { + fn is_ordinal(&self) -> bool { + self & IMPORT_BY_ORDINAL_32 == IMPORT_BY_ORDINAL_32 + } + fn to_ordinal(&self) -> u16 { + (0xffff & self) as u16 + } + fn to_rva(&self) -> u32 { + (self & IMPORT_RVA_MASK_32) as u32 + } + fn size_of() -> usize { + 4 + } + fn is_zero(&self) -> bool { + *self == 0 + } +} + +#[derive(Debug, Clone)] +pub struct HintNameTableEntry<'a> { + pub hint: u16, + pub name: &'a str, +} + +impl<'a> HintNameTableEntry<'a> { + fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<Self> { + let offset = &mut offset; + let hint = bytes.gread_with(offset, scroll::LE)?; + let name = bytes.pread::<&'a str>(*offset)?; + Ok(HintNameTableEntry { hint, name }) + } +} + +#[derive(Debug, Clone)] +pub enum SyntheticImportLookupTableEntry<'a> { + OrdinalNumber(u16), + HintNameTableRVA((u32, HintNameTableEntry<'a>)), // [u8; 31] bitfield :/ +} + +pub type ImportLookupTable<'a> = Vec<SyntheticImportLookupTableEntry<'a>>; + +impl<'a> SyntheticImportLookupTableEntry<'a> { + pub fn parse<T: Bitfield<'a>>( + bytes: &'a [u8], + offset: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<ImportLookupTable<'a>> { + Self::parse_with_opts::<T>( + bytes, + offset, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts<T: Bitfield<'a>>( + bytes: &'a [u8], + mut offset: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<ImportLookupTable<'a>> { + let le = scroll::LE; + let offset = &mut offset; + let mut table = Vec::new(); + loop { + let bitfield: T = bytes.gread_with(offset, le)?; + if bitfield.is_zero() { + debug!("imports done"); + break; + } else { + let entry = { + debug!("bitfield {:#x}", bitfield); + use self::SyntheticImportLookupTableEntry::*; + if bitfield.is_ordinal() { + let ordinal = bitfield.to_ordinal(); + debug!("importing by ordinal {:#x}", ordinal); + OrdinalNumber(ordinal) + } else { + let rva = bitfield.to_rva(); + let hentry = { + debug!("searching for RVA {:#x}", rva); + if let Some(offset) = + utils::find_offset(rva as usize, sections, file_alignment, opts) + { + debug!("offset {:#x}", offset); + HintNameTableEntry::parse(bytes, offset)? + } else { + warn!("Entry {} has bad RVA: {:#x}", table.len(), rva); + continue; + } + }; + HintNameTableRVA((rva, hentry)) + } + }; + table.push(entry); + } + } + Ok(table) + } +} + +// get until entry is 0 +pub type ImportAddressTable = Vec<u64>; + +#[repr(C)] +#[derive(Debug, Pread, Pwrite, SizeWith)] +pub struct ImportDirectoryEntry { + pub import_lookup_table_rva: u32, + pub time_date_stamp: u32, + pub forwarder_chain: u32, + pub name_rva: u32, + pub import_address_table_rva: u32, +} + +pub const SIZEOF_IMPORT_DIRECTORY_ENTRY: usize = 20; + +impl ImportDirectoryEntry { + pub fn is_null(&self) -> bool { + (self.import_lookup_table_rva == 0) + && (self.time_date_stamp == 0) + && (self.forwarder_chain == 0) + && (self.name_rva == 0) + && (self.import_address_table_rva == 0) + } +} + +#[derive(Debug)] +pub struct SyntheticImportDirectoryEntry<'a> { + pub import_directory_entry: ImportDirectoryEntry, + /// Computed + pub name: &'a str, + /// The import lookup table is a vector of either ordinals, or RVAs + import names + pub import_lookup_table: Option<ImportLookupTable<'a>>, + /// Computed + pub import_address_table: ImportAddressTable, +} + +impl<'a> SyntheticImportDirectoryEntry<'a> { + pub fn parse<T: Bitfield<'a>>( + bytes: &'a [u8], + import_directory_entry: ImportDirectoryEntry, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<SyntheticImportDirectoryEntry<'a>> { + Self::parse_with_opts::<T>( + bytes, + import_directory_entry, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts<T: Bitfield<'a>>( + bytes: &'a [u8], + import_directory_entry: ImportDirectoryEntry, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<SyntheticImportDirectoryEntry<'a>> { + const LE: scroll::Endian = scroll::LE; + let name_rva = import_directory_entry.name_rva; + let name = utils::try_name(bytes, name_rva as usize, sections, file_alignment, opts)?; + let import_lookup_table = { + let import_lookup_table_rva = import_directory_entry.import_lookup_table_rva; + let import_address_table_rva = import_directory_entry.import_address_table_rva; + if let Some(import_lookup_table_offset) = utils::find_offset( + import_lookup_table_rva as usize, + sections, + file_alignment, + opts, + ) { + debug!("Synthesizing lookup table imports for {} lib, with import lookup table rva: {:#x}", name, import_lookup_table_rva); + let import_lookup_table = SyntheticImportLookupTableEntry::parse_with_opts::<T>( + bytes, + import_lookup_table_offset, + sections, + file_alignment, + opts, + )?; + debug!( + "Successfully synthesized import lookup table entry from lookup table: {:#?}", + import_lookup_table + ); + Some(import_lookup_table) + } else if let Some(import_address_table_offset) = utils::find_offset( + import_address_table_rva as usize, + sections, + file_alignment, + opts, + ) { + debug!("Synthesizing lookup table imports for {} lib, with import address table rva: {:#x}", name, import_lookup_table_rva); + let import_address_table = SyntheticImportLookupTableEntry::parse_with_opts::<T>( + bytes, + import_address_table_offset, + sections, + file_alignment, + opts, + )?; + debug!( + "Successfully synthesized import lookup table entry from IAT: {:#?}", + import_address_table + ); + Some(import_address_table) + } else { + None + } + }; + + let import_address_table_offset = &mut utils::find_offset( + import_directory_entry.import_address_table_rva as usize, + sections, + file_alignment, + opts, + ) + .ok_or_else(|| { + error::Error::Malformed(format!( + "Cannot map import_address_table_rva {:#x} into offset for {}", + import_directory_entry.import_address_table_rva, name + )) + })?; + let mut import_address_table = Vec::new(); + loop { + let import_address = bytes + .gread_with::<T>(import_address_table_offset, LE)? + .into(); + if import_address == 0 { + break; + } else { + import_address_table.push(import_address); + } + } + Ok(SyntheticImportDirectoryEntry { + import_directory_entry, + name, + import_lookup_table, + import_address_table, + }) + } +} + +#[derive(Debug)] +/// Contains a list of synthesized import data for this binary, e.g., which symbols from which libraries it is importing from +pub struct ImportData<'a> { + pub import_data: Vec<SyntheticImportDirectoryEntry<'a>>, +} + +impl<'a> ImportData<'a> { + pub fn parse<T: Bitfield<'a>>( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<ImportData<'a>> { + Self::parse_with_opts::<T>( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts<T: Bitfield<'a>>( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<ImportData<'a>> { + let import_directory_table_rva = dd.virtual_address as usize; + debug!( + "import_directory_table_rva {:#x}", + import_directory_table_rva + ); + let offset = + &mut utils::find_offset(import_directory_table_rva, sections, file_alignment, opts) + .ok_or_else(|| { + error::Error::Malformed(format!( + "Cannot create ImportData; cannot map import_directory_table_rva {:#x} into offset", + import_directory_table_rva + )) + })?; + debug!("import data offset {:#x}", offset); + let mut import_data = Vec::new(); + loop { + let import_directory_entry: ImportDirectoryEntry = + bytes.gread_with(offset, scroll::LE)?; + debug!("{:#?}", import_directory_entry); + if import_directory_entry.is_null() { + break; + } else { + let entry = SyntheticImportDirectoryEntry::parse_with_opts::<T>( + bytes, + import_directory_entry, + sections, + file_alignment, + opts, + )?; + debug!("entry {:#?}", entry); + import_data.push(entry); + } + } + debug!("finished ImportData"); + Ok(ImportData { import_data }) + } +} + +#[derive(Debug)] +/// A synthesized symbol import, the name is pre-indexed, and the binary offset is computed, as well as which dll it belongs to +pub struct Import<'a> { + pub name: Cow<'a, str>, + pub dll: &'a str, + pub ordinal: u16, + pub offset: usize, + pub rva: usize, + pub size: usize, +} + +impl<'a> Import<'a> { + pub fn parse<T: Bitfield<'a>>( + _bytes: &'a [u8], + import_data: &ImportData<'a>, + _sections: &[section_table::SectionTable], + ) -> error::Result<Vec<Import<'a>>> { + let mut imports = Vec::new(); + for data in &import_data.import_data { + if let Some(ref import_lookup_table) = data.import_lookup_table { + let dll = data.name; + let import_base = data.import_directory_entry.import_address_table_rva as usize; + debug!("Getting imports from {}", &dll); + for (i, entry) in import_lookup_table.iter().enumerate() { + let offset = import_base + (i * T::size_of()); + use self::SyntheticImportLookupTableEntry::*; + let (rva, name, ordinal) = match *entry { + HintNameTableRVA((rva, ref hint_entry)) => { + // if hint_entry.name = "" && hint_entry.hint = 0 { + // println!("<PE.Import> warning hint/name table rva from {} without hint {:#x}", dll, rva); + // } + (rva, Cow::Borrowed(hint_entry.name), hint_entry.hint) + } + OrdinalNumber(ordinal) => { + let name = format!("ORDINAL {}", ordinal); + (0x0, Cow::Owned(name), ordinal) + } + }; + let import = Import { + name, + ordinal, + dll, + size: T::size_of(), + offset, + rva: rva as usize, + }; + imports.push(import); + } + } + } + Ok(imports) + } +} diff --git a/third_party/rust/goblin/src/pe/mod.rs b/third_party/rust/goblin/src/pe/mod.rs new file mode 100644 index 0000000000..5a7337630a --- /dev/null +++ b/third_party/rust/goblin/src/pe/mod.rs @@ -0,0 +1,448 @@ +//! A PE32 and PE32+ parser +//! + +// TODO: panics with unwrap on None for apisetschema.dll, fhuxgraphics.dll and some others + +use alloc::vec::Vec; + +pub mod authenticode; +pub mod certificate_table; +pub mod characteristic; +pub mod data_directories; +pub mod debug; +pub mod exception; +pub mod export; +pub mod header; +pub mod import; +pub mod optional_header; +pub mod options; +pub mod relocation; +pub mod section_table; +pub mod symbol; +pub mod utils; + +use crate::container; +use crate::error; +use crate::strtab; + +use log::debug; + +#[derive(Debug)] +/// An analyzed PE32/PE32+ binary +pub struct PE<'a> { + /// Underlying bytes + bytes: &'a [u8], + authenticode_excluded_sections: Option<authenticode::ExcludedSections>, + /// The PE header + pub header: header::Header, + /// A list of the sections in this PE binary + pub sections: Vec<section_table::SectionTable>, + /// The size of the binary + pub size: usize, + /// The name of this `dll`, if it has one + pub name: Option<&'a str>, + /// Whether this is a `dll` or not + pub is_lib: bool, + /// Whether the binary is 64-bit (PE32+) + pub is_64: bool, + /// the entry point of the binary + pub entry: usize, + /// The binary's RVA, or image base - useful for computing virtual addreses + pub image_base: usize, + /// Data about any exported symbols in this binary (e.g., if it's a `dll`) + pub export_data: Option<export::ExportData<'a>>, + /// Data for any imported symbols, and from which `dll`, etc., in this binary + pub import_data: Option<import::ImportData<'a>>, + /// The list of exported symbols in this binary, contains synthetic information for easier analysis + pub exports: Vec<export::Export<'a>>, + /// The list symbols imported by this binary from other `dll`s + pub imports: Vec<import::Import<'a>>, + /// The list of libraries which this binary imports symbols from + pub libraries: Vec<&'a str>, + /// Debug information, if any, contained in the PE header + pub debug_data: Option<debug::DebugData<'a>>, + /// Exception handling and stack unwind information, if any, contained in the PE header + pub exception_data: Option<exception::ExceptionData<'a>>, + /// Certificates present, if any, described by the Certificate Table + pub certificates: certificate_table::CertificateDirectoryTable<'a>, +} + +impl<'a> PE<'a> { + /// Reads a PE binary from the underlying `bytes` + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + Self::parse_with_opts(bytes, &options::ParseOptions::default()) + } + + /// Reads a PE binary from the underlying `bytes` + pub fn parse_with_opts(bytes: &'a [u8], opts: &options::ParseOptions) -> error::Result<Self> { + let header = header::Header::parse(bytes)?; + let mut authenticode_excluded_sections = None; + + debug!("{:#?}", header); + let optional_header_offset = header.dos_header.pe_pointer as usize + + header::SIZEOF_PE_MAGIC + + header::SIZEOF_COFF_HEADER; + let offset = + &mut (optional_header_offset + header.coff_header.size_of_optional_header as usize); + + let sections = header.coff_header.sections(bytes, offset)?; + let is_lib = characteristic::is_dll(header.coff_header.characteristics); + let mut entry = 0; + let mut image_base = 0; + let mut exports = vec![]; + let mut export_data = None; + let mut name = None; + let mut imports = vec![]; + let mut import_data = None; + let mut libraries = vec![]; + let mut debug_data = None; + let mut exception_data = None; + let mut certificates = Default::default(); + let mut is_64 = false; + if let Some(optional_header) = header.optional_header { + // Sections we are assembling through the parsing, eventually, it will be passed + // to the authenticode_sections attribute of `PE`. + let (checksum, datadir_entry_certtable) = match optional_header.standard_fields.magic { + optional_header::MAGIC_32 => { + let standard_field_offset = + optional_header_offset + optional_header::SIZEOF_STANDARD_FIELDS_32; + let checksum_field_offset = + standard_field_offset + optional_header::OFFSET_WINDOWS_FIELDS_32_CHECKSUM; + ( + checksum_field_offset..checksum_field_offset + 4, + optional_header_offset + 128..optional_header_offset + 136, + ) + } + optional_header::MAGIC_64 => { + let standard_field_offset = + optional_header_offset + optional_header::SIZEOF_STANDARD_FIELDS_64; + let checksum_field_offset = + standard_field_offset + optional_header::OFFSET_WINDOWS_FIELDS_64_CHECKSUM; + + ( + checksum_field_offset..checksum_field_offset + 4, + optional_header_offset + 144..optional_header_offset + 152, + ) + } + magic => { + return Err(error::Error::Malformed(format!( + "Unsupported header magic ({:#x})", + magic + ))) + } + }; + + entry = optional_header.standard_fields.address_of_entry_point as usize; + image_base = optional_header.windows_fields.image_base as usize; + is_64 = optional_header.container()? == container::Container::Big; + debug!( + "entry {:#x} image_base {:#x} is_64: {}", + entry, image_base, is_64 + ); + let file_alignment = optional_header.windows_fields.file_alignment; + if let Some(export_table) = *optional_header.data_directories.get_export_table() { + if let Ok(ed) = export::ExportData::parse_with_opts( + bytes, + export_table, + §ions, + file_alignment, + opts, + ) { + debug!("export data {:#?}", ed); + exports = export::Export::parse_with_opts( + bytes, + &ed, + §ions, + file_alignment, + opts, + )?; + name = ed.name; + debug!("name: {:#?}", name); + export_data = Some(ed); + } + } + debug!("exports: {:#?}", exports); + if let Some(import_table) = *optional_header.data_directories.get_import_table() { + let id = if is_64 { + import::ImportData::parse_with_opts::<u64>( + bytes, + import_table, + §ions, + file_alignment, + opts, + )? + } else { + import::ImportData::parse_with_opts::<u32>( + bytes, + import_table, + §ions, + file_alignment, + opts, + )? + }; + debug!("import data {:#?}", id); + if is_64 { + imports = import::Import::parse::<u64>(bytes, &id, §ions)? + } else { + imports = import::Import::parse::<u32>(bytes, &id, §ions)? + } + libraries = id + .import_data + .iter() + .map(|data| data.name) + .collect::<Vec<&'a str>>(); + libraries.sort(); + libraries.dedup(); + import_data = Some(id); + } + debug!("imports: {:#?}", imports); + if let Some(debug_table) = *optional_header.data_directories.get_debug_table() { + debug_data = Some(debug::DebugData::parse_with_opts( + bytes, + debug_table, + §ions, + file_alignment, + opts, + )?); + } + + if header.coff_header.machine == header::COFF_MACHINE_X86_64 { + // currently only x86_64 is supported + debug!("exception data: {:#?}", exception_data); + if let Some(exception_table) = + *optional_header.data_directories.get_exception_table() + { + exception_data = Some(exception::ExceptionData::parse_with_opts( + bytes, + exception_table, + §ions, + file_alignment, + opts, + )?); + } + } + + let certtable = if let Some(certificate_table) = + *optional_header.data_directories.get_certificate_table() + { + certificates = certificate_table::enumerate_certificates( + bytes, + certificate_table.virtual_address, + certificate_table.size, + )?; + + let start = certificate_table.virtual_address as usize; + let end = start + certificate_table.size as usize; + Some(start..end) + } else { + None + }; + + authenticode_excluded_sections = Some(authenticode::ExcludedSections::new( + checksum, + datadir_entry_certtable, + certtable, + )); + } + Ok(PE { + bytes, + authenticode_excluded_sections, + header, + sections, + size: 0, + name, + is_lib, + is_64, + entry, + image_base, + export_data, + import_data, + exports, + imports, + libraries, + debug_data, + exception_data, + certificates, + }) + } +} + +/// An analyzed COFF object +#[derive(Debug)] +pub struct Coff<'a> { + /// The COFF header + pub header: header::CoffHeader, + /// A list of the sections in this COFF binary + pub sections: Vec<section_table::SectionTable>, + /// The COFF symbol table. + pub symbols: symbol::SymbolTable<'a>, + /// The string table. + pub strings: strtab::Strtab<'a>, +} + +impl<'a> Coff<'a> { + /// Reads a COFF object from the underlying `bytes` + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + let offset = &mut 0; + let header = header::CoffHeader::parse(bytes, offset)?; + debug!("{:#?}", header); + // TODO: maybe parse optional header, but it isn't present for Windows. + *offset += header.size_of_optional_header as usize; + let sections = header.sections(bytes, offset)?; + let symbols = header.symbols(bytes)?; + let strings = header.strings(bytes)?; + Ok(Coff { + header, + sections, + symbols, + strings, + }) + } +} + +#[cfg(test)] +mod tests { + use super::Coff; + use super::PE; + + static INVALID_DOS_SIGNATURE: [u8; 512] = [ + 0x3D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, + 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x31, 0xE2, 0xB1, 0x67, 0x50, 0x8C, + 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x88, 0xE3, 0x6D, 0x50, + 0x8C, 0xE2, 0x3C, 0x38, 0x8F, 0xE3, 0x62, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x89, 0xE3, 0xE0, + 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x89, 0xE3, 0x42, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x88, 0xE3, + 0x77, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x8F, 0xE3, 0x6E, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x8D, + 0xE3, 0x64, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8D, 0xE2, 0x3F, 0x50, 0x8C, 0xE2, 0xE1, 0x20, + 0x85, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0xE1, 0x20, 0x73, 0xE2, 0x66, 0x50, 0x8C, 0xE2, 0xE1, + 0x20, 0x8E, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0x52, 0x69, 0x63, 0x68, 0x67, 0x50, 0x8C, 0xE2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x50, 0x45, 0x00, 0x00, 0x64, 0x86, 0x07, 0x00, 0x5F, 0x41, 0xFC, 0x5E, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x22, 0x00, 0x0B, 0x02, 0x0E, 0x1A, 0x00, + 0xFC, 0x00, 0x00, 0x00, 0xD6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x14, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0xE0, + 0x68, 0x02, 0x00, 0x03, 0x00, 0x60, 0x81, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xA3, 0x01, 0x00, 0x28, + 0x00, 0x00, 0x00, 0x00, 0xF0, 0x01, 0x00, 0xE0, 0x01, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, + 0x60, 0x0F, 0x00, 0x00, 0x00, 0xC4, 0x01, 0x00, 0xF8, 0x46, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x54, 0x06, 0x00, 0x00, 0xF0, 0x91, 0x01, 0x00, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x92, 0x01, 0x00, 0x30, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x48, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + + static INVALID_PE_SIGNATURE: [u8; 512] = [ + 0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, + 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x31, 0xE2, 0xB1, 0x67, 0x50, 0x8C, + 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x88, 0xE3, 0x6D, 0x50, + 0x8C, 0xE2, 0x3C, 0x38, 0x8F, 0xE3, 0x62, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x89, 0xE3, 0xE0, + 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x89, 0xE3, 0x42, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x88, 0xE3, + 0x77, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x8F, 0xE3, 0x6E, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x8D, + 0xE3, 0x64, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8D, 0xE2, 0x3F, 0x50, 0x8C, 0xE2, 0xE1, 0x20, + 0x85, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0xE1, 0x20, 0x73, 0xE2, 0x66, 0x50, 0x8C, 0xE2, 0xE1, + 0x20, 0x8E, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0x52, 0x69, 0x63, 0x68, 0x67, 0x50, 0x8C, 0xE2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x50, 0x05, 0x00, 0x00, 0x64, 0x86, 0x07, 0x00, 0x5F, 0x41, 0xFC, 0x5E, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x22, 0x00, 0x0B, 0x02, 0x0E, 0x1A, 0x00, + 0xFC, 0x00, 0x00, 0x00, 0xD6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x14, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0xE0, + 0x68, 0x02, 0x00, 0x03, 0x00, 0x60, 0x81, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xA3, 0x01, 0x00, 0x28, + 0x00, 0x00, 0x00, 0x00, 0xF0, 0x01, 0x00, 0xE0, 0x01, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, + 0x60, 0x0F, 0x00, 0x00, 0x00, 0xC4, 0x01, 0x00, 0xF8, 0x46, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x54, 0x06, 0x00, 0x00, 0xF0, 0x91, 0x01, 0x00, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x92, 0x01, 0x00, 0x30, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x48, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + + // The assembler program used to generate this string is as follows: + // + // bits 64 + // default rel + // segment .text + // global main + // extern ExitProcess + // main: + // xor rax, rax + // call ExitProcess + // + // + // The code can be compiled using nasm (https://nasm.us) with the command below: + // nasm -f win64 <filename>.asm -o <filename>.obj + static COFF_FILE_SINGLE_STRING_IN_STRING_TABLE: [u8; 220] = [ + 0x64, 0x86, 0x1, 0x0, 0xb5, 0x39, 0x91, 0x62, 0x4e, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x3c, 0x0, 0x0, 0x0, 0x44, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x20, 0x0, 0x50, 0x60, 0x48, 0x31, 0xc0, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x4, + 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x4, 0x0, 0x2e, 0x66, 0x69, 0x6c, 0x65, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xfe, 0xff, 0x0, 0x0, 0x67, 0x1, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, + 0x73, 0x2e, 0x61, 0x73, 0x6d, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2e, 0x74, 0x65, 0x78, + 0x74, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x3, 0x1, 0x8, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2e, 0x61, 0x62, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x6d, 0x61, + 0x69, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x10, + 0x0, 0x0, 0x0, 0x45, 0x78, 0x69, 0x74, 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x0, + ]; + + #[test] + fn string_table_excludes_length() { + let coff = Coff::parse(&&COFF_FILE_SINGLE_STRING_IN_STRING_TABLE[..]).unwrap(); + let string_table = coff.strings.to_vec().unwrap(); + + assert!(string_table == vec!["ExitProcess"]); + } + + #[test] + fn symbol_name_excludes_length() { + let coff = Coff::parse(&COFF_FILE_SINGLE_STRING_IN_STRING_TABLE).unwrap(); + let strings = coff.strings; + let symbols = coff + .symbols + .iter() + .filter(|(_, name, _)| name.is_none()) + .map(|(_, _, sym)| sym.name(&strings).unwrap().to_owned()) + .collect::<Vec<_>>(); + assert_eq!(symbols, vec!["ExitProcess"]) + } + + #[test] + fn invalid_dos_header() { + if let Ok(_) = PE::parse(&INVALID_DOS_SIGNATURE) { + panic!("must not parse PE with invalid DOS header"); + } + } + + #[test] + fn invalid_pe_header() { + if let Ok(_) = PE::parse(&INVALID_PE_SIGNATURE) { + panic!("must not parse PE with invalid PE header"); + } + } +} diff --git a/third_party/rust/goblin/src/pe/optional_header.rs b/third_party/rust/goblin/src/pe/optional_header.rs new file mode 100644 index 0000000000..b1d3192764 --- /dev/null +++ b/third_party/rust/goblin/src/pe/optional_header.rs @@ -0,0 +1,323 @@ +use crate::container; +use crate::error; + +use crate::pe::data_directories; + +use scroll::{ctx, Endian, LE}; +use scroll::{Pread, Pwrite, SizeWith}; + +/// standard COFF fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct StandardFields32 { + pub magic: u16, + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub size_of_code: u32, + pub size_of_initialized_data: u32, + pub size_of_uninitialized_data: u32, + pub address_of_entry_point: u32, + pub base_of_code: u32, + /// absent in 64-bit PE32+ + pub base_of_data: u32, +} + +pub const SIZEOF_STANDARD_FIELDS_32: usize = 28; + +/// standard 64-bit COFF fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct StandardFields64 { + pub magic: u16, + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub size_of_code: u32, + pub size_of_initialized_data: u32, + pub size_of_uninitialized_data: u32, + pub address_of_entry_point: u32, + pub base_of_code: u32, +} + +pub const SIZEOF_STANDARD_FIELDS_64: usize = 24; + +/// Unified 32/64-bit COFF fields +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct StandardFields { + pub magic: u16, + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub size_of_code: u64, + pub size_of_initialized_data: u64, + pub size_of_uninitialized_data: u64, + pub address_of_entry_point: u64, + pub base_of_code: u64, + /// absent in 64-bit PE32+ + pub base_of_data: u32, +} + +impl From<StandardFields32> for StandardFields { + fn from(fields: StandardFields32) -> Self { + StandardFields { + magic: fields.magic, + major_linker_version: fields.major_linker_version, + minor_linker_version: fields.minor_linker_version, + size_of_code: u64::from(fields.size_of_code), + size_of_initialized_data: u64::from(fields.size_of_initialized_data), + size_of_uninitialized_data: u64::from(fields.size_of_uninitialized_data), + address_of_entry_point: u64::from(fields.address_of_entry_point), + base_of_code: u64::from(fields.base_of_code), + base_of_data: fields.base_of_data, + } + } +} + +impl From<StandardFields64> for StandardFields { + fn from(fields: StandardFields64) -> Self { + StandardFields { + magic: fields.magic, + major_linker_version: fields.major_linker_version, + minor_linker_version: fields.minor_linker_version, + size_of_code: u64::from(fields.size_of_code), + size_of_initialized_data: u64::from(fields.size_of_initialized_data), + size_of_uninitialized_data: u64::from(fields.size_of_uninitialized_data), + address_of_entry_point: u64::from(fields.address_of_entry_point), + base_of_code: u64::from(fields.base_of_code), + base_of_data: 0, + } + } +} + +/// Standard fields magic number for 32-bit binary +pub const MAGIC_32: u16 = 0x10b; +/// Standard fields magic number for 64-bit binary +pub const MAGIC_64: u16 = 0x20b; + +/// Windows specific fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct WindowsFields32 { + pub image_base: u32, + pub section_alignment: u32, + pub file_alignment: u32, + pub major_operating_system_version: u16, + pub minor_operating_system_version: u16, + pub major_image_version: u16, + pub minor_image_version: u16, + pub major_subsystem_version: u16, + pub minor_subsystem_version: u16, + pub win32_version_value: u32, + pub size_of_image: u32, + pub size_of_headers: u32, + pub check_sum: u32, + pub subsystem: u16, + pub dll_characteristics: u16, + pub size_of_stack_reserve: u32, + pub size_of_stack_commit: u32, + pub size_of_heap_reserve: u32, + pub size_of_heap_commit: u32, + pub loader_flags: u32, + pub number_of_rva_and_sizes: u32, +} + +pub const SIZEOF_WINDOWS_FIELDS_32: usize = 68; +/// Offset of the `check_sum` field in [`WindowsFields32`] +pub const OFFSET_WINDOWS_FIELDS_32_CHECKSUM: usize = 36; + +/// 64-bit Windows specific fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct WindowsFields64 { + pub image_base: u64, + pub section_alignment: u32, + pub file_alignment: u32, + pub major_operating_system_version: u16, + pub minor_operating_system_version: u16, + pub major_image_version: u16, + pub minor_image_version: u16, + pub major_subsystem_version: u16, + pub minor_subsystem_version: u16, + pub win32_version_value: u32, + pub size_of_image: u32, + pub size_of_headers: u32, + pub check_sum: u32, + pub subsystem: u16, + pub dll_characteristics: u16, + pub size_of_stack_reserve: u64, + pub size_of_stack_commit: u64, + pub size_of_heap_reserve: u64, + pub size_of_heap_commit: u64, + pub loader_flags: u32, + pub number_of_rva_and_sizes: u32, +} + +pub const SIZEOF_WINDOWS_FIELDS_64: usize = 88; +/// Offset of the `check_sum` field in [`WindowsFields64`] +pub const OFFSET_WINDOWS_FIELDS_64_CHECKSUM: usize = 40; + +// /// Generic 32/64-bit Windows specific fields +// #[derive(Debug, PartialEq, Copy, Clone, Default)] +// pub struct WindowsFields { +// pub image_base: u64, +// pub section_alignment: u32, +// pub file_alignment: u32, +// pub major_operating_system_version: u16, +// pub minor_operating_system_version: u16, +// pub major_image_version: u16, +// pub minor_image_version: u16, +// pub major_subsystem_version: u16, +// pub minor_subsystem_version: u16, +// pub win32_version_value: u32, +// pub size_of_image: u32, +// pub size_of_headers: u32, +// pub check_sum: u32, +// pub subsystem: u16, +// pub dll_characteristics: u16, +// pub size_of_stack_reserve: u64, +// pub size_of_stack_commit: u64, +// pub size_of_heap_reserve: u64, +// pub size_of_heap_commit: u64, +// pub loader_flags: u32, +// pub number_of_rva_and_sizes: u32, +// } + +impl From<WindowsFields32> for WindowsFields { + fn from(windows: WindowsFields32) -> Self { + WindowsFields { + image_base: u64::from(windows.image_base), + section_alignment: windows.section_alignment, + file_alignment: windows.file_alignment, + major_operating_system_version: windows.major_operating_system_version, + minor_operating_system_version: windows.minor_operating_system_version, + major_image_version: windows.major_image_version, + minor_image_version: windows.minor_image_version, + major_subsystem_version: windows.major_subsystem_version, + minor_subsystem_version: windows.minor_subsystem_version, + win32_version_value: windows.win32_version_value, + size_of_image: windows.size_of_image, + size_of_headers: windows.size_of_headers, + check_sum: windows.check_sum, + subsystem: windows.subsystem, + dll_characteristics: windows.dll_characteristics, + size_of_stack_reserve: u64::from(windows.size_of_stack_reserve), + size_of_stack_commit: u64::from(windows.size_of_stack_commit), + size_of_heap_reserve: u64::from(windows.size_of_heap_reserve), + size_of_heap_commit: u64::from(windows.size_of_heap_commit), + loader_flags: windows.loader_flags, + number_of_rva_and_sizes: windows.number_of_rva_and_sizes, + } + } +} + +// impl From<WindowsFields32> for WindowsFields { +// fn from(windows: WindowsFields32) -> Self { +// WindowsFields { +// image_base: windows.image_base, +// section_alignment: windows.section_alignment, +// file_alignment: windows.file_alignment, +// major_operating_system_version: windows.major_operating_system_version, +// minor_operating_system_version: windows.minor_operating_system_version, +// major_image_version: windows.major_image_version, +// minor_image_version: windows.minor_image_version, +// major_subsystem_version: windows.major_subsystem_version, +// minor_subsystem_version: windows.minor_subsystem_version, +// win32_version_value: windows.win32_version_value, +// size_of_image: windows.size_of_image, +// size_of_headers: windows.size_of_headers, +// check_sum: windows.check_sum, +// subsystem: windows.subsystem, +// dll_characteristics: windows.dll_characteristics, +// size_of_stack_reserve: windows.size_of_stack_reserve, +// size_of_stack_commit: windows.size_of_stack_commit, +// size_of_heap_reserve: windows.size_of_heap_reserve, +// size_of_heap_commit: windows.size_of_heap_commit, +// loader_flags: windows.loader_flags, +// number_of_rva_and_sizes: windows.number_of_rva_and_sizes, +// } +// } +// } + +pub type WindowsFields = WindowsFields64; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub struct OptionalHeader { + pub standard_fields: StandardFields, + pub windows_fields: WindowsFields, + pub data_directories: data_directories::DataDirectories, +} + +impl OptionalHeader { + pub fn container(&self) -> error::Result<container::Container> { + match self.standard_fields.magic { + MAGIC_32 => Ok(container::Container::Little), + MAGIC_64 => Ok(container::Container::Big), + magic => Err(error::Error::BadMagic(u64::from(magic))), + } + } +} + +impl<'a> ctx::TryFromCtx<'a, Endian> for OptionalHeader { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], _: Endian) -> error::Result<(Self, usize)> { + let magic = bytes.pread_with::<u16>(0, LE)?; + let offset = &mut 0; + let (standard_fields, windows_fields): (StandardFields, WindowsFields) = match magic { + MAGIC_32 => { + let standard_fields = bytes.gread_with::<StandardFields32>(offset, LE)?.into(); + let windows_fields = bytes.gread_with::<WindowsFields32>(offset, LE)?.into(); + (standard_fields, windows_fields) + } + MAGIC_64 => { + let standard_fields = bytes.gread_with::<StandardFields64>(offset, LE)?.into(); + let windows_fields = bytes.gread_with::<WindowsFields64>(offset, LE)?; + (standard_fields, windows_fields) + } + _ => return Err(error::Error::BadMagic(u64::from(magic))), + }; + let data_directories = data_directories::DataDirectories::parse( + &bytes, + windows_fields.number_of_rva_and_sizes as usize, + offset, + )?; + Ok(( + OptionalHeader { + standard_fields, + windows_fields, + data_directories, + }, + 0, + )) // TODO: FIXME + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn sizeof_standards32() { + assert_eq!( + ::std::mem::size_of::<StandardFields32>(), + SIZEOF_STANDARD_FIELDS_32 + ); + } + #[test] + fn sizeof_windows32() { + assert_eq!( + ::std::mem::size_of::<WindowsFields32>(), + SIZEOF_WINDOWS_FIELDS_32 + ); + } + #[test] + fn sizeof_standards64() { + assert_eq!( + ::std::mem::size_of::<StandardFields64>(), + SIZEOF_STANDARD_FIELDS_64 + ); + } + #[test] + fn sizeof_windows64() { + assert_eq!( + ::std::mem::size_of::<WindowsFields64>(), + SIZEOF_WINDOWS_FIELDS_64 + ); + } +} diff --git a/third_party/rust/goblin/src/pe/options.rs b/third_party/rust/goblin/src/pe/options.rs new file mode 100644 index 0000000000..5fea632f75 --- /dev/null +++ b/third_party/rust/goblin/src/pe/options.rs @@ -0,0 +1,13 @@ +/// Parsing Options structure for the PE parser +#[derive(Debug, Copy, Clone)] +pub struct ParseOptions { + /// Wether the parser should resolve rvas or not. Default: true + pub resolve_rva: bool, +} + +impl ParseOptions { + /// Returns a parse options structure with default values + pub fn default() -> Self { + ParseOptions { resolve_rva: true } + } +} diff --git a/third_party/rust/goblin/src/pe/relocation.rs b/third_party/rust/goblin/src/pe/relocation.rs new file mode 100644 index 0000000000..ab8398ce73 --- /dev/null +++ b/third_party/rust/goblin/src/pe/relocation.rs @@ -0,0 +1,133 @@ +use crate::error; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// Size of a single COFF relocation. +pub const COFF_RELOCATION_SIZE: usize = 10; + +// x86 relocations. + +/// The relocation is ignored. +pub const IMAGE_REL_I386_ABSOLUTE: u16 = 0x0000; +/// Not supported. +pub const IMAGE_REL_I386_DIR16: u16 = 0x0001; +/// Not supported. +pub const IMAGE_REL_I386_REL16: u16 = 0x0002; +/// The target's 32-bit VA. +pub const IMAGE_REL_I386_DIR32: u16 = 0x0006; +/// The target's 32-bit RVA. +pub const IMAGE_REL_I386_DIR32NB: u16 = 0x0007; +/// Not supported. +pub const IMAGE_REL_I386_SEG12: u16 = 0x0009; +/// The 16-bit section index of the section that contains the target. +/// +/// This is used to support debugging information. +pub const IMAGE_REL_I386_SECTION: u16 = 0x000A; +/// The 32-bit offset of the target from the beginning of its section. +/// +/// This is used to support debugging information and static thread local storage. +pub const IMAGE_REL_I386_SECREL: u16 = 0x000B; +/// The CLR token. +pub const IMAGE_REL_I386_TOKEN: u16 = 0x000C; +/// A 7-bit offset from the base of the section that contains the target. +pub const IMAGE_REL_I386_SECREL7: u16 = 0x000D; +/// The 32-bit relative displacement to the target. +/// +/// This supports the x86 relative branch and call instructions. +pub const IMAGE_REL_I386_REL32: u16 = 0x0014; + +// x86-64 relocations. + +/// The relocation is ignored. +pub const IMAGE_REL_AMD64_ABSOLUTE: u16 = 0x0000; +/// The 64-bit VA of the relocation target. +pub const IMAGE_REL_AMD64_ADDR64: u16 = 0x0001; +/// The 32-bit VA of the relocation target. +pub const IMAGE_REL_AMD64_ADDR32: u16 = 0x0002; +/// The 32-bit address without an image base (RVA). +pub const IMAGE_REL_AMD64_ADDR32NB: u16 = 0x0003; +/// The 32-bit relative address from the byte following the relocation. +pub const IMAGE_REL_AMD64_REL32: u16 = 0x0004; +/// The 32-bit address relative to byte distance 1 from the relocation. +pub const IMAGE_REL_AMD64_REL32_1: u16 = 0x0005; +/// The 32-bit address relative to byte distance 2 from the relocation. +pub const IMAGE_REL_AMD64_REL32_2: u16 = 0x0006; +/// The 32-bit address relative to byte distance 3 from the relocation. +pub const IMAGE_REL_AMD64_REL32_3: u16 = 0x0007; +/// The 32-bit address relative to byte distance 4 from the relocation. +pub const IMAGE_REL_AMD64_REL32_4: u16 = 0x0008; +/// The 32-bit address relative to byte distance 5 from the relocation. +pub const IMAGE_REL_AMD64_REL32_5: u16 = 0x0009; +/// The 16-bit section index of the section that contains the target. +/// +/// This is used to support debugging information. +pub const IMAGE_REL_AMD64_SECTION: u16 = 0x000A; +/// The 32-bit offset of the target from the beginning of its section. +/// +/// This is used to support debugging information and static thread local storage. +pub const IMAGE_REL_AMD64_SECREL: u16 = 0x000B; +/// A 7-bit unsigned offset from the base of the section that contains the target. +pub const IMAGE_REL_AMD64_SECREL7: u16 = 0x000C; +/// CLR tokens. +pub const IMAGE_REL_AMD64_TOKEN: u16 = 0x000D; +/// A 32-bit signed span-dependent value emitted into the object. +pub const IMAGE_REL_AMD64_SREL32: u16 = 0x000E; +/// A pair that must immediately follow every span-dependent value. +pub const IMAGE_REL_AMD64_PAIR: u16 = 0x000F; +/// A 32-bit signed span-dependent value that is applied at link time. +pub const IMAGE_REL_AMD64_SSPAN32: u16 = 0x0010; + +/// A COFF relocation. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Relocation { + /// The address of the item to which relocation is applied. + /// + /// This is the offset from the beginning of the section, plus the + /// value of the section's `virtual_address` field. + pub virtual_address: u32, + /// A zero-based index into the symbol table. + /// + /// This symbol gives the address that is to be used for the relocation. If the specified + /// symbol has section storage class, then the symbol's address is the address with the + /// first section of the same name. + pub symbol_table_index: u32, + /// A value that indicates the kind of relocation that should be performed. + /// + /// Valid relocation types depend on machine type. + pub typ: u16, +} + +/// An iterator for COFF relocations. +#[derive(Default)] +pub struct Relocations<'a> { + offset: usize, + relocations: &'a [u8], +} + +impl<'a> Relocations<'a> { + /// Parse a COFF relocation table at the given offset. + /// + /// The offset and number of relocations should be from the COFF section header. + pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result<Relocations<'a>> { + let relocations = bytes.pread_with(offset, number * COFF_RELOCATION_SIZE)?; + Ok(Relocations { + offset: 0, + relocations, + }) + } +} + +impl<'a> Iterator for Relocations<'a> { + type Item = Relocation; + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.relocations.len() { + None + } else { + Some( + self.relocations + .gread_with(&mut self.offset, scroll::LE) + .unwrap(), + ) + } + } +} diff --git a/third_party/rust/goblin/src/pe/section_table.rs b/third_party/rust/goblin/src/pe/section_table.rs new file mode 100644 index 0000000000..4491827f16 --- /dev/null +++ b/third_party/rust/goblin/src/pe/section_table.rs @@ -0,0 +1,279 @@ +use crate::error::{self, Error}; +use crate::pe::relocation; +use alloc::string::{String, ToString}; +use scroll::{ctx, Pread, Pwrite}; + +#[repr(C)] +#[derive(Debug, PartialEq, Clone, Default)] +pub struct SectionTable { + pub name: [u8; 8], + pub real_name: Option<String>, + pub virtual_size: u32, + pub virtual_address: u32, + pub size_of_raw_data: u32, + pub pointer_to_raw_data: u32, + pub pointer_to_relocations: u32, + pub pointer_to_linenumbers: u32, + pub number_of_relocations: u16, + pub number_of_linenumbers: u16, + pub characteristics: u32, +} + +pub const SIZEOF_SECTION_TABLE: usize = 8 * 5; + +// Based on https://github.com/llvm-mirror/llvm/blob/af7b1832a03ab6486c42a40d21695b2c03b2d8a3/lib/Object/COFFObjectFile.cpp#L70 +// Decodes a string table entry in base 64 (//AAAAAA). Expects string without +// prefixed slashes. +fn base64_decode_string_entry(s: &str) -> Result<usize, ()> { + assert!(s.len() <= 6, "String too long, possible overflow."); + + let mut val = 0; + for c in s.bytes() { + let v = if b'A' <= c && c <= b'Z' { + // 00..=25 + c - b'A' + } else if b'a' <= c && c <= b'z' { + // 26..=51 + c - b'a' + 26 + } else if b'0' <= c && c <= b'9' { + // 52..=61 + c - b'0' + 52 + } else if c == b'+' { + // 62 + 62 + } else if c == b'/' { + // 63 + 63 + } else { + return Err(()); + }; + val = val * 64 + v as usize; + } + Ok(val) +} + +impl SectionTable { + pub fn parse( + bytes: &[u8], + offset: &mut usize, + string_table_offset: usize, + ) -> error::Result<Self> { + let mut table = SectionTable::default(); + let mut name = [0u8; 8]; + name.copy_from_slice(bytes.gread_with(offset, 8)?); + + table.name = name; + table.virtual_size = bytes.gread_with(offset, scroll::LE)?; + table.virtual_address = bytes.gread_with(offset, scroll::LE)?; + table.size_of_raw_data = bytes.gread_with(offset, scroll::LE)?; + table.pointer_to_raw_data = bytes.gread_with(offset, scroll::LE)?; + table.pointer_to_relocations = bytes.gread_with(offset, scroll::LE)?; + table.pointer_to_linenumbers = bytes.gread_with(offset, scroll::LE)?; + table.number_of_relocations = bytes.gread_with(offset, scroll::LE)?; + table.number_of_linenumbers = bytes.gread_with(offset, scroll::LE)?; + table.characteristics = bytes.gread_with(offset, scroll::LE)?; + + if let Some(idx) = table.name_offset()? { + table.real_name = Some(bytes.pread::<&str>(string_table_offset + idx)?.to_string()); + } + Ok(table) + } + + pub fn name_offset(&self) -> error::Result<Option<usize>> { + // Based on https://github.com/llvm-mirror/llvm/blob/af7b1832a03ab6486c42a40d21695b2c03b2d8a3/lib/Object/COFFObjectFile.cpp#L1054 + if self.name[0] == b'/' { + let idx: usize = if self.name[1] == b'/' { + let b64idx = self.name.pread::<&str>(2)?; + base64_decode_string_entry(b64idx).map_err(|_| { + Error::Malformed(format!( + "Invalid indirect section name //{}: base64 decoding failed", + b64idx + )) + })? + } else { + let name = self.name.pread::<&str>(1)?; + name.parse().map_err(|err| { + Error::Malformed(format!("Invalid indirect section name /{}: {}", name, err)) + })? + }; + Ok(Some(idx)) + } else { + Ok(None) + } + } + + #[allow(clippy::useless_let_if_seq)] + pub fn set_name_offset(&mut self, mut idx: usize) -> error::Result<()> { + if idx <= 9_999_999 { + // 10^7 - 1 + // write!(&mut self.name[1..], "{}", idx) without using io::Write. + // We write into a temporary since we calculate digits starting at the right. + let mut name = [0; 7]; + let mut len = 0; + if idx == 0 { + name[6] = b'0'; + len = 1; + } else { + while idx != 0 { + let rem = (idx % 10) as u8; + idx /= 10; + name[6 - len] = b'0' + rem; + len += 1; + } + } + self.name = [0; 8]; + self.name[0] = b'/'; + self.name[1..][..len].copy_from_slice(&name[7 - len..]); + Ok(()) + } else if idx as u64 <= 0xfff_fff_fff { + // 64^6 - 1 + self.name[0] = b'/'; + self.name[1] = b'/'; + for i in 0..6 { + let rem = (idx % 64) as u8; + idx /= 64; + let c = match rem { + 0..=25 => b'A' + rem, + 26..=51 => b'a' + rem - 26, + 52..=61 => b'0' + rem - 52, + 62 => b'+', + 63 => b'/', + _ => unreachable!(), + }; + self.name[7 - i] = c; + } + Ok(()) + } else { + Err(Error::Malformed(format!( + "Invalid section name offset: {}", + idx + ))) + } + } + + pub fn name(&self) -> error::Result<&str> { + match self.real_name.as_ref() { + Some(s) => Ok(s), + None => Ok(self.name.pread(0)?), + } + } + + pub fn relocations<'a>(&self, bytes: &'a [u8]) -> error::Result<relocation::Relocations<'a>> { + let offset = self.pointer_to_relocations as usize; + let number = self.number_of_relocations as usize; + relocation::Relocations::parse(bytes, offset, number) + } +} + +impl ctx::SizeWith<scroll::Endian> for SectionTable { + fn size_with(_ctx: &scroll::Endian) -> usize { + SIZEOF_SECTION_TABLE + } +} + +impl ctx::TryIntoCtx<scroll::Endian> for SectionTable { + type Error = error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) -> Result<usize, Self::Error> { + let offset = &mut 0; + bytes.gwrite(&self.name[..], offset)?; + bytes.gwrite_with(self.virtual_size, offset, ctx)?; + bytes.gwrite_with(self.virtual_address, offset, ctx)?; + bytes.gwrite_with(self.size_of_raw_data, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_raw_data, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_relocations, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_linenumbers, offset, ctx)?; + bytes.gwrite_with(self.number_of_relocations, offset, ctx)?; + bytes.gwrite_with(self.number_of_linenumbers, offset, ctx)?; + bytes.gwrite_with(self.characteristics, offset, ctx)?; + Ok(SIZEOF_SECTION_TABLE) + } +} + +impl ctx::IntoCtx<scroll::Endian> for SectionTable { + fn into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +/// The section should not be padded to the next boundary. This flag is obsolete and is replaced +/// by `IMAGE_SCN_ALIGN_1BYTES`. This is valid only for object files. +pub const IMAGE_SCN_TYPE_NO_PAD: u32 = 0x0000_0008; +/// The section contains executable code. +pub const IMAGE_SCN_CNT_CODE: u32 = 0x0000_0020; +/// The section contains initialized data. +pub const IMAGE_SCN_CNT_INITIALIZED_DATA: u32 = 0x0000_0040; +/// The section contains uninitialized data. +pub const IMAGE_SCN_CNT_UNINITIALIZED_DATA: u32 = 0x0000_0080; +pub const IMAGE_SCN_LNK_OTHER: u32 = 0x0000_0100; +/// The section contains comments or other information. The .drectve section has this type. +/// This is valid for object files only. +pub const IMAGE_SCN_LNK_INFO: u32 = 0x0000_0200; +/// The section will not become part of the image. This is valid only for object files. +pub const IMAGE_SCN_LNK_REMOVE: u32 = 0x0000_0800; +/// The section contains COMDAT data. This is valid only for object files. +pub const IMAGE_SCN_LNK_COMDAT: u32 = 0x0000_1000; +/// The section contains data referenced through the global pointer (GP). +pub const IMAGE_SCN_GPREL: u32 = 0x0000_8000; +pub const IMAGE_SCN_MEM_PURGEABLE: u32 = 0x0002_0000; +pub const IMAGE_SCN_MEM_16BIT: u32 = 0x0002_0000; +pub const IMAGE_SCN_MEM_LOCKED: u32 = 0x0004_0000; +pub const IMAGE_SCN_MEM_PRELOAD: u32 = 0x0008_0000; + +pub const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x0010_0000; +pub const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x0020_0000; +pub const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x0030_0000; +pub const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x0040_0000; +pub const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x0050_0000; +pub const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x0060_0000; +pub const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x0070_0000; +pub const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x0080_0000; +pub const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x0090_0000; +pub const IMAGE_SCN_ALIGN_512BYTES: u32 = 0x00A0_0000; +pub const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0x00B0_0000; +pub const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0x00C0_0000; +pub const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0x00D0_0000; +pub const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0x00E0_0000; +pub const IMAGE_SCN_ALIGN_MASK: u32 = 0x00F0_0000; + +/// The section contains extended relocations. +pub const IMAGE_SCN_LNK_NRELOC_OVFL: u32 = 0x0100_0000; +/// The section can be discarded as needed. +pub const IMAGE_SCN_MEM_DISCARDABLE: u32 = 0x0200_0000; +/// The section cannot be cached. +pub const IMAGE_SCN_MEM_NOT_CACHED: u32 = 0x0400_0000; +/// The section is not pageable. +pub const IMAGE_SCN_MEM_NOT_PAGED: u32 = 0x0800_0000; +/// The section can be shared in memory. +pub const IMAGE_SCN_MEM_SHARED: u32 = 0x1000_0000; +/// The section can be executed as code. +pub const IMAGE_SCN_MEM_EXECUTE: u32 = 0x2000_0000; +/// The section can be read. +pub const IMAGE_SCN_MEM_READ: u32 = 0x4000_0000; +/// The section can be written to. +pub const IMAGE_SCN_MEM_WRITE: u32 = 0x8000_0000; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn set_name_offset() { + let mut section = SectionTable::default(); + for &(offset, name) in [ + (0usize, b"/0\0\0\0\0\0\0"), + (1, b"/1\0\0\0\0\0\0"), + (9_999_999, b"/9999999"), + (10_000_000, b"//AAmJaA"), + #[cfg(target_pointer_width = "64")] + (0xfff_fff_fff, b"////////"), + ] + .iter() + { + section.set_name_offset(offset).unwrap(); + assert_eq!(§ion.name, name); + assert_eq!(section.name_offset().unwrap(), Some(offset)); + } + #[cfg(target_pointer_width = "64")] + assert!(section.set_name_offset(0x1_000_000_000).is_err()); + } +} diff --git a/third_party/rust/goblin/src/pe/symbol.rs b/third_party/rust/goblin/src/pe/symbol.rs new file mode 100644 index 0000000000..b2ced808a1 --- /dev/null +++ b/third_party/rust/goblin/src/pe/symbol.rs @@ -0,0 +1,516 @@ +use crate::error; +use crate::strtab; +use alloc::vec::Vec; +use core::fmt::{self, Debug}; +use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// Size of a single symbol in the COFF Symbol Table. +pub const COFF_SYMBOL_SIZE: usize = 18; + +// Values for `Symbol::section_number`. + +/// The symbol record is not yet assigned a section. A `value` of zero +/// indicates that a reference to an external symbol is defined elsewhere. +/// A `value` of non-zero is a common symbol with a size that is specified by the `value`. +pub const IMAGE_SYM_UNDEFINED: i16 = 0; +/// The symbol has an absolute (non-relocatable) `value` and is not an address. +pub const IMAGE_SYM_ABSOLUTE: i16 = -1; +/// The symbol provides general type or debugging information but does not +/// correspond to a section. +pub const IMAGE_SYM_DEBUG: i16 = -2; + +// Base types for `Symbol::typ`. + +/// No type information or unknown base type. Microsoft tools use this setting +pub const IMAGE_SYM_TYPE_NULL: u16 = 0; +/// No valid type; used with void pointers and functions +pub const IMAGE_SYM_TYPE_VOID: u16 = 1; +/// A character (signed byte) +pub const IMAGE_SYM_TYPE_CHAR: u16 = 2; +/// A 2-byte signed integer +pub const IMAGE_SYM_TYPE_SHORT: u16 = 3; +/// A natural integer type (normally 4 bytes in Windows) +pub const IMAGE_SYM_TYPE_INT: u16 = 4; +/// A 4-byte signed integer +pub const IMAGE_SYM_TYPE_LONG: u16 = 5; +/// A 4-byte floating-point number +pub const IMAGE_SYM_TYPE_FLOAT: u16 = 6; +/// An 8-byte floating-point number +pub const IMAGE_SYM_TYPE_DOUBLE: u16 = 7; +/// A structure +pub const IMAGE_SYM_TYPE_STRUCT: u16 = 8; +/// A union +pub const IMAGE_SYM_TYPE_UNION: u16 = 9; +/// An enumerated type +pub const IMAGE_SYM_TYPE_ENUM: u16 = 10; +/// A member of enumeration (a specific value) +pub const IMAGE_SYM_TYPE_MOE: u16 = 11; +/// A byte; unsigned 1-byte integer +pub const IMAGE_SYM_TYPE_BYTE: u16 = 12; +/// A word; unsigned 2-byte integer +pub const IMAGE_SYM_TYPE_WORD: u16 = 13; +/// An unsigned integer of natural size (normally, 4 bytes) +pub const IMAGE_SYM_TYPE_UINT: u16 = 14; +/// An unsigned 4-byte integer +pub const IMAGE_SYM_TYPE_DWORD: u16 = 15; + +// Derived types for `Symbol::typ`. + +/// No derived type; the symbol is a simple scalar variable. +pub const IMAGE_SYM_DTYPE_NULL: u16 = 0; +/// The symbol is a pointer to base type. +pub const IMAGE_SYM_DTYPE_POINTER: u16 = 1; +/// The symbol is a function that returns a base type. +pub const IMAGE_SYM_DTYPE_FUNCTION: u16 = 2; +/// The symbol is an array of base type. +pub const IMAGE_SYM_DTYPE_ARRAY: u16 = 3; + +pub const IMAGE_SYM_TYPE_MASK: u16 = 0xf; +pub const IMAGE_SYM_DTYPE_SHIFT: usize = 4; + +// Values for `Symbol::storage_class`. + +/// A special symbol that represents the end of function, for debugging purposes. +pub const IMAGE_SYM_CLASS_END_OF_FUNCTION: u8 = 0xff; +/// No assigned storage class. +pub const IMAGE_SYM_CLASS_NULL: u8 = 0; +/// The automatic (stack) variable. +/// +/// The `value` field specifies the stack frame offset. +pub const IMAGE_SYM_CLASS_AUTOMATIC: u8 = 1; +/// A value that Microsoft tools use for external symbols. +/// +/// The `value` field indicates the size if the section number is +/// `IMAGE_SYM_UNDEFINED` (0). If the section number is not zero, +/// then the `value` field specifies the offset within the section. +pub const IMAGE_SYM_CLASS_EXTERNAL: u8 = 2; +/// A static symbol. +/// +/// The 'value' field specifies the offset of the symbol within the section. +/// If the `value` field is zero, then the symbol represents a section name. +pub const IMAGE_SYM_CLASS_STATIC: u8 = 3; +/// A register variable. +/// +/// The `value` field specifies the register number. +pub const IMAGE_SYM_CLASS_REGISTER: u8 = 4; +/// A symbol that is defined externally. +pub const IMAGE_SYM_CLASS_EXTERNAL_DEF: u8 = 5; +/// A code label that is defined within the module. +/// +/// The `value` field specifies the offset of the symbol within the section. +pub const IMAGE_SYM_CLASS_LABEL: u8 = 6; +/// A reference to a code label that is not defined. +pub const IMAGE_SYM_CLASS_UNDEFINED_LABEL: u8 = 7; +/// The structure member. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_STRUCT: u8 = 8; +/// A formal argument (parameter) of a function. +/// +/// The `value` field specifies the n th argument. +pub const IMAGE_SYM_CLASS_ARGUMENT: u8 = 9; +/// The structure tag-name entry. +pub const IMAGE_SYM_CLASS_STRUCT_TAG: u8 = 10; +/// A union member. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_UNION: u8 = 11; +/// The Union tag-name entry. +pub const IMAGE_SYM_CLASS_UNION_TAG: u8 = 12; +/// A Typedef entry. +pub const IMAGE_SYM_CLASS_TYPE_DEFINITION: u8 = 13; +/// A static data declaration. +pub const IMAGE_SYM_CLASS_UNDEFINED_STATIC: u8 = 14; +/// An enumerated type tagname entry. +pub const IMAGE_SYM_CLASS_ENUM_TAG: u8 = 15; +/// A member of an enumeration. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_ENUM: u8 = 16; +/// A register parameter. +pub const IMAGE_SYM_CLASS_REGISTER_PARAM: u8 = 17; +/// A bit-field reference. +/// +/// The `value` field specifies the n th bit in the bit field. +pub const IMAGE_SYM_CLASS_BIT_FIELD: u8 = 18; +/// A .bb (beginning of block) or .eb (end of block) record. +/// +/// The `value` field is the relocatable address of the code location. +pub const IMAGE_SYM_CLASS_BLOCK: u8 = 100; +/// A value that Microsoft tools use for symbol records that define the extent of a function. +/// +/// Records may be begin function (.bf ), end function ( .ef ), and lines in function ( .lf ). +/// For .lf records, the `value` field gives the number of source lines in the function. +/// For .ef records, the `value` field gives the size of the function code. +pub const IMAGE_SYM_CLASS_FUNCTION: u8 = 101; +/// An end-of-structure entry. +pub const IMAGE_SYM_CLASS_END_OF_STRUCT: u8 = 102; +/// The source-file symbol record. +/// +/// The symbol is followed by auxiliary records that name the file. +pub const IMAGE_SYM_CLASS_FILE: u8 = 103; +/// A definition of a section (Microsoft tools use STATIC storage class instead). +pub const IMAGE_SYM_CLASS_SECTION: u8 = 104; +/// A weak external. +pub const IMAGE_SYM_CLASS_WEAK_EXTERNAL: u8 = 105; +/// A CLR token symbol. +/// +/// The name is an ASCII string that consists of the hexadecimal value of the token. +pub const IMAGE_SYM_CLASS_CLR_TOKEN: u8 = 107; + +/// A COFF symbol. +/// +/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`]. +/// +/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Symbol { + /// The name of the symbol. + /// + /// An array of 8 bytes is used if the name is not more than 8 bytes long. + /// This array is padded with nulls on the right if the name is less than 8 bytes long. + /// + /// For longer names, the first 4 bytes are all zeros, and the second 4 bytes + /// are an offset into the string table. + pub name: [u8; 8], + /// The value that is associated with the symbol. + /// + /// The interpretation of this field depends on `section_number` and + /// `storage_class`. A typical meaning is the relocatable address. + pub value: u32, + /// A one-based index into the section table. Zero and negative values have special meanings. + pub section_number: i16, + /// A number that represents type. + /// + /// Microsoft tools set this field to 0x20 (function) or 0x0 (not a function). + pub typ: u16, + /// An enumerated value that represents storage class. + pub storage_class: u8, + /// The number of auxiliary symbol table entries that follow this record. + /// + /// Each auxiliary record is the same size as a standard symbol-table record (18 bytes), + /// but rather than define a new symbol, the auxiliary record gives additional information + /// on the last symbol defined. + pub number_of_aux_symbols: u8, +} + +impl Symbol { + /// Parse the symbol at the given offset. + /// + /// If the symbol has an inline name, then also returns a reference to the name's + /// location in `bytes`. + pub fn parse<'a>(bytes: &'a [u8], offset: usize) -> error::Result<(Option<&'a str>, Symbol)> { + let symbol = bytes.pread::<Symbol>(offset)?; + let name = if symbol.name[0] != 0 { + bytes + .pread_with(offset, ctx::StrCtx::DelimiterUntil(0, 8)) + .ok() + } else { + None + }; + Ok((name, symbol)) + } + + /// Returns the symbol name. + /// + /// This may be a reference to an inline name in the symbol, or to + /// a strtab entry. + pub fn name<'a>(&'a self, strtab: &'a strtab::Strtab) -> error::Result<&'a str> { + if let Some(offset) = self.name_offset() { + strtab.get_at(offset as usize).ok_or_else(|| { + error::Error::Malformed(format!("Invalid Symbol name offset {:#x}", offset)) + }) + } else { + Ok(self.name.pread(0)?) + } + } + + /// Return the strtab offset of the symbol name. + /// + /// Returns `None` if the name is inline. + pub fn name_offset(&self) -> Option<u32> { + // Symbol offset starts at the strtable's length, so let's adjust it + let length_field_size = core::mem::size_of::<u32>() as u32; + + if self.name[0] == 0 { + self.name + .pread_with(4, scroll::LE) + .ok() + .map(|offset: u32| offset - length_field_size) + } else { + None + } + } + + /// Set the strtab offset of the symbol name. + pub fn set_name_offset(&mut self, offset: u32) { + self.name[..4].copy_from_slice(&[0; 4]); + self.name.pwrite_with(offset, 4, scroll::LE).unwrap(); + } + + /// Return the base type of the symbol. + /// + /// This type uses the `IMAGE_SYM_TYPE_*` definitions. + pub fn base_type(&self) -> u16 { + self.typ & IMAGE_SYM_TYPE_MASK + } + + /// Return the derived type of the symbol. + /// + /// This type uses the `IMAGE_SYM_DTYPE_*` definitions. + pub fn derived_type(&self) -> u16 { + self.typ >> IMAGE_SYM_DTYPE_SHIFT + } + + /// Return true for function definitions. + /// + /// These symbols use `AuxFunctionDefinition` for auxiliary symbol records. + pub fn is_function_definition(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_EXTERNAL + && self.derived_type() == IMAGE_SYM_DTYPE_FUNCTION + && self.section_number > 0 + } + + /// Return true for weak external symbols. + /// + /// These symbols use `AuxWeakExternal` for auxiliary symbol records. + pub fn is_weak_external(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_WEAK_EXTERNAL + } + + /// Return true for file symbol records. + /// + /// The auxiliary records contain the name of the source code file. + pub fn is_file(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_FILE + } + + /// Return true for section definitions. + /// + /// These symbols use `AuxSectionDefinition` for auxiliary symbol records. + pub fn is_section_definition(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_STATIC && self.number_of_aux_symbols > 0 + } +} + +/// Auxiliary symbol record for function definitions. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxFunctionDefinition { + /// The symbol-table index of the corresponding `.bf` (begin function) symbol record. + pub tag_index: u32, + /// The size of the executable code for the function itself. + /// + /// If the function is in its own section, the `size_of_raw_data` in the section header + /// is greater or equal to this field, depending on alignment considerations. + pub total_size: u32, + /// The file offset of the first COFF line-number entry for the function, + /// or zero if none exists. + pub pointer_to_line_number: u32, + /// The symbol-table index of the record for the next function. + /// + /// If the function is the last in the symbol table, this field is set to zero. + pub pointer_to_next_function: u32, + /// Unused padding. + pub unused: [u8; 2], +} + +/// Auxiliary symbol record for symbols with storage class `IMAGE_SYM_CLASS_FUNCTION`. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxBeginAndEndFunction { + /// Unused padding. + pub unused1: [u8; 4], + /// The actual ordinal line number within the source file, corresponding + /// to the `.bf` or `.ef` record. + pub line_number: u16, + /// Unused padding. + pub unused2: [u8; 6], + /// The symbol-table index of the next `.bf` symbol record. + /// + /// If the function is the last in the symbol table, this field is set to zero. + /// It is not used for `.ef` records. + pub pointer_to_next_function: u32, + /// Unused padding. + pub unused3: [u8; 2], +} + +// Values for the `characteristics` field of `AuxWeakExternal`. + +/// Indicates that no library search for the symbol should be performed. +pub const IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY: u32 = 1; +/// Indicates that a library search for the symbol should be performed. +pub const IMAGE_WEAK_EXTERN_SEARCH_LIBRARY: u32 = 2; +/// Indicates that the symbol is an alias for the symbol given by the `tag_index` field. +pub const IMAGE_WEAK_EXTERN_SEARCH_ALIAS: u32 = 3; + +/// Auxiliary symbol record for weak external symbols. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxWeakExternal { + /// The symbol-table index of the symbol to be linked if an external definition is not found. + pub tag_index: u32, + /// Flags that control how the symbol should be linked. + pub characteristics: u32, + /// Unused padding. + pub unused: [u8; 10], +} + +// Values for the `selection` field of `AuxSectionDefinition`. + +/// If this symbol is already defined, the linker issues a "multiply defined symbol" error. +pub const IMAGE_COMDAT_SELECT_NODUPLICATES: u8 = 1; +/// Any section that defines the same COMDAT symbol can be linked; the rest are removed. +pub const IMAGE_COMDAT_SELECT_ANY: u8 = 2; +/// The linker chooses an arbitrary section among the definitions for this symbol. +/// +/// If all definitions are not the same size, a "multiply defined symbol" error is issued. +pub const IMAGE_COMDAT_SELECT_SAME_SIZE: u8 = 3; +/// The linker chooses an arbitrary section among the definitions for this symbol. +/// +/// If all definitions do not match exactly, a "multiply defined symbol" error is issued. +pub const IMAGE_COMDAT_SELECT_EXACT_MATCH: u8 = 4; +/// The section is linked if a certain other COMDAT section is linked. +/// +/// This other section is indicated by the `number` field of the auxiliary symbol record +/// for the section definition. This setting is useful for definitions that have components +/// in multiple sections (for example, code in one and data in another), but where all must +/// be linked or discarded as a set. The other section with which this section is associated +/// must be a COMDAT section; it cannot be another associative COMDAT section (that is, the +/// other section cannot have `IMAGE_COMDAT_SELECT_ASSOCIATIVE` set). +pub const IMAGE_COMDAT_SELECT_ASSOCIATIVE: u8 = 5; +/// The linker chooses the largest definition from among all of the definitions for this symbol. +/// +/// If multiple definitions have this size, the choice between them is arbitrary. +pub const IMAGE_COMDAT_SELECT_LARGEST: u8 = 6; + +/// Auxiliary symbol record for section definitions. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxSectionDefinition { + /// The size of section data; the same as `size_of_raw_data` in the section header. + pub length: u32, + /// The number of relocation entries for the section. + pub number_of_relocations: u16, + /// The number of line-number entries for the section. + pub number_of_line_numbers: u16, + /// The checksum for communal data. + /// + /// It is applicable if the `IMAGE_SCN_LNK_COMDAT` flag is set in the section header. + pub checksum: u32, + /// One-based index into the section table for the associated section. + /// + /// This is used when the `selection` field is `IMAGE_COMDAT_SELECT_ASSOCIATIVE`. + pub number: u16, + /// The COMDAT selection number. + /// + /// This is applicable if the section is a COMDAT section. + pub selection: u8, + /// Unused padding. + pub unused: [u8; 3], +} + +/// A COFF symbol table. +pub struct SymbolTable<'a> { + symbols: &'a [u8], +} + +impl<'a> SymbolTable<'a> { + /// Parse a COFF symbol table at the given offset. + /// + /// The offset and number of symbols should be from the COFF header. + pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result<SymbolTable<'a>> { + let symbols = bytes.pread_with(offset, Self::size(number))?; + Ok(SymbolTable { symbols }) + } + + /// Get the size in bytes of the symbol table. + pub fn size(number: usize) -> usize { + number * COFF_SYMBOL_SIZE + } + + /// Get the symbol at the given index. + /// + /// If the symbol has an inline name, then also returns a reference to the name's + /// location in `bytes`. + pub fn get(&self, index: usize) -> Option<(Option<&'a str>, Symbol)> { + let offset = index * COFF_SYMBOL_SIZE; + Symbol::parse(self.symbols, offset).ok() + } + + /// Get the auxiliary symbol record for a function definition. + pub fn aux_function_definition(&self, index: usize) -> Option<AuxFunctionDefinition> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the auxiliary symbol record for a `.bf` or `.ef` symbol record. + pub fn aux_begin_and_end_function(&self, index: usize) -> Option<AuxBeginAndEndFunction> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the auxiliary symbol record for a weak external. + pub fn aux_weak_external(&self, index: usize) -> Option<AuxWeakExternal> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the file name from the auxiliary symbol record for a file symbol record. + pub fn aux_file(&self, index: usize, number: usize) -> Option<&'a str> { + let offset = index * COFF_SYMBOL_SIZE; + let length = number * COFF_SYMBOL_SIZE; + self.symbols + .pread_with(offset, ctx::StrCtx::DelimiterUntil(0, length)) + .ok() + } + + /// Get the auxiliary symbol record for a section definition. + pub fn aux_section_definition(&self, index: usize) -> Option<AuxSectionDefinition> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Return an iterator for the COFF symbols. + /// + /// This iterator skips over auxiliary symbol records. + pub fn iter(&self) -> SymbolIterator<'a> { + SymbolIterator { + index: 0, + symbols: self.symbols, + } + } +} + +impl<'a> Debug for SymbolTable<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("SymbolTable") + .field("symbols", &self.iter().collect::<Vec<_>>()) + .finish() + } +} + +/// An iterator for COFF symbols. +/// +/// This iterator skips over auxiliary symbol records. +#[derive(Default)] +pub struct SymbolIterator<'a> { + index: usize, + symbols: &'a [u8], +} + +impl<'a> Iterator for SymbolIterator<'a> { + type Item = (usize, Option<&'a str>, Symbol); + fn next(&mut self) -> Option<Self::Item> { + let offset = self.index * COFF_SYMBOL_SIZE; + if offset >= self.symbols.len() { + None + } else { + let index = self.index; + let (name, symbol) = Symbol::parse(self.symbols, offset).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + Some((index, name, symbol)) + } + } +} diff --git a/third_party/rust/goblin/src/pe/utils.rs b/third_party/rust/goblin/src/pe/utils.rs new file mode 100644 index 0000000000..07a320d57b --- /dev/null +++ b/third_party/rust/goblin/src/pe/utils.rs @@ -0,0 +1,180 @@ +use crate::error; +use alloc::string::ToString; +use scroll::Pread; + +use super::options; +use super::section_table; + +use crate::pe::data_directories::DataDirectory; +use core::cmp; + +use log::debug; + +pub fn is_in_range(rva: usize, r1: usize, r2: usize) -> bool { + r1 <= rva && rva < r2 +} + +// reference: Peter Ferrie. Reliable algorithm to extract overlay of a PE. https://bit.ly/2vBX2bR +#[inline] +fn aligned_pointer_to_raw_data(pointer_to_raw_data: usize) -> usize { + const PHYSICAL_ALIGN: usize = 0x1ff; + pointer_to_raw_data & !PHYSICAL_ALIGN +} + +#[inline] +fn section_read_size(section: §ion_table::SectionTable, file_alignment: u32) -> usize { + fn round_size(size: usize) -> usize { + const PAGE_MASK: usize = 0xfff; + (size + PAGE_MASK) & !PAGE_MASK + } + + // Paraphrased from https://reverseengineering.stackexchange.com/a/4326 (by Peter Ferrie). + // + // Handles the corner cases such as mis-aligned pointers (round down) and sizes (round up) + // Further rounding corner cases: + // - the physical pointer should be rounded down to a multiple of 512, regardless of the value in the header + // - the read size is rounded up by using a combination of the file alignment and 4kb + // - the virtual size is always rounded up to a multiple of 4kb, regardless of the value in the header. + // + // Reference C implementation: + // + // long pointerToRaw = section.get(POINTER_TO_RAW_DATA); + // long alignedpointerToRaw = pointerToRaw & ~0x1ff; + // long sizeOfRaw = section.get(SIZE_OF_RAW_DATA); + // long readsize = ((pointerToRaw + sizeOfRaw) + filealign - 1) & ~(filealign - 1)) - alignedpointerToRaw; + // readsize = min(readsize, (sizeOfRaw + 0xfff) & ~0xfff); + // long virtsize = section.get(VIRTUAL_SIZE); + // + // if (virtsize) + // { + // readsize = min(readsize, (virtsize + 0xfff) & ~0xfff); + // } + + let file_alignment = file_alignment as usize; + let size_of_raw_data = section.size_of_raw_data as usize; + let virtual_size = section.virtual_size as usize; + let read_size = { + let read_size = + ((section.pointer_to_raw_data as usize + size_of_raw_data + file_alignment - 1) + & !(file_alignment - 1)) + - aligned_pointer_to_raw_data(section.pointer_to_raw_data as usize); + cmp::min(read_size, round_size(size_of_raw_data)) + }; + + if virtual_size == 0 { + read_size + } else { + cmp::min(read_size, round_size(virtual_size)) + } +} + +fn rva2offset(rva: usize, section: §ion_table::SectionTable) -> usize { + (rva - section.virtual_address as usize) + + aligned_pointer_to_raw_data(section.pointer_to_raw_data as usize) +} + +fn is_in_section(rva: usize, section: §ion_table::SectionTable, file_alignment: u32) -> bool { + let section_rva = section.virtual_address as usize; + is_in_range( + rva, + section_rva, + section_rva + section_read_size(section, file_alignment), + ) +} + +pub fn find_offset( + rva: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, +) -> Option<usize> { + if opts.resolve_rva { + if file_alignment == 0 || file_alignment & (file_alignment - 1) != 0 { + return None; + } + for (i, section) in sections.iter().enumerate() { + debug!( + "Checking {} for {:#x} ∈ {:#x}..{:#x}", + section.name().unwrap_or(""), + rva, + section.virtual_address, + section.virtual_address + section.virtual_size + ); + if is_in_section(rva, §ion, file_alignment) { + let offset = rva2offset(rva, §ion); + debug!( + "Found in section {}({}), remapped into offset {:#x}", + section.name().unwrap_or(""), + i, + offset + ); + return Some(offset); + } + } + None + } else { + Some(rva) + } +} + +pub fn find_offset_or( + rva: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + msg: &str, +) -> error::Result<usize> { + find_offset(rva, sections, file_alignment, opts) + .ok_or_else(|| error::Error::Malformed(msg.to_string())) +} + +pub fn try_name<'a>( + bytes: &'a [u8], + rva: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, +) -> error::Result<&'a str> { + match find_offset(rva, sections, file_alignment, opts) { + Some(offset) => Ok(bytes.pread::<&str>(offset)?), + None => Err(error::Error::Malformed(format!( + "Cannot find name from rva {:#x} in sections: {:?}", + rva, sections + ))), + } +} + +pub fn get_data<'a, T>( + bytes: &'a [u8], + sections: &[section_table::SectionTable], + directory: DataDirectory, + file_alignment: u32, +) -> error::Result<T> +where + T: scroll::ctx::TryFromCtx<'a, scroll::Endian, Error = scroll::Error>, +{ + get_data_with_opts( + bytes, + sections, + directory, + file_alignment, + &options::ParseOptions::default(), + ) +} + +pub fn get_data_with_opts<'a, T>( + bytes: &'a [u8], + sections: &[section_table::SectionTable], + directory: DataDirectory, + file_alignment: u32, + opts: &options::ParseOptions, +) -> error::Result<T> +where + T: scroll::ctx::TryFromCtx<'a, scroll::Endian, Error = scroll::Error>, +{ + let rva = directory.virtual_address as usize; + let offset = find_offset(rva, sections, file_alignment, opts) + .ok_or_else(|| error::Error::Malformed(directory.virtual_address.to_string()))?; + let result: T = bytes.pread_with(offset, scroll::LE)?; + Ok(result) +} diff --git a/third_party/rust/goblin/src/strtab.rs b/third_party/rust/goblin/src/strtab.rs new file mode 100644 index 0000000000..dc7b8080f0 --- /dev/null +++ b/third_party/rust/goblin/src/strtab.rs @@ -0,0 +1,264 @@ +//! A byte-offset based string table. +//! Commonly used in ELF binaries, Unix archives, and even PE binaries. + +use core::fmt; +use core::ops::Index; +use core::str; +use scroll::{ctx, Pread}; +if_alloc! { + use crate::error; + use alloc::vec::Vec; +} + +/// A common string table format which is indexed by byte offsets (and not +/// member index). Constructed using [`parse`](#method.parse) +/// with your choice of delimiter. Please be careful. +pub struct Strtab<'a> { + delim: ctx::StrCtx, + bytes: &'a [u8], + #[cfg(feature = "alloc")] + strings: Vec<(usize, &'a str)>, +} + +#[inline(always)] +fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&str> { + bytes.pread_with::<&str>(offset, delim) +} + +impl<'a> Strtab<'a> { + /// Creates a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries. + /// + /// NB: this does *not* preparse the string table, which can have non-optimal access patterns. + /// See https://github.com/m4b/goblin/pull/275#issue-660364025 + pub fn new(bytes: &'a [u8], delim: u8) -> Self { + Self::from_slice_unparsed(bytes, 0, bytes.len(), delim) + } + + /// Creates a `Strtab` directly without bounds check and without parsing it. + /// + /// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled. + pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self { + Self { + delim: ctx::StrCtx::Delimiter(delim), + bytes: &bytes[offset..offset + len], + #[cfg(feature = "alloc")] + strings: Vec::new(), + } + } + /// Gets a str reference from the backing bytes starting at byte `offset`. + /// + /// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8. + /// Use this method if the `Strtab` was created using `from_slice_unparsed()`. + pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> { + if offset >= self.bytes.len() { + None + } else { + Some(get_str(offset, self.bytes, self.delim).unwrap()) + } + } + #[cfg(feature = "alloc")] + /// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter. + /// + /// Errors if bytes are invalid UTF-8. + /// Requires `feature = "alloc"` + pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result<Self> { + let (end, overflow) = offset.overflowing_add(len); + if overflow || end > bytes.len() { + return Err(error::Error::Malformed(format!( + "Strtable size ({}) + offset ({}) is out of bounds for {} #bytes. Overflowed: {}", + len, + offset, + bytes.len(), + overflow + ))); + } + let mut result = Self::from_slice_unparsed(bytes, offset, len, delim); + let mut i = 0; + while i < result.bytes.len() { + let string = get_str(i, result.bytes, result.delim)?; + result.strings.push((i, string)); + i += string.len() + 1; + } + Ok(result) + } + #[cfg(feature = "alloc")] + /// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries. + /// + /// Requires `feature = "alloc"` + pub fn new_preparsed(bytes: &'a [u8], delim: u8) -> error::Result<Self> { + Self::parse(bytes, 0, bytes.len(), delim) + } + #[cfg(feature = "alloc")] + /// Converts the string table to a vector of parsed strings. + /// + /// Note: This method is used to check the parsed contents of `strtab`. + /// If you want to get the correct contents of `strtab` as `Vec`, use the following example. + /// + /// # Examples + /// ```rust + /// use goblin::error::Error; + /// + /// pub fn show_shdr_strtab(bytes: &[u8]) -> Result<(), Error> { + /// let elf = goblin::elf::Elf::parse(&bytes)?; + /// + /// for section in elf.section_headers { + /// println!("{}", elf.shdr_strtab.get_at(section.sh_name).unwrap_or("")); + /// } + /// + /// Ok(()) + /// } + /// ``` + /// + /// Requires `feature = "alloc"` + pub fn to_vec(&self) -> error::Result<Vec<&'a str>> { + // Fallback in case `Strtab` was created using `from_slice_unparsed()`. + if self.strings.is_empty() { + let mut result = Vec::new(); + let mut i = 0; + while i < self.bytes.len() { + let string = get_str(i, self.bytes, self.delim)?; + result.push(string); + i += string.len() + 1; + } + return Ok(result); + } + Ok(self.strings.iter().map(|&(_key, value)| value).collect()) + } + #[cfg(feature = "alloc")] + /// Safely gets a str reference from the parsed table starting at byte `offset`. + /// + /// If the index is out of bounds, `None` is returned. + /// Requires `feature = "alloc"` + pub fn get_at(&self, offset: usize) -> Option<&'a str> { + match self + .strings + .binary_search_by_key(&offset, |&(key, _value)| key) + { + Ok(index) => Some(self.strings[index].1), + Err(index) => { + if index == 0 { + return None; + } + let (string_begin_offset, entire_string) = self.strings[index - 1]; + entire_string.get(offset - string_begin_offset..) + } + } + } + #[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")] + /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter + /// + /// # Safety + /// This function creates a `Strtab` directly from a raw pointer and size + pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> { + Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim) + } + #[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")] + #[cfg(feature = "alloc")] + /// Parses a str reference from the parsed table starting at byte `offset`. + /// + /// If the index is out of bounds, `None` is returned. + /// Requires `feature = "alloc"` + pub fn get(&self, offset: usize) -> Option<error::Result<&'a str>> { + if offset >= self.bytes.len() { + None + } else { + Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into)) + } + } +} + +impl<'a> fmt::Debug for Strtab<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("Strtab") + .field("delim", &self.delim) + .field("bytes", &str::from_utf8(self.bytes)) + .finish() + } +} + +impl<'a> Default for Strtab<'a> { + fn default() -> Self { + Self { + delim: ctx::StrCtx::default(), + bytes: &[], + #[cfg(feature = "alloc")] + strings: Vec::new(), + } + } +} + +impl<'a> Index<usize> for Strtab<'a> { + type Output = str; + /// Gets str reference at starting at byte `offset`. + /// **NB**: this will panic if the underlying bytes are not valid utf8, or the offset is invalid + #[inline(always)] + fn index(&self, offset: usize) -> &Self::Output { + // This can't delegate to get() because get() requires #[cfg(features = "alloc")] + // It's also slightly less useful than get() because the lifetime -- specified by the Index + // trait -- matches &self, even though we could return &'a instead + get_str(offset, self.bytes, self.delim).unwrap() + } +} + +#[test] +fn as_vec_no_final_null() { + let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta", 0x0).unwrap(); + let vec = strtab.to_vec().unwrap(); + assert_eq!(vec.len(), 4); + assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); +} + +#[test] +fn as_vec_no_first_null_no_final_null() { + let strtab = Strtab::new_preparsed(b"printf\0memmove\0busta", 0x0).unwrap(); + let vec = strtab.to_vec().unwrap(); + assert_eq!(vec.len(), 3); + assert_eq!(vec, vec!["printf", "memmove", "busta"]); +} + +#[test] +fn to_vec_final_null() { + let strtab = Strtab::new_preparsed(b"\0printf\0memmove\0busta\0", 0x0).unwrap(); + let vec = strtab.to_vec().unwrap(); + assert_eq!(vec.len(), 4); + assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); +} + +#[test] +fn to_vec_newline_delim() { + let strtab = Strtab::new_preparsed(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap(); + let vec = strtab.to_vec().unwrap(); + assert_eq!(vec.len(), 4); + assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); +} + +#[test] +fn parse_utf8() { + assert!(match Strtab::new_preparsed(&[0x80, 0x80], b'\n') { + Err(error::Error::Scroll(scroll::Error::BadInput { + size: 2, + msg: "invalid utf8", + })) => true, + _ => false, + }); + assert!( + match Strtab::new_preparsed(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') { + Ok(_) => true, + _ => false, + } + ); +} + +#[test] +fn get_at_utf8() { + let strtab = Strtab::new_preparsed("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap(); + assert_eq!(strtab.get_at(0), Some("")); + assert_eq!(strtab.get_at(5), Some("")); + assert_eq!(strtab.get_at(6), Some("memmove")); + assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta")); + assert_eq!(strtab.get_at(16), None); + assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta")); + assert_eq!(strtab.get_at(21), Some("usta")); + assert_eq!(strtab.get_at(25), Some("")); + assert_eq!(strtab.get_at(26), None); +} |