diff options
Diffstat (limited to 'third_party/rust/goblin/src/pe')
-rw-r--r-- | third_party/rust/goblin/src/pe/authenticode.rs | 116 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/certificate_table.rs | 164 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/characteristic.rs | 99 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/data_directories.rs | 106 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/debug.rs | 186 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/exception.rs | 1058 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/export.rs | 533 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/header.rs | 312 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/import.rs | 417 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/mod.rs | 448 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/optional_header.rs | 323 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/options.rs | 13 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/relocation.rs | 133 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/section_table.rs | 279 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/symbol.rs | 516 | ||||
-rw-r--r-- | third_party/rust/goblin/src/pe/utils.rs | 180 |
16 files changed, 4883 insertions, 0 deletions
diff --git a/third_party/rust/goblin/src/pe/authenticode.rs b/third_party/rust/goblin/src/pe/authenticode.rs new file mode 100644 index 0000000000..e16b7997cd --- /dev/null +++ b/third_party/rust/goblin/src/pe/authenticode.rs @@ -0,0 +1,116 @@ +// Reference: +// https://learn.microsoft.com/en-us/windows-hardware/drivers/install/authenticode +// https://download.microsoft.com/download/9/c/5/9c5b2167-8017-4bae-9fde-d599bac8184a/Authenticode_PE.docx + +// Authenticode works by omiting sections of the PE binary from the digest +// those sections are: +// - checksum +// - data directory entry for certtable +// - certtable + +use core::ops::Range; + +use super::PE; + +impl PE<'_> { + /// [`authenticode_ranges`] returns the various ranges of the binary that are relevant for + /// signature. + pub fn authenticode_ranges(&self) -> ExcludedSectionsIter<'_> { + ExcludedSectionsIter { + pe: self, + state: IterState::default(), + } + } +} + +/// [`ExcludedSections`] holds the various ranges of the binary that are expected to be +/// excluded from the authenticode computation. +#[derive(Debug, Clone, Default)] +pub(super) struct ExcludedSections { + checksum: Range<usize>, + datadir_entry_certtable: Range<usize>, + certtable: Option<Range<usize>>, +} + +impl ExcludedSections { + pub(super) fn new( + checksum: Range<usize>, + datadir_entry_certtable: Range<usize>, + certtable: Option<Range<usize>>, + ) -> Self { + Self { + checksum, + datadir_entry_certtable, + certtable, + } + } +} + +pub struct ExcludedSectionsIter<'s> { + pe: &'s PE<'s>, + state: IterState, +} + +#[derive(Debug, PartialEq)] +enum IterState { + Initial, + DatadirEntry(usize), + CertTable(usize), + Final(usize), + Done, +} + +impl Default for IterState { + fn default() -> Self { + Self::Initial + } +} + +impl<'s> Iterator for ExcludedSectionsIter<'s> { + type Item = &'s [u8]; + + fn next(&mut self) -> Option<Self::Item> { + let bytes = &self.pe.bytes; + + if let Some(sections) = self.pe.authenticode_excluded_sections.as_ref() { + loop { + match self.state { + IterState::Initial => { + self.state = IterState::DatadirEntry(sections.checksum.end); + return Some(&bytes[..sections.checksum.start]); + } + IterState::DatadirEntry(start) => { + self.state = IterState::CertTable(sections.datadir_entry_certtable.end); + return Some(&bytes[start..sections.datadir_entry_certtable.start]); + } + IterState::CertTable(start) => { + if let Some(certtable) = sections.certtable.as_ref() { + self.state = IterState::Final(certtable.end); + return Some(&bytes[start..certtable.start]); + } else { + self.state = IterState::Final(start) + } + } + IterState::Final(start) => { + self.state = IterState::Done; + return Some(&bytes[start..]); + } + IterState::Done => return None, + } + } + } else { + loop { + match self.state { + IterState::Initial => { + self.state = IterState::Done; + return Some(bytes); + } + IterState::Done => return None, + _ => { + self.state = IterState::Done; + } + } + } + } + } +} diff --git a/third_party/rust/goblin/src/pe/certificate_table.rs b/third_party/rust/goblin/src/pe/certificate_table.rs new file mode 100644 index 0000000000..353a6c70ce --- /dev/null +++ b/third_party/rust/goblin/src/pe/certificate_table.rs @@ -0,0 +1,164 @@ +/// Implements parsing of pe32's Attribute Certificate Table +/// See reference: +/// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-attribute-certificate-table-image-only +/// https://learn.microsoft.com/en-us/windows/win32/api/wintrust/ns-wintrust-win_certificate +use crate::error; +use scroll::Pread; + +use alloc::string::ToString; +use alloc::vec::Vec; + +#[repr(u16)] +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum AttributeCertificateRevision { + /// WIN_CERT_REVISION_1_0 + Revision1_0 = 0x0100, + /// WIN_CERT_REVISION_2_0 + Revision2_0 = 0x0200, +} + +impl TryFrom<u16> for AttributeCertificateRevision { + type Error = error::Error; + + fn try_from(value: u16) -> Result<Self, Self::Error> { + Ok(match value { + x if x == AttributeCertificateRevision::Revision1_0 as u16 => { + AttributeCertificateRevision::Revision1_0 + } + x if x == AttributeCertificateRevision::Revision2_0 as u16 => { + AttributeCertificateRevision::Revision2_0 + } + _ => { + return Err(error::Error::Malformed( + "Invalid certificate attribute revision".to_string(), + )) + } + }) + } +} + +#[repr(u16)] +#[derive(Debug)] +pub enum AttributeCertificateType { + /// WIN_CERT_TYPE_X509 + X509 = 0x0001, + /// WIN_CERT_TYPE_PKCS_SIGNED_DATA + PkcsSignedData = 0x0002, + /// WIN_CERT_TYPE_RESERVED_1 + Reserved1 = 0x0003, + /// WIN_CERT_TYPE_TS_STACK_SIGNED + TsStackSigned = 0x0004, +} + +impl TryFrom<u16> for AttributeCertificateType { + type Error = error::Error; + + fn try_from(value: u16) -> Result<Self, Self::Error> { + Ok(match value { + x if x == AttributeCertificateType::X509 as u16 => AttributeCertificateType::X509, + x if x == AttributeCertificateType::PkcsSignedData as u16 => { + AttributeCertificateType::PkcsSignedData + } + x if x == AttributeCertificateType::Reserved1 as u16 => { + AttributeCertificateType::Reserved1 + } + x if x == AttributeCertificateType::TsStackSigned as u16 => { + AttributeCertificateType::TsStackSigned + } + _ => { + return Err(error::Error::Malformed( + "Invalid attribute certificate type".to_string(), + )) + } + }) + } +} + +#[derive(Clone, Pread)] +struct AttributeCertificateHeader { + /// dwLength + length: u32, + revision: u16, + certificate_type: u16, +} + +const CERTIFICATE_DATA_OFFSET: u32 = 8; +#[derive(Debug)] +pub struct AttributeCertificate<'a> { + pub length: u32, + pub revision: AttributeCertificateRevision, + pub certificate_type: AttributeCertificateType, + pub certificate: &'a [u8], +} + +impl<'a> AttributeCertificate<'a> { + pub fn parse( + bytes: &'a [u8], + current_offset: &mut usize, + ) -> Result<AttributeCertificate<'a>, error::Error> { + // `current_offset` is moved sizeof(AttributeCertificateHeader) = 8 bytes further. + let header: AttributeCertificateHeader = bytes.gread_with(current_offset, scroll::LE)?; + let cert_size = usize::try_from(header.length.saturating_sub(CERTIFICATE_DATA_OFFSET)) + .map_err(|_err| { + error::Error::Malformed( + "Attribute certificate size do not fit in usize".to_string(), + ) + })?; + + if let Some(bytes) = bytes.get(*current_offset..(*current_offset + cert_size)) { + let attr = Self { + length: header.length, + revision: header.revision.try_into()?, + certificate_type: header.certificate_type.try_into()?, + certificate: bytes, + }; + // Moving past the certificate data. + // Prevent the current_offset to wrap and ensure current_offset is strictly increasing. + *current_offset = current_offset.saturating_add(cert_size); + // Round to the next 8-bytes. + *current_offset = (*current_offset + 7) & !7; + Ok(attr) + } else { + Err(error::Error::Malformed(format!( + "Unable to extract certificate. Probably cert_size:{} is malformed", + cert_size + ))) + } + } +} + +pub type CertificateDirectoryTable<'a> = Vec<AttributeCertificate<'a>>; +pub(crate) fn enumerate_certificates( + bytes: &[u8], + table_virtual_address: u32, + table_size: u32, +) -> Result<CertificateDirectoryTable, error::Error> { + let table_start_offset = usize::try_from(table_virtual_address).map_err(|_err| { + error::Error::Malformed("Certificate table RVA do not fit in a usize".to_string()) + })?; + // Here, we do not want wrapping semantics as it means that a too big table size or table start + // offset will provide table_end_offset such that table_end_offset < table_start_offset, which + // is not desirable at all. + let table_end_offset = + table_start_offset.saturating_add(usize::try_from(table_size).map_err(|_err| { + error::Error::Malformed("Certificate table size do not fit in a usize".to_string()) + })?); + let mut current_offset = table_start_offset; + let mut attrs = vec![]; + + // End offset cannot be further than the binary we have at hand. + if table_end_offset > bytes.len() { + return Err(error::Error::Malformed( + "End of attribute certificates table is after the end of the PE binary".to_string(), + )); + } + + // This is guaranteed to terminate, either by a malformed error being returned + // or because current_offset >= table_end_offset by virtue of current_offset being strictly + // increasing through `AttributeCertificate::parse`. + while current_offset < table_end_offset { + attrs.push(AttributeCertificate::parse(bytes, &mut current_offset)?); + } + + Ok(attrs) +} diff --git a/third_party/rust/goblin/src/pe/characteristic.rs b/third_party/rust/goblin/src/pe/characteristic.rs new file mode 100644 index 0000000000..f5c1fe4e5d --- /dev/null +++ b/third_party/rust/goblin/src/pe/characteristic.rs @@ -0,0 +1,99 @@ +/* +type characteristic = + | IMAGE_FILE_RELOCS_STRIPPED + | IMAGE_FILE_EXECUTABLE_IMAGE + | IMAGE_FILE_LINE_NUMS_STRIPPED + | IMAGE_FILE_LOCAL_SYMS_STRIPPED + | IMAGE_FILE_AGGRESSIVE_WS_TRIM + | IMAGE_FILE_LARGE_ADDRESS_AWARE + | RESERVED + | IMAGE_FILE_BYTES_REVERSED_LO + | IMAGE_FILE_32BIT_MACHINE + | IMAGE_FILE_DEBUG_STRIPPED + | IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP + | IMAGE_FILE_NET_RUN_FROM_SWAP + | IMAGE_FILE_SYSTEM + | IMAGE_FILE_DLL + | IMAGE_FILE_UP_SYSTEM_ONLY + | IMAGE_FILE_BYTES_REVERSED_HI + | UNKNOWN of int + +let get_characteristic = + function + | 0x0001 -> IMAGE_FILE_RELOCS_STRIPPED + | 0x0002 -> IMAGE_FILE_EXECUTABLE_IMAGE + | 0x0004 -> IMAGE_FILE_LINE_NUMS_STRIPPED + | 0x0008 -> IMAGE_FILE_LOCAL_SYMS_STRIPPED + | 0x0010 -> IMAGE_FILE_AGGRESSIVE_WS_TRIM + | 0x0020 -> IMAGE_FILE_LARGE_ADDRESS_AWARE + | 0x0040 -> RESERVED + | 0x0080 -> IMAGE_FILE_BYTES_REVERSED_LO + | 0x0100 -> IMAGE_FILE_32BIT_MACHINE + | 0x0200 -> IMAGE_FILE_DEBUG_STRIPPED + | 0x0400 -> IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP + | 0x0800 -> IMAGE_FILE_NET_RUN_FROM_SWAP + | 0x1000 -> IMAGE_FILE_SYSTEM + | 0x2000 -> IMAGE_FILE_DLL + | 0x4000 -> IMAGE_FILE_UP_SYSTEM_ONLY + | 0x8000 -> IMAGE_FILE_BYTES_REVERSED_HI + | x -> UNKNOWN x + +let characteristic_to_string = + function + | IMAGE_FILE_RELOCS_STRIPPED -> "IMAGE_FILE_RELOCS_STRIPPED" + | IMAGE_FILE_EXECUTABLE_IMAGE -> "IMAGE_FILE_EXECUTABLE_IMAGE" + | IMAGE_FILE_LINE_NUMS_STRIPPED -> "IMAGE_FILE_LINE_NUMS_STRIPPED" + | IMAGE_FILE_LOCAL_SYMS_STRIPPED -> "IMAGE_FILE_LOCAL_SYMS_STRIPPED" + | IMAGE_FILE_AGGRESSIVE_WS_TRIM -> "IMAGE_FILE_AGGRESSIVE_WS_TRIM" + | IMAGE_FILE_LARGE_ADDRESS_AWARE -> "IMAGE_FILE_LARGE_ADDRESS_AWARE" + | RESERVED -> "RESERVED" + | IMAGE_FILE_BYTES_REVERSED_LO -> "IMAGE_FILE_BYTES_REVERSED_LO" + | IMAGE_FILE_32BIT_MACHINE -> "IMAGE_FILE_32BIT_MACHINE" + | IMAGE_FILE_DEBUG_STRIPPED -> "IMAGE_FILE_DEBUG_STRIPPED" + | IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP -> "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP" + | IMAGE_FILE_NET_RUN_FROM_SWAP -> "IMAGE_FILE_NET_RUN_FROM_SWAP" + | IMAGE_FILE_SYSTEM -> "IMAGE_FILE_SYSTEM" + | IMAGE_FILE_DLL -> "IMAGE_FILE_DLL" + | IMAGE_FILE_UP_SYSTEM_ONLY -> "IMAGE_FILE_UP_SYSTEM_ONLY" + | IMAGE_FILE_BYTES_REVERSED_HI -> "IMAGE_FILE_BYTES_REVERSED_HI" + | UNKNOWN x -> Printf.sprintf "UNKNOWN_CHARACTERISTIC 0x%x" x + +let is_dll characteristics = + let characteristic = characteristic_to_int IMAGE_FILE_DLL in + characteristics land characteristic = characteristic + +let has characteristic characteristics = + let characteristic = characteristic_to_int characteristic in + characteristics land characteristic = characteristic + +(* TODO: this is a mad hack *) +let show_type characteristics = + if (has IMAGE_FILE_DLL characteristics) then "DLL" + else if (has IMAGE_FILE_EXECUTABLE_IMAGE characteristics) then "EXE" + else "MANY" (* print all *) + */ + +pub const IMAGE_FILE_RELOCS_STRIPPED: u16 = 0x0001; +pub const IMAGE_FILE_EXECUTABLE_IMAGE: u16 = 0x0002; +pub const IMAGE_FILE_LINE_NUMS_STRIPPED: u16 = 0x0004; +pub const IMAGE_FILE_LOCAL_SYMS_STRIPPED: u16 = 0x0008; +pub const IMAGE_FILE_AGGRESSIVE_WS_TRIM: u16 = 0x0010; +pub const IMAGE_FILE_LARGE_ADDRESS_AWARE: u16 = 0x0020; +pub const RESERVED: u16 = 0x0040; +pub const IMAGE_FILE_BYTES_REVERSED_LO: u16 = 0x0080; +pub const IMAGE_FILE_32BIT_MACHINE: u16 = 0x0100; +pub const IMAGE_FILE_DEBUG_STRIPPED: u16 = 0x0200; +pub const IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP: u16 = 0x0400; +pub const IMAGE_FILE_NET_RUN_FROM_SWAP: u16 = 0x0800; +pub const IMAGE_FILE_SYSTEM: u16 = 0x1000; +pub const IMAGE_FILE_DLL: u16 = 0x2000; +pub const IMAGE_FILE_UP_SYSTEM_ONLY: u16 = 0x4000; +pub const IMAGE_FILE_BYTES_REVERSED_HI: u16 = 0x8000; + +pub fn is_dll(characteristics: u16) -> bool { + characteristics & IMAGE_FILE_DLL == IMAGE_FILE_DLL +} + +pub fn is_exe(characteristics: u16) -> bool { + characteristics & IMAGE_FILE_EXECUTABLE_IMAGE == IMAGE_FILE_EXECUTABLE_IMAGE +} diff --git a/third_party/rust/goblin/src/pe/data_directories.rs b/third_party/rust/goblin/src/pe/data_directories.rs new file mode 100644 index 0000000000..265e4e27f7 --- /dev/null +++ b/third_party/rust/goblin/src/pe/data_directories.rs @@ -0,0 +1,106 @@ +use crate::error; +use scroll::{Pread, Pwrite, SizeWith}; + +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct DataDirectory { + pub virtual_address: u32, + pub size: u32, +} + +pub const SIZEOF_DATA_DIRECTORY: usize = 8; +const NUM_DATA_DIRECTORIES: usize = 16; + +impl DataDirectory { + pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result<Self> { + let dd = bytes.gread_with(offset, scroll::LE)?; + Ok(dd) + } +} + +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct DataDirectories { + pub data_directories: [Option<DataDirectory>; NUM_DATA_DIRECTORIES], +} + +impl DataDirectories { + pub fn parse(bytes: &[u8], count: usize, offset: &mut usize) -> error::Result<Self> { + let mut data_directories = [None; NUM_DATA_DIRECTORIES]; + if count > NUM_DATA_DIRECTORIES { + return Err(error::Error::Malformed(format!( + "data directory count ({}) is greater than maximum number of data directories ({})", + count, NUM_DATA_DIRECTORIES + ))); + } + for dir in data_directories.iter_mut().take(count) { + let dd = DataDirectory::parse(bytes, offset)?; + let dd = if dd.virtual_address == 0 && dd.size == 0 { + None + } else { + Some(dd) + }; + *dir = dd; + } + Ok(DataDirectories { data_directories }) + } + pub fn get_export_table(&self) -> &Option<DataDirectory> { + let idx = 0; + &self.data_directories[idx] + } + pub fn get_import_table(&self) -> &Option<DataDirectory> { + let idx = 1; + &self.data_directories[idx] + } + pub fn get_resource_table(&self) -> &Option<DataDirectory> { + let idx = 2; + &self.data_directories[idx] + } + pub fn get_exception_table(&self) -> &Option<DataDirectory> { + let idx = 3; + &self.data_directories[idx] + } + pub fn get_certificate_table(&self) -> &Option<DataDirectory> { + let idx = 4; + &self.data_directories[idx] + } + pub fn get_base_relocation_table(&self) -> &Option<DataDirectory> { + let idx = 5; + &self.data_directories[idx] + } + pub fn get_debug_table(&self) -> &Option<DataDirectory> { + let idx = 6; + &self.data_directories[idx] + } + pub fn get_architecture(&self) -> &Option<DataDirectory> { + let idx = 7; + &self.data_directories[idx] + } + pub fn get_global_ptr(&self) -> &Option<DataDirectory> { + let idx = 8; + &self.data_directories[idx] + } + pub fn get_tls_table(&self) -> &Option<DataDirectory> { + let idx = 9; + &self.data_directories[idx] + } + pub fn get_load_config_table(&self) -> &Option<DataDirectory> { + let idx = 10; + &self.data_directories[idx] + } + pub fn get_bound_import_table(&self) -> &Option<DataDirectory> { + let idx = 11; + &self.data_directories[idx] + } + pub fn get_import_address_table(&self) -> &Option<DataDirectory> { + let idx = 12; + &self.data_directories[idx] + } + pub fn get_delay_import_descriptor(&self) -> &Option<DataDirectory> { + let idx = 13; + &self.data_directories[idx] + } + pub fn get_clr_runtime_header(&self) -> &Option<DataDirectory> { + let idx = 14; + &self.data_directories[idx] + } +} diff --git a/third_party/rust/goblin/src/pe/debug.rs b/third_party/rust/goblin/src/pe/debug.rs new file mode 100644 index 0000000000..311bc632cb --- /dev/null +++ b/third_party/rust/goblin/src/pe/debug.rs @@ -0,0 +1,186 @@ +use crate::error; +use scroll::{Pread, Pwrite, SizeWith}; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct DebugData<'a> { + pub image_debug_directory: ImageDebugDirectory, + pub codeview_pdb70_debug_info: Option<CodeviewPDB70DebugInfo<'a>>, +} + +impl<'a> DebugData<'a> { + pub fn parse( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Self> { + Self::parse_with_opts( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Self> { + let image_debug_directory = + ImageDebugDirectory::parse_with_opts(bytes, dd, sections, file_alignment, opts)?; + let codeview_pdb70_debug_info = + CodeviewPDB70DebugInfo::parse_with_opts(bytes, &image_debug_directory, opts)?; + + Ok(DebugData { + image_debug_directory, + codeview_pdb70_debug_info, + }) + } + + /// Return this executable's debugging GUID, suitable for matching against a PDB file. + pub fn guid(&self) -> Option<[u8; 16]> { + self.codeview_pdb70_debug_info.map(|pdb70| pdb70.signature) + } +} + +// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680307(v=vs.85).aspx +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct ImageDebugDirectory { + pub characteristics: u32, + pub time_date_stamp: u32, + pub major_version: u16, + pub minor_version: u16, + pub data_type: u32, + pub size_of_data: u32, + pub address_of_raw_data: u32, + pub pointer_to_raw_data: u32, +} + +pub const IMAGE_DEBUG_TYPE_UNKNOWN: u32 = 0; +pub const IMAGE_DEBUG_TYPE_COFF: u32 = 1; +pub const IMAGE_DEBUG_TYPE_CODEVIEW: u32 = 2; +pub const IMAGE_DEBUG_TYPE_FPO: u32 = 3; +pub const IMAGE_DEBUG_TYPE_MISC: u32 = 4; +pub const IMAGE_DEBUG_TYPE_EXCEPTION: u32 = 5; +pub const IMAGE_DEBUG_TYPE_FIXUP: u32 = 6; +pub const IMAGE_DEBUG_TYPE_BORLAND: u32 = 9; + +impl ImageDebugDirectory { + #[allow(unused)] + fn parse( + bytes: &[u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Self> { + Self::parse_with_opts( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + fn parse_with_opts( + bytes: &[u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Self> { + let rva = dd.virtual_address as usize; + let offset = utils::find_offset(rva, sections, file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!( + "Cannot map ImageDebugDirectory rva {:#x} into offset", + rva + )) + })?; + let idd: Self = bytes.pread_with(offset, scroll::LE)?; + Ok(idd) + } +} + +pub const CODEVIEW_PDB70_MAGIC: u32 = 0x5344_5352; +pub const CODEVIEW_PDB20_MAGIC: u32 = 0x3031_424e; +pub const CODEVIEW_CV50_MAGIC: u32 = 0x3131_424e; +pub const CODEVIEW_CV41_MAGIC: u32 = 0x3930_424e; + +// http://llvm.org/doxygen/CVDebugRecord_8h_source.html +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct CodeviewPDB70DebugInfo<'a> { + pub codeview_signature: u32, + pub signature: [u8; 16], + pub age: u32, + pub filename: &'a [u8], +} + +impl<'a> CodeviewPDB70DebugInfo<'a> { + pub fn parse(bytes: &'a [u8], idd: &ImageDebugDirectory) -> error::Result<Option<Self>> { + Self::parse_with_opts(bytes, idd, &options::ParseOptions::default()) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + idd: &ImageDebugDirectory, + opts: &options::ParseOptions, + ) -> error::Result<Option<Self>> { + if idd.data_type != IMAGE_DEBUG_TYPE_CODEVIEW { + // not a codeview debug directory + // that's not an error, but it's not a CodeviewPDB70DebugInfo either + return Ok(None); + } + + // ImageDebugDirectory.pointer_to_raw_data stores a raw offset -- not a virtual offset -- which we can use directly + let mut offset: usize = match opts.resolve_rva { + true => idd.pointer_to_raw_data as usize, + false => idd.address_of_raw_data as usize, + }; + + // calculate how long the eventual filename will be, which doubles as a check of the record size + let filename_length = idd.size_of_data as isize - 24; + if filename_length < 0 { + // the record is too short to be plausible + return Err(error::Error::Malformed(format!( + "ImageDebugDirectory size of data seems wrong: {:?}", + idd.size_of_data + ))); + } + let filename_length = filename_length as usize; + + // check the codeview signature + let codeview_signature: u32 = bytes.gread_with(&mut offset, scroll::LE)?; + if codeview_signature != CODEVIEW_PDB70_MAGIC { + return Ok(None); + } + + // read the rest + let mut signature: [u8; 16] = [0; 16]; + signature.copy_from_slice(bytes.gread_with(&mut offset, 16)?); + let age: u32 = bytes.gread_with(&mut offset, scroll::LE)?; + if let Some(filename) = bytes.get(offset..offset + filename_length) { + Ok(Some(CodeviewPDB70DebugInfo { + codeview_signature, + signature, + age, + filename, + })) + } else { + Err(error::Error::Malformed(format!( + "ImageDebugDirectory seems corrupted: {:?}", + idd + ))) + } + } +} diff --git a/third_party/rust/goblin/src/pe/exception.rs b/third_party/rust/goblin/src/pe/exception.rs new file mode 100644 index 0000000000..d26854d3e9 --- /dev/null +++ b/third_party/rust/goblin/src/pe/exception.rs @@ -0,0 +1,1058 @@ +//! Exception handling and stack unwinding for x64. +//! +//! Exception information is exposed via the [`ExceptionData`] structure. If present in a PE file, +//! it contains a list of [`RuntimeFunction`] entries that can be used to get [`UnwindInfo`] for a +//! particular code location. +//! +//! Unwind information contains a list of unwind codes which specify the operations that are +//! necessary to restore registers (including the stack pointer RSP) when unwinding out of a +//! function. +//! +//! Depending on where the instruction pointer lies, there are three strategies to unwind: +//! +//! 1. If the RIP is within an epilog, then control is leaving the function, there can be no +//! exception handler associated with this exception for this function, and the effects of the +//! epilog must be continued to compute the context of the caller function. To determine if the +//! RIP is within an epilog, the code stream from RIP on is examined. If that code stream can be +//! matched to the trailing portion of a legitimate epilog, then it's in an epilog, and the +//! remaining portion of the epilog is simulated, with the context record updated as each +//! instruction is processed. After this, step 1 is repeated. +//! +//! 2. Case b) If the RIP lies within the prologue, then control has not entered the function, +//! there can be no exception handler associated with this exception for this function, and the +//! effects of the prolog must be undone to compute the context of the caller function. The RIP +//! is within the prolog if the distance from the function start to the RIP is less than or +//! equal to the prolog size encoded in the unwind info. The effects of the prolog are unwound +//! by scanning forward through the unwind codes array for the first entry with an offset less +//! than or equal to the offset of the RIP from the function start, then undoing the effect of +//! all remaining items in the unwind code array. Step 1 is then repeated. +//! +//! 3. If the RIP is not within a prolog or epilog and the function has an exception handler, then +//! the language-specific handler is called. The handler scans its data and calls filter +//! functions as appropriate. The language-specific handler can return that the exception was +//! handled or that the search is to be continued. It can also initiate an unwind directly. +//! +//! For more information, see [x64 exception handling]. +//! +//! [`ExceptionData`]: struct.ExceptionData.html +//! [`RuntimeFunction`]: struct.RuntimeFunction.html +//! [`UnwindInfo`]: struct.UnwindInfo.html +//! [x64 exception handling]: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2017 + +use core::cmp::Ordering; +use core::fmt; +use core::iter::FusedIterator; + +use scroll::ctx::TryFromCtx; +use scroll::{self, Pread, Pwrite}; + +use crate::error; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +/// The function has an exception handler that should be called when looking for functions that need +/// to examine exceptions. +const UNW_FLAG_EHANDLER: u8 = 0x01; +/// The function has a termination handler that should be called when unwinding an exception. +const UNW_FLAG_UHANDLER: u8 = 0x02; +/// This unwind info structure is not the primary one for the procedure. Instead, the chained unwind +/// info entry is the contents of a previous `RUNTIME_FUNCTION` entry. If this flag is set, then the +/// `UNW_FLAG_EHANDLER` and `UNW_FLAG_UHANDLER` flags must be cleared. Also, the frame register and +/// fixed-stack allocation fields must have the same values as in the primary unwind info. +const UNW_FLAG_CHAININFO: u8 = 0x04; + +/// info == register number +const UWOP_PUSH_NONVOL: u8 = 0; +/// no info, alloc size in next 2 slots +const UWOP_ALLOC_LARGE: u8 = 1; +/// info == size of allocation / 8 - 1 +const UWOP_ALLOC_SMALL: u8 = 2; +/// no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 +const UWOP_SET_FPREG: u8 = 3; +/// info == register number, offset in next slot +const UWOP_SAVE_NONVOL: u8 = 4; +/// info == register number, offset in next 2 slots +const UWOP_SAVE_NONVOL_FAR: u8 = 5; +/// changes the structure of unwind codes to `struct Epilogue`. +/// (was UWOP_SAVE_XMM in version 1, but deprecated and removed) +const UWOP_EPILOG: u8 = 6; +/// reserved +/// (was UWOP_SAVE_XMM_FAR in version 1, but deprecated and removed) +const UWOP_SPARE_CODE: u8 = 7; +/// info == XMM reg number, offset in next slot +const UWOP_SAVE_XMM128: u8 = 8; +/// info == XMM reg number, offset in next 2 slots +const UWOP_SAVE_XMM128_FAR: u8 = 9; +/// info == 0: no error-code, 1: error-code +const UWOP_PUSH_MACHFRAME: u8 = 10; + +/// Size of `RuntimeFunction` entries. +const RUNTIME_FUNCTION_SIZE: usize = 12; +/// Size of unwind code slots. Codes take 1 - 3 slots. +const UNWIND_CODE_SIZE: usize = 2; + +/// An unwind entry for a range of a function. +/// +/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`]. +/// +/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Default, Pread, Pwrite)] +pub struct RuntimeFunction { + /// Function start address. + pub begin_address: u32, + /// Function end address. + pub end_address: u32, + /// Unwind info address. + pub unwind_info_address: u32, +} + +impl fmt::Debug for RuntimeFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("RuntimeFunction") + .field("begin_address", &format_args!("{:#x}", self.begin_address)) + .field("end_address", &format_args!("{:#x}", self.end_address)) + .field( + "unwind_info_address", + &format_args!("{:#x}", self.unwind_info_address), + ) + .finish() + } +} + +/// Iterator over runtime function entries in [`ExceptionData`](struct.ExceptionData.html). +#[derive(Debug)] +pub struct RuntimeFunctionIterator<'a> { + data: &'a [u8], +} + +impl Iterator for RuntimeFunctionIterator<'_> { + type Item = error::Result<RuntimeFunction>; + + fn next(&mut self) -> Option<Self::Item> { + if self.data.is_empty() { + return None; + } + + Some(match self.data.pread_with(0, scroll::LE) { + Ok(func) => { + self.data = &self.data[RUNTIME_FUNCTION_SIZE..]; + Ok(func) + } + Err(error) => { + self.data = &[]; + Err(error.into()) + } + }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.data.len() / RUNTIME_FUNCTION_SIZE; + (len, Some(len)) + } +} + +impl FusedIterator for RuntimeFunctionIterator<'_> {} +impl ExactSizeIterator for RuntimeFunctionIterator<'_> {} + +/// An x64 register used during unwinding. +/// +/// - `0` - `15`: General purpose registers +/// - `17` - `32`: XMM registers +#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct Register(pub u8); + +impl Register { + fn xmm(number: u8) -> Self { + Register(number + 17) + } + + /// Returns the x64 register name. + pub fn name(self) -> &'static str { + match self.0 { + 0 => "$rax", + 1 => "$rcx", + 2 => "$rdx", + 3 => "$rbx", + 4 => "$rsp", + 5 => "$rbp", + 6 => "$rsi", + 7 => "$rdi", + 8 => "$r8", + 9 => "$r9", + 10 => "$r10", + 11 => "$r11", + 12 => "$r12", + 13 => "$r13", + 14 => "$r14", + 15 => "$r15", + 16 => "$rip", + 17 => "$xmm0", + 18 => "$xmm1", + 19 => "$xmm2", + 20 => "$xmm3", + 21 => "$xmm4", + 22 => "$xmm5", + 23 => "$xmm6", + 24 => "$xmm7", + 25 => "$xmm8", + 26 => "$xmm9", + 27 => "$xmm10", + 28 => "$xmm11", + 29 => "$xmm12", + 30 => "$xmm13", + 31 => "$xmm14", + 32 => "$xmm15", + _ => "", + } + } +} + +/// An unsigned offset to a value in the local stack frame. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum StackFrameOffset { + /// Offset from the current RSP, that is, the lowest address of the fixed stack allocation. + /// + /// To restore this register, read the value at the given offset from the RSP. + RSP(u32), + + /// Offset from the value of the frame pointer register. + /// + /// To restore this register, read the value at the given offset from the FP register, reduced + /// by the `frame_register_offset` value specified in the `UnwindInfo` structure. By definition, + /// the frame pointer register is any register other than RAX (`0`). + FP(u32), +} + +impl StackFrameOffset { + fn with_ctx(offset: u32, ctx: UnwindOpContext) -> Self { + match ctx.frame_register { + Register(0) => StackFrameOffset::RSP(offset), + Register(_) => StackFrameOffset::FP(offset), + } + } +} + +impl fmt::Display for Register { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.name()) + } +} + +/// An unwind operation corresponding to code in the function prolog. +/// +/// Unwind operations can be used to reverse the effects of the function prolog and restore register +/// values of parent stack frames that have been saved to the stack. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum UnwindOperation { + /// Push a nonvolatile integer register, decrementing `RSP` by 8. + PushNonVolatile(Register), + + /// Allocate a fixed-size area on the stack. + Alloc(u32), + + /// Establish the frame pointer register by setting the register to some offset of the current + /// RSP. The use of an offset permits establishing a frame pointer that points to the middle of + /// the fixed stack allocation, helping code density by allowing more accesses to use short + /// instruction forms. + SetFPRegister, + + /// Save a nonvolatile integer register on the stack using a MOV instead of a PUSH. This code is + /// primarily used for shrink-wrapping, where a nonvolatile register is saved to the stack in a + /// position that was previously allocated. + SaveNonVolatile(Register, StackFrameOffset), + + /// Save the lower 64 bits of a nonvolatile XMM register on the stack. + SaveXMM(Register, StackFrameOffset), + + /// Describes the function epilog. + /// + /// This operation has been introduced with unwind info version 2 and is not implemented yet. + Epilog, + + /// Save all 128 bits of a nonvolatile XMM register on the stack. + SaveXMM128(Register, StackFrameOffset), + + /// Push a machine frame. This is used to record the effect of a hardware interrupt or + /// exception. Depending on the error flag, this frame has two different layouts. + /// + /// This unwind code always appears in a dummy prolog, which is never actually executed but + /// instead appears before the real entry point of an interrupt routine, and exists only to + /// provide a place to simulate the push of a machine frame. This operation records that + /// simulation, which indicates the machine has conceptually done this: + /// + /// 1. Pop RIP return address from top of stack into `temp` + /// 2. `$ss`, Push old `$rsp`, `$rflags`, `$cs`, `temp` + /// 3. If error flag is `true`, push the error code + /// + /// Without an error code, RSP was incremented by `40` and the following was frame pushed: + /// + /// Offset | Value + /// ---------|-------- + /// RSP + 32 | `$ss` + /// RSP + 24 | old `$rsp` + /// RSP + 16 | `$rflags` + /// RSP + 8 | `$cs` + /// RSP + 0 | `$rip` + /// + /// With an error code, RSP was incremented by `48` and the following was frame pushed: + /// + /// Offset | Value + /// ---------|-------- + /// RSP + 40 | `$ss` + /// RSP + 32 | old `$rsp` + /// RSP + 24 | `$rflags` + /// RSP + 16 | `$cs` + /// RSP + 8 | `$rip` + /// RSP + 0 | error code + PushMachineFrame(bool), + + /// A reserved operation without effect. + Noop, +} + +/// Context used to parse unwind operation. +#[derive(Clone, Copy, Debug, PartialEq)] +struct UnwindOpContext { + /// Version of the unwind info. + version: u8, + + /// The nonvolatile register used as the frame pointer of this function. + /// + /// If this register is non-zero, all stack frame offsets used in unwind operations are of type + /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of + /// this frame register instead of the conventional RSP. This allows the RSP to be modified. + frame_register: Register, +} + +/// An unwind operation that is executed at a particular place in the function prolog. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct UnwindCode { + /// Offset of the corresponding instruction in the function prolog. + /// + /// To be precise, this is the offset from the beginning of the prolog of the end of the + /// instruction that performs this operation, plus 1 (that is, the offset of the start of the + /// next instruction). + /// + /// Unwind codes are ordered by this offset in reverse order, suitable for unwinding. + pub code_offset: u8, + + /// The operation that was performed by the code in the prolog. + pub operation: UnwindOperation, +} + +impl<'a> TryFromCtx<'a, UnwindOpContext> for UnwindCode { + type Error = error::Error; + #[inline] + fn try_from_ctx(bytes: &'a [u8], ctx: UnwindOpContext) -> Result<(Self, usize), Self::Error> { + let mut read = 0; + let code_offset = bytes.gread_with::<u8>(&mut read, scroll::LE)?; + let operation = bytes.gread_with::<u8>(&mut read, scroll::LE)?; + + let operation_code = operation & 0xf; + let operation_info = operation >> 4; + + let operation = match operation_code { + self::UWOP_PUSH_NONVOL => { + let register = Register(operation_info); + UnwindOperation::PushNonVolatile(register) + } + self::UWOP_ALLOC_LARGE => { + let offset = match operation_info { + 0 => u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 8, + 1 => bytes.gread_with::<u32>(&mut read, scroll::LE)?, + i => { + let msg = format!("invalid op info ({}) for UWOP_ALLOC_LARGE", i); + return Err(error::Error::Malformed(msg)); + } + }; + UnwindOperation::Alloc(offset) + } + self::UWOP_ALLOC_SMALL => { + let offset = u32::from(operation_info) * 8 + 8; + UnwindOperation::Alloc(offset) + } + self::UWOP_SET_FPREG => UnwindOperation::SetFPRegister, + self::UWOP_SAVE_NONVOL => { + let register = Register(operation_info); + let offset = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 8; + UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_SAVE_NONVOL_FAR => { + let register = Register(operation_info); + let offset = bytes.gread_with::<u32>(&mut read, scroll::LE)?; + UnwindOperation::SaveNonVolatile(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_EPILOG => { + let data = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 16; + if ctx.version == 1 { + let register = Register::xmm(operation_info); + UnwindOperation::SaveXMM(register, StackFrameOffset::with_ctx(data, ctx)) + } else { + // TODO: See https://weekly-geekly.github.io/articles/322956/index.html + UnwindOperation::Epilog + } + } + self::UWOP_SPARE_CODE => { + let data = bytes.gread_with::<u32>(&mut read, scroll::LE)?; + if ctx.version == 1 { + let register = Register::xmm(operation_info); + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(data, ctx)) + } else { + UnwindOperation::Noop + } + } + self::UWOP_SAVE_XMM128 => { + let register = Register::xmm(operation_info); + let offset = u32::from(bytes.gread_with::<u16>(&mut read, scroll::LE)?) * 16; + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_SAVE_XMM128_FAR => { + let register = Register::xmm(operation_info); + let offset = bytes.gread_with::<u32>(&mut read, scroll::LE)?; + UnwindOperation::SaveXMM128(register, StackFrameOffset::with_ctx(offset, ctx)) + } + self::UWOP_PUSH_MACHFRAME => { + let is_error = match operation_info { + 0 => false, + 1 => true, + i => { + let msg = format!("invalid op info ({}) for UWOP_PUSH_MACHFRAME", i); + return Err(error::Error::Malformed(msg)); + } + }; + UnwindOperation::PushMachineFrame(is_error) + } + op => { + let msg = format!("unknown unwind op code ({})", op); + return Err(error::Error::Malformed(msg)); + } + }; + + let code = UnwindCode { + code_offset, + operation, + }; + + Ok((code, read)) + } +} + +/// An iterator over unwind codes for a function or part of a function, returned from +/// [`UnwindInfo`]. +/// +/// [`UnwindInfo`]: struct.UnwindInfo.html +#[derive(Clone, Debug)] +pub struct UnwindCodeIterator<'a> { + bytes: &'a [u8], + offset: usize, + context: UnwindOpContext, +} + +impl Iterator for UnwindCodeIterator<'_> { + type Item = error::Result<UnwindCode>; + + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.bytes.len() { + return None; + } + + Some(self.bytes.gread_with(&mut self.offset, self.context)) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let upper = (self.bytes.len() - self.offset) / UNWIND_CODE_SIZE; + // the largest codes take up three slots + let lower = (upper + 3 - (upper % 3)) / 3; + (lower, Some(upper)) + } +} + +impl FusedIterator for UnwindCodeIterator<'_> {} + +/// A language-specific handler that is called as part of the search for an exception handler or as +/// part of an unwind. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum UnwindHandler<'a> { + /// The image-relative address of an exception handler and its implementation-defined data. + ExceptionHandler(u32, &'a [u8]), + /// The image-relative address of a termination handler and its implementation-defined data. + TerminationHandler(u32, &'a [u8]), +} + +/// Unwind information for a function or portion of a function. +/// +/// The unwind info structure is used to record the effects a function has on the stack pointer and +/// where the nonvolatile registers are saved on the stack. The unwind codes can be enumerated with +/// [`unwind_codes`]. +/// +/// This unwind info might only be secondary information, and link to a [chained unwind handler]. +/// For unwinding, this link shall be followed until the root unwind info record has been resolved. +/// +/// [`unwind_codes`]: struct.UnwindInfo.html#method.unwind_codes +/// [chained unwind handler]: struct.UnwindInfo.html#structfield.chained_info +#[derive(Clone)] +pub struct UnwindInfo<'a> { + /// Version of this unwind info. + pub version: u8, + + /// Length of the function prolog in bytes. + pub size_of_prolog: u8, + + /// The nonvolatile register used as the frame pointer of this function. + /// + /// If this register is non-zero, all stack frame offsets used in unwind operations are of type + /// `StackFrameOffset::FP`. When loading these offsets, they have to be based off the value of + /// this frame register instead of the conventional RSP. This allows the RSP to be modified. + pub frame_register: Register, + + /// Offset from RSP that is applied to the FP register when it is established. + /// + /// When loading offsets of type `StackFrameOffset::FP` from the stack, this offset has to be + /// subtracted before loading the value since the actual RSP was lower by that amount in the + /// prolog. + pub frame_register_offset: u32, + + /// A record pointing to chained unwind information. + /// + /// If chained unwind info is present, then this unwind info is a secondary one and the linked + /// unwind info contains primary information. Chained info is useful in two situations. First, + /// it is used for noncontiguous code segments. Second, this mechanism is sometimes used to + /// group volatile register saves. + /// + /// The referenced unwind info can itself specify chained unwind information, until it arrives + /// at the root unwind info. Generally, the entire chain should be considered when unwinding. + pub chained_info: Option<RuntimeFunction>, + + /// An exception or termination handler called as part of the unwind. + pub handler: Option<UnwindHandler<'a>>, + + /// A list of unwind codes, sorted descending by code offset. + code_bytes: &'a [u8], +} + +impl<'a> UnwindInfo<'a> { + /// Parses unwind information from the image at the given offset. + pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<Self> { + // Read the version and flags fields, which are combined into a single byte. + let version_flags: u8 = bytes.gread_with(&mut offset, scroll::LE)?; + let version = version_flags & 0b111; + let flags = version_flags >> 3; + + if version < 1 || version > 2 { + let msg = format!("unsupported unwind code version ({})", version); + return Err(error::Error::Malformed(msg)); + } + + let size_of_prolog = bytes.gread_with::<u8>(&mut offset, scroll::LE)?; + let count_of_codes = bytes.gread_with::<u8>(&mut offset, scroll::LE)?; + + // Parse the frame register and frame register offset values, that are combined into a + // single byte. + let frame_info = bytes.gread_with::<u8>(&mut offset, scroll::LE)?; + // If nonzero, then the function uses a frame pointer (FP), and this field is the number + // of the nonvolatile register used as the frame pointer. The zero register value does + // not need special casing since it will not be referenced by the unwind operations. + let frame_register = Register(frame_info & 0xf); + // The the scaled offset from RSP that is applied to the FP register when it's + // established. The actual FP register is set to RSP + 16 * this number, allowing + // offsets from 0 to 240. + let frame_register_offset = u32::from((frame_info >> 4) * 16); + + // An array of items that explains the effect of the prolog on the nonvolatile registers and + // RSP. Some unwind codes require more than one slot in the array. + let codes_size = count_of_codes as usize * UNWIND_CODE_SIZE; + let code_bytes = bytes.gread_with(&mut offset, codes_size)?; + + // For alignment purposes, the codes array always has an even number of entries, and the + // final entry is potentially unused. In that case, the array is one longer than indicated + // by the count of unwind codes field. + if count_of_codes % 2 != 0 { + offset += 2; + } + debug_assert!(offset % 4 == 0); + + let mut chained_info = None; + let mut handler = None; + + // If flag UNW_FLAG_CHAININFO is set then the UNWIND_INFO structure ends with three UWORDs. + // These UWORDs represent the RUNTIME_FUNCTION information for the function of the chained + // unwind. + if flags & UNW_FLAG_CHAININFO != 0 { + chained_info = Some(bytes.gread_with(&mut offset, scroll::LE)?); + + // The relative address of the language-specific handler is present in the UNWIND_INFO + // whenever flags UNW_FLAG_EHANDLER or UNW_FLAG_UHANDLER are set. The language-specific + // handler is called as part of the search for an exception handler or as part of an unwind. + } else if flags & (UNW_FLAG_EHANDLER | UNW_FLAG_UHANDLER) != 0 { + let address = bytes.gread_with::<u32>(&mut offset, scroll::LE)?; + let data = &bytes[offset..]; + + handler = Some(if flags & UNW_FLAG_EHANDLER != 0 { + UnwindHandler::ExceptionHandler(address, data) + } else { + UnwindHandler::TerminationHandler(address, data) + }); + } + + Ok(UnwindInfo { + version, + size_of_prolog, + frame_register, + frame_register_offset, + chained_info, + handler, + code_bytes, + }) + } + + /// Returns an iterator over unwind codes in this unwind info. + /// + /// Unwind codes are iterated in descending `code_offset` order suitable for unwinding. If the + /// optional [`chained_info`] is present, codes of that unwind info should be interpreted + /// immediately afterwards. + pub fn unwind_codes(&self) -> UnwindCodeIterator<'a> { + UnwindCodeIterator { + bytes: self.code_bytes, + offset: 0, + context: UnwindOpContext { + version: self.version, + frame_register: self.frame_register, + }, + } + } +} + +impl fmt::Debug for UnwindInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let count_of_codes = self.code_bytes.len() / UNWIND_CODE_SIZE; + + f.debug_struct("UnwindInfo") + .field("version", &self.version) + .field("size_of_prolog", &self.size_of_prolog) + .field("frame_register", &self.frame_register) + .field("frame_register_offset", &self.frame_register_offset) + .field("count_of_codes", &count_of_codes) + .field("chained_info", &self.chained_info) + .field("handler", &self.handler) + .finish() + } +} + +impl<'a> IntoIterator for &'_ UnwindInfo<'a> { + type Item = error::Result<UnwindCode>; + type IntoIter = UnwindCodeIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.unwind_codes() + } +} + +/// Exception handling and stack unwind information for functions in the image. +pub struct ExceptionData<'a> { + bytes: &'a [u8], + offset: usize, + size: usize, + file_alignment: u32, +} + +impl<'a> ExceptionData<'a> { + /// Parses exception data from the image at the given offset. + pub fn parse( + bytes: &'a [u8], + directory: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Self> { + Self::parse_with_opts( + bytes, + directory, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + /// Parses exception data from the image at the given offset. + pub fn parse_with_opts( + bytes: &'a [u8], + directory: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Self> { + let size = directory.size as usize; + + if size % RUNTIME_FUNCTION_SIZE != 0 { + return Err(error::Error::from(scroll::Error::BadInput { + size, + msg: "invalid exception directory table size", + })); + } + + let rva = directory.virtual_address as usize; + let offset = utils::find_offset(rva, sections, file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!("cannot map exception_rva ({:#x}) into offset", rva)) + })?; + + if offset % 4 != 0 { + return Err(error::Error::from(scroll::Error::BadOffset(offset))); + } + + Ok(ExceptionData { + bytes, + offset, + size, + file_alignment, + }) + } + + /// The number of function entries described by this exception data. + pub fn len(&self) -> usize { + self.size / RUNTIME_FUNCTION_SIZE + } + + /// Indicating whether there are functions in this entry. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Iterates all function entries in order of their code offset. + /// + /// To search for a function by relative instruction address, use [`find_function`]. To resolve + /// unwind information, use [`get_unwind_info`]. + /// + /// [`find_function`]: struct.ExceptionData.html#method.find_function + /// [`get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info + pub fn functions(&self) -> RuntimeFunctionIterator<'a> { + RuntimeFunctionIterator { + data: &self.bytes[self.offset..self.offset + self.size], + } + } + + /// Returns the function at the given index. + pub fn get_function(&self, index: usize) -> error::Result<RuntimeFunction> { + self.get_function_by_offset(self.offset + index * RUNTIME_FUNCTION_SIZE) + } + + /// Performs a binary search to find a function entry covering the given RVA relative to the + /// image. + pub fn find_function(&self, rva: u32) -> error::Result<Option<RuntimeFunction>> { + // NB: Binary search implementation copied from std::slice::binary_search_by and adapted. + // Theoretically, there should be nothing that causes parsing runtime functions to fail and + // all access to the bytes buffer is guaranteed to be in range. However, since all other + // functions also return Results, this is much more ergonomic here. + + let mut size = self.len(); + if size == 0 { + return Ok(None); + } + + let mut base = 0; + while size > 1 { + let half = size / 2; + let mid = base + half; + let offset = self.offset + mid * RUNTIME_FUNCTION_SIZE; + let addr = self.bytes.pread_with::<u32>(offset, scroll::LE)?; + base = if addr > rva { base } else { mid }; + size -= half; + } + + let offset = self.offset + base * RUNTIME_FUNCTION_SIZE; + let addr = self.bytes.pread_with::<u32>(offset, scroll::LE)?; + let function = match addr.cmp(&rva) { + Ordering::Less | Ordering::Equal => self.get_function(base)?, + Ordering::Greater if base == 0 => return Ok(None), + Ordering::Greater => self.get_function(base - 1)?, + }; + + if function.end_address > rva { + Ok(Some(function)) + } else { + Ok(None) + } + } + + /// Resolves unwind information for the given function entry. + pub fn get_unwind_info( + &self, + function: RuntimeFunction, + sections: &[section_table::SectionTable], + ) -> error::Result<UnwindInfo<'a>> { + self.get_unwind_info_with_opts(function, sections, &options::ParseOptions::default()) + } + + /// Resolves unwind information for the given function entry. + pub fn get_unwind_info_with_opts( + &self, + mut function: RuntimeFunction, + sections: &[section_table::SectionTable], + opts: &options::ParseOptions, + ) -> error::Result<UnwindInfo<'a>> { + while function.unwind_info_address % 2 != 0 { + let rva = (function.unwind_info_address & !1) as usize; + function = self.get_function_by_rva_with_opts(rva, sections, opts)?; + } + + let rva = function.unwind_info_address as usize; + let offset = + utils::find_offset(rva, sections, self.file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!("cannot map unwind rva ({:#x}) into offset", rva)) + })?; + + UnwindInfo::parse(self.bytes, offset) + } + + #[allow(dead_code)] + fn get_function_by_rva( + &self, + rva: usize, + sections: &[section_table::SectionTable], + ) -> error::Result<RuntimeFunction> { + self.get_function_by_rva_with_opts(rva, sections, &options::ParseOptions::default()) + } + + fn get_function_by_rva_with_opts( + &self, + rva: usize, + sections: &[section_table::SectionTable], + opts: &options::ParseOptions, + ) -> error::Result<RuntimeFunction> { + let offset = + utils::find_offset(rva, sections, self.file_alignment, opts).ok_or_else(|| { + error::Error::Malformed(format!( + "cannot map exception rva ({:#x}) into offset", + rva + )) + })?; + + self.get_function_by_offset(offset) + } + + #[inline] + fn get_function_by_offset(&self, offset: usize) -> error::Result<RuntimeFunction> { + debug_assert!((offset - self.offset) % RUNTIME_FUNCTION_SIZE == 0); + debug_assert!(offset < self.offset + self.size); + + Ok(self.bytes.pread_with(offset, scroll::LE)?) + } +} + +impl fmt::Debug for ExceptionData<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("ExceptionData") + .field("file_alignment", &self.file_alignment) + .field("offset", &format_args!("{:#x}", self.offset)) + .field("size", &format_args!("{:#x}", self.size)) + .field("len", &self.len()) + .finish() + } +} + +impl<'a> IntoIterator for &'_ ExceptionData<'a> { + type Item = error::Result<RuntimeFunction>; + type IntoIter = RuntimeFunctionIterator<'a>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.functions() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_size_of_runtime_function() { + assert_eq!( + std::mem::size_of::<RuntimeFunction>(), + RUNTIME_FUNCTION_SIZE + ); + } + + // Tests disabled until there is a solution for handling binary test data + // See https://github.com/m4b/goblin/issues/185 + + // macro_rules! microsoft_symbol { + // ($name:literal, $id:literal) => {{ + // use std::fs::File; + // use std::path::Path; + + // let path = Path::new(concat!("cache/", $name)); + // if !path.exists() { + // let url = format!( + // "https://msdl.microsoft.com/download/symbols/{}/{}/{}", + // $name, $id, $name + // ); + + // let mut response = reqwest::get(&url).expect(concat!("get ", $name)); + // let mut target = File::create(path).expect(concat!("create ", $name)); + // response + // .copy_to(&mut target) + // .expect(concat!("download ", $name)); + // } + + // std::fs::read(path).expect(concat!("open ", $name)) + // }}; + // } + + // lazy_static::lazy_static! { + // static ref PE_DATA: Vec<u8> = microsoft_symbol!("WSHTCPIP.DLL", "4a5be0b77000"); + // } + + // #[test] + // fn test_parse() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // assert_eq!(exception_data.len(), 19); + // assert!(!exception_data.is_empty()); + // } + + // #[test] + // fn test_iter_functions() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // let functions: Vec<RuntimeFunction> = exception_data + // .functions() + // .map(|result| result.expect("parse runtime function")) + // .collect(); + + // assert_eq!(functions.len(), 19); + + // let expected = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // assert_eq!(functions[4], expected); + // } + + // #[test] + // fn test_get_function() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // let expected = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // assert_eq!( + // exception_data.get_function(4).expect("find function"), + // expected + // ); + // } + + // #[test] + // fn test_find_function() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // let expected = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // assert_eq!( + // exception_data.find_function(0x1400).expect("find function"), + // Some(expected) + // ); + // } + + // #[test] + // fn test_find_function_none() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // // 0x1d00 is the end address of the last function. + + // assert_eq!( + // exception_data.find_function(0x1d00).expect("find function"), + // None + // ); + // } + + // #[test] + // fn test_get_unwind_info() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // // runtime function #0 directly refers to unwind info + // let rt_function = RuntimeFunction { + // begin_address: 0x1010, + // end_address: 0x1090, + // unwind_info_address: 0x25d8, + // }; + + // let unwind_info = exception_data + // .get_unwind_info(rt_function, &pe.sections) + // .expect("get unwind info"); + + // // Unwind codes just used to assert that the right unwind info was resolved + // let expected = &[4, 98]; + + // assert_eq!(unwind_info.code_bytes, expected); + // } + + // #[test] + // fn test_get_unwind_info_redirect() { + // let pe = PE::parse(&PE_DATA).expect("parse PE"); + // let exception_data = pe.exception_data.expect("get exception data"); + + // // runtime function #4 has a redirect (unwind_info_address & 1). + // let rt_function = RuntimeFunction { + // begin_address: 0x1355, + // end_address: 0x1420, + // unwind_info_address: 0x4019, + // }; + + // let unwind_info = exception_data + // .get_unwind_info(rt_function, &pe.sections) + // .expect("get unwind info"); + + // // Unwind codes just used to assert that the right unwind info was resolved + // let expected = &[ + // 28, 100, 15, 0, 28, 84, 14, 0, 28, 52, 12, 0, 28, 82, 24, 240, 22, 224, 20, 208, 18, + // 192, 16, 112, + // ]; + + // assert_eq!(unwind_info.code_bytes, expected); + // } + + #[test] + fn test_iter_unwind_codes() { + let unwind_info = UnwindInfo { + version: 1, + size_of_prolog: 4, + frame_register: Register(0), + frame_register_offset: 0, + chained_info: None, + handler: None, + code_bytes: &[4, 98], + }; + + let unwind_codes: Vec<UnwindCode> = unwind_info + .unwind_codes() + .map(|result| result.expect("parse unwind code")) + .collect(); + + assert_eq!(unwind_codes.len(), 1); + + let expected = UnwindCode { + code_offset: 4, + operation: UnwindOperation::Alloc(56), + }; + + assert_eq!(unwind_codes[0], expected); + } +} diff --git a/third_party/rust/goblin/src/pe/export.rs b/third_party/rust/goblin/src/pe/export.rs new file mode 100644 index 0000000000..f892c75276 --- /dev/null +++ b/third_party/rust/goblin/src/pe/export.rs @@ -0,0 +1,533 @@ +use alloc::vec::Vec; +use scroll::{Pread, Pwrite}; + +use log::debug; + +use crate::error; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite)] +pub struct ExportDirectoryTable { + pub export_flags: u32, + pub time_date_stamp: u32, + pub major_version: u16, + pub minor_version: u16, + pub name_rva: u32, + pub ordinal_base: u32, + pub address_table_entries: u32, + pub number_of_name_pointers: u32, + pub export_address_table_rva: u32, + pub name_pointer_rva: u32, + pub ordinal_table_rva: u32, +} + +pub const SIZEOF_EXPORT_DIRECTORY_TABLE: usize = 40; + +impl ExportDirectoryTable { + pub fn parse(bytes: &[u8], offset: usize) -> error::Result<Self> { + let res = bytes.pread_with(offset, scroll::LE)?; + Ok(res) + } +} + +#[derive(Debug)] +pub enum ExportAddressTableEntry { + ExportRVA(u32), + ForwarderRVA(u32), +} + +pub const SIZEOF_EXPORT_ADDRESS_TABLE_ENTRY: usize = 4; + +pub type ExportAddressTable = Vec<ExportAddressTableEntry>; + +/// Array of rvas into the export name table +/// +/// Export name is defined iff pointer table has pointer to the name +pub type ExportNamePointerTable = Vec<u32>; + +/// Array of indexes into the export address table. +/// +/// Should obey the formula `idx = ordinal - ordinalbase` +pub type ExportOrdinalTable = Vec<u16>; + +#[derive(Debug, Default)] +/// Export data contains the `dll` name which other libraries can import symbols by (two-level namespace), as well as other important indexing data allowing symbol lookups +pub struct ExportData<'a> { + pub name: Option<&'a str>, + pub export_directory_table: ExportDirectoryTable, + pub export_name_pointer_table: ExportNamePointerTable, + pub export_ordinal_table: ExportOrdinalTable, + pub export_address_table: ExportAddressTable, +} + +impl<'a> ExportData<'a> { + pub fn parse( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<ExportData<'a>> { + Self::parse_with_opts( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<ExportData<'a>> { + let export_rva = dd.virtual_address as usize; + let size = dd.size as usize; + debug!("export_rva {:#x} size {:#}", export_rva, size); + let export_offset = utils::find_offset_or( + export_rva, + sections, + file_alignment, + opts, + &format!("cannot map export_rva ({:#x}) into offset", export_rva), + )?; + let export_directory_table = + ExportDirectoryTable::parse(bytes, export_offset).map_err(|_| { + error::Error::Malformed(format!( + "cannot parse export_directory_table (offset {:#x})", + export_offset + )) + })?; + let number_of_name_pointers = export_directory_table.number_of_name_pointers as usize; + let address_table_entries = export_directory_table.address_table_entries as usize; + + if number_of_name_pointers > bytes.len() { + return Err(error::Error::BufferTooShort( + number_of_name_pointers, + "name pointers", + )); + } + if address_table_entries > bytes.len() { + return Err(error::Error::BufferTooShort( + address_table_entries, + "address table entries", + )); + } + + let export_name_pointer_table = utils::find_offset( + export_directory_table.name_pointer_rva as usize, + sections, + file_alignment, + opts, + ) + .map_or(vec![], |table_offset| { + let mut offset = table_offset; + let mut table: ExportNamePointerTable = Vec::with_capacity(number_of_name_pointers); + + for _ in 0..number_of_name_pointers { + if let Ok(name_rva) = bytes.gread_with(&mut offset, scroll::LE) { + table.push(name_rva); + } else { + break; + } + } + + table + }); + + let export_ordinal_table = utils::find_offset( + export_directory_table.ordinal_table_rva as usize, + sections, + file_alignment, + opts, + ) + .map_or(vec![], |table_offset| { + let mut offset = table_offset; + let mut table: ExportOrdinalTable = Vec::with_capacity(number_of_name_pointers); + + for _ in 0..number_of_name_pointers { + if let Ok(name_ordinal) = bytes.gread_with(&mut offset, scroll::LE) { + table.push(name_ordinal); + } else { + break; + } + } + + table + }); + + let export_address_table = utils::find_offset( + export_directory_table.export_address_table_rva as usize, + sections, + file_alignment, + opts, + ) + .map_or(vec![], |table_offset| { + let mut offset = table_offset; + let mut table: ExportAddressTable = Vec::with_capacity(address_table_entries); + let export_end = export_rva + size; + + for _ in 0..address_table_entries { + if let Ok(func_rva) = bytes.gread_with::<u32>(&mut offset, scroll::LE) { + if utils::is_in_range(func_rva as usize, export_rva, export_end) { + table.push(ExportAddressTableEntry::ForwarderRVA(func_rva)); + } else { + table.push(ExportAddressTableEntry::ExportRVA(func_rva)); + } + } else { + break; + } + } + + table + }); + + let name = utils::find_offset( + export_directory_table.name_rva as usize, + sections, + file_alignment, + opts, + ) + .and_then(|offset| bytes.pread(offset).ok()); + + Ok(ExportData { + name, + export_directory_table, + export_name_pointer_table, + export_ordinal_table, + export_address_table, + }) + } +} + +#[derive(Debug)] +/// PE binaries have two kinds of reexports, either specifying the dll's name, or the ordinal value of the dll +pub enum Reexport<'a> { + DLLName { export: &'a str, lib: &'a str }, + DLLOrdinal { ordinal: usize, lib: &'a str }, +} + +impl<'a> scroll::ctx::TryFromCtx<'a, scroll::Endian> for Reexport<'a> { + type Error = crate::error::Error; + #[inline] + fn try_from_ctx(bytes: &'a [u8], _ctx: scroll::Endian) -> Result<(Self, usize), Self::Error> { + let reexport = bytes.pread::<&str>(0)?; + let reexport_len = reexport.len(); + debug!("reexport: {}", &reexport); + for o in 0..reexport_len { + let c: u8 = bytes.pread(o)?; + debug!("reexport offset: {:#x} char: {:#x}", o, c); + if c == b'.' { + let dll: &'a str = bytes.pread_with(0, scroll::ctx::StrCtx::Length(o))?; + debug!("dll: {:?}", &dll); + if o + 1 == reexport_len { + break; + } + let len = reexport_len - o - 1; + let rest: &'a [u8] = bytes.pread_with(o + 1, len)?; + debug!("rest: {:?}", &rest); + if rest[0] == b'#' { + let ordinal = + rest.pread_with::<&str>(1, scroll::ctx::StrCtx::Length(len - 1))?; + let ordinal = ordinal.parse::<u32>().map_err(|_e| { + error::Error::Malformed(format!( + "Cannot parse reexport ordinal from {} bytes", + bytes.len() + )) + })?; + return Ok(( + Reexport::DLLOrdinal { + ordinal: ordinal as usize, + lib: dll, + }, + reexport_len + 1, + )); + } else { + let export = rest.pread_with::<&str>(0, scroll::ctx::StrCtx::Length(len))?; + return Ok((Reexport::DLLName { export, lib: dll }, reexport_len + 1)); + } + } + } + Err(error::Error::Malformed(format!( + "Reexport {:#} is malformed", + reexport + ))) + } +} + +impl<'a> Reexport<'a> { + pub fn parse(bytes: &'a [u8], offset: usize) -> crate::error::Result<Reexport<'a>> { + bytes.pread(offset) + } +} + +#[derive(Debug, Default)] +/// An exported symbol in this binary, contains synthetic data (name offset, etc., are computed) +pub struct Export<'a> { + pub name: Option<&'a str>, + pub offset: Option<usize>, + pub rva: usize, + pub size: usize, + pub reexport: Option<Reexport<'a>>, +} + +#[derive(Debug, Copy, Clone)] +struct ExportCtx<'a> { + pub ptr: u32, + pub idx: usize, + pub sections: &'a [section_table::SectionTable], + pub file_alignment: u32, + pub addresses: &'a ExportAddressTable, + pub ordinals: &'a ExportOrdinalTable, + pub opts: options::ParseOptions, +} + +impl<'a, 'b> scroll::ctx::TryFromCtx<'a, ExportCtx<'b>> for Export<'a> { + type Error = error::Error; + #[inline] + fn try_from_ctx( + bytes: &'a [u8], + ExportCtx { + ptr, + idx, + sections, + file_alignment, + addresses, + ordinals, + opts, + }: ExportCtx<'b>, + ) -> Result<(Self, usize), Self::Error> { + use self::ExportAddressTableEntry::*; + + let name = utils::find_offset(ptr as usize, sections, file_alignment, &opts) + .and_then(|offset| bytes.pread::<&str>(offset).ok()); + + if let Some(ordinal) = ordinals.get(idx) { + if let Some(rva) = addresses.get(*ordinal as usize) { + match *rva { + ExportRVA(rva) => { + let rva = rva as usize; + let offset = utils::find_offset(rva, sections, file_alignment, &opts); + Ok(( + Export { + name, + offset, + rva, + reexport: None, + size: 0, + }, + 0, + )) + } + + ForwarderRVA(rva) => { + let rva = rva as usize; + let offset = utils::find_offset_or( + rva, + sections, + file_alignment, + &opts, + &format!( + "cannot map RVA ({:#x}) of export ordinal {} into offset", + rva, ordinal + ), + )?; + let reexport = Reexport::parse(bytes, offset)?; + Ok(( + Export { + name, + offset: Some(offset), + rva, + reexport: Some(reexport), + size: 0, + }, + 0, + )) + } + } + } else { + Err(error::Error::Malformed(format!( + "cannot get RVA of export ordinal {}", + ordinal + ))) + } + } else { + Err(error::Error::Malformed(format!( + "cannot get ordinal of export name entry {}", + idx + ))) + } + } +} + +impl<'a> Export<'a> { + pub fn parse( + bytes: &'a [u8], + export_data: &ExportData, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<Vec<Export<'a>>> { + Self::parse_with_opts( + bytes, + export_data, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts( + bytes: &'a [u8], + export_data: &ExportData, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<Vec<Export<'a>>> { + let pointers = &export_data.export_name_pointer_table; + let addresses = &export_data.export_address_table; + let ordinals = &export_data.export_ordinal_table; + + let mut exports = Vec::with_capacity(pointers.len()); + for (idx, &ptr) in pointers.iter().enumerate() { + if let Ok(export) = bytes.pread_with( + 0, + ExportCtx { + ptr, + idx, + sections, + file_alignment, + addresses, + ordinals, + opts: *opts, + }, + ) { + exports.push(export); + } + } + + // TODO: sort + compute size + Ok(exports) + } +} + +#[cfg(test)] +mod tests { + use self::data_directories::*; + use super::*; + + static CORKAMI_POCS_PE_EXPORTSDATA_EXE: [u8; 0x400] = [ + 0x4d, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x50, 0x45, 0x00, 0x00, 0x4c, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x02, 0x01, 0x0b, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x60, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0xb0, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x80, 0x03, + 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x20, 0x2a, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, + 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x20, 0x61, 0x73, 0x20, 0x66, 0x61, 0x6b, 0x65, 0x20, + 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x95, 0x10, 0x00, 0x00, + 0x40, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xa0, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x72, 0x69, 0x6e, 0x74, 0x66, + 0x00, 0x6d, 0x73, 0x76, 0x63, 0x72, 0x74, 0x2e, 0x64, 0x6c, 0x6c, 0x00, 0x65, 0x78, 0x70, + 0x6f, 0x72, 0x74, 0x73, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x65, 0x78, 0x65, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x68, 0x14, 0x10, 0xf0, 0x10, 0xff, 0x15, 0x30, 0x10, 0x00, 0x10, 0x73, 0xc4, 0x04, + 0xc3, 0xbc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + + #[test] + fn size_export_directory_table() { + assert_eq!( + ::std::mem::size_of::<ExportDirectoryTable>(), + SIZEOF_EXPORT_DIRECTORY_TABLE + ); + } + + #[test] + fn parse_export_table() { + let data_dirs = + DataDirectories::parse(&CORKAMI_POCS_PE_EXPORTSDATA_EXE[..], 16, &mut 0xb8).unwrap(); + let export_table = data_dirs.get_export_table().unwrap(); + + assert_eq!(export_table.virtual_address, 0x10b0); + assert_eq!(export_table.size, 0x0); + } + + #[test] + fn parse_export_directory() { + let data_dir = ExportDirectoryTable::parse(&CORKAMI_POCS_PE_EXPORTSDATA_EXE[..], 0x2b0); + assert!(data_dir.is_ok()); + + let data_dir = data_dir.unwrap(); + assert_eq!(data_dir.export_flags, 0x0); + assert_eq!(data_dir.time_date_stamp, 0x0); + assert_eq!(data_dir.major_version, 0x0); + assert_eq!(data_dir.minor_version, 0x0); + assert_eq!(data_dir.name_rva, 0x0); + assert_eq!(data_dir.ordinal_base, 0x0); + assert_eq!(data_dir.address_table_entries, 0x4); + assert_eq!(data_dir.number_of_name_pointers, 0x0); + assert_eq!(data_dir.export_address_table_rva, 0x10e0); + assert_eq!(data_dir.name_pointer_rva, 0x0); + assert_eq!(data_dir.ordinal_table_rva, 0x1100); + } +} diff --git a/third_party/rust/goblin/src/pe/header.rs b/third_party/rust/goblin/src/pe/header.rs new file mode 100644 index 0000000000..c1ded9dd9f --- /dev/null +++ b/third_party/rust/goblin/src/pe/header.rs @@ -0,0 +1,312 @@ +use crate::error; +use crate::pe::{optional_header, section_table, symbol}; +use crate::strtab; +use alloc::vec::Vec; +use log::debug; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// DOS header present in all PE binaries +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct DosHeader { + /// Magic number: 5a4d + pub signature: u16, + /// Pointer to PE header, always at offset 0x3c + pub pe_pointer: u32, +} + +pub const DOS_MAGIC: u16 = 0x5a4d; +pub const PE_POINTER_OFFSET: u32 = 0x3c; + +impl DosHeader { + pub fn parse(bytes: &[u8]) -> error::Result<Self> { + let signature = bytes.pread_with(0, scroll::LE).map_err(|_| { + error::Error::Malformed(format!("cannot parse DOS signature (offset {:#x})", 0)) + })?; + if signature != DOS_MAGIC { + return Err(error::Error::Malformed(format!( + "DOS header is malformed (signature {:#x})", + signature + ))); + } + let pe_pointer = bytes + .pread_with(PE_POINTER_OFFSET as usize, scroll::LE) + .map_err(|_| { + error::Error::Malformed(format!( + "cannot parse PE header pointer (offset {:#x})", + PE_POINTER_OFFSET + )) + })?; + let pe_signature: u32 = + bytes + .pread_with(pe_pointer as usize, scroll::LE) + .map_err(|_| { + error::Error::Malformed(format!( + "cannot parse PE header signature (offset {:#x})", + pe_pointer + )) + })?; + if pe_signature != PE_MAGIC { + return Err(error::Error::Malformed(format!( + "PE header is malformed (signature {:#x})", + pe_signature + ))); + } + Ok(DosHeader { + signature, + pe_pointer, + }) + } +} + +/// COFF Header +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct CoffHeader { + /// The machine type + pub machine: u16, + pub number_of_sections: u16, + pub time_date_stamp: u32, + pub pointer_to_symbol_table: u32, + pub number_of_symbol_table: u32, + pub size_of_optional_header: u16, + pub characteristics: u16, +} + +pub const SIZEOF_COFF_HEADER: usize = 20; +/// PE\0\0, little endian +pub const PE_MAGIC: u32 = 0x0000_4550; +pub const SIZEOF_PE_MAGIC: usize = 4; +/// The contents of this field are assumed to be applicable to any machine type +pub const COFF_MACHINE_UNKNOWN: u16 = 0x0; +/// Matsushita AM33 +pub const COFF_MACHINE_AM33: u16 = 0x1d3; +/// x64 +pub const COFF_MACHINE_X86_64: u16 = 0x8664; +/// ARM little endian +pub const COFF_MACHINE_ARM: u16 = 0x1c0; +/// ARM64 little endian +pub const COFF_MACHINE_ARM64: u16 = 0xaa64; +/// ARM Thumb-2 little endian +pub const COFF_MACHINE_ARMNT: u16 = 0x1c4; +/// EFI byte code +pub const COFF_MACHINE_EBC: u16 = 0xebc; +/// Intel 386 or later processors and compatible processors +pub const COFF_MACHINE_X86: u16 = 0x14c; +/// Intel Itanium processor family +pub const COFF_MACHINE_IA64: u16 = 0x200; +/// Mitsubishi M32R little endian +pub const COFF_MACHINE_M32R: u16 = 0x9041; +/// MIPS16 +pub const COFF_MACHINE_MIPS16: u16 = 0x266; +/// MIPS with FPU +pub const COFF_MACHINE_MIPSFPU: u16 = 0x366; +/// MIPS16 with FPU +pub const COFF_MACHINE_MIPSFPU16: u16 = 0x466; +/// Power PC little endian +pub const COFF_MACHINE_POWERPC: u16 = 0x1f0; +/// Power PC with floating point support +pub const COFF_MACHINE_POWERPCFP: u16 = 0x1f1; +/// MIPS little endian +pub const COFF_MACHINE_R4000: u16 = 0x166; +/// RISC-V 32-bit address space +pub const COFF_MACHINE_RISCV32: u16 = 0x5032; +/// RISC-V 64-bit address space +pub const COFF_MACHINE_RISCV64: u16 = 0x5064; +/// RISC-V 128-bit address space +pub const COFF_MACHINE_RISCV128: u16 = 0x5128; +/// Hitachi SH3 +pub const COFF_MACHINE_SH3: u16 = 0x1a2; +/// Hitachi SH3 DSP +pub const COFF_MACHINE_SH3DSP: u16 = 0x1a3; +/// Hitachi SH4 +pub const COFF_MACHINE_SH4: u16 = 0x1a6; +/// Hitachi SH5 +pub const COFF_MACHINE_SH5: u16 = 0x1a8; +/// Thumb +pub const COFF_MACHINE_THUMB: u16 = 0x1c2; +/// MIPS little-endian WCE v2 +pub const COFF_MACHINE_WCEMIPSV2: u16 = 0x169; + +impl CoffHeader { + pub fn parse(bytes: &[u8], offset: &mut usize) -> error::Result<Self> { + Ok(bytes.gread_with(offset, scroll::LE)?) + } + + /// Parse the COFF section headers. + /// + /// For COFF, these immediately follow the COFF header. For PE, these immediately follow the + /// optional header. + pub fn sections( + &self, + bytes: &[u8], + offset: &mut usize, + ) -> error::Result<Vec<section_table::SectionTable>> { + let nsections = self.number_of_sections as usize; + + // a section table is at least 40 bytes + if nsections > bytes.len() / 40 { + return Err(error::Error::BufferTooShort(nsections, "sections")); + } + + let mut sections = Vec::with_capacity(nsections); + // Note that if we are handling a BigCoff, the size of the symbol will be different! + let string_table_offset = self.pointer_to_symbol_table as usize + + symbol::SymbolTable::size(self.number_of_symbol_table as usize); + for i in 0..nsections { + let section = + section_table::SectionTable::parse(bytes, offset, string_table_offset as usize)?; + debug!("({}) {:#?}", i, section); + sections.push(section); + } + Ok(sections) + } + + /// Return the COFF symbol table. + pub fn symbols<'a>(&self, bytes: &'a [u8]) -> error::Result<symbol::SymbolTable<'a>> { + let offset = self.pointer_to_symbol_table as usize; + let number = self.number_of_symbol_table as usize; + symbol::SymbolTable::parse(bytes, offset, number) + } + + /// Return the COFF string table. + pub fn strings<'a>(&self, bytes: &'a [u8]) -> error::Result<strtab::Strtab<'a>> { + let mut offset = self.pointer_to_symbol_table as usize + + symbol::SymbolTable::size(self.number_of_symbol_table as usize); + + let length_field_size = core::mem::size_of::<u32>(); + let length = bytes.pread_with::<u32>(offset, scroll::LE)? as usize - length_field_size; + + // The offset needs to be advanced in order to read the strings. + offset += length_field_size; + + Ok(strtab::Strtab::parse(bytes, offset, length, 0)?) + } +} + +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct Header { + pub dos_header: DosHeader, + /// PE Magic: PE\0\0, little endian + pub signature: u32, + pub coff_header: CoffHeader, + pub optional_header: Option<optional_header::OptionalHeader>, +} + +impl Header { + pub fn parse(bytes: &[u8]) -> error::Result<Self> { + let dos_header = DosHeader::parse(&bytes)?; + let mut offset = dos_header.pe_pointer as usize; + let signature = bytes.gread_with(&mut offset, scroll::LE).map_err(|_| { + error::Error::Malformed(format!("cannot parse PE signature (offset {:#x})", offset)) + })?; + let coff_header = CoffHeader::parse(&bytes, &mut offset)?; + let optional_header = if coff_header.size_of_optional_header > 0 { + Some(bytes.pread::<optional_header::OptionalHeader>(offset)?) + } else { + None + }; + Ok(Header { + dos_header, + signature, + coff_header, + optional_header, + }) + } +} + +/// Convert machine to str representation +pub fn machine_to_str(machine: u16) -> &'static str { + match machine { + COFF_MACHINE_UNKNOWN => "UNKNOWN", + COFF_MACHINE_AM33 => "AM33", + COFF_MACHINE_X86_64 => "X86_64", + COFF_MACHINE_ARM => "ARM", + COFF_MACHINE_ARM64 => "ARM64", + COFF_MACHINE_ARMNT => "ARM_NT", + COFF_MACHINE_EBC => "EBC", + COFF_MACHINE_X86 => "X86", + COFF_MACHINE_IA64 => "IA64", + COFF_MACHINE_M32R => "M32R", + COFF_MACHINE_MIPS16 => "MIPS_16", + COFF_MACHINE_MIPSFPU => "MIPS_FPU", + COFF_MACHINE_MIPSFPU16 => "MIPS_FPU_16", + COFF_MACHINE_POWERPC => "POWERPC", + COFF_MACHINE_POWERPCFP => "POWERCFP", + COFF_MACHINE_R4000 => "R4000", + COFF_MACHINE_RISCV32 => "RISC-V_32", + COFF_MACHINE_RISCV64 => "RISC-V_64", + COFF_MACHINE_RISCV128 => "RISC-V_128", + COFF_MACHINE_SH3 => "SH3", + COFF_MACHINE_SH3DSP => "SH3DSP", + COFF_MACHINE_SH4 => "SH4", + COFF_MACHINE_SH5 => "SH5", + COFF_MACHINE_THUMB => "THUMB", + COFF_MACHINE_WCEMIPSV2 => "WCE_MIPS_V2", + _ => "COFF_UNKNOWN", + } +} + +#[cfg(test)] +mod tests { + use super::{machine_to_str, Header, COFF_MACHINE_X86, DOS_MAGIC, PE_MAGIC}; + + const CRSS_HEADER: [u8; 688] = [ + 0x4d, 0x5a, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, + 0x00, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd0, 0x00, 0x00, 0x00, 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, + 0x4c, 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, + 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, + 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x0d, 0x0d, 0x0a, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0x4a, 0xc3, 0xeb, 0xee, 0x2b, 0xad, + 0xb8, 0xee, 0x2b, 0xad, 0xb8, 0xee, 0x2b, 0xad, 0xb8, 0xee, 0x2b, 0xac, 0xb8, 0xfe, 0x2b, + 0xad, 0xb8, 0x33, 0xd4, 0x66, 0xb8, 0xeb, 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x63, 0xb8, 0xea, + 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x7a, 0xb8, 0xed, 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x64, 0xb8, + 0xef, 0x2b, 0xad, 0xb8, 0x33, 0xd4, 0x61, 0xb8, 0xef, 0x2b, 0xad, 0xb8, 0x52, 0x69, 0x63, + 0x68, 0xee, 0x2b, 0xad, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x45, + 0x00, 0x00, 0x4c, 0x01, 0x05, 0x00, 0xd9, 0x8f, 0x15, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xe0, 0x00, 0x02, 0x01, 0x0b, 0x01, 0x0b, 0x00, 0x00, 0x08, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x00, 0x06, 0x00, 0x03, 0x00, 0x06, 0x00, 0x03, 0x00, 0x06, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0xe4, 0xab, 0x00, 0x00, + 0x01, 0x00, 0x40, 0x05, 0x00, 0x00, 0x04, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x10, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x1a, 0x00, 0x00, 0xb8, 0x22, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x38, 0x00, 0x00, + 0x00, 0x10, 0x10, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x68, 0x10, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x24, + 0x06, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, + 0x60, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xc0, 0x2e, 0x69, 0x64, 0x61, + 0x74, 0x61, 0x00, 0x00, 0xf8, 0x01, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x40, 0x00, 0x00, 0x40, 0x2e, 0x72, 0x73, 0x72, 0x63, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, + 0x42, 0x2e, 0x72, 0x65, 0x6c, 0x6f, 0x63, 0x00, 0x00, 0x86, 0x01, 0x00, 0x00, 0x00, 0x50, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + + #[test] + fn crss_header() { + let header = Header::parse(&&CRSS_HEADER[..]).unwrap(); + assert!(header.dos_header.signature == DOS_MAGIC); + assert!(header.signature == PE_MAGIC); + assert!(header.coff_header.machine == COFF_MACHINE_X86); + assert!(machine_to_str(header.coff_header.machine) == "X86"); + println!("header: {:?}", &header); + } +} diff --git a/third_party/rust/goblin/src/pe/import.rs b/third_party/rust/goblin/src/pe/import.rs new file mode 100644 index 0000000000..0284fc3276 --- /dev/null +++ b/third_party/rust/goblin/src/pe/import.rs @@ -0,0 +1,417 @@ +use alloc::borrow::Cow; +use alloc::vec::Vec; +use core::fmt::{Debug, LowerHex}; + +use crate::error; +use scroll::ctx::TryFromCtx; +use scroll::{Pread, Pwrite, SizeWith}; + +use crate::pe::data_directories; +use crate::pe::options; +use crate::pe::section_table; +use crate::pe::utils; + +use log::{debug, warn}; + +pub const IMPORT_BY_ORDINAL_32: u32 = 0x8000_0000; +pub const IMPORT_BY_ORDINAL_64: u64 = 0x8000_0000_0000_0000; +pub const IMPORT_RVA_MASK_32: u32 = 0x7fff_ffff; +pub const IMPORT_RVA_MASK_64: u64 = 0x0000_0000_7fff_ffff; + +pub trait Bitfield<'a>: + Into<u64> + + PartialEq + + Eq + + LowerHex + + Debug + + TryFromCtx<'a, scroll::Endian, Error = scroll::Error> +{ + fn is_ordinal(&self) -> bool; + fn to_ordinal(&self) -> u16; + fn to_rva(&self) -> u32; + fn size_of() -> usize; + fn is_zero(&self) -> bool; +} + +impl<'a> Bitfield<'a> for u64 { + fn is_ordinal(&self) -> bool { + self & IMPORT_BY_ORDINAL_64 == IMPORT_BY_ORDINAL_64 + } + fn to_ordinal(&self) -> u16 { + (0xffff & self) as u16 + } + fn to_rva(&self) -> u32 { + (self & IMPORT_RVA_MASK_64) as u32 + } + fn size_of() -> usize { + 8 + } + fn is_zero(&self) -> bool { + *self == 0 + } +} + +impl<'a> Bitfield<'a> for u32 { + fn is_ordinal(&self) -> bool { + self & IMPORT_BY_ORDINAL_32 == IMPORT_BY_ORDINAL_32 + } + fn to_ordinal(&self) -> u16 { + (0xffff & self) as u16 + } + fn to_rva(&self) -> u32 { + (self & IMPORT_RVA_MASK_32) as u32 + } + fn size_of() -> usize { + 4 + } + fn is_zero(&self) -> bool { + *self == 0 + } +} + +#[derive(Debug, Clone)] +pub struct HintNameTableEntry<'a> { + pub hint: u16, + pub name: &'a str, +} + +impl<'a> HintNameTableEntry<'a> { + fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<Self> { + let offset = &mut offset; + let hint = bytes.gread_with(offset, scroll::LE)?; + let name = bytes.pread::<&'a str>(*offset)?; + Ok(HintNameTableEntry { hint, name }) + } +} + +#[derive(Debug, Clone)] +pub enum SyntheticImportLookupTableEntry<'a> { + OrdinalNumber(u16), + HintNameTableRVA((u32, HintNameTableEntry<'a>)), // [u8; 31] bitfield :/ +} + +pub type ImportLookupTable<'a> = Vec<SyntheticImportLookupTableEntry<'a>>; + +impl<'a> SyntheticImportLookupTableEntry<'a> { + pub fn parse<T: Bitfield<'a>>( + bytes: &'a [u8], + offset: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<ImportLookupTable<'a>> { + Self::parse_with_opts::<T>( + bytes, + offset, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts<T: Bitfield<'a>>( + bytes: &'a [u8], + mut offset: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<ImportLookupTable<'a>> { + let le = scroll::LE; + let offset = &mut offset; + let mut table = Vec::new(); + loop { + let bitfield: T = bytes.gread_with(offset, le)?; + if bitfield.is_zero() { + debug!("imports done"); + break; + } else { + let entry = { + debug!("bitfield {:#x}", bitfield); + use self::SyntheticImportLookupTableEntry::*; + if bitfield.is_ordinal() { + let ordinal = bitfield.to_ordinal(); + debug!("importing by ordinal {:#x}", ordinal); + OrdinalNumber(ordinal) + } else { + let rva = bitfield.to_rva(); + let hentry = { + debug!("searching for RVA {:#x}", rva); + if let Some(offset) = + utils::find_offset(rva as usize, sections, file_alignment, opts) + { + debug!("offset {:#x}", offset); + HintNameTableEntry::parse(bytes, offset)? + } else { + warn!("Entry {} has bad RVA: {:#x}", table.len(), rva); + continue; + } + }; + HintNameTableRVA((rva, hentry)) + } + }; + table.push(entry); + } + } + Ok(table) + } +} + +// get until entry is 0 +pub type ImportAddressTable = Vec<u64>; + +#[repr(C)] +#[derive(Debug, Pread, Pwrite, SizeWith)] +pub struct ImportDirectoryEntry { + pub import_lookup_table_rva: u32, + pub time_date_stamp: u32, + pub forwarder_chain: u32, + pub name_rva: u32, + pub import_address_table_rva: u32, +} + +pub const SIZEOF_IMPORT_DIRECTORY_ENTRY: usize = 20; + +impl ImportDirectoryEntry { + pub fn is_null(&self) -> bool { + (self.import_lookup_table_rva == 0) + && (self.time_date_stamp == 0) + && (self.forwarder_chain == 0) + && (self.name_rva == 0) + && (self.import_address_table_rva == 0) + } +} + +#[derive(Debug)] +pub struct SyntheticImportDirectoryEntry<'a> { + pub import_directory_entry: ImportDirectoryEntry, + /// Computed + pub name: &'a str, + /// The import lookup table is a vector of either ordinals, or RVAs + import names + pub import_lookup_table: Option<ImportLookupTable<'a>>, + /// Computed + pub import_address_table: ImportAddressTable, +} + +impl<'a> SyntheticImportDirectoryEntry<'a> { + pub fn parse<T: Bitfield<'a>>( + bytes: &'a [u8], + import_directory_entry: ImportDirectoryEntry, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<SyntheticImportDirectoryEntry<'a>> { + Self::parse_with_opts::<T>( + bytes, + import_directory_entry, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts<T: Bitfield<'a>>( + bytes: &'a [u8], + import_directory_entry: ImportDirectoryEntry, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<SyntheticImportDirectoryEntry<'a>> { + const LE: scroll::Endian = scroll::LE; + let name_rva = import_directory_entry.name_rva; + let name = utils::try_name(bytes, name_rva as usize, sections, file_alignment, opts)?; + let import_lookup_table = { + let import_lookup_table_rva = import_directory_entry.import_lookup_table_rva; + let import_address_table_rva = import_directory_entry.import_address_table_rva; + if let Some(import_lookup_table_offset) = utils::find_offset( + import_lookup_table_rva as usize, + sections, + file_alignment, + opts, + ) { + debug!("Synthesizing lookup table imports for {} lib, with import lookup table rva: {:#x}", name, import_lookup_table_rva); + let import_lookup_table = SyntheticImportLookupTableEntry::parse_with_opts::<T>( + bytes, + import_lookup_table_offset, + sections, + file_alignment, + opts, + )?; + debug!( + "Successfully synthesized import lookup table entry from lookup table: {:#?}", + import_lookup_table + ); + Some(import_lookup_table) + } else if let Some(import_address_table_offset) = utils::find_offset( + import_address_table_rva as usize, + sections, + file_alignment, + opts, + ) { + debug!("Synthesizing lookup table imports for {} lib, with import address table rva: {:#x}", name, import_lookup_table_rva); + let import_address_table = SyntheticImportLookupTableEntry::parse_with_opts::<T>( + bytes, + import_address_table_offset, + sections, + file_alignment, + opts, + )?; + debug!( + "Successfully synthesized import lookup table entry from IAT: {:#?}", + import_address_table + ); + Some(import_address_table) + } else { + None + } + }; + + let import_address_table_offset = &mut utils::find_offset( + import_directory_entry.import_address_table_rva as usize, + sections, + file_alignment, + opts, + ) + .ok_or_else(|| { + error::Error::Malformed(format!( + "Cannot map import_address_table_rva {:#x} into offset for {}", + import_directory_entry.import_address_table_rva, name + )) + })?; + let mut import_address_table = Vec::new(); + loop { + let import_address = bytes + .gread_with::<T>(import_address_table_offset, LE)? + .into(); + if import_address == 0 { + break; + } else { + import_address_table.push(import_address); + } + } + Ok(SyntheticImportDirectoryEntry { + import_directory_entry, + name, + import_lookup_table, + import_address_table, + }) + } +} + +#[derive(Debug)] +/// Contains a list of synthesized import data for this binary, e.g., which symbols from which libraries it is importing from +pub struct ImportData<'a> { + pub import_data: Vec<SyntheticImportDirectoryEntry<'a>>, +} + +impl<'a> ImportData<'a> { + pub fn parse<T: Bitfield<'a>>( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + ) -> error::Result<ImportData<'a>> { + Self::parse_with_opts::<T>( + bytes, + dd, + sections, + file_alignment, + &options::ParseOptions::default(), + ) + } + + pub fn parse_with_opts<T: Bitfield<'a>>( + bytes: &'a [u8], + dd: data_directories::DataDirectory, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + ) -> error::Result<ImportData<'a>> { + let import_directory_table_rva = dd.virtual_address as usize; + debug!( + "import_directory_table_rva {:#x}", + import_directory_table_rva + ); + let offset = + &mut utils::find_offset(import_directory_table_rva, sections, file_alignment, opts) + .ok_or_else(|| { + error::Error::Malformed(format!( + "Cannot create ImportData; cannot map import_directory_table_rva {:#x} into offset", + import_directory_table_rva + )) + })?; + debug!("import data offset {:#x}", offset); + let mut import_data = Vec::new(); + loop { + let import_directory_entry: ImportDirectoryEntry = + bytes.gread_with(offset, scroll::LE)?; + debug!("{:#?}", import_directory_entry); + if import_directory_entry.is_null() { + break; + } else { + let entry = SyntheticImportDirectoryEntry::parse_with_opts::<T>( + bytes, + import_directory_entry, + sections, + file_alignment, + opts, + )?; + debug!("entry {:#?}", entry); + import_data.push(entry); + } + } + debug!("finished ImportData"); + Ok(ImportData { import_data }) + } +} + +#[derive(Debug)] +/// A synthesized symbol import, the name is pre-indexed, and the binary offset is computed, as well as which dll it belongs to +pub struct Import<'a> { + pub name: Cow<'a, str>, + pub dll: &'a str, + pub ordinal: u16, + pub offset: usize, + pub rva: usize, + pub size: usize, +} + +impl<'a> Import<'a> { + pub fn parse<T: Bitfield<'a>>( + _bytes: &'a [u8], + import_data: &ImportData<'a>, + _sections: &[section_table::SectionTable], + ) -> error::Result<Vec<Import<'a>>> { + let mut imports = Vec::new(); + for data in &import_data.import_data { + if let Some(ref import_lookup_table) = data.import_lookup_table { + let dll = data.name; + let import_base = data.import_directory_entry.import_address_table_rva as usize; + debug!("Getting imports from {}", &dll); + for (i, entry) in import_lookup_table.iter().enumerate() { + let offset = import_base + (i * T::size_of()); + use self::SyntheticImportLookupTableEntry::*; + let (rva, name, ordinal) = match *entry { + HintNameTableRVA((rva, ref hint_entry)) => { + // if hint_entry.name = "" && hint_entry.hint = 0 { + // println!("<PE.Import> warning hint/name table rva from {} without hint {:#x}", dll, rva); + // } + (rva, Cow::Borrowed(hint_entry.name), hint_entry.hint) + } + OrdinalNumber(ordinal) => { + let name = format!("ORDINAL {}", ordinal); + (0x0, Cow::Owned(name), ordinal) + } + }; + let import = Import { + name, + ordinal, + dll, + size: T::size_of(), + offset, + rva: rva as usize, + }; + imports.push(import); + } + } + } + Ok(imports) + } +} diff --git a/third_party/rust/goblin/src/pe/mod.rs b/third_party/rust/goblin/src/pe/mod.rs new file mode 100644 index 0000000000..5a7337630a --- /dev/null +++ b/third_party/rust/goblin/src/pe/mod.rs @@ -0,0 +1,448 @@ +//! A PE32 and PE32+ parser +//! + +// TODO: panics with unwrap on None for apisetschema.dll, fhuxgraphics.dll and some others + +use alloc::vec::Vec; + +pub mod authenticode; +pub mod certificate_table; +pub mod characteristic; +pub mod data_directories; +pub mod debug; +pub mod exception; +pub mod export; +pub mod header; +pub mod import; +pub mod optional_header; +pub mod options; +pub mod relocation; +pub mod section_table; +pub mod symbol; +pub mod utils; + +use crate::container; +use crate::error; +use crate::strtab; + +use log::debug; + +#[derive(Debug)] +/// An analyzed PE32/PE32+ binary +pub struct PE<'a> { + /// Underlying bytes + bytes: &'a [u8], + authenticode_excluded_sections: Option<authenticode::ExcludedSections>, + /// The PE header + pub header: header::Header, + /// A list of the sections in this PE binary + pub sections: Vec<section_table::SectionTable>, + /// The size of the binary + pub size: usize, + /// The name of this `dll`, if it has one + pub name: Option<&'a str>, + /// Whether this is a `dll` or not + pub is_lib: bool, + /// Whether the binary is 64-bit (PE32+) + pub is_64: bool, + /// the entry point of the binary + pub entry: usize, + /// The binary's RVA, or image base - useful for computing virtual addreses + pub image_base: usize, + /// Data about any exported symbols in this binary (e.g., if it's a `dll`) + pub export_data: Option<export::ExportData<'a>>, + /// Data for any imported symbols, and from which `dll`, etc., in this binary + pub import_data: Option<import::ImportData<'a>>, + /// The list of exported symbols in this binary, contains synthetic information for easier analysis + pub exports: Vec<export::Export<'a>>, + /// The list symbols imported by this binary from other `dll`s + pub imports: Vec<import::Import<'a>>, + /// The list of libraries which this binary imports symbols from + pub libraries: Vec<&'a str>, + /// Debug information, if any, contained in the PE header + pub debug_data: Option<debug::DebugData<'a>>, + /// Exception handling and stack unwind information, if any, contained in the PE header + pub exception_data: Option<exception::ExceptionData<'a>>, + /// Certificates present, if any, described by the Certificate Table + pub certificates: certificate_table::CertificateDirectoryTable<'a>, +} + +impl<'a> PE<'a> { + /// Reads a PE binary from the underlying `bytes` + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + Self::parse_with_opts(bytes, &options::ParseOptions::default()) + } + + /// Reads a PE binary from the underlying `bytes` + pub fn parse_with_opts(bytes: &'a [u8], opts: &options::ParseOptions) -> error::Result<Self> { + let header = header::Header::parse(bytes)?; + let mut authenticode_excluded_sections = None; + + debug!("{:#?}", header); + let optional_header_offset = header.dos_header.pe_pointer as usize + + header::SIZEOF_PE_MAGIC + + header::SIZEOF_COFF_HEADER; + let offset = + &mut (optional_header_offset + header.coff_header.size_of_optional_header as usize); + + let sections = header.coff_header.sections(bytes, offset)?; + let is_lib = characteristic::is_dll(header.coff_header.characteristics); + let mut entry = 0; + let mut image_base = 0; + let mut exports = vec![]; + let mut export_data = None; + let mut name = None; + let mut imports = vec![]; + let mut import_data = None; + let mut libraries = vec![]; + let mut debug_data = None; + let mut exception_data = None; + let mut certificates = Default::default(); + let mut is_64 = false; + if let Some(optional_header) = header.optional_header { + // Sections we are assembling through the parsing, eventually, it will be passed + // to the authenticode_sections attribute of `PE`. + let (checksum, datadir_entry_certtable) = match optional_header.standard_fields.magic { + optional_header::MAGIC_32 => { + let standard_field_offset = + optional_header_offset + optional_header::SIZEOF_STANDARD_FIELDS_32; + let checksum_field_offset = + standard_field_offset + optional_header::OFFSET_WINDOWS_FIELDS_32_CHECKSUM; + ( + checksum_field_offset..checksum_field_offset + 4, + optional_header_offset + 128..optional_header_offset + 136, + ) + } + optional_header::MAGIC_64 => { + let standard_field_offset = + optional_header_offset + optional_header::SIZEOF_STANDARD_FIELDS_64; + let checksum_field_offset = + standard_field_offset + optional_header::OFFSET_WINDOWS_FIELDS_64_CHECKSUM; + + ( + checksum_field_offset..checksum_field_offset + 4, + optional_header_offset + 144..optional_header_offset + 152, + ) + } + magic => { + return Err(error::Error::Malformed(format!( + "Unsupported header magic ({:#x})", + magic + ))) + } + }; + + entry = optional_header.standard_fields.address_of_entry_point as usize; + image_base = optional_header.windows_fields.image_base as usize; + is_64 = optional_header.container()? == container::Container::Big; + debug!( + "entry {:#x} image_base {:#x} is_64: {}", + entry, image_base, is_64 + ); + let file_alignment = optional_header.windows_fields.file_alignment; + if let Some(export_table) = *optional_header.data_directories.get_export_table() { + if let Ok(ed) = export::ExportData::parse_with_opts( + bytes, + export_table, + §ions, + file_alignment, + opts, + ) { + debug!("export data {:#?}", ed); + exports = export::Export::parse_with_opts( + bytes, + &ed, + §ions, + file_alignment, + opts, + )?; + name = ed.name; + debug!("name: {:#?}", name); + export_data = Some(ed); + } + } + debug!("exports: {:#?}", exports); + if let Some(import_table) = *optional_header.data_directories.get_import_table() { + let id = if is_64 { + import::ImportData::parse_with_opts::<u64>( + bytes, + import_table, + §ions, + file_alignment, + opts, + )? + } else { + import::ImportData::parse_with_opts::<u32>( + bytes, + import_table, + §ions, + file_alignment, + opts, + )? + }; + debug!("import data {:#?}", id); + if is_64 { + imports = import::Import::parse::<u64>(bytes, &id, §ions)? + } else { + imports = import::Import::parse::<u32>(bytes, &id, §ions)? + } + libraries = id + .import_data + .iter() + .map(|data| data.name) + .collect::<Vec<&'a str>>(); + libraries.sort(); + libraries.dedup(); + import_data = Some(id); + } + debug!("imports: {:#?}", imports); + if let Some(debug_table) = *optional_header.data_directories.get_debug_table() { + debug_data = Some(debug::DebugData::parse_with_opts( + bytes, + debug_table, + §ions, + file_alignment, + opts, + )?); + } + + if header.coff_header.machine == header::COFF_MACHINE_X86_64 { + // currently only x86_64 is supported + debug!("exception data: {:#?}", exception_data); + if let Some(exception_table) = + *optional_header.data_directories.get_exception_table() + { + exception_data = Some(exception::ExceptionData::parse_with_opts( + bytes, + exception_table, + §ions, + file_alignment, + opts, + )?); + } + } + + let certtable = if let Some(certificate_table) = + *optional_header.data_directories.get_certificate_table() + { + certificates = certificate_table::enumerate_certificates( + bytes, + certificate_table.virtual_address, + certificate_table.size, + )?; + + let start = certificate_table.virtual_address as usize; + let end = start + certificate_table.size as usize; + Some(start..end) + } else { + None + }; + + authenticode_excluded_sections = Some(authenticode::ExcludedSections::new( + checksum, + datadir_entry_certtable, + certtable, + )); + } + Ok(PE { + bytes, + authenticode_excluded_sections, + header, + sections, + size: 0, + name, + is_lib, + is_64, + entry, + image_base, + export_data, + import_data, + exports, + imports, + libraries, + debug_data, + exception_data, + certificates, + }) + } +} + +/// An analyzed COFF object +#[derive(Debug)] +pub struct Coff<'a> { + /// The COFF header + pub header: header::CoffHeader, + /// A list of the sections in this COFF binary + pub sections: Vec<section_table::SectionTable>, + /// The COFF symbol table. + pub symbols: symbol::SymbolTable<'a>, + /// The string table. + pub strings: strtab::Strtab<'a>, +} + +impl<'a> Coff<'a> { + /// Reads a COFF object from the underlying `bytes` + pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { + let offset = &mut 0; + let header = header::CoffHeader::parse(bytes, offset)?; + debug!("{:#?}", header); + // TODO: maybe parse optional header, but it isn't present for Windows. + *offset += header.size_of_optional_header as usize; + let sections = header.sections(bytes, offset)?; + let symbols = header.symbols(bytes)?; + let strings = header.strings(bytes)?; + Ok(Coff { + header, + sections, + symbols, + strings, + }) + } +} + +#[cfg(test)] +mod tests { + use super::Coff; + use super::PE; + + static INVALID_DOS_SIGNATURE: [u8; 512] = [ + 0x3D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, + 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x31, 0xE2, 0xB1, 0x67, 0x50, 0x8C, + 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x88, 0xE3, 0x6D, 0x50, + 0x8C, 0xE2, 0x3C, 0x38, 0x8F, 0xE3, 0x62, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x89, 0xE3, 0xE0, + 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x89, 0xE3, 0x42, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x88, 0xE3, + 0x77, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x8F, 0xE3, 0x6E, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x8D, + 0xE3, 0x64, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8D, 0xE2, 0x3F, 0x50, 0x8C, 0xE2, 0xE1, 0x20, + 0x85, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0xE1, 0x20, 0x73, 0xE2, 0x66, 0x50, 0x8C, 0xE2, 0xE1, + 0x20, 0x8E, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0x52, 0x69, 0x63, 0x68, 0x67, 0x50, 0x8C, 0xE2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x50, 0x45, 0x00, 0x00, 0x64, 0x86, 0x07, 0x00, 0x5F, 0x41, 0xFC, 0x5E, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x22, 0x00, 0x0B, 0x02, 0x0E, 0x1A, 0x00, + 0xFC, 0x00, 0x00, 0x00, 0xD6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x14, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0xE0, + 0x68, 0x02, 0x00, 0x03, 0x00, 0x60, 0x81, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xA3, 0x01, 0x00, 0x28, + 0x00, 0x00, 0x00, 0x00, 0xF0, 0x01, 0x00, 0xE0, 0x01, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, + 0x60, 0x0F, 0x00, 0x00, 0x00, 0xC4, 0x01, 0x00, 0xF8, 0x46, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x54, 0x06, 0x00, 0x00, 0xF0, 0x91, 0x01, 0x00, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x92, 0x01, 0x00, 0x30, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x48, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + + static INVALID_PE_SIGNATURE: [u8; 512] = [ + 0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, + 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x31, 0xE2, 0xB1, 0x67, 0x50, 0x8C, + 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x88, 0xE3, 0x6D, 0x50, + 0x8C, 0xE2, 0x3C, 0x38, 0x8F, 0xE3, 0x62, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x89, 0xE3, 0xE0, + 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x89, 0xE3, 0x42, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x88, 0xE3, + 0x77, 0x50, 0x8C, 0xE2, 0xAC, 0x3F, 0x8F, 0xE3, 0x6E, 0x50, 0x8C, 0xE2, 0x3C, 0x38, 0x8D, + 0xE3, 0x64, 0x50, 0x8C, 0xE2, 0x67, 0x50, 0x8D, 0xE2, 0x3F, 0x50, 0x8C, 0xE2, 0xE1, 0x20, + 0x85, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0xE1, 0x20, 0x73, 0xE2, 0x66, 0x50, 0x8C, 0xE2, 0xE1, + 0x20, 0x8E, 0xE3, 0x66, 0x50, 0x8C, 0xE2, 0x52, 0x69, 0x63, 0x68, 0x67, 0x50, 0x8C, 0xE2, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x50, 0x05, 0x00, 0x00, 0x64, 0x86, 0x07, 0x00, 0x5F, 0x41, 0xFC, 0x5E, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00, 0x22, 0x00, 0x0B, 0x02, 0x0E, 0x1A, 0x00, + 0xFC, 0x00, 0x00, 0x00, 0xD6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE4, 0x14, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x02, 0x00, 0x00, 0x04, 0x00, 0x00, 0xE0, + 0x68, 0x02, 0x00, 0x03, 0x00, 0x60, 0x81, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xA3, 0x01, 0x00, 0x28, + 0x00, 0x00, 0x00, 0x00, 0xF0, 0x01, 0x00, 0xE0, 0x01, 0x00, 0x00, 0x00, 0xD0, 0x01, 0x00, + 0x60, 0x0F, 0x00, 0x00, 0x00, 0xC4, 0x01, 0x00, 0xF8, 0x46, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x00, 0x54, 0x06, 0x00, 0x00, 0xF0, 0x91, 0x01, 0x00, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x92, 0x01, 0x00, 0x30, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, 0x48, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + + // The assembler program used to generate this string is as follows: + // + // bits 64 + // default rel + // segment .text + // global main + // extern ExitProcess + // main: + // xor rax, rax + // call ExitProcess + // + // + // The code can be compiled using nasm (https://nasm.us) with the command below: + // nasm -f win64 <filename>.asm -o <filename>.obj + static COFF_FILE_SINGLE_STRING_IN_STRING_TABLE: [u8; 220] = [ + 0x64, 0x86, 0x1, 0x0, 0xb5, 0x39, 0x91, 0x62, 0x4e, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x3c, 0x0, 0x0, 0x0, 0x44, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x20, 0x0, 0x50, 0x60, 0x48, 0x31, 0xc0, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x4, + 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x4, 0x0, 0x2e, 0x66, 0x69, 0x6c, 0x65, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0xfe, 0xff, 0x0, 0x0, 0x67, 0x1, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, + 0x73, 0x2e, 0x61, 0x73, 0x6d, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2e, 0x74, 0x65, 0x78, + 0x74, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x3, 0x1, 0x8, 0x0, 0x0, 0x0, + 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2e, 0x61, 0x62, + 0x73, 0x6f, 0x6c, 0x75, 0x74, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, 0x6d, 0x61, + 0x69, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x2, 0x0, 0x10, + 0x0, 0x0, 0x0, 0x45, 0x78, 0x69, 0x74, 0x50, 0x72, 0x6f, 0x63, 0x65, 0x73, 0x73, 0x0, + ]; + + #[test] + fn string_table_excludes_length() { + let coff = Coff::parse(&&COFF_FILE_SINGLE_STRING_IN_STRING_TABLE[..]).unwrap(); + let string_table = coff.strings.to_vec().unwrap(); + + assert!(string_table == vec!["ExitProcess"]); + } + + #[test] + fn symbol_name_excludes_length() { + let coff = Coff::parse(&COFF_FILE_SINGLE_STRING_IN_STRING_TABLE).unwrap(); + let strings = coff.strings; + let symbols = coff + .symbols + .iter() + .filter(|(_, name, _)| name.is_none()) + .map(|(_, _, sym)| sym.name(&strings).unwrap().to_owned()) + .collect::<Vec<_>>(); + assert_eq!(symbols, vec!["ExitProcess"]) + } + + #[test] + fn invalid_dos_header() { + if let Ok(_) = PE::parse(&INVALID_DOS_SIGNATURE) { + panic!("must not parse PE with invalid DOS header"); + } + } + + #[test] + fn invalid_pe_header() { + if let Ok(_) = PE::parse(&INVALID_PE_SIGNATURE) { + panic!("must not parse PE with invalid PE header"); + } + } +} diff --git a/third_party/rust/goblin/src/pe/optional_header.rs b/third_party/rust/goblin/src/pe/optional_header.rs new file mode 100644 index 0000000000..b1d3192764 --- /dev/null +++ b/third_party/rust/goblin/src/pe/optional_header.rs @@ -0,0 +1,323 @@ +use crate::container; +use crate::error; + +use crate::pe::data_directories; + +use scroll::{ctx, Endian, LE}; +use scroll::{Pread, Pwrite, SizeWith}; + +/// standard COFF fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct StandardFields32 { + pub magic: u16, + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub size_of_code: u32, + pub size_of_initialized_data: u32, + pub size_of_uninitialized_data: u32, + pub address_of_entry_point: u32, + pub base_of_code: u32, + /// absent in 64-bit PE32+ + pub base_of_data: u32, +} + +pub const SIZEOF_STANDARD_FIELDS_32: usize = 28; + +/// standard 64-bit COFF fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct StandardFields64 { + pub magic: u16, + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub size_of_code: u32, + pub size_of_initialized_data: u32, + pub size_of_uninitialized_data: u32, + pub address_of_entry_point: u32, + pub base_of_code: u32, +} + +pub const SIZEOF_STANDARD_FIELDS_64: usize = 24; + +/// Unified 32/64-bit COFF fields +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct StandardFields { + pub magic: u16, + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub size_of_code: u64, + pub size_of_initialized_data: u64, + pub size_of_uninitialized_data: u64, + pub address_of_entry_point: u64, + pub base_of_code: u64, + /// absent in 64-bit PE32+ + pub base_of_data: u32, +} + +impl From<StandardFields32> for StandardFields { + fn from(fields: StandardFields32) -> Self { + StandardFields { + magic: fields.magic, + major_linker_version: fields.major_linker_version, + minor_linker_version: fields.minor_linker_version, + size_of_code: u64::from(fields.size_of_code), + size_of_initialized_data: u64::from(fields.size_of_initialized_data), + size_of_uninitialized_data: u64::from(fields.size_of_uninitialized_data), + address_of_entry_point: u64::from(fields.address_of_entry_point), + base_of_code: u64::from(fields.base_of_code), + base_of_data: fields.base_of_data, + } + } +} + +impl From<StandardFields64> for StandardFields { + fn from(fields: StandardFields64) -> Self { + StandardFields { + magic: fields.magic, + major_linker_version: fields.major_linker_version, + minor_linker_version: fields.minor_linker_version, + size_of_code: u64::from(fields.size_of_code), + size_of_initialized_data: u64::from(fields.size_of_initialized_data), + size_of_uninitialized_data: u64::from(fields.size_of_uninitialized_data), + address_of_entry_point: u64::from(fields.address_of_entry_point), + base_of_code: u64::from(fields.base_of_code), + base_of_data: 0, + } + } +} + +/// Standard fields magic number for 32-bit binary +pub const MAGIC_32: u16 = 0x10b; +/// Standard fields magic number for 64-bit binary +pub const MAGIC_64: u16 = 0x20b; + +/// Windows specific fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct WindowsFields32 { + pub image_base: u32, + pub section_alignment: u32, + pub file_alignment: u32, + pub major_operating_system_version: u16, + pub minor_operating_system_version: u16, + pub major_image_version: u16, + pub minor_image_version: u16, + pub major_subsystem_version: u16, + pub minor_subsystem_version: u16, + pub win32_version_value: u32, + pub size_of_image: u32, + pub size_of_headers: u32, + pub check_sum: u32, + pub subsystem: u16, + pub dll_characteristics: u16, + pub size_of_stack_reserve: u32, + pub size_of_stack_commit: u32, + pub size_of_heap_reserve: u32, + pub size_of_heap_commit: u32, + pub loader_flags: u32, + pub number_of_rva_and_sizes: u32, +} + +pub const SIZEOF_WINDOWS_FIELDS_32: usize = 68; +/// Offset of the `check_sum` field in [`WindowsFields32`] +pub const OFFSET_WINDOWS_FIELDS_32_CHECKSUM: usize = 36; + +/// 64-bit Windows specific fields +#[repr(C)] +#[derive(Debug, PartialEq, Copy, Clone, Default, Pread, Pwrite, SizeWith)] +pub struct WindowsFields64 { + pub image_base: u64, + pub section_alignment: u32, + pub file_alignment: u32, + pub major_operating_system_version: u16, + pub minor_operating_system_version: u16, + pub major_image_version: u16, + pub minor_image_version: u16, + pub major_subsystem_version: u16, + pub minor_subsystem_version: u16, + pub win32_version_value: u32, + pub size_of_image: u32, + pub size_of_headers: u32, + pub check_sum: u32, + pub subsystem: u16, + pub dll_characteristics: u16, + pub size_of_stack_reserve: u64, + pub size_of_stack_commit: u64, + pub size_of_heap_reserve: u64, + pub size_of_heap_commit: u64, + pub loader_flags: u32, + pub number_of_rva_and_sizes: u32, +} + +pub const SIZEOF_WINDOWS_FIELDS_64: usize = 88; +/// Offset of the `check_sum` field in [`WindowsFields64`] +pub const OFFSET_WINDOWS_FIELDS_64_CHECKSUM: usize = 40; + +// /// Generic 32/64-bit Windows specific fields +// #[derive(Debug, PartialEq, Copy, Clone, Default)] +// pub struct WindowsFields { +// pub image_base: u64, +// pub section_alignment: u32, +// pub file_alignment: u32, +// pub major_operating_system_version: u16, +// pub minor_operating_system_version: u16, +// pub major_image_version: u16, +// pub minor_image_version: u16, +// pub major_subsystem_version: u16, +// pub minor_subsystem_version: u16, +// pub win32_version_value: u32, +// pub size_of_image: u32, +// pub size_of_headers: u32, +// pub check_sum: u32, +// pub subsystem: u16, +// pub dll_characteristics: u16, +// pub size_of_stack_reserve: u64, +// pub size_of_stack_commit: u64, +// pub size_of_heap_reserve: u64, +// pub size_of_heap_commit: u64, +// pub loader_flags: u32, +// pub number_of_rva_and_sizes: u32, +// } + +impl From<WindowsFields32> for WindowsFields { + fn from(windows: WindowsFields32) -> Self { + WindowsFields { + image_base: u64::from(windows.image_base), + section_alignment: windows.section_alignment, + file_alignment: windows.file_alignment, + major_operating_system_version: windows.major_operating_system_version, + minor_operating_system_version: windows.minor_operating_system_version, + major_image_version: windows.major_image_version, + minor_image_version: windows.minor_image_version, + major_subsystem_version: windows.major_subsystem_version, + minor_subsystem_version: windows.minor_subsystem_version, + win32_version_value: windows.win32_version_value, + size_of_image: windows.size_of_image, + size_of_headers: windows.size_of_headers, + check_sum: windows.check_sum, + subsystem: windows.subsystem, + dll_characteristics: windows.dll_characteristics, + size_of_stack_reserve: u64::from(windows.size_of_stack_reserve), + size_of_stack_commit: u64::from(windows.size_of_stack_commit), + size_of_heap_reserve: u64::from(windows.size_of_heap_reserve), + size_of_heap_commit: u64::from(windows.size_of_heap_commit), + loader_flags: windows.loader_flags, + number_of_rva_and_sizes: windows.number_of_rva_and_sizes, + } + } +} + +// impl From<WindowsFields32> for WindowsFields { +// fn from(windows: WindowsFields32) -> Self { +// WindowsFields { +// image_base: windows.image_base, +// section_alignment: windows.section_alignment, +// file_alignment: windows.file_alignment, +// major_operating_system_version: windows.major_operating_system_version, +// minor_operating_system_version: windows.minor_operating_system_version, +// major_image_version: windows.major_image_version, +// minor_image_version: windows.minor_image_version, +// major_subsystem_version: windows.major_subsystem_version, +// minor_subsystem_version: windows.minor_subsystem_version, +// win32_version_value: windows.win32_version_value, +// size_of_image: windows.size_of_image, +// size_of_headers: windows.size_of_headers, +// check_sum: windows.check_sum, +// subsystem: windows.subsystem, +// dll_characteristics: windows.dll_characteristics, +// size_of_stack_reserve: windows.size_of_stack_reserve, +// size_of_stack_commit: windows.size_of_stack_commit, +// size_of_heap_reserve: windows.size_of_heap_reserve, +// size_of_heap_commit: windows.size_of_heap_commit, +// loader_flags: windows.loader_flags, +// number_of_rva_and_sizes: windows.number_of_rva_and_sizes, +// } +// } +// } + +pub type WindowsFields = WindowsFields64; + +#[derive(Debug, PartialEq, Copy, Clone)] +pub struct OptionalHeader { + pub standard_fields: StandardFields, + pub windows_fields: WindowsFields, + pub data_directories: data_directories::DataDirectories, +} + +impl OptionalHeader { + pub fn container(&self) -> error::Result<container::Container> { + match self.standard_fields.magic { + MAGIC_32 => Ok(container::Container::Little), + MAGIC_64 => Ok(container::Container::Big), + magic => Err(error::Error::BadMagic(u64::from(magic))), + } + } +} + +impl<'a> ctx::TryFromCtx<'a, Endian> for OptionalHeader { + type Error = crate::error::Error; + fn try_from_ctx(bytes: &'a [u8], _: Endian) -> error::Result<(Self, usize)> { + let magic = bytes.pread_with::<u16>(0, LE)?; + let offset = &mut 0; + let (standard_fields, windows_fields): (StandardFields, WindowsFields) = match magic { + MAGIC_32 => { + let standard_fields = bytes.gread_with::<StandardFields32>(offset, LE)?.into(); + let windows_fields = bytes.gread_with::<WindowsFields32>(offset, LE)?.into(); + (standard_fields, windows_fields) + } + MAGIC_64 => { + let standard_fields = bytes.gread_with::<StandardFields64>(offset, LE)?.into(); + let windows_fields = bytes.gread_with::<WindowsFields64>(offset, LE)?; + (standard_fields, windows_fields) + } + _ => return Err(error::Error::BadMagic(u64::from(magic))), + }; + let data_directories = data_directories::DataDirectories::parse( + &bytes, + windows_fields.number_of_rva_and_sizes as usize, + offset, + )?; + Ok(( + OptionalHeader { + standard_fields, + windows_fields, + data_directories, + }, + 0, + )) // TODO: FIXME + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn sizeof_standards32() { + assert_eq!( + ::std::mem::size_of::<StandardFields32>(), + SIZEOF_STANDARD_FIELDS_32 + ); + } + #[test] + fn sizeof_windows32() { + assert_eq!( + ::std::mem::size_of::<WindowsFields32>(), + SIZEOF_WINDOWS_FIELDS_32 + ); + } + #[test] + fn sizeof_standards64() { + assert_eq!( + ::std::mem::size_of::<StandardFields64>(), + SIZEOF_STANDARD_FIELDS_64 + ); + } + #[test] + fn sizeof_windows64() { + assert_eq!( + ::std::mem::size_of::<WindowsFields64>(), + SIZEOF_WINDOWS_FIELDS_64 + ); + } +} diff --git a/third_party/rust/goblin/src/pe/options.rs b/third_party/rust/goblin/src/pe/options.rs new file mode 100644 index 0000000000..5fea632f75 --- /dev/null +++ b/third_party/rust/goblin/src/pe/options.rs @@ -0,0 +1,13 @@ +/// Parsing Options structure for the PE parser +#[derive(Debug, Copy, Clone)] +pub struct ParseOptions { + /// Wether the parser should resolve rvas or not. Default: true + pub resolve_rva: bool, +} + +impl ParseOptions { + /// Returns a parse options structure with default values + pub fn default() -> Self { + ParseOptions { resolve_rva: true } + } +} diff --git a/third_party/rust/goblin/src/pe/relocation.rs b/third_party/rust/goblin/src/pe/relocation.rs new file mode 100644 index 0000000000..ab8398ce73 --- /dev/null +++ b/third_party/rust/goblin/src/pe/relocation.rs @@ -0,0 +1,133 @@ +use crate::error; +use scroll::{IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// Size of a single COFF relocation. +pub const COFF_RELOCATION_SIZE: usize = 10; + +// x86 relocations. + +/// The relocation is ignored. +pub const IMAGE_REL_I386_ABSOLUTE: u16 = 0x0000; +/// Not supported. +pub const IMAGE_REL_I386_DIR16: u16 = 0x0001; +/// Not supported. +pub const IMAGE_REL_I386_REL16: u16 = 0x0002; +/// The target's 32-bit VA. +pub const IMAGE_REL_I386_DIR32: u16 = 0x0006; +/// The target's 32-bit RVA. +pub const IMAGE_REL_I386_DIR32NB: u16 = 0x0007; +/// Not supported. +pub const IMAGE_REL_I386_SEG12: u16 = 0x0009; +/// The 16-bit section index of the section that contains the target. +/// +/// This is used to support debugging information. +pub const IMAGE_REL_I386_SECTION: u16 = 0x000A; +/// The 32-bit offset of the target from the beginning of its section. +/// +/// This is used to support debugging information and static thread local storage. +pub const IMAGE_REL_I386_SECREL: u16 = 0x000B; +/// The CLR token. +pub const IMAGE_REL_I386_TOKEN: u16 = 0x000C; +/// A 7-bit offset from the base of the section that contains the target. +pub const IMAGE_REL_I386_SECREL7: u16 = 0x000D; +/// The 32-bit relative displacement to the target. +/// +/// This supports the x86 relative branch and call instructions. +pub const IMAGE_REL_I386_REL32: u16 = 0x0014; + +// x86-64 relocations. + +/// The relocation is ignored. +pub const IMAGE_REL_AMD64_ABSOLUTE: u16 = 0x0000; +/// The 64-bit VA of the relocation target. +pub const IMAGE_REL_AMD64_ADDR64: u16 = 0x0001; +/// The 32-bit VA of the relocation target. +pub const IMAGE_REL_AMD64_ADDR32: u16 = 0x0002; +/// The 32-bit address without an image base (RVA). +pub const IMAGE_REL_AMD64_ADDR32NB: u16 = 0x0003; +/// The 32-bit relative address from the byte following the relocation. +pub const IMAGE_REL_AMD64_REL32: u16 = 0x0004; +/// The 32-bit address relative to byte distance 1 from the relocation. +pub const IMAGE_REL_AMD64_REL32_1: u16 = 0x0005; +/// The 32-bit address relative to byte distance 2 from the relocation. +pub const IMAGE_REL_AMD64_REL32_2: u16 = 0x0006; +/// The 32-bit address relative to byte distance 3 from the relocation. +pub const IMAGE_REL_AMD64_REL32_3: u16 = 0x0007; +/// The 32-bit address relative to byte distance 4 from the relocation. +pub const IMAGE_REL_AMD64_REL32_4: u16 = 0x0008; +/// The 32-bit address relative to byte distance 5 from the relocation. +pub const IMAGE_REL_AMD64_REL32_5: u16 = 0x0009; +/// The 16-bit section index of the section that contains the target. +/// +/// This is used to support debugging information. +pub const IMAGE_REL_AMD64_SECTION: u16 = 0x000A; +/// The 32-bit offset of the target from the beginning of its section. +/// +/// This is used to support debugging information and static thread local storage. +pub const IMAGE_REL_AMD64_SECREL: u16 = 0x000B; +/// A 7-bit unsigned offset from the base of the section that contains the target. +pub const IMAGE_REL_AMD64_SECREL7: u16 = 0x000C; +/// CLR tokens. +pub const IMAGE_REL_AMD64_TOKEN: u16 = 0x000D; +/// A 32-bit signed span-dependent value emitted into the object. +pub const IMAGE_REL_AMD64_SREL32: u16 = 0x000E; +/// A pair that must immediately follow every span-dependent value. +pub const IMAGE_REL_AMD64_PAIR: u16 = 0x000F; +/// A 32-bit signed span-dependent value that is applied at link time. +pub const IMAGE_REL_AMD64_SSPAN32: u16 = 0x0010; + +/// A COFF relocation. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Relocation { + /// The address of the item to which relocation is applied. + /// + /// This is the offset from the beginning of the section, plus the + /// value of the section's `virtual_address` field. + pub virtual_address: u32, + /// A zero-based index into the symbol table. + /// + /// This symbol gives the address that is to be used for the relocation. If the specified + /// symbol has section storage class, then the symbol's address is the address with the + /// first section of the same name. + pub symbol_table_index: u32, + /// A value that indicates the kind of relocation that should be performed. + /// + /// Valid relocation types depend on machine type. + pub typ: u16, +} + +/// An iterator for COFF relocations. +#[derive(Default)] +pub struct Relocations<'a> { + offset: usize, + relocations: &'a [u8], +} + +impl<'a> Relocations<'a> { + /// Parse a COFF relocation table at the given offset. + /// + /// The offset and number of relocations should be from the COFF section header. + pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result<Relocations<'a>> { + let relocations = bytes.pread_with(offset, number * COFF_RELOCATION_SIZE)?; + Ok(Relocations { + offset: 0, + relocations, + }) + } +} + +impl<'a> Iterator for Relocations<'a> { + type Item = Relocation; + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.relocations.len() { + None + } else { + Some( + self.relocations + .gread_with(&mut self.offset, scroll::LE) + .unwrap(), + ) + } + } +} diff --git a/third_party/rust/goblin/src/pe/section_table.rs b/third_party/rust/goblin/src/pe/section_table.rs new file mode 100644 index 0000000000..4491827f16 --- /dev/null +++ b/third_party/rust/goblin/src/pe/section_table.rs @@ -0,0 +1,279 @@ +use crate::error::{self, Error}; +use crate::pe::relocation; +use alloc::string::{String, ToString}; +use scroll::{ctx, Pread, Pwrite}; + +#[repr(C)] +#[derive(Debug, PartialEq, Clone, Default)] +pub struct SectionTable { + pub name: [u8; 8], + pub real_name: Option<String>, + pub virtual_size: u32, + pub virtual_address: u32, + pub size_of_raw_data: u32, + pub pointer_to_raw_data: u32, + pub pointer_to_relocations: u32, + pub pointer_to_linenumbers: u32, + pub number_of_relocations: u16, + pub number_of_linenumbers: u16, + pub characteristics: u32, +} + +pub const SIZEOF_SECTION_TABLE: usize = 8 * 5; + +// Based on https://github.com/llvm-mirror/llvm/blob/af7b1832a03ab6486c42a40d21695b2c03b2d8a3/lib/Object/COFFObjectFile.cpp#L70 +// Decodes a string table entry in base 64 (//AAAAAA). Expects string without +// prefixed slashes. +fn base64_decode_string_entry(s: &str) -> Result<usize, ()> { + assert!(s.len() <= 6, "String too long, possible overflow."); + + let mut val = 0; + for c in s.bytes() { + let v = if b'A' <= c && c <= b'Z' { + // 00..=25 + c - b'A' + } else if b'a' <= c && c <= b'z' { + // 26..=51 + c - b'a' + 26 + } else if b'0' <= c && c <= b'9' { + // 52..=61 + c - b'0' + 52 + } else if c == b'+' { + // 62 + 62 + } else if c == b'/' { + // 63 + 63 + } else { + return Err(()); + }; + val = val * 64 + v as usize; + } + Ok(val) +} + +impl SectionTable { + pub fn parse( + bytes: &[u8], + offset: &mut usize, + string_table_offset: usize, + ) -> error::Result<Self> { + let mut table = SectionTable::default(); + let mut name = [0u8; 8]; + name.copy_from_slice(bytes.gread_with(offset, 8)?); + + table.name = name; + table.virtual_size = bytes.gread_with(offset, scroll::LE)?; + table.virtual_address = bytes.gread_with(offset, scroll::LE)?; + table.size_of_raw_data = bytes.gread_with(offset, scroll::LE)?; + table.pointer_to_raw_data = bytes.gread_with(offset, scroll::LE)?; + table.pointer_to_relocations = bytes.gread_with(offset, scroll::LE)?; + table.pointer_to_linenumbers = bytes.gread_with(offset, scroll::LE)?; + table.number_of_relocations = bytes.gread_with(offset, scroll::LE)?; + table.number_of_linenumbers = bytes.gread_with(offset, scroll::LE)?; + table.characteristics = bytes.gread_with(offset, scroll::LE)?; + + if let Some(idx) = table.name_offset()? { + table.real_name = Some(bytes.pread::<&str>(string_table_offset + idx)?.to_string()); + } + Ok(table) + } + + pub fn name_offset(&self) -> error::Result<Option<usize>> { + // Based on https://github.com/llvm-mirror/llvm/blob/af7b1832a03ab6486c42a40d21695b2c03b2d8a3/lib/Object/COFFObjectFile.cpp#L1054 + if self.name[0] == b'/' { + let idx: usize = if self.name[1] == b'/' { + let b64idx = self.name.pread::<&str>(2)?; + base64_decode_string_entry(b64idx).map_err(|_| { + Error::Malformed(format!( + "Invalid indirect section name //{}: base64 decoding failed", + b64idx + )) + })? + } else { + let name = self.name.pread::<&str>(1)?; + name.parse().map_err(|err| { + Error::Malformed(format!("Invalid indirect section name /{}: {}", name, err)) + })? + }; + Ok(Some(idx)) + } else { + Ok(None) + } + } + + #[allow(clippy::useless_let_if_seq)] + pub fn set_name_offset(&mut self, mut idx: usize) -> error::Result<()> { + if idx <= 9_999_999 { + // 10^7 - 1 + // write!(&mut self.name[1..], "{}", idx) without using io::Write. + // We write into a temporary since we calculate digits starting at the right. + let mut name = [0; 7]; + let mut len = 0; + if idx == 0 { + name[6] = b'0'; + len = 1; + } else { + while idx != 0 { + let rem = (idx % 10) as u8; + idx /= 10; + name[6 - len] = b'0' + rem; + len += 1; + } + } + self.name = [0; 8]; + self.name[0] = b'/'; + self.name[1..][..len].copy_from_slice(&name[7 - len..]); + Ok(()) + } else if idx as u64 <= 0xfff_fff_fff { + // 64^6 - 1 + self.name[0] = b'/'; + self.name[1] = b'/'; + for i in 0..6 { + let rem = (idx % 64) as u8; + idx /= 64; + let c = match rem { + 0..=25 => b'A' + rem, + 26..=51 => b'a' + rem - 26, + 52..=61 => b'0' + rem - 52, + 62 => b'+', + 63 => b'/', + _ => unreachable!(), + }; + self.name[7 - i] = c; + } + Ok(()) + } else { + Err(Error::Malformed(format!( + "Invalid section name offset: {}", + idx + ))) + } + } + + pub fn name(&self) -> error::Result<&str> { + match self.real_name.as_ref() { + Some(s) => Ok(s), + None => Ok(self.name.pread(0)?), + } + } + + pub fn relocations<'a>(&self, bytes: &'a [u8]) -> error::Result<relocation::Relocations<'a>> { + let offset = self.pointer_to_relocations as usize; + let number = self.number_of_relocations as usize; + relocation::Relocations::parse(bytes, offset, number) + } +} + +impl ctx::SizeWith<scroll::Endian> for SectionTable { + fn size_with(_ctx: &scroll::Endian) -> usize { + SIZEOF_SECTION_TABLE + } +} + +impl ctx::TryIntoCtx<scroll::Endian> for SectionTable { + type Error = error::Error; + fn try_into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) -> Result<usize, Self::Error> { + let offset = &mut 0; + bytes.gwrite(&self.name[..], offset)?; + bytes.gwrite_with(self.virtual_size, offset, ctx)?; + bytes.gwrite_with(self.virtual_address, offset, ctx)?; + bytes.gwrite_with(self.size_of_raw_data, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_raw_data, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_relocations, offset, ctx)?; + bytes.gwrite_with(self.pointer_to_linenumbers, offset, ctx)?; + bytes.gwrite_with(self.number_of_relocations, offset, ctx)?; + bytes.gwrite_with(self.number_of_linenumbers, offset, ctx)?; + bytes.gwrite_with(self.characteristics, offset, ctx)?; + Ok(SIZEOF_SECTION_TABLE) + } +} + +impl ctx::IntoCtx<scroll::Endian> for SectionTable { + fn into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) { + bytes.pwrite_with(self, 0, ctx).unwrap(); + } +} + +/// The section should not be padded to the next boundary. This flag is obsolete and is replaced +/// by `IMAGE_SCN_ALIGN_1BYTES`. This is valid only for object files. +pub const IMAGE_SCN_TYPE_NO_PAD: u32 = 0x0000_0008; +/// The section contains executable code. +pub const IMAGE_SCN_CNT_CODE: u32 = 0x0000_0020; +/// The section contains initialized data. +pub const IMAGE_SCN_CNT_INITIALIZED_DATA: u32 = 0x0000_0040; +/// The section contains uninitialized data. +pub const IMAGE_SCN_CNT_UNINITIALIZED_DATA: u32 = 0x0000_0080; +pub const IMAGE_SCN_LNK_OTHER: u32 = 0x0000_0100; +/// The section contains comments or other information. The .drectve section has this type. +/// This is valid for object files only. +pub const IMAGE_SCN_LNK_INFO: u32 = 0x0000_0200; +/// The section will not become part of the image. This is valid only for object files. +pub const IMAGE_SCN_LNK_REMOVE: u32 = 0x0000_0800; +/// The section contains COMDAT data. This is valid only for object files. +pub const IMAGE_SCN_LNK_COMDAT: u32 = 0x0000_1000; +/// The section contains data referenced through the global pointer (GP). +pub const IMAGE_SCN_GPREL: u32 = 0x0000_8000; +pub const IMAGE_SCN_MEM_PURGEABLE: u32 = 0x0002_0000; +pub const IMAGE_SCN_MEM_16BIT: u32 = 0x0002_0000; +pub const IMAGE_SCN_MEM_LOCKED: u32 = 0x0004_0000; +pub const IMAGE_SCN_MEM_PRELOAD: u32 = 0x0008_0000; + +pub const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x0010_0000; +pub const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x0020_0000; +pub const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x0030_0000; +pub const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x0040_0000; +pub const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x0050_0000; +pub const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x0060_0000; +pub const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x0070_0000; +pub const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x0080_0000; +pub const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x0090_0000; +pub const IMAGE_SCN_ALIGN_512BYTES: u32 = 0x00A0_0000; +pub const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0x00B0_0000; +pub const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0x00C0_0000; +pub const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0x00D0_0000; +pub const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0x00E0_0000; +pub const IMAGE_SCN_ALIGN_MASK: u32 = 0x00F0_0000; + +/// The section contains extended relocations. +pub const IMAGE_SCN_LNK_NRELOC_OVFL: u32 = 0x0100_0000; +/// The section can be discarded as needed. +pub const IMAGE_SCN_MEM_DISCARDABLE: u32 = 0x0200_0000; +/// The section cannot be cached. +pub const IMAGE_SCN_MEM_NOT_CACHED: u32 = 0x0400_0000; +/// The section is not pageable. +pub const IMAGE_SCN_MEM_NOT_PAGED: u32 = 0x0800_0000; +/// The section can be shared in memory. +pub const IMAGE_SCN_MEM_SHARED: u32 = 0x1000_0000; +/// The section can be executed as code. +pub const IMAGE_SCN_MEM_EXECUTE: u32 = 0x2000_0000; +/// The section can be read. +pub const IMAGE_SCN_MEM_READ: u32 = 0x4000_0000; +/// The section can be written to. +pub const IMAGE_SCN_MEM_WRITE: u32 = 0x8000_0000; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn set_name_offset() { + let mut section = SectionTable::default(); + for &(offset, name) in [ + (0usize, b"/0\0\0\0\0\0\0"), + (1, b"/1\0\0\0\0\0\0"), + (9_999_999, b"/9999999"), + (10_000_000, b"//AAmJaA"), + #[cfg(target_pointer_width = "64")] + (0xfff_fff_fff, b"////////"), + ] + .iter() + { + section.set_name_offset(offset).unwrap(); + assert_eq!(§ion.name, name); + assert_eq!(section.name_offset().unwrap(), Some(offset)); + } + #[cfg(target_pointer_width = "64")] + assert!(section.set_name_offset(0x1_000_000_000).is_err()); + } +} diff --git a/third_party/rust/goblin/src/pe/symbol.rs b/third_party/rust/goblin/src/pe/symbol.rs new file mode 100644 index 0000000000..b2ced808a1 --- /dev/null +++ b/third_party/rust/goblin/src/pe/symbol.rs @@ -0,0 +1,516 @@ +use crate::error; +use crate::strtab; +use alloc::vec::Vec; +use core::fmt::{self, Debug}; +use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith}; + +/// Size of a single symbol in the COFF Symbol Table. +pub const COFF_SYMBOL_SIZE: usize = 18; + +// Values for `Symbol::section_number`. + +/// The symbol record is not yet assigned a section. A `value` of zero +/// indicates that a reference to an external symbol is defined elsewhere. +/// A `value` of non-zero is a common symbol with a size that is specified by the `value`. +pub const IMAGE_SYM_UNDEFINED: i16 = 0; +/// The symbol has an absolute (non-relocatable) `value` and is not an address. +pub const IMAGE_SYM_ABSOLUTE: i16 = -1; +/// The symbol provides general type or debugging information but does not +/// correspond to a section. +pub const IMAGE_SYM_DEBUG: i16 = -2; + +// Base types for `Symbol::typ`. + +/// No type information or unknown base type. Microsoft tools use this setting +pub const IMAGE_SYM_TYPE_NULL: u16 = 0; +/// No valid type; used with void pointers and functions +pub const IMAGE_SYM_TYPE_VOID: u16 = 1; +/// A character (signed byte) +pub const IMAGE_SYM_TYPE_CHAR: u16 = 2; +/// A 2-byte signed integer +pub const IMAGE_SYM_TYPE_SHORT: u16 = 3; +/// A natural integer type (normally 4 bytes in Windows) +pub const IMAGE_SYM_TYPE_INT: u16 = 4; +/// A 4-byte signed integer +pub const IMAGE_SYM_TYPE_LONG: u16 = 5; +/// A 4-byte floating-point number +pub const IMAGE_SYM_TYPE_FLOAT: u16 = 6; +/// An 8-byte floating-point number +pub const IMAGE_SYM_TYPE_DOUBLE: u16 = 7; +/// A structure +pub const IMAGE_SYM_TYPE_STRUCT: u16 = 8; +/// A union +pub const IMAGE_SYM_TYPE_UNION: u16 = 9; +/// An enumerated type +pub const IMAGE_SYM_TYPE_ENUM: u16 = 10; +/// A member of enumeration (a specific value) +pub const IMAGE_SYM_TYPE_MOE: u16 = 11; +/// A byte; unsigned 1-byte integer +pub const IMAGE_SYM_TYPE_BYTE: u16 = 12; +/// A word; unsigned 2-byte integer +pub const IMAGE_SYM_TYPE_WORD: u16 = 13; +/// An unsigned integer of natural size (normally, 4 bytes) +pub const IMAGE_SYM_TYPE_UINT: u16 = 14; +/// An unsigned 4-byte integer +pub const IMAGE_SYM_TYPE_DWORD: u16 = 15; + +// Derived types for `Symbol::typ`. + +/// No derived type; the symbol is a simple scalar variable. +pub const IMAGE_SYM_DTYPE_NULL: u16 = 0; +/// The symbol is a pointer to base type. +pub const IMAGE_SYM_DTYPE_POINTER: u16 = 1; +/// The symbol is a function that returns a base type. +pub const IMAGE_SYM_DTYPE_FUNCTION: u16 = 2; +/// The symbol is an array of base type. +pub const IMAGE_SYM_DTYPE_ARRAY: u16 = 3; + +pub const IMAGE_SYM_TYPE_MASK: u16 = 0xf; +pub const IMAGE_SYM_DTYPE_SHIFT: usize = 4; + +// Values for `Symbol::storage_class`. + +/// A special symbol that represents the end of function, for debugging purposes. +pub const IMAGE_SYM_CLASS_END_OF_FUNCTION: u8 = 0xff; +/// No assigned storage class. +pub const IMAGE_SYM_CLASS_NULL: u8 = 0; +/// The automatic (stack) variable. +/// +/// The `value` field specifies the stack frame offset. +pub const IMAGE_SYM_CLASS_AUTOMATIC: u8 = 1; +/// A value that Microsoft tools use for external symbols. +/// +/// The `value` field indicates the size if the section number is +/// `IMAGE_SYM_UNDEFINED` (0). If the section number is not zero, +/// then the `value` field specifies the offset within the section. +pub const IMAGE_SYM_CLASS_EXTERNAL: u8 = 2; +/// A static symbol. +/// +/// The 'value' field specifies the offset of the symbol within the section. +/// If the `value` field is zero, then the symbol represents a section name. +pub const IMAGE_SYM_CLASS_STATIC: u8 = 3; +/// A register variable. +/// +/// The `value` field specifies the register number. +pub const IMAGE_SYM_CLASS_REGISTER: u8 = 4; +/// A symbol that is defined externally. +pub const IMAGE_SYM_CLASS_EXTERNAL_DEF: u8 = 5; +/// A code label that is defined within the module. +/// +/// The `value` field specifies the offset of the symbol within the section. +pub const IMAGE_SYM_CLASS_LABEL: u8 = 6; +/// A reference to a code label that is not defined. +pub const IMAGE_SYM_CLASS_UNDEFINED_LABEL: u8 = 7; +/// The structure member. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_STRUCT: u8 = 8; +/// A formal argument (parameter) of a function. +/// +/// The `value` field specifies the n th argument. +pub const IMAGE_SYM_CLASS_ARGUMENT: u8 = 9; +/// The structure tag-name entry. +pub const IMAGE_SYM_CLASS_STRUCT_TAG: u8 = 10; +/// A union member. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_UNION: u8 = 11; +/// The Union tag-name entry. +pub const IMAGE_SYM_CLASS_UNION_TAG: u8 = 12; +/// A Typedef entry. +pub const IMAGE_SYM_CLASS_TYPE_DEFINITION: u8 = 13; +/// A static data declaration. +pub const IMAGE_SYM_CLASS_UNDEFINED_STATIC: u8 = 14; +/// An enumerated type tagname entry. +pub const IMAGE_SYM_CLASS_ENUM_TAG: u8 = 15; +/// A member of an enumeration. +/// +/// The `value` field specifies the n th member. +pub const IMAGE_SYM_CLASS_MEMBER_OF_ENUM: u8 = 16; +/// A register parameter. +pub const IMAGE_SYM_CLASS_REGISTER_PARAM: u8 = 17; +/// A bit-field reference. +/// +/// The `value` field specifies the n th bit in the bit field. +pub const IMAGE_SYM_CLASS_BIT_FIELD: u8 = 18; +/// A .bb (beginning of block) or .eb (end of block) record. +/// +/// The `value` field is the relocatable address of the code location. +pub const IMAGE_SYM_CLASS_BLOCK: u8 = 100; +/// A value that Microsoft tools use for symbol records that define the extent of a function. +/// +/// Records may be begin function (.bf ), end function ( .ef ), and lines in function ( .lf ). +/// For .lf records, the `value` field gives the number of source lines in the function. +/// For .ef records, the `value` field gives the size of the function code. +pub const IMAGE_SYM_CLASS_FUNCTION: u8 = 101; +/// An end-of-structure entry. +pub const IMAGE_SYM_CLASS_END_OF_STRUCT: u8 = 102; +/// The source-file symbol record. +/// +/// The symbol is followed by auxiliary records that name the file. +pub const IMAGE_SYM_CLASS_FILE: u8 = 103; +/// A definition of a section (Microsoft tools use STATIC storage class instead). +pub const IMAGE_SYM_CLASS_SECTION: u8 = 104; +/// A weak external. +pub const IMAGE_SYM_CLASS_WEAK_EXTERNAL: u8 = 105; +/// A CLR token symbol. +/// +/// The name is an ASCII string that consists of the hexadecimal value of the token. +pub const IMAGE_SYM_CLASS_CLR_TOKEN: u8 = 107; + +/// A COFF symbol. +/// +/// Unwind information for this function can be loaded with [`ExceptionData::get_unwind_info`]. +/// +/// [`ExceptionData::get_unwind_info`]: struct.ExceptionData.html#method.get_unwind_info +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct Symbol { + /// The name of the symbol. + /// + /// An array of 8 bytes is used if the name is not more than 8 bytes long. + /// This array is padded with nulls on the right if the name is less than 8 bytes long. + /// + /// For longer names, the first 4 bytes are all zeros, and the second 4 bytes + /// are an offset into the string table. + pub name: [u8; 8], + /// The value that is associated with the symbol. + /// + /// The interpretation of this field depends on `section_number` and + /// `storage_class`. A typical meaning is the relocatable address. + pub value: u32, + /// A one-based index into the section table. Zero and negative values have special meanings. + pub section_number: i16, + /// A number that represents type. + /// + /// Microsoft tools set this field to 0x20 (function) or 0x0 (not a function). + pub typ: u16, + /// An enumerated value that represents storage class. + pub storage_class: u8, + /// The number of auxiliary symbol table entries that follow this record. + /// + /// Each auxiliary record is the same size as a standard symbol-table record (18 bytes), + /// but rather than define a new symbol, the auxiliary record gives additional information + /// on the last symbol defined. + pub number_of_aux_symbols: u8, +} + +impl Symbol { + /// Parse the symbol at the given offset. + /// + /// If the symbol has an inline name, then also returns a reference to the name's + /// location in `bytes`. + pub fn parse<'a>(bytes: &'a [u8], offset: usize) -> error::Result<(Option<&'a str>, Symbol)> { + let symbol = bytes.pread::<Symbol>(offset)?; + let name = if symbol.name[0] != 0 { + bytes + .pread_with(offset, ctx::StrCtx::DelimiterUntil(0, 8)) + .ok() + } else { + None + }; + Ok((name, symbol)) + } + + /// Returns the symbol name. + /// + /// This may be a reference to an inline name in the symbol, or to + /// a strtab entry. + pub fn name<'a>(&'a self, strtab: &'a strtab::Strtab) -> error::Result<&'a str> { + if let Some(offset) = self.name_offset() { + strtab.get_at(offset as usize).ok_or_else(|| { + error::Error::Malformed(format!("Invalid Symbol name offset {:#x}", offset)) + }) + } else { + Ok(self.name.pread(0)?) + } + } + + /// Return the strtab offset of the symbol name. + /// + /// Returns `None` if the name is inline. + pub fn name_offset(&self) -> Option<u32> { + // Symbol offset starts at the strtable's length, so let's adjust it + let length_field_size = core::mem::size_of::<u32>() as u32; + + if self.name[0] == 0 { + self.name + .pread_with(4, scroll::LE) + .ok() + .map(|offset: u32| offset - length_field_size) + } else { + None + } + } + + /// Set the strtab offset of the symbol name. + pub fn set_name_offset(&mut self, offset: u32) { + self.name[..4].copy_from_slice(&[0; 4]); + self.name.pwrite_with(offset, 4, scroll::LE).unwrap(); + } + + /// Return the base type of the symbol. + /// + /// This type uses the `IMAGE_SYM_TYPE_*` definitions. + pub fn base_type(&self) -> u16 { + self.typ & IMAGE_SYM_TYPE_MASK + } + + /// Return the derived type of the symbol. + /// + /// This type uses the `IMAGE_SYM_DTYPE_*` definitions. + pub fn derived_type(&self) -> u16 { + self.typ >> IMAGE_SYM_DTYPE_SHIFT + } + + /// Return true for function definitions. + /// + /// These symbols use `AuxFunctionDefinition` for auxiliary symbol records. + pub fn is_function_definition(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_EXTERNAL + && self.derived_type() == IMAGE_SYM_DTYPE_FUNCTION + && self.section_number > 0 + } + + /// Return true for weak external symbols. + /// + /// These symbols use `AuxWeakExternal` for auxiliary symbol records. + pub fn is_weak_external(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_WEAK_EXTERNAL + } + + /// Return true for file symbol records. + /// + /// The auxiliary records contain the name of the source code file. + pub fn is_file(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_FILE + } + + /// Return true for section definitions. + /// + /// These symbols use `AuxSectionDefinition` for auxiliary symbol records. + pub fn is_section_definition(&self) -> bool { + self.storage_class == IMAGE_SYM_CLASS_STATIC && self.number_of_aux_symbols > 0 + } +} + +/// Auxiliary symbol record for function definitions. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxFunctionDefinition { + /// The symbol-table index of the corresponding `.bf` (begin function) symbol record. + pub tag_index: u32, + /// The size of the executable code for the function itself. + /// + /// If the function is in its own section, the `size_of_raw_data` in the section header + /// is greater or equal to this field, depending on alignment considerations. + pub total_size: u32, + /// The file offset of the first COFF line-number entry for the function, + /// or zero if none exists. + pub pointer_to_line_number: u32, + /// The symbol-table index of the record for the next function. + /// + /// If the function is the last in the symbol table, this field is set to zero. + pub pointer_to_next_function: u32, + /// Unused padding. + pub unused: [u8; 2], +} + +/// Auxiliary symbol record for symbols with storage class `IMAGE_SYM_CLASS_FUNCTION`. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxBeginAndEndFunction { + /// Unused padding. + pub unused1: [u8; 4], + /// The actual ordinal line number within the source file, corresponding + /// to the `.bf` or `.ef` record. + pub line_number: u16, + /// Unused padding. + pub unused2: [u8; 6], + /// The symbol-table index of the next `.bf` symbol record. + /// + /// If the function is the last in the symbol table, this field is set to zero. + /// It is not used for `.ef` records. + pub pointer_to_next_function: u32, + /// Unused padding. + pub unused3: [u8; 2], +} + +// Values for the `characteristics` field of `AuxWeakExternal`. + +/// Indicates that no library search for the symbol should be performed. +pub const IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY: u32 = 1; +/// Indicates that a library search for the symbol should be performed. +pub const IMAGE_WEAK_EXTERN_SEARCH_LIBRARY: u32 = 2; +/// Indicates that the symbol is an alias for the symbol given by the `tag_index` field. +pub const IMAGE_WEAK_EXTERN_SEARCH_ALIAS: u32 = 3; + +/// Auxiliary symbol record for weak external symbols. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxWeakExternal { + /// The symbol-table index of the symbol to be linked if an external definition is not found. + pub tag_index: u32, + /// Flags that control how the symbol should be linked. + pub characteristics: u32, + /// Unused padding. + pub unused: [u8; 10], +} + +// Values for the `selection` field of `AuxSectionDefinition`. + +/// If this symbol is already defined, the linker issues a "multiply defined symbol" error. +pub const IMAGE_COMDAT_SELECT_NODUPLICATES: u8 = 1; +/// Any section that defines the same COMDAT symbol can be linked; the rest are removed. +pub const IMAGE_COMDAT_SELECT_ANY: u8 = 2; +/// The linker chooses an arbitrary section among the definitions for this symbol. +/// +/// If all definitions are not the same size, a "multiply defined symbol" error is issued. +pub const IMAGE_COMDAT_SELECT_SAME_SIZE: u8 = 3; +/// The linker chooses an arbitrary section among the definitions for this symbol. +/// +/// If all definitions do not match exactly, a "multiply defined symbol" error is issued. +pub const IMAGE_COMDAT_SELECT_EXACT_MATCH: u8 = 4; +/// The section is linked if a certain other COMDAT section is linked. +/// +/// This other section is indicated by the `number` field of the auxiliary symbol record +/// for the section definition. This setting is useful for definitions that have components +/// in multiple sections (for example, code in one and data in another), but where all must +/// be linked or discarded as a set. The other section with which this section is associated +/// must be a COMDAT section; it cannot be another associative COMDAT section (that is, the +/// other section cannot have `IMAGE_COMDAT_SELECT_ASSOCIATIVE` set). +pub const IMAGE_COMDAT_SELECT_ASSOCIATIVE: u8 = 5; +/// The linker chooses the largest definition from among all of the definitions for this symbol. +/// +/// If multiple definitions have this size, the choice between them is arbitrary. +pub const IMAGE_COMDAT_SELECT_LARGEST: u8 = 6; + +/// Auxiliary symbol record for section definitions. +#[repr(C)] +#[derive(Debug, Copy, Clone, PartialEq, Default, Pread, Pwrite, IOread, IOwrite, SizeWith)] +pub struct AuxSectionDefinition { + /// The size of section data; the same as `size_of_raw_data` in the section header. + pub length: u32, + /// The number of relocation entries for the section. + pub number_of_relocations: u16, + /// The number of line-number entries for the section. + pub number_of_line_numbers: u16, + /// The checksum for communal data. + /// + /// It is applicable if the `IMAGE_SCN_LNK_COMDAT` flag is set in the section header. + pub checksum: u32, + /// One-based index into the section table for the associated section. + /// + /// This is used when the `selection` field is `IMAGE_COMDAT_SELECT_ASSOCIATIVE`. + pub number: u16, + /// The COMDAT selection number. + /// + /// This is applicable if the section is a COMDAT section. + pub selection: u8, + /// Unused padding. + pub unused: [u8; 3], +} + +/// A COFF symbol table. +pub struct SymbolTable<'a> { + symbols: &'a [u8], +} + +impl<'a> SymbolTable<'a> { + /// Parse a COFF symbol table at the given offset. + /// + /// The offset and number of symbols should be from the COFF header. + pub fn parse(bytes: &'a [u8], offset: usize, number: usize) -> error::Result<SymbolTable<'a>> { + let symbols = bytes.pread_with(offset, Self::size(number))?; + Ok(SymbolTable { symbols }) + } + + /// Get the size in bytes of the symbol table. + pub fn size(number: usize) -> usize { + number * COFF_SYMBOL_SIZE + } + + /// Get the symbol at the given index. + /// + /// If the symbol has an inline name, then also returns a reference to the name's + /// location in `bytes`. + pub fn get(&self, index: usize) -> Option<(Option<&'a str>, Symbol)> { + let offset = index * COFF_SYMBOL_SIZE; + Symbol::parse(self.symbols, offset).ok() + } + + /// Get the auxiliary symbol record for a function definition. + pub fn aux_function_definition(&self, index: usize) -> Option<AuxFunctionDefinition> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the auxiliary symbol record for a `.bf` or `.ef` symbol record. + pub fn aux_begin_and_end_function(&self, index: usize) -> Option<AuxBeginAndEndFunction> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the auxiliary symbol record for a weak external. + pub fn aux_weak_external(&self, index: usize) -> Option<AuxWeakExternal> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Get the file name from the auxiliary symbol record for a file symbol record. + pub fn aux_file(&self, index: usize, number: usize) -> Option<&'a str> { + let offset = index * COFF_SYMBOL_SIZE; + let length = number * COFF_SYMBOL_SIZE; + self.symbols + .pread_with(offset, ctx::StrCtx::DelimiterUntil(0, length)) + .ok() + } + + /// Get the auxiliary symbol record for a section definition. + pub fn aux_section_definition(&self, index: usize) -> Option<AuxSectionDefinition> { + let offset = index * COFF_SYMBOL_SIZE; + self.symbols.pread(offset).ok() + } + + /// Return an iterator for the COFF symbols. + /// + /// This iterator skips over auxiliary symbol records. + pub fn iter(&self) -> SymbolIterator<'a> { + SymbolIterator { + index: 0, + symbols: self.symbols, + } + } +} + +impl<'a> Debug for SymbolTable<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("SymbolTable") + .field("symbols", &self.iter().collect::<Vec<_>>()) + .finish() + } +} + +/// An iterator for COFF symbols. +/// +/// This iterator skips over auxiliary symbol records. +#[derive(Default)] +pub struct SymbolIterator<'a> { + index: usize, + symbols: &'a [u8], +} + +impl<'a> Iterator for SymbolIterator<'a> { + type Item = (usize, Option<&'a str>, Symbol); + fn next(&mut self) -> Option<Self::Item> { + let offset = self.index * COFF_SYMBOL_SIZE; + if offset >= self.symbols.len() { + None + } else { + let index = self.index; + let (name, symbol) = Symbol::parse(self.symbols, offset).ok()?; + self.index += 1 + symbol.number_of_aux_symbols as usize; + Some((index, name, symbol)) + } + } +} diff --git a/third_party/rust/goblin/src/pe/utils.rs b/third_party/rust/goblin/src/pe/utils.rs new file mode 100644 index 0000000000..07a320d57b --- /dev/null +++ b/third_party/rust/goblin/src/pe/utils.rs @@ -0,0 +1,180 @@ +use crate::error; +use alloc::string::ToString; +use scroll::Pread; + +use super::options; +use super::section_table; + +use crate::pe::data_directories::DataDirectory; +use core::cmp; + +use log::debug; + +pub fn is_in_range(rva: usize, r1: usize, r2: usize) -> bool { + r1 <= rva && rva < r2 +} + +// reference: Peter Ferrie. Reliable algorithm to extract overlay of a PE. https://bit.ly/2vBX2bR +#[inline] +fn aligned_pointer_to_raw_data(pointer_to_raw_data: usize) -> usize { + const PHYSICAL_ALIGN: usize = 0x1ff; + pointer_to_raw_data & !PHYSICAL_ALIGN +} + +#[inline] +fn section_read_size(section: §ion_table::SectionTable, file_alignment: u32) -> usize { + fn round_size(size: usize) -> usize { + const PAGE_MASK: usize = 0xfff; + (size + PAGE_MASK) & !PAGE_MASK + } + + // Paraphrased from https://reverseengineering.stackexchange.com/a/4326 (by Peter Ferrie). + // + // Handles the corner cases such as mis-aligned pointers (round down) and sizes (round up) + // Further rounding corner cases: + // - the physical pointer should be rounded down to a multiple of 512, regardless of the value in the header + // - the read size is rounded up by using a combination of the file alignment and 4kb + // - the virtual size is always rounded up to a multiple of 4kb, regardless of the value in the header. + // + // Reference C implementation: + // + // long pointerToRaw = section.get(POINTER_TO_RAW_DATA); + // long alignedpointerToRaw = pointerToRaw & ~0x1ff; + // long sizeOfRaw = section.get(SIZE_OF_RAW_DATA); + // long readsize = ((pointerToRaw + sizeOfRaw) + filealign - 1) & ~(filealign - 1)) - alignedpointerToRaw; + // readsize = min(readsize, (sizeOfRaw + 0xfff) & ~0xfff); + // long virtsize = section.get(VIRTUAL_SIZE); + // + // if (virtsize) + // { + // readsize = min(readsize, (virtsize + 0xfff) & ~0xfff); + // } + + let file_alignment = file_alignment as usize; + let size_of_raw_data = section.size_of_raw_data as usize; + let virtual_size = section.virtual_size as usize; + let read_size = { + let read_size = + ((section.pointer_to_raw_data as usize + size_of_raw_data + file_alignment - 1) + & !(file_alignment - 1)) + - aligned_pointer_to_raw_data(section.pointer_to_raw_data as usize); + cmp::min(read_size, round_size(size_of_raw_data)) + }; + + if virtual_size == 0 { + read_size + } else { + cmp::min(read_size, round_size(virtual_size)) + } +} + +fn rva2offset(rva: usize, section: §ion_table::SectionTable) -> usize { + (rva - section.virtual_address as usize) + + aligned_pointer_to_raw_data(section.pointer_to_raw_data as usize) +} + +fn is_in_section(rva: usize, section: §ion_table::SectionTable, file_alignment: u32) -> bool { + let section_rva = section.virtual_address as usize; + is_in_range( + rva, + section_rva, + section_rva + section_read_size(section, file_alignment), + ) +} + +pub fn find_offset( + rva: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, +) -> Option<usize> { + if opts.resolve_rva { + if file_alignment == 0 || file_alignment & (file_alignment - 1) != 0 { + return None; + } + for (i, section) in sections.iter().enumerate() { + debug!( + "Checking {} for {:#x} ∈ {:#x}..{:#x}", + section.name().unwrap_or(""), + rva, + section.virtual_address, + section.virtual_address + section.virtual_size + ); + if is_in_section(rva, §ion, file_alignment) { + let offset = rva2offset(rva, §ion); + debug!( + "Found in section {}({}), remapped into offset {:#x}", + section.name().unwrap_or(""), + i, + offset + ); + return Some(offset); + } + } + None + } else { + Some(rva) + } +} + +pub fn find_offset_or( + rva: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, + msg: &str, +) -> error::Result<usize> { + find_offset(rva, sections, file_alignment, opts) + .ok_or_else(|| error::Error::Malformed(msg.to_string())) +} + +pub fn try_name<'a>( + bytes: &'a [u8], + rva: usize, + sections: &[section_table::SectionTable], + file_alignment: u32, + opts: &options::ParseOptions, +) -> error::Result<&'a str> { + match find_offset(rva, sections, file_alignment, opts) { + Some(offset) => Ok(bytes.pread::<&str>(offset)?), + None => Err(error::Error::Malformed(format!( + "Cannot find name from rva {:#x} in sections: {:?}", + rva, sections + ))), + } +} + +pub fn get_data<'a, T>( + bytes: &'a [u8], + sections: &[section_table::SectionTable], + directory: DataDirectory, + file_alignment: u32, +) -> error::Result<T> +where + T: scroll::ctx::TryFromCtx<'a, scroll::Endian, Error = scroll::Error>, +{ + get_data_with_opts( + bytes, + sections, + directory, + file_alignment, + &options::ParseOptions::default(), + ) +} + +pub fn get_data_with_opts<'a, T>( + bytes: &'a [u8], + sections: &[section_table::SectionTable], + directory: DataDirectory, + file_alignment: u32, + opts: &options::ParseOptions, +) -> error::Result<T> +where + T: scroll::ctx::TryFromCtx<'a, scroll::Endian, Error = scroll::Error>, +{ + let rva = directory.virtual_address as usize; + let offset = find_offset(rva, sections, file_alignment, opts) + .ok_or_else(|| error::Error::Malformed(directory.virtual_address.to_string()))?; + let result: T = bytes.pread_with(offset, scroll::LE)?; + Ok(result) +} |