diff options
Diffstat (limited to 'third_party/rust/object/src/write')
-rw-r--r-- | third_party/rust/object/src/write/coff.rs | 711 | ||||
-rw-r--r-- | third_party/rust/object/src/write/elf/mod.rs | 9 | ||||
-rw-r--r-- | third_party/rust/object/src/write/elf/object.rs | 808 | ||||
-rw-r--r-- | third_party/rust/object/src/write/elf/writer.rs | 1955 | ||||
-rw-r--r-- | third_party/rust/object/src/write/macho.rs | 886 | ||||
-rw-r--r-- | third_party/rust/object/src/write/mod.rs | 918 | ||||
-rw-r--r-- | third_party/rust/object/src/write/pe.rs | 847 | ||||
-rw-r--r-- | third_party/rust/object/src/write/string.rs | 159 | ||||
-rw-r--r-- | third_party/rust/object/src/write/util.rs | 210 |
9 files changed, 6503 insertions, 0 deletions
diff --git a/third_party/rust/object/src/write/coff.rs b/third_party/rust/object/src/write/coff.rs new file mode 100644 index 0000000000..c7489d3d6e --- /dev/null +++ b/third_party/rust/object/src/write/coff.rs @@ -0,0 +1,711 @@ +use alloc::vec::Vec; +use core::mem; + +use crate::endian::{LittleEndian as LE, U16Bytes, U32Bytes, U16, U32}; +use crate::pe as coff; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + offset: usize, + str_id: Option<StringId>, + reloc_offset: usize, + selection: u8, + associative_section: u16, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, + aux_count: u8, +} + +/// Internal format to use for the `.drectve` section containing linker +/// directives for symbol exports. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CoffExportStyle { + /// MSVC format supported by link.exe and LLD. + Msvc, + /// Gnu format supported by GNU LD and LLD. + Gnu, +} + +impl<'a> Object<'a> { + pub(crate) fn coff_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rdata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + // TLS sections are data sections with a special name. + StandardSection::Tls => (&[], &b".tls$"[..], SectionKind::Data), + StandardSection::UninitializedTls => { + // Unsupported section. + (&[], &[], SectionKind::UninitializedTls) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables) + } + StandardSection::Common => { + // Unsupported section. + (&[], &[], SectionKind::Common) + } + } + } + + pub(crate) fn coff_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + let mut name = section.to_vec(); + name.push(b'$'); + name.extend_from_slice(value); + name + } + + pub(crate) fn coff_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + if relocation.kind == RelocationKind::GotRelative { + // Use a stub symbol for the relocation instead. + // This isn't really a GOT, but it's a similar purpose. + // TODO: need to handle DLL imports differently? + relocation.kind = RelocationKind::Relative; + relocation.symbol = self.coff_add_stub_symbol(relocation.symbol); + } else if relocation.kind == RelocationKind::PltRelative { + // Windows doesn't need a separate relocation type for + // references to functions in import libraries. + // For convenience, treat this the same as Relative. + relocation.kind = RelocationKind::Relative; + } + + let constant = match self.architecture { + Architecture::I386 | Architecture::Arm | Architecture::Aarch64 => match relocation.kind + { + RelocationKind::Relative => { + // IMAGE_REL_I386_REL32, IMAGE_REL_ARM_REL32, IMAGE_REL_ARM64_REL32 + relocation.addend + 4 + } + _ => relocation.addend, + }, + Architecture::X86_64 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_AMD64_REL32 through to IMAGE_REL_AMD64_REL32_5 + if relocation.addend <= -4 && relocation.addend >= -9 { + 0 + } else { + relocation.addend + 4 + } + } + _ => relocation.addend, + }, + _ => unimplemented!(), + }; + relocation.addend -= constant; + constant + } + + fn coff_add_stub_symbol(&mut self, symbol_id: SymbolId) -> SymbolId { + if let Some(stub_id) = self.stub_symbols.get(&symbol_id) { + return *stub_id; + } + let stub_size = self.architecture.address_size().unwrap().bytes(); + + let name = b".rdata$.refptr".to_vec(); + let section_id = self.add_section(Vec::new(), name, SectionKind::ReadOnlyData); + let section = self.section_mut(section_id); + section.set_data(vec![0; stub_size as usize], u64::from(stub_size)); + section.relocations = vec![Relocation { + offset: 0, + size: stub_size * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: symbol_id, + addend: 0, + }]; + + let mut name = b".refptr.".to_vec(); + name.extend_from_slice(&self.symbol(symbol_id).name); + let stub_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: u64::from(stub_size), + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Section(section_id), + flags: SymbolFlags::None, + }); + self.stub_symbols.insert(symbol_id, stub_id); + + stub_id + } + + /// Appends linker directives to the `.drectve` section to tell the linker + /// to export all symbols with `SymbolScope::Dynamic`. + /// + /// This must be called after all symbols have been defined. + pub fn add_coff_exports(&mut self, style: CoffExportStyle) { + assert_eq!(self.format, BinaryFormat::Coff); + + let mut directives = vec![]; + for symbol in &self.symbols { + if symbol.scope == SymbolScope::Dynamic { + match style { + CoffExportStyle::Msvc => directives.extend(b" /EXPORT:\""), + CoffExportStyle::Gnu => directives.extend(b" -export:\""), + } + directives.extend(&symbol.name); + directives.extend(b"\""); + if symbol.kind != SymbolKind::Text { + match style { + CoffExportStyle::Msvc => directives.extend(b",DATA"), + CoffExportStyle::Gnu => directives.extend(b",data"), + } + } + } + } + let drectve = self.add_section(vec![], b".drectve".to_vec(), SectionKind::Linker); + self.append_section_data(drectve, &directives, 1); + } + + pub(crate) fn coff_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + let mut strtab = StringTable::default(); + + // COFF header. + offset += mem::size_of::<coff::ImageFileHeader>(); + + // Section headers. + offset += self.sections.len() * mem::size_of::<coff::ImageSectionHeader>(); + + // Calculate size of section data and add section strings to strtab. + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + for (index, section) in self.sections.iter().enumerate() { + if section.name.len() > 8 { + section_offsets[index].str_id = Some(strtab.add(§ion.name)); + } + + let len = section.data.len(); + if len != 0 { + // TODO: not sure what alignment is required here, but this seems to match LLVM + offset = align(offset, 4); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = 0; + } + + // Calculate size of relocations. + let mut count = section.relocations.len(); + if count != 0 { + section_offsets[index].reloc_offset = offset; + if count > 0xffff { + count += 1; + } + offset += count * mem::size_of::<coff::ImageRelocation>(); + } + } + + // Set COMDAT flags. + for comdat in &self.comdats { + let symbol = &self.symbols[comdat.symbol.0]; + let comdat_section = match symbol.section { + SymbolSection::Section(id) => id.0, + _ => { + return Err(Error(format!( + "unsupported COMDAT symbol `{}` section {:?}", + symbol.name().unwrap_or(""), + symbol.section + ))); + } + }; + section_offsets[comdat_section].selection = match comdat.kind { + ComdatKind::NoDuplicates => coff::IMAGE_COMDAT_SELECT_NODUPLICATES, + ComdatKind::Any => coff::IMAGE_COMDAT_SELECT_ANY, + ComdatKind::SameSize => coff::IMAGE_COMDAT_SELECT_SAME_SIZE, + ComdatKind::ExactMatch => coff::IMAGE_COMDAT_SELECT_EXACT_MATCH, + ComdatKind::Largest => coff::IMAGE_COMDAT_SELECT_LARGEST, + ComdatKind::Newest => coff::IMAGE_COMDAT_SELECT_NEWEST, + ComdatKind::Unknown => { + return Err(Error(format!( + "unsupported COMDAT symbol `{}` kind {:?}", + symbol.name().unwrap_or(""), + comdat.kind + ))); + } + }; + for id in &comdat.sections { + let section = &self.sections[id.0]; + if section.symbol.is_none() { + return Err(Error(format!( + "missing symbol for COMDAT section `{}`", + section.name().unwrap_or(""), + ))); + } + if id.0 != comdat_section { + section_offsets[id.0].selection = coff::IMAGE_COMDAT_SELECT_ASSOCIATIVE; + section_offsets[id.0].associative_section = comdat_section as u16 + 1; + } + } + } + + // Calculate size of symbols and add symbol strings to strtab. + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut symtab_count = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + let aux_count = (symbol.name.len() + coff::IMAGE_SIZEOF_SYMBOL - 1) + / coff::IMAGE_SIZEOF_SYMBOL; + symbol_offsets[index].aux_count = aux_count as u8; + symtab_count += aux_count; + // Don't add name to strtab. + continue; + } + SymbolKind::Section => { + symbol_offsets[index].aux_count = 1; + symtab_count += 1; + } + _ => {} + } + if symbol.name.len() > 8 { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_offset = offset; + let symtab_len = symtab_count * coff::IMAGE_SIZEOF_SYMBOL; + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // First 4 bytes of strtab are the length. + strtab.write(4, &mut strtab_data); + let strtab_len = strtab_data.len() + 4; + offset += strtab_len; + + // Start writing. + buffer + .reserve(offset) + .map_err(|_| Error(String::from("Cannot allocate buffer")))?; + + // Write file header. + let header = coff::ImageFileHeader { + machine: U16::new( + LE, + match self.architecture { + Architecture::Arm => coff::IMAGE_FILE_MACHINE_ARMNT, + Architecture::Aarch64 => coff::IMAGE_FILE_MACHINE_ARM64, + Architecture::I386 => coff::IMAGE_FILE_MACHINE_I386, + Architecture::X86_64 => coff::IMAGE_FILE_MACHINE_AMD64, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }, + ), + number_of_sections: U16::new(LE, self.sections.len() as u16), + time_date_stamp: U32::default(), + pointer_to_symbol_table: U32::new(LE, symtab_offset as u32), + number_of_symbols: U32::new(LE, symtab_count as u32), + size_of_optional_header: U16::default(), + characteristics: match self.flags { + FileFlags::Coff { characteristics } => U16::new(LE, characteristics), + _ => U16::default(), + }, + }; + buffer.write(&header); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + let mut characteristics = if let SectionFlags::Coff { + characteristics, .. + } = section.flags + { + characteristics + } else { + match section.kind { + SectionKind::Text => { + coff::IMAGE_SCN_CNT_CODE + | coff::IMAGE_SCN_MEM_EXECUTE + | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Data => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::UninitializedData => { + coff::IMAGE_SCN_CNT_UNINITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::ReadOnlyData | SectionKind::ReadOnlyString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Debug | SectionKind::Other | SectionKind::OtherString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_DISCARDABLE + } + SectionKind::Linker => coff::IMAGE_SCN_LNK_INFO | coff::IMAGE_SCN_LNK_REMOVE, + SectionKind::Common + | SectionKind::Tls + | SectionKind::UninitializedTls + | SectionKind::TlsVariables + | SectionKind::Note + | SectionKind::Unknown + | SectionKind::Metadata + | SectionKind::Elf(_) => { + return Err(Error(format!( + "unimplemented section `{}` kind {:?}", + section.name().unwrap_or(""), + section.kind + ))); + } + } + }; + if section_offsets[index].selection != 0 { + characteristics |= coff::IMAGE_SCN_LNK_COMDAT; + }; + if section.relocations.len() > 0xffff { + characteristics |= coff::IMAGE_SCN_LNK_NRELOC_OVFL; + } + characteristics |= match section.align { + 1 => coff::IMAGE_SCN_ALIGN_1BYTES, + 2 => coff::IMAGE_SCN_ALIGN_2BYTES, + 4 => coff::IMAGE_SCN_ALIGN_4BYTES, + 8 => coff::IMAGE_SCN_ALIGN_8BYTES, + 16 => coff::IMAGE_SCN_ALIGN_16BYTES, + 32 => coff::IMAGE_SCN_ALIGN_32BYTES, + 64 => coff::IMAGE_SCN_ALIGN_64BYTES, + 128 => coff::IMAGE_SCN_ALIGN_128BYTES, + 256 => coff::IMAGE_SCN_ALIGN_256BYTES, + 512 => coff::IMAGE_SCN_ALIGN_512BYTES, + 1024 => coff::IMAGE_SCN_ALIGN_1024BYTES, + 2048 => coff::IMAGE_SCN_ALIGN_2048BYTES, + 4096 => coff::IMAGE_SCN_ALIGN_4096BYTES, + 8192 => coff::IMAGE_SCN_ALIGN_8192BYTES, + _ => { + return Err(Error(format!( + "unimplemented section `{}` align {}", + section.name().unwrap_or(""), + section.align + ))); + } + }; + let mut coff_section = coff::ImageSectionHeader { + name: [0; 8], + virtual_size: U32::default(), + virtual_address: U32::default(), + size_of_raw_data: U32::new(LE, section.size as u32), + pointer_to_raw_data: U32::new(LE, section_offsets[index].offset as u32), + pointer_to_relocations: U32::new(LE, section_offsets[index].reloc_offset as u32), + pointer_to_linenumbers: U32::default(), + number_of_relocations: if section.relocations.len() > 0xffff { + U16::new(LE, 0xffff) + } else { + U16::new(LE, section.relocations.len() as u16) + }, + number_of_linenumbers: U16::default(), + characteristics: U32::new(LE, characteristics), + }; + if section.name.len() <= 8 { + coff_section.name[..section.name.len()].copy_from_slice(§ion.name); + } else { + let mut str_offset = strtab.get_offset(section_offsets[index].str_id.unwrap()); + if str_offset <= 9_999_999 { + let mut name = [0; 7]; + let mut len = 0; + if str_offset == 0 { + name[6] = b'0'; + len = 1; + } else { + while str_offset != 0 { + let rem = (str_offset % 10) as u8; + str_offset /= 10; + name[6 - len] = b'0' + rem; + len += 1; + } + } + coff_section.name = [0; 8]; + coff_section.name[0] = b'/'; + coff_section.name[1..][..len].copy_from_slice(&name[7 - len..]); + } else if str_offset as u64 <= 0xf_ffff_ffff { + coff_section.name[0] = b'/'; + coff_section.name[1] = b'/'; + for i in 0..6 { + let rem = (str_offset % 64) as u8; + str_offset /= 64; + let c = match rem { + 0..=25 => b'A' + rem, + 26..=51 => b'a' + rem - 26, + 52..=61 => b'0' + rem - 52, + 62 => b'+', + 63 => b'/', + _ => unreachable!(), + }; + coff_section.name[7 - i] = c; + } + } else { + return Err(Error(format!("invalid section name offset {}", str_offset))); + } + } + buffer.write(&coff_section); + } + + // Write section data and relocations. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(buffer, 4); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.write_bytes(§ion.data); + } + + if !section.relocations.is_empty() { + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + if section.relocations.len() > 0xffff { + let coff_relocation = coff::ImageRelocation { + virtual_address: U32Bytes::new(LE, section.relocations.len() as u32 + 1), + symbol_table_index: U32Bytes::new(LE, 0), + typ: U16Bytes::new(LE, 0), + }; + buffer.write(&coff_relocation); + } + for reloc in §ion.relocations { + //assert!(reloc.implicit_addend); + let typ = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 16, 0) => coff::IMAGE_REL_I386_DIR16, + (RelocationKind::Relative, 16, 0) => coff::IMAGE_REL_I386_REL16, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_I386_DIR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_I386_DIR32NB, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_I386_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_I386_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_I386_SECREL7, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_I386_REL32, + (RelocationKind::Coff(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::X86_64 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_AMD64_ADDR64, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32NB, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_AMD64_REL32, + (RelocationKind::Relative, 32, -5) => coff::IMAGE_REL_AMD64_REL32_1, + (RelocationKind::Relative, 32, -6) => coff::IMAGE_REL_AMD64_REL32_2, + (RelocationKind::Relative, 32, -7) => coff::IMAGE_REL_AMD64_REL32_3, + (RelocationKind::Relative, 32, -8) => coff::IMAGE_REL_AMD64_REL32_4, + (RelocationKind::Relative, 32, -9) => coff::IMAGE_REL_AMD64_REL32_5, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_AMD64_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_AMD64_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_AMD64_SECREL7, + (RelocationKind::Coff(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Arm => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_ARM_ADDR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_ARM_ADDR32NB, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_ARM_REL32, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_ARM_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_ARM_SECREL, + (RelocationKind::Coff(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Aarch64 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_ARM64_ADDR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_ARM64_ADDR32NB, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_ARM64_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_ARM64_SECREL, + (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_ARM64_ADDR64, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_ARM64_REL32, + (RelocationKind::Coff(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }; + let coff_relocation = coff::ImageRelocation { + virtual_address: U32Bytes::new(LE, reloc.offset as u32), + symbol_table_index: U32Bytes::new( + LE, + symbol_offsets[reloc.symbol.0].index as u32, + ), + typ: U16Bytes::new(LE, typ), + }; + buffer.write(&coff_relocation); + } + } + } + + // Write symbols. + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + let mut name = &symbol.name[..]; + let section_number = match symbol.section { + SymbolSection::None => { + debug_assert_eq!(symbol.kind, SymbolKind::File); + coff::IMAGE_SYM_DEBUG + } + SymbolSection::Undefined => coff::IMAGE_SYM_UNDEFINED, + SymbolSection::Absolute => coff::IMAGE_SYM_ABSOLUTE, + SymbolSection::Common => coff::IMAGE_SYM_UNDEFINED, + SymbolSection::Section(id) => id.0 as u16 + 1, + }; + let typ = if symbol.kind == SymbolKind::Text { + coff::IMAGE_SYM_DTYPE_FUNCTION << coff::IMAGE_SYM_DTYPE_SHIFT + } else { + coff::IMAGE_SYM_TYPE_NULL + }; + let storage_class = match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + name = b".file"; + coff::IMAGE_SYM_CLASS_FILE + } + SymbolKind::Section => coff::IMAGE_SYM_CLASS_STATIC, + SymbolKind::Label => coff::IMAGE_SYM_CLASS_LABEL, + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => { + match symbol.section { + SymbolSection::None => { + return Err(Error(format!( + "missing section for symbol `{}`", + symbol.name().unwrap_or("") + ))); + } + SymbolSection::Undefined | SymbolSection::Common => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + SymbolSection::Absolute | SymbolSection::Section(_) => { + match symbol.scope { + // TODO: does this need aux symbol records too? + _ if symbol.weak => coff::IMAGE_SYM_CLASS_WEAK_EXTERNAL, + SymbolScope::Unknown => { + return Err(Error(format!( + "unimplemented symbol `{}` scope {:?}", + symbol.name().unwrap_or(""), + symbol.scope + ))); + } + SymbolScope::Compilation => coff::IMAGE_SYM_CLASS_STATIC, + SymbolScope::Linkage | SymbolScope::Dynamic => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + } + } + } + } + SymbolKind::Unknown | SymbolKind::Null => { + return Err(Error(format!( + "unimplemented symbol `{}` kind {:?}", + symbol.name().unwrap_or(""), + symbol.kind + ))); + } + }; + let number_of_aux_symbols = symbol_offsets[index].aux_count; + let value = if symbol.section == SymbolSection::Common { + symbol.size as u32 + } else { + symbol.value as u32 + }; + let mut coff_symbol = coff::ImageSymbol { + name: [0; 8], + value: U32Bytes::new(LE, value), + section_number: U16Bytes::new(LE, section_number as u16), + typ: U16Bytes::new(LE, typ), + storage_class, + number_of_aux_symbols, + }; + if name.len() <= 8 { + coff_symbol.name[..name.len()].copy_from_slice(name); + } else { + let str_offset = strtab.get_offset(symbol_offsets[index].str_id.unwrap()); + coff_symbol.name[4..8].copy_from_slice(&u32::to_le_bytes(str_offset as u32)); + } + buffer.write(&coff_symbol); + + // Write auxiliary symbols. + match symbol.kind { + SymbolKind::File => { + let aux_len = number_of_aux_symbols as usize * coff::IMAGE_SIZEOF_SYMBOL; + debug_assert!(aux_len >= symbol.name.len()); + let old_len = buffer.len(); + buffer.write_bytes(&symbol.name); + buffer.resize(old_len + aux_len); + } + SymbolKind::Section => { + debug_assert_eq!(number_of_aux_symbols, 1); + let section_index = symbol.section.id().unwrap().0; + let section = &self.sections[section_index]; + let aux = coff::ImageAuxSymbolSection { + length: U32Bytes::new(LE, section.size as u32), + number_of_relocations: if section.relocations.len() > 0xffff { + U16Bytes::new(LE, 0xffff) + } else { + U16Bytes::new(LE, section.relocations.len() as u16) + }, + number_of_linenumbers: U16Bytes::default(), + check_sum: U32Bytes::new(LE, checksum(section.data())), + number: U16Bytes::new( + LE, + section_offsets[section_index].associative_section, + ), + selection: section_offsets[section_index].selection, + reserved: 0, + // TODO: bigobj + high_number: U16Bytes::default(), + }; + buffer.write(&aux); + } + _ => { + debug_assert_eq!(number_of_aux_symbols, 0); + } + } + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.write_bytes(&u32::to_le_bytes(strtab_len as u32)); + buffer.write_bytes(&strtab_data); + + debug_assert_eq!(offset, buffer.len()); + + Ok(()) + } +} + +// JamCRC +fn checksum(data: &[u8]) -> u32 { + let mut hasher = crc32fast::Hasher::new_with_initial(0xffff_ffff); + hasher.update(data); + !hasher.finalize() +} diff --git a/third_party/rust/object/src/write/elf/mod.rs b/third_party/rust/object/src/write/elf/mod.rs new file mode 100644 index 0000000000..3a4f3716e8 --- /dev/null +++ b/third_party/rust/object/src/write/elf/mod.rs @@ -0,0 +1,9 @@ +//! Support for writing ELF files. +//! +//! Provides [`Writer`] for low level writing of ELF files. +//! This is also used to provide ELF support for [`write::Object`](crate::write::Object). + +mod object; + +mod writer; +pub use writer::*; diff --git a/third_party/rust/object/src/write/elf/object.rs b/third_party/rust/object/src/write/elf/object.rs new file mode 100644 index 0000000000..6ea6592ac7 --- /dev/null +++ b/third_party/rust/object/src/write/elf/object.rs @@ -0,0 +1,808 @@ +use alloc::vec::Vec; + +use crate::elf; +use crate::write::elf::writer::*; +use crate::write::string::StringId; +use crate::write::*; +use crate::AddressSize; + +#[derive(Clone, Copy)] +struct ComdatOffsets { + offset: usize, + str_id: StringId, +} + +#[derive(Clone, Copy)] +struct SectionOffsets { + index: SectionIndex, + offset: usize, + str_id: StringId, + reloc_offset: usize, + reloc_str_id: Option<StringId>, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: SymbolIndex, + str_id: Option<StringId>, +} + +impl<'a> Object<'a> { + pub(crate) fn elf_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData | StandardSection::ReadOnlyString => { + (&[], &b".rodata"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyDataWithRel => (&[], b".data.rel.ro", SectionKind::Data), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + StandardSection::Tls => (&[], &b".tdata"[..], SectionKind::Tls), + StandardSection::UninitializedTls => { + (&[], &b".tbss"[..], SectionKind::UninitializedTls) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables) + } + StandardSection::Common => { + // Unsupported section. + (&[], &[], SectionKind::Common) + } + } + } + + pub(crate) fn elf_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + let mut name = section.to_vec(); + name.push(b'.'); + name.extend_from_slice(value); + name + } + + fn elf_has_relocation_addend(&self) -> Result<bool> { + Ok(match self.architecture { + Architecture::Aarch64 => true, + Architecture::Arm => false, + Architecture::Avr => true, + Architecture::Bpf => false, + Architecture::I386 => false, + Architecture::X86_64 => true, + Architecture::X86_64_X32 => true, + Architecture::Hexagon => true, + Architecture::LoongArch64 => true, + Architecture::Mips => false, + Architecture::Mips64 => true, + Architecture::Msp430 => true, + Architecture::PowerPc => true, + Architecture::PowerPc64 => true, + Architecture::Riscv64 => true, + Architecture::Riscv32 => true, + Architecture::S390x => true, + Architecture::Sparc64 => true, + Architecture::Xtensa => true, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }) + } + + pub(crate) fn elf_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> Result<i64> { + // Return true if we should use a section symbol to avoid preemption. + fn want_section_symbol(relocation: &Relocation, symbol: &Symbol) -> bool { + if symbol.scope != SymbolScope::Dynamic { + // Only dynamic symbols can be preemptible. + return false; + } + match symbol.kind { + SymbolKind::Text | SymbolKind::Data => {} + _ => return false, + } + match relocation.kind { + // Anything using GOT or PLT is preemptible. + // We also require that `Other` relocations must already be correct. + RelocationKind::Got + | RelocationKind::GotRelative + | RelocationKind::GotBaseRelative + | RelocationKind::PltRelative + | RelocationKind::Elf(_) => return false, + // Absolute relocations are preemptible for non-local data. + // TODO: not sure if this rule is exactly correct + // This rule was added to handle global data references in debuginfo. + // Maybe this should be a new relocation kind so that the caller can decide. + RelocationKind::Absolute => { + if symbol.kind == SymbolKind::Data { + return false; + } + } + _ => {} + } + true + } + + // Use section symbols for relocations where required to avoid preemption. + // Otherwise, the linker will fail with: + // relocation R_X86_64_PC32 against symbol `SomeSymbolName' can not be used when + // making a shared object; recompile with -fPIC + let symbol = &self.symbols[relocation.symbol.0]; + if want_section_symbol(relocation, symbol) { + if let Some(section) = symbol.section.id() { + relocation.addend += symbol.value as i64; + relocation.symbol = self.section_symbol(section); + } + } + + // Determine whether the addend is stored in the relocation or the data. + if self.elf_has_relocation_addend()? { + Ok(0) + } else { + let constant = relocation.addend; + relocation.addend = 0; + Ok(constant) + } + } + + pub(crate) fn elf_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { + // Create reloc section header names so we can reference them. + let is_rela = self.elf_has_relocation_addend()?; + let reloc_names: Vec<_> = self + .sections + .iter() + .map(|section| { + let mut reloc_name = Vec::with_capacity( + if is_rela { ".rela".len() } else { ".rel".len() } + section.name.len(), + ); + if !section.relocations.is_empty() { + reloc_name.extend_from_slice(if is_rela { + &b".rela"[..] + } else { + &b".rel"[..] + }); + reloc_name.extend_from_slice(§ion.name); + } + reloc_name + }) + .collect(); + + // Start calculating offsets of everything. + let is_64 = match self.architecture.address_size().unwrap() { + AddressSize::U8 | AddressSize::U16 | AddressSize::U32 => false, + AddressSize::U64 => true, + }; + let mut writer = Writer::new(self.endian, is_64, buffer); + writer.reserve_file_header(); + + // Calculate size of section data. + let mut comdat_offsets = Vec::with_capacity(self.comdats.len()); + for comdat in &self.comdats { + if comdat.kind != ComdatKind::Any { + return Err(Error(format!( + "unsupported COMDAT symbol `{}` kind {:?}", + self.symbols[comdat.symbol.0].name().unwrap_or(""), + comdat.kind + ))); + } + + writer.reserve_section_index(); + let offset = writer.reserve_comdat(comdat.sections.len()); + let str_id = writer.add_section_name(b".group"); + comdat_offsets.push(ComdatOffsets { offset, str_id }); + } + let mut section_offsets = Vec::with_capacity(self.sections.len()); + for (section, reloc_name) in self.sections.iter().zip(reloc_names.iter()) { + let index = writer.reserve_section_index(); + let offset = writer.reserve(section.data.len(), section.align as usize); + let str_id = writer.add_section_name(§ion.name); + let mut reloc_str_id = None; + if !section.relocations.is_empty() { + writer.reserve_section_index(); + reloc_str_id = Some(writer.add_section_name(reloc_name)); + } + section_offsets.push(SectionOffsets { + index, + offset, + str_id, + // Relocation data is reserved later. + reloc_offset: 0, + reloc_str_id, + }); + } + + // Calculate index of symbols and add symbol strings to strtab. + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + writer.reserve_null_symbol_index(); + // Local symbols must come before global. + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + let section_index = symbol.section.id().map(|s| section_offsets[s.0].index); + symbol_offsets[index].index = writer.reserve_symbol_index(section_index); + } + } + let symtab_num_local = writer.symbol_count(); + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + let section_index = symbol.section.id().map(|s| section_offsets[s.0].index); + symbol_offsets[index].index = writer.reserve_symbol_index(section_index); + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.kind != SymbolKind::Section && !symbol.name.is_empty() { + symbol_offsets[index].str_id = Some(writer.add_string(&symbol.name)); + } + } + + // Calculate size of symbols. + writer.reserve_symtab_section_index(); + writer.reserve_symtab(); + if writer.symtab_shndx_needed() { + writer.reserve_symtab_shndx_section_index(); + } + writer.reserve_symtab_shndx(); + writer.reserve_strtab_section_index(); + writer.reserve_strtab(); + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + section_offsets[index].reloc_offset = writer.reserve_relocations(count, is_rela); + } + } + + // Calculate size of section headers. + writer.reserve_shstrtab_section_index(); + writer.reserve_shstrtab(); + writer.reserve_section_headers(); + + // Start writing. + let e_type = elf::ET_REL; + let e_machine = match self.architecture { + Architecture::Aarch64 => elf::EM_AARCH64, + Architecture::Arm => elf::EM_ARM, + Architecture::Avr => elf::EM_AVR, + Architecture::Bpf => elf::EM_BPF, + Architecture::I386 => elf::EM_386, + Architecture::X86_64 => elf::EM_X86_64, + Architecture::X86_64_X32 => elf::EM_X86_64, + Architecture::Hexagon => elf::EM_HEXAGON, + Architecture::LoongArch64 => elf::EM_LOONGARCH, + Architecture::Mips => elf::EM_MIPS, + Architecture::Mips64 => elf::EM_MIPS, + Architecture::Msp430 => elf::EM_MSP430, + Architecture::PowerPc => elf::EM_PPC, + Architecture::PowerPc64 => elf::EM_PPC64, + Architecture::Riscv32 => elf::EM_RISCV, + Architecture::Riscv64 => elf::EM_RISCV, + Architecture::S390x => elf::EM_S390, + Architecture::Sparc64 => elf::EM_SPARCV9, + Architecture::Xtensa => elf::EM_XTENSA, + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }; + let (os_abi, abi_version, e_flags) = if let FileFlags::Elf { + os_abi, + abi_version, + e_flags, + } = self.flags + { + (os_abi, abi_version, e_flags) + } else { + (elf::ELFOSABI_NONE, 0, 0) + }; + writer.write_file_header(&FileHeader { + os_abi, + abi_version, + e_type, + e_machine, + e_entry: 0, + e_flags, + })?; + + // Write section data. + for comdat in &self.comdats { + writer.write_comdat_header(); + for section in &comdat.sections { + writer.write_comdat_entry(section_offsets[section.0].index); + } + } + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + writer.write_align(section.align as usize); + debug_assert_eq!(section_offsets[index].offset, writer.len()); + writer.write(§ion.data); + } + } + + // Write symbols. + writer.write_null_symbol(); + let mut write_symbol = |index: usize, symbol: &Symbol| -> Result<()> { + let st_info = if let SymbolFlags::Elf { st_info, .. } = symbol.flags { + st_info + } else { + let st_type = match symbol.kind { + SymbolKind::Null => elf::STT_NOTYPE, + SymbolKind::Text => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_FUNC + } + } + SymbolKind::Data => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else if symbol.is_common() { + elf::STT_COMMON + } else { + elf::STT_OBJECT + } + } + SymbolKind::Section => elf::STT_SECTION, + SymbolKind::File => elf::STT_FILE, + SymbolKind::Tls => elf::STT_TLS, + SymbolKind::Label => elf::STT_NOTYPE, + SymbolKind::Unknown => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + return Err(Error(format!( + "unimplemented symbol `{}` kind {:?}", + symbol.name().unwrap_or(""), + symbol.kind + ))); + } + } + }; + let st_bind = if symbol.weak { + elf::STB_WEAK + } else if symbol.is_undefined() { + elf::STB_GLOBAL + } else if symbol.is_local() { + elf::STB_LOCAL + } else { + elf::STB_GLOBAL + }; + (st_bind << 4) + st_type + }; + let st_other = if let SymbolFlags::Elf { st_other, .. } = symbol.flags { + st_other + } else if symbol.scope == SymbolScope::Linkage { + elf::STV_HIDDEN + } else { + elf::STV_DEFAULT + }; + let (st_shndx, section) = match symbol.section { + SymbolSection::None => { + debug_assert_eq!(symbol.kind, SymbolKind::File); + (elf::SHN_ABS, None) + } + SymbolSection::Undefined => (elf::SHN_UNDEF, None), + SymbolSection::Absolute => (elf::SHN_ABS, None), + SymbolSection::Common => (elf::SHN_COMMON, None), + SymbolSection::Section(id) => (0, Some(section_offsets[id.0].index)), + }; + writer.write_symbol(&Sym { + name: symbol_offsets[index].str_id, + section, + st_info, + st_other, + st_shndx, + st_value: symbol.value, + st_size: symbol.size, + }); + Ok(()) + }; + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + write_symbol(index, symbol)?; + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + write_symbol(index, symbol)?; + } + } + writer.write_symtab_shndx(); + writer.write_strtab(); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + writer.write_align_relocation(); + debug_assert_eq!(section_offsets[index].reloc_offset, writer.len()); + for reloc in §ion.relocations { + let r_type = match self.architecture { + Architecture::Aarch64 => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { + elf::R_AARCH64_ABS64 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_AARCH64_ABS32 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => { + elf::R_AARCH64_ABS16 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 64) => { + elf::R_AARCH64_PREL64 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_AARCH64_PREL32 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 16) => { + elf::R_AARCH64_PREL16 + } + (RelocationKind::Relative, RelocationEncoding::AArch64Call, 26) + | (RelocationKind::PltRelative, RelocationEncoding::AArch64Call, 26) => { + elf::R_AARCH64_CALL26 + } + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Arm => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_ARM_ABS32, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Avr => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_AVR_32, + (RelocationKind::Absolute, _, 16) => elf::R_AVR_16, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Bpf => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 64) => elf::R_BPF_64_64, + (RelocationKind::Absolute, _, 32) => elf::R_BPF_64_32, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::I386 => match (reloc.kind, reloc.size) { + (RelocationKind::Absolute, 32) => elf::R_386_32, + (RelocationKind::Relative, 32) => elf::R_386_PC32, + (RelocationKind::Got, 32) => elf::R_386_GOT32, + (RelocationKind::PltRelative, 32) => elf::R_386_PLT32, + (RelocationKind::GotBaseOffset, 32) => elf::R_386_GOTOFF, + (RelocationKind::GotBaseRelative, 32) => elf::R_386_GOTPC, + (RelocationKind::Absolute, 16) => elf::R_386_16, + (RelocationKind::Relative, 16) => elf::R_386_PC16, + (RelocationKind::Absolute, 8) => elf::R_386_8, + (RelocationKind::Relative, 8) => elf::R_386_PC8, + (RelocationKind::Elf(x), _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::X86_64 | Architecture::X86_64_X32 => { + match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { + elf::R_X86_64_64 + } + (RelocationKind::Relative, _, 32) => elf::R_X86_64_PC32, + (RelocationKind::Got, _, 32) => elf::R_X86_64_GOT32, + (RelocationKind::PltRelative, _, 32) => elf::R_X86_64_PLT32, + (RelocationKind::GotRelative, _, 32) => elf::R_X86_64_GOTPCREL, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_X86_64_32 + } + (RelocationKind::Absolute, RelocationEncoding::X86Signed, 32) => { + elf::R_X86_64_32S + } + (RelocationKind::Absolute, _, 16) => elf::R_X86_64_16, + (RelocationKind::Relative, _, 16) => elf::R_X86_64_PC16, + (RelocationKind::Absolute, _, 8) => elf::R_X86_64_8, + (RelocationKind::Relative, _, 8) => elf::R_X86_64_PC8, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!( + "unimplemented relocation {:?}", + reloc + ))); + } + } + } + Architecture::Hexagon => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_HEX_32, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::LoongArch64 => match (reloc.kind, reloc.encoding, reloc.size) + { + (RelocationKind::Absolute, _, 32) => elf::R_LARCH_32, + (RelocationKind::Absolute, _, 64) => elf::R_LARCH_64, + (RelocationKind::Relative, _, 32) => elf::R_LARCH_32_PCREL, + (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 16) + | ( + RelocationKind::PltRelative, + RelocationEncoding::LoongArchBranch, + 16, + ) => elf::R_LARCH_B16, + (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 21) + | ( + RelocationKind::PltRelative, + RelocationEncoding::LoongArchBranch, + 21, + ) => elf::R_LARCH_B21, + (RelocationKind::Relative, RelocationEncoding::LoongArchBranch, 26) + | ( + RelocationKind::PltRelative, + RelocationEncoding::LoongArchBranch, + 26, + ) => elf::R_LARCH_B26, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Mips | Architecture::Mips64 => { + match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 16) => elf::R_MIPS_16, + (RelocationKind::Absolute, _, 32) => elf::R_MIPS_32, + (RelocationKind::Absolute, _, 64) => elf::R_MIPS_64, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!( + "unimplemented relocation {:?}", + reloc + ))); + } + } + } + Architecture::Msp430 => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_MSP430_32, + (RelocationKind::Absolute, _, 16) => elf::R_MSP430_16_BYTE, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::PowerPc => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_PPC_ADDR32, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::PowerPc64 => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_PPC64_ADDR32, + (RelocationKind::Absolute, _, 64) => elf::R_PPC64_ADDR64, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Riscv32 | Architecture::Riscv64 => { + match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_RISCV_32, + (RelocationKind::Absolute, _, 64) => elf::R_RISCV_64, + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_RISCV_32_PCREL + } + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!( + "unimplemented relocation {:?}", + reloc + ))); + } + } + } + Architecture::S390x => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 8) => { + elf::R_390_8 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 16) => { + elf::R_390_16 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_390_32 + } + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { + elf::R_390_64 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 16) => { + elf::R_390_PC16 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_390_PC32 + } + (RelocationKind::Relative, RelocationEncoding::Generic, 64) => { + elf::R_390_PC64 + } + (RelocationKind::Relative, RelocationEncoding::S390xDbl, 16) => { + elf::R_390_PC16DBL + } + (RelocationKind::Relative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_PC32DBL + } + (RelocationKind::PltRelative, RelocationEncoding::S390xDbl, 16) => { + elf::R_390_PLT16DBL + } + (RelocationKind::PltRelative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_PLT32DBL + } + (RelocationKind::Got, RelocationEncoding::Generic, 16) => { + elf::R_390_GOT16 + } + (RelocationKind::Got, RelocationEncoding::Generic, 32) => { + elf::R_390_GOT32 + } + (RelocationKind::Got, RelocationEncoding::Generic, 64) => { + elf::R_390_GOT64 + } + (RelocationKind::GotRelative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_GOTENT + } + (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 16) => { + elf::R_390_GOTOFF16 + } + (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 32) => { + elf::R_390_GOTOFF32 + } + (RelocationKind::GotBaseOffset, RelocationEncoding::Generic, 64) => { + elf::R_390_GOTOFF64 + } + (RelocationKind::GotBaseRelative, RelocationEncoding::Generic, 64) => { + elf::R_390_GOTPC + } + (RelocationKind::GotBaseRelative, RelocationEncoding::S390xDbl, 32) => { + elf::R_390_GOTPCDBL + } + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Sparc64 => match (reloc.kind, reloc.encoding, reloc.size) { + // TODO: use R_SPARC_32/R_SPARC_64 if aligned. + (RelocationKind::Absolute, _, 32) => elf::R_SPARC_UA32, + (RelocationKind::Absolute, _, 64) => elf::R_SPARC_UA64, + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Xtensa => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, _, 32) => elf::R_XTENSA_32, + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => { + elf::R_XTENSA_32_PCREL + } + (RelocationKind::Elf(x), _, _) => x, + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + _ => { + if let RelocationKind::Elf(x) = reloc.kind { + x + } else { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + } + }; + let r_sym = symbol_offsets[reloc.symbol.0].index.0; + writer.write_relocation( + is_rela, + &Rel { + r_offset: reloc.offset, + r_sym, + r_type, + r_addend: reloc.addend, + }, + ); + } + } + } + + writer.write_shstrtab(); + + // Write section headers. + writer.write_null_section_header(); + + let symtab_index = writer.symtab_index(); + for (comdat, comdat_offset) in self.comdats.iter().zip(comdat_offsets.iter()) { + writer.write_comdat_section_header( + comdat_offset.str_id, + symtab_index, + symbol_offsets[comdat.symbol.0].index, + comdat_offset.offset, + comdat.sections.len(), + ); + } + for (index, section) in self.sections.iter().enumerate() { + let sh_type = match section.kind { + SectionKind::UninitializedData | SectionKind::UninitializedTls => elf::SHT_NOBITS, + SectionKind::Note => elf::SHT_NOTE, + SectionKind::Elf(sh_type) => sh_type, + _ => elf::SHT_PROGBITS, + }; + let sh_flags = if let SectionFlags::Elf { sh_flags } = section.flags { + sh_flags + } else { + match section.kind { + SectionKind::Text => elf::SHF_ALLOC | elf::SHF_EXECINSTR, + SectionKind::Data => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::Tls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::UninitializedData => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::UninitializedTls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::ReadOnlyData => elf::SHF_ALLOC, + SectionKind::ReadOnlyString => { + elf::SHF_ALLOC | elf::SHF_STRINGS | elf::SHF_MERGE + } + SectionKind::OtherString => elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::Other + | SectionKind::Debug + | SectionKind::Metadata + | SectionKind::Linker + | SectionKind::Note + | SectionKind::Elf(_) => 0, + SectionKind::Unknown | SectionKind::Common | SectionKind::TlsVariables => { + return Err(Error(format!( + "unimplemented section `{}` kind {:?}", + section.name().unwrap_or(""), + section.kind + ))); + } + } + .into() + }; + // TODO: not sure if this is correct, maybe user should determine this + let sh_entsize = match section.kind { + SectionKind::ReadOnlyString | SectionKind::OtherString => 1, + _ => 0, + }; + writer.write_section_header(&SectionHeader { + name: Some(section_offsets[index].str_id), + sh_type, + sh_flags, + sh_addr: 0, + sh_offset: section_offsets[index].offset as u64, + sh_size: section.size, + sh_link: 0, + sh_info: 0, + sh_addralign: section.align, + sh_entsize, + }); + + if !section.relocations.is_empty() { + writer.write_relocation_section_header( + section_offsets[index].reloc_str_id.unwrap(), + section_offsets[index].index, + symtab_index, + section_offsets[index].reloc_offset, + section.relocations.len(), + is_rela, + ); + } + } + + writer.write_symtab_section_header(symtab_num_local); + writer.write_symtab_shndx_section_header(); + writer.write_strtab_section_header(); + writer.write_shstrtab_section_header(); + + debug_assert_eq!(writer.reserved_len(), writer.len()); + + Ok(()) + } +} diff --git a/third_party/rust/object/src/write/elf/writer.rs b/third_party/rust/object/src/write/elf/writer.rs new file mode 100644 index 0000000000..3c9d85a126 --- /dev/null +++ b/third_party/rust/object/src/write/elf/writer.rs @@ -0,0 +1,1955 @@ +//! Helper for writing ELF files. +use alloc::string::String; +use alloc::vec::Vec; +use core::mem; + +use crate::elf; +use crate::endian::*; +use crate::write::string::{StringId, StringTable}; +use crate::write::util; +use crate::write::{Error, Result, WritableBuffer}; + +/// The index of an ELF section. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionIndex(pub u32); + +/// The index of an ELF symbol. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SymbolIndex(pub u32); + +/// A helper for writing ELF files. +/// +/// Writing uses a two phase approach. The first phase builds up all of the information +/// that may need to be known ahead of time: +/// - build string tables +/// - reserve section indices +/// - reserve symbol indices +/// - reserve file ranges for headers and sections +/// +/// Some of the information has ordering requirements. For example, strings must be added +/// to string tables before reserving the file range for the string table. Symbol indices +/// must be reserved after reserving the section indices they reference. There are debug +/// asserts to check some of these requirements. +/// +/// The second phase writes everything out in order. Thus the caller must ensure writing +/// is in the same order that file ranges were reserved. There are debug asserts to assist +/// with checking this. +#[allow(missing_debug_implementations)] +pub struct Writer<'a> { + endian: Endianness, + is_64: bool, + is_mips64el: bool, + elf_align: usize, + + buffer: &'a mut dyn WritableBuffer, + len: usize, + + segment_offset: usize, + segment_num: u32, + + section_offset: usize, + section_num: u32, + + shstrtab: StringTable<'a>, + shstrtab_str_id: Option<StringId>, + shstrtab_index: SectionIndex, + shstrtab_offset: usize, + shstrtab_data: Vec<u8>, + + need_strtab: bool, + strtab: StringTable<'a>, + strtab_str_id: Option<StringId>, + strtab_index: SectionIndex, + strtab_offset: usize, + strtab_data: Vec<u8>, + + symtab_str_id: Option<StringId>, + symtab_index: SectionIndex, + symtab_offset: usize, + symtab_num: u32, + + need_symtab_shndx: bool, + symtab_shndx_str_id: Option<StringId>, + symtab_shndx_offset: usize, + symtab_shndx_data: Vec<u8>, + + need_dynstr: bool, + dynstr: StringTable<'a>, + dynstr_str_id: Option<StringId>, + dynstr_index: SectionIndex, + dynstr_offset: usize, + dynstr_data: Vec<u8>, + + dynsym_str_id: Option<StringId>, + dynsym_index: SectionIndex, + dynsym_offset: usize, + dynsym_num: u32, + + dynamic_str_id: Option<StringId>, + dynamic_offset: usize, + dynamic_num: usize, + + hash_str_id: Option<StringId>, + hash_offset: usize, + hash_size: usize, + + gnu_hash_str_id: Option<StringId>, + gnu_hash_offset: usize, + gnu_hash_size: usize, + + gnu_versym_str_id: Option<StringId>, + gnu_versym_offset: usize, + + gnu_verdef_str_id: Option<StringId>, + gnu_verdef_offset: usize, + gnu_verdef_size: usize, + gnu_verdef_count: u16, + gnu_verdef_remaining: u16, + gnu_verdaux_remaining: u16, + + gnu_verneed_str_id: Option<StringId>, + gnu_verneed_offset: usize, + gnu_verneed_size: usize, + gnu_verneed_count: u16, + gnu_verneed_remaining: u16, + gnu_vernaux_remaining: u16, +} + +impl<'a> Writer<'a> { + /// Create a new `Writer` for the given endianness and ELF class. + pub fn new(endian: Endianness, is_64: bool, buffer: &'a mut dyn WritableBuffer) -> Self { + let elf_align = if is_64 { 8 } else { 4 }; + Writer { + endian, + is_64, + // Determined later. + is_mips64el: false, + elf_align, + + buffer, + len: 0, + + segment_offset: 0, + segment_num: 0, + + section_offset: 0, + section_num: 0, + + shstrtab: StringTable::default(), + shstrtab_str_id: None, + shstrtab_index: SectionIndex(0), + shstrtab_offset: 0, + shstrtab_data: Vec::new(), + + need_strtab: false, + strtab: StringTable::default(), + strtab_str_id: None, + strtab_index: SectionIndex(0), + strtab_offset: 0, + strtab_data: Vec::new(), + + symtab_str_id: None, + symtab_index: SectionIndex(0), + symtab_offset: 0, + symtab_num: 0, + + need_symtab_shndx: false, + symtab_shndx_str_id: None, + symtab_shndx_offset: 0, + symtab_shndx_data: Vec::new(), + + need_dynstr: false, + dynstr: StringTable::default(), + dynstr_str_id: None, + dynstr_index: SectionIndex(0), + dynstr_offset: 0, + dynstr_data: Vec::new(), + + dynsym_str_id: None, + dynsym_index: SectionIndex(0), + dynsym_offset: 0, + dynsym_num: 0, + + dynamic_str_id: None, + dynamic_offset: 0, + dynamic_num: 0, + + hash_str_id: None, + hash_offset: 0, + hash_size: 0, + + gnu_hash_str_id: None, + gnu_hash_offset: 0, + gnu_hash_size: 0, + + gnu_versym_str_id: None, + gnu_versym_offset: 0, + + gnu_verdef_str_id: None, + gnu_verdef_offset: 0, + gnu_verdef_size: 0, + gnu_verdef_count: 0, + gnu_verdef_remaining: 0, + gnu_verdaux_remaining: 0, + + gnu_verneed_str_id: None, + gnu_verneed_offset: 0, + gnu_verneed_size: 0, + gnu_verneed_count: 0, + gnu_verneed_remaining: 0, + gnu_vernaux_remaining: 0, + } + } + + /// Return the current file length that has been reserved. + pub fn reserved_len(&self) -> usize { + self.len + } + + /// Return the current file length that has been written. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Reserve a file range with the given size and starting alignment. + /// + /// Returns the aligned offset of the start of the range. + pub fn reserve(&mut self, len: usize, align_start: usize) -> usize { + if len == 0 { + return self.len; + } + self.len = util::align(self.len, align_start); + let offset = self.len; + self.len += len; + offset + } + + /// Write alignment padding bytes. + pub fn write_align(&mut self, align_start: usize) { + util::write_align(self.buffer, align_start); + } + + /// Write data. + /// + /// This is typically used to write section data. + pub fn write(&mut self, data: &[u8]) { + self.buffer.write_bytes(data); + } + + /// Reserve the file range up to the given file offset. + pub fn reserve_until(&mut self, offset: usize) { + debug_assert!(self.len <= offset); + self.len = offset; + } + + /// Write padding up to the given file offset. + pub fn pad_until(&mut self, offset: usize) { + debug_assert!(self.buffer.len() <= offset); + self.buffer.resize(offset); + } + + fn file_header_size(&self) -> usize { + if self.is_64 { + mem::size_of::<elf::FileHeader64<Endianness>>() + } else { + mem::size_of::<elf::FileHeader32<Endianness>>() + } + } + + /// Reserve the range for the file header. + /// + /// This must be at the start of the file. + pub fn reserve_file_header(&mut self) { + debug_assert_eq!(self.len, 0); + self.reserve(self.file_header_size(), 1); + } + + /// Write the file header. + /// + /// This must be at the start of the file. + /// + /// Fields that can be derived from known information are automatically set by this function. + pub fn write_file_header(&mut self, header: &FileHeader) -> Result<()> { + debug_assert_eq!(self.buffer.len(), 0); + + self.is_mips64el = + self.is_64 && self.endian.is_little_endian() && header.e_machine == elf::EM_MIPS; + + // Start writing. + self.buffer + .reserve(self.len) + .map_err(|_| Error(String::from("Cannot allocate buffer")))?; + + // Write file header. + let e_ident = elf::Ident { + magic: elf::ELFMAG, + class: if self.is_64 { + elf::ELFCLASS64 + } else { + elf::ELFCLASS32 + }, + data: if self.endian.is_little_endian() { + elf::ELFDATA2LSB + } else { + elf::ELFDATA2MSB + }, + version: elf::EV_CURRENT, + os_abi: header.os_abi, + abi_version: header.abi_version, + padding: [0; 7], + }; + + let e_ehsize = self.file_header_size() as u16; + + let e_phoff = self.segment_offset as u64; + let e_phentsize = if self.segment_num == 0 { + 0 + } else { + self.program_header_size() as u16 + }; + // TODO: overflow + let e_phnum = self.segment_num as u16; + + let e_shoff = self.section_offset as u64; + let e_shentsize = if self.section_num == 0 { + 0 + } else { + self.section_header_size() as u16 + }; + let e_shnum = if self.section_num >= elf::SHN_LORESERVE.into() { + 0 + } else { + self.section_num as u16 + }; + let e_shstrndx = if self.shstrtab_index.0 >= elf::SHN_LORESERVE.into() { + elf::SHN_XINDEX + } else { + self.shstrtab_index.0 as u16 + }; + + let endian = self.endian; + if self.is_64 { + let file = elf::FileHeader64 { + e_ident, + e_type: U16::new(endian, header.e_type), + e_machine: U16::new(endian, header.e_machine), + e_version: U32::new(endian, elf::EV_CURRENT.into()), + e_entry: U64::new(endian, header.e_entry), + e_phoff: U64::new(endian, e_phoff), + e_shoff: U64::new(endian, e_shoff), + e_flags: U32::new(endian, header.e_flags), + e_ehsize: U16::new(endian, e_ehsize), + e_phentsize: U16::new(endian, e_phentsize), + e_phnum: U16::new(endian, e_phnum), + e_shentsize: U16::new(endian, e_shentsize), + e_shnum: U16::new(endian, e_shnum), + e_shstrndx: U16::new(endian, e_shstrndx), + }; + self.buffer.write(&file) + } else { + let file = elf::FileHeader32 { + e_ident, + e_type: U16::new(endian, header.e_type), + e_machine: U16::new(endian, header.e_machine), + e_version: U32::new(endian, elf::EV_CURRENT.into()), + e_entry: U32::new(endian, header.e_entry as u32), + e_phoff: U32::new(endian, e_phoff as u32), + e_shoff: U32::new(endian, e_shoff as u32), + e_flags: U32::new(endian, header.e_flags), + e_ehsize: U16::new(endian, e_ehsize), + e_phentsize: U16::new(endian, e_phentsize), + e_phnum: U16::new(endian, e_phnum), + e_shentsize: U16::new(endian, e_shentsize), + e_shnum: U16::new(endian, e_shnum), + e_shstrndx: U16::new(endian, e_shstrndx), + }; + self.buffer.write(&file); + } + + Ok(()) + } + + fn program_header_size(&self) -> usize { + if self.is_64 { + mem::size_of::<elf::ProgramHeader64<Endianness>>() + } else { + mem::size_of::<elf::ProgramHeader32<Endianness>>() + } + } + + /// Reserve the range for the program headers. + pub fn reserve_program_headers(&mut self, num: u32) { + debug_assert_eq!(self.segment_offset, 0); + if num == 0 { + return; + } + self.segment_num = num; + self.segment_offset = + self.reserve(num as usize * self.program_header_size(), self.elf_align); + } + + /// Write alignment padding bytes prior to the program headers. + pub fn write_align_program_headers(&mut self) { + if self.segment_offset == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.segment_offset, self.buffer.len()); + } + + /// Write a program header. + pub fn write_program_header(&mut self, header: &ProgramHeader) { + let endian = self.endian; + if self.is_64 { + let header = elf::ProgramHeader64 { + p_type: U32::new(endian, header.p_type), + p_flags: U32::new(endian, header.p_flags), + p_offset: U64::new(endian, header.p_offset), + p_vaddr: U64::new(endian, header.p_vaddr), + p_paddr: U64::new(endian, header.p_paddr), + p_filesz: U64::new(endian, header.p_filesz), + p_memsz: U64::new(endian, header.p_memsz), + p_align: U64::new(endian, header.p_align), + }; + self.buffer.write(&header); + } else { + let header = elf::ProgramHeader32 { + p_type: U32::new(endian, header.p_type), + p_offset: U32::new(endian, header.p_offset as u32), + p_vaddr: U32::new(endian, header.p_vaddr as u32), + p_paddr: U32::new(endian, header.p_paddr as u32), + p_filesz: U32::new(endian, header.p_filesz as u32), + p_memsz: U32::new(endian, header.p_memsz as u32), + p_flags: U32::new(endian, header.p_flags), + p_align: U32::new(endian, header.p_align as u32), + }; + self.buffer.write(&header); + } + } + + /// Reserve the section index for the null section header. + /// + /// The null section header is usually automatically reserved, + /// but this can be used to force an empty section table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_null_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.section_num, 0); + if self.section_num == 0 { + self.section_num = 1; + } + SectionIndex(0) + } + + /// Reserve a section table index. + /// + /// Automatically also reserves the null section header if required. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.section_offset, 0); + if self.section_num == 0 { + self.section_num = 1; + } + let index = self.section_num; + self.section_num += 1; + SectionIndex(index) + } + + fn section_header_size(&self) -> usize { + if self.is_64 { + mem::size_of::<elf::SectionHeader64<Endianness>>() + } else { + mem::size_of::<elf::SectionHeader32<Endianness>>() + } + } + + /// Reserve the range for the section headers. + /// + /// This function does nothing if no sections were reserved. + /// This must be called after [`Self::reserve_section_index`] + /// and other functions that reserve section indices. + pub fn reserve_section_headers(&mut self) { + debug_assert_eq!(self.section_offset, 0); + if self.section_num == 0 { + return; + } + self.section_offset = self.reserve( + self.section_num as usize * self.section_header_size(), + self.elf_align, + ); + } + + /// Write the null section header. + /// + /// This must be the first section header that is written. + /// This function does nothing if no sections were reserved. + pub fn write_null_section_header(&mut self) { + if self.section_num == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.section_offset, self.buffer.len()); + self.write_section_header(&SectionHeader { + name: None, + sh_type: 0, + sh_flags: 0, + sh_addr: 0, + sh_offset: 0, + sh_size: if self.section_num >= elf::SHN_LORESERVE.into() { + self.section_num.into() + } else { + 0 + }, + sh_link: if self.shstrtab_index.0 >= elf::SHN_LORESERVE.into() { + self.shstrtab_index.0 + } else { + 0 + }, + // TODO: e_phnum overflow + sh_info: 0, + sh_addralign: 0, + sh_entsize: 0, + }); + } + + /// Write a section header. + pub fn write_section_header(&mut self, section: &SectionHeader) { + let sh_name = if let Some(name) = section.name { + self.shstrtab.get_offset(name) as u32 + } else { + 0 + }; + let endian = self.endian; + if self.is_64 { + let section = elf::SectionHeader64 { + sh_name: U32::new(endian, sh_name), + sh_type: U32::new(endian, section.sh_type), + sh_flags: U64::new(endian, section.sh_flags), + sh_addr: U64::new(endian, section.sh_addr), + sh_offset: U64::new(endian, section.sh_offset), + sh_size: U64::new(endian, section.sh_size), + sh_link: U32::new(endian, section.sh_link), + sh_info: U32::new(endian, section.sh_info), + sh_addralign: U64::new(endian, section.sh_addralign), + sh_entsize: U64::new(endian, section.sh_entsize), + }; + self.buffer.write(§ion); + } else { + let section = elf::SectionHeader32 { + sh_name: U32::new(endian, sh_name), + sh_type: U32::new(endian, section.sh_type), + sh_flags: U32::new(endian, section.sh_flags as u32), + sh_addr: U32::new(endian, section.sh_addr as u32), + sh_offset: U32::new(endian, section.sh_offset as u32), + sh_size: U32::new(endian, section.sh_size as u32), + sh_link: U32::new(endian, section.sh_link), + sh_info: U32::new(endian, section.sh_info), + sh_addralign: U32::new(endian, section.sh_addralign as u32), + sh_entsize: U32::new(endian, section.sh_entsize as u32), + }; + self.buffer.write(§ion); + } + } + + /// Add a section name to the section header string table. + /// + /// This will be stored in the `.shstrtab` section. + /// + /// This must be called before [`Self::reserve_shstrtab`]. + pub fn add_section_name(&mut self, name: &'a [u8]) -> StringId { + debug_assert_eq!(self.shstrtab_offset, 0); + self.shstrtab.add(name) + } + + /// Reserve the range for the section header string table. + /// + /// This range is used for a section named `.shstrtab`. + /// + /// This function does nothing if no sections were reserved. + /// This must be called after [`Self::add_section_name`]. + /// and other functions that reserve section names and indices. + pub fn reserve_shstrtab(&mut self) { + debug_assert_eq!(self.shstrtab_offset, 0); + if self.section_num == 0 { + return; + } + // Start with null section name. + self.shstrtab_data = vec![0]; + self.shstrtab.write(1, &mut self.shstrtab_data); + self.shstrtab_offset = self.reserve(self.shstrtab_data.len(), 1); + } + + /// Write the section header string table. + /// + /// This function does nothing if the section was not reserved. + pub fn write_shstrtab(&mut self) { + if self.shstrtab_offset == 0 { + return; + } + debug_assert_eq!(self.shstrtab_offset, self.buffer.len()); + self.buffer.write_bytes(&self.shstrtab_data); + } + + /// Reserve the section index for the section header string table. + /// + /// This must be called before [`Self::reserve_shstrtab`] + /// and [`Self::reserve_section_headers`]. + pub fn reserve_shstrtab_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.shstrtab_index, SectionIndex(0)); + self.shstrtab_str_id = Some(self.add_section_name(&b".shstrtab"[..])); + self.shstrtab_index = self.reserve_section_index(); + self.shstrtab_index + } + + /// Write the section header for the section header string table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_shstrtab_section_header(&mut self) { + if self.shstrtab_index == SectionIndex(0) { + return; + } + self.write_section_header(&SectionHeader { + name: self.shstrtab_str_id, + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: self.shstrtab_offset as u64, + sh_size: self.shstrtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }); + } + + /// Add a string to the string table. + /// + /// This will be stored in the `.strtab` section. + /// + /// This must be called before [`Self::reserve_strtab`]. + pub fn add_string(&mut self, name: &'a [u8]) -> StringId { + debug_assert_eq!(self.strtab_offset, 0); + self.need_strtab = true; + self.strtab.add(name) + } + + /// Return true if `.strtab` is needed. + pub fn strtab_needed(&self) -> bool { + self.need_strtab + } + + /// Reserve the range for the string table. + /// + /// This range is used for a section named `.strtab`. + /// + /// This function does nothing if no strings or symbols were defined. + /// This must be called after [`Self::add_string`]. + pub fn reserve_strtab(&mut self) { + debug_assert_eq!(self.strtab_offset, 0); + if !self.need_strtab { + return; + } + // Start with null string. + self.strtab_data = vec![0]; + self.strtab.write(1, &mut self.strtab_data); + self.strtab_offset = self.reserve(self.strtab_data.len(), 1); + } + + /// Write the string table. + /// + /// This function does nothing if the section was not reserved. + pub fn write_strtab(&mut self) { + if self.strtab_offset == 0 { + return; + } + debug_assert_eq!(self.strtab_offset, self.buffer.len()); + self.buffer.write_bytes(&self.strtab_data); + } + + /// Reserve the section index for the string table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_strtab_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.strtab_index, SectionIndex(0)); + self.strtab_str_id = Some(self.add_section_name(&b".strtab"[..])); + self.strtab_index = self.reserve_section_index(); + self.strtab_index + } + + /// Write the section header for the string table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_strtab_section_header(&mut self) { + if self.strtab_index == SectionIndex(0) { + return; + } + self.write_section_header(&SectionHeader { + name: self.strtab_str_id, + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: self.strtab_offset as u64, + sh_size: self.strtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }); + } + + /// Reserve the null symbol table entry. + /// + /// This will be stored in the `.symtab` section. + /// + /// The null symbol table entry is usually automatically reserved, + /// but this can be used to force an empty symbol table. + /// + /// This must be called before [`Self::reserve_symtab`]. + pub fn reserve_null_symbol_index(&mut self) -> SymbolIndex { + debug_assert_eq!(self.symtab_offset, 0); + debug_assert_eq!(self.symtab_num, 0); + self.symtab_num = 1; + // The symtab must link to a strtab. + self.need_strtab = true; + SymbolIndex(0) + } + + /// Reserve a symbol table entry. + /// + /// This will be stored in the `.symtab` section. + /// + /// `section_index` is used to determine whether `.symtab_shndx` is required. + /// + /// Automatically also reserves the null symbol if required. + /// Callers may assume that the returned indices will be sequential + /// starting at 1. + /// + /// This must be called before [`Self::reserve_symtab`] and + /// [`Self::reserve_symtab_shndx`]. + pub fn reserve_symbol_index(&mut self, section_index: Option<SectionIndex>) -> SymbolIndex { + debug_assert_eq!(self.symtab_offset, 0); + debug_assert_eq!(self.symtab_shndx_offset, 0); + if self.symtab_num == 0 { + self.symtab_num = 1; + // The symtab must link to a strtab. + self.need_strtab = true; + } + let index = self.symtab_num; + self.symtab_num += 1; + if let Some(section_index) = section_index { + if section_index.0 >= elf::SHN_LORESERVE.into() { + self.need_symtab_shndx = true; + } + } + SymbolIndex(index) + } + + /// Return the number of reserved symbol table entries. + /// + /// Includes the null symbol. + pub fn symbol_count(&self) -> u32 { + self.symtab_num + } + + fn symbol_size(&self) -> usize { + if self.is_64 { + mem::size_of::<elf::Sym64<Endianness>>() + } else { + mem::size_of::<elf::Sym32<Endianness>>() + } + } + + /// Reserve the range for the symbol table. + /// + /// This range is used for a section named `.symtab`. + /// This function does nothing if no symbols were reserved. + /// This must be called after [`Self::reserve_symbol_index`]. + pub fn reserve_symtab(&mut self) { + debug_assert_eq!(self.symtab_offset, 0); + if self.symtab_num == 0 { + return; + } + self.symtab_offset = self.reserve( + self.symtab_num as usize * self.symbol_size(), + self.elf_align, + ); + } + + /// Write the null symbol. + /// + /// This must be the first symbol that is written. + /// This function does nothing if no symbols were reserved. + pub fn write_null_symbol(&mut self) { + if self.symtab_num == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.symtab_offset, self.buffer.len()); + if self.is_64 { + self.buffer.write(&elf::Sym64::<Endianness>::default()); + } else { + self.buffer.write(&elf::Sym32::<Endianness>::default()); + } + + if self.need_symtab_shndx { + self.symtab_shndx_data.write_pod(&U32::new(self.endian, 0)); + } + } + + /// Write a symbol. + pub fn write_symbol(&mut self, sym: &Sym) { + let st_name = if let Some(name) = sym.name { + self.strtab.get_offset(name) as u32 + } else { + 0 + }; + let st_shndx = if let Some(section) = sym.section { + if section.0 >= elf::SHN_LORESERVE as u32 { + elf::SHN_XINDEX + } else { + section.0 as u16 + } + } else { + sym.st_shndx + }; + + let endian = self.endian; + if self.is_64 { + let sym = elf::Sym64 { + st_name: U32::new(endian, st_name), + st_info: sym.st_info, + st_other: sym.st_other, + st_shndx: U16::new(endian, st_shndx), + st_value: U64::new(endian, sym.st_value), + st_size: U64::new(endian, sym.st_size), + }; + self.buffer.write(&sym); + } else { + let sym = elf::Sym32 { + st_name: U32::new(endian, st_name), + st_info: sym.st_info, + st_other: sym.st_other, + st_shndx: U16::new(endian, st_shndx), + st_value: U32::new(endian, sym.st_value as u32), + st_size: U32::new(endian, sym.st_size as u32), + }; + self.buffer.write(&sym); + } + + if self.need_symtab_shndx { + let section_index = sym.section.unwrap_or(SectionIndex(0)); + self.symtab_shndx_data + .write_pod(&U32::new(self.endian, section_index.0)); + } + } + + /// Reserve the section index for the symbol table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_symtab_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.symtab_index, SectionIndex(0)); + self.symtab_str_id = Some(self.add_section_name(&b".symtab"[..])); + self.symtab_index = self.reserve_section_index(); + self.symtab_index + } + + /// Return the section index of the symbol table. + pub fn symtab_index(&mut self) -> SectionIndex { + self.symtab_index + } + + /// Write the section header for the symbol table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_symtab_section_header(&mut self, num_local: u32) { + if self.symtab_index == SectionIndex(0) { + return; + } + self.write_section_header(&SectionHeader { + name: self.symtab_str_id, + sh_type: elf::SHT_SYMTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: self.symtab_offset as u64, + sh_size: self.symtab_num as u64 * self.symbol_size() as u64, + sh_link: self.strtab_index.0, + sh_info: num_local, + sh_addralign: self.elf_align as u64, + sh_entsize: self.symbol_size() as u64, + }); + } + + /// Return true if `.symtab_shndx` is needed. + pub fn symtab_shndx_needed(&self) -> bool { + self.need_symtab_shndx + } + + /// Reserve the range for the extended section indices for the symbol table. + /// + /// This range is used for a section named `.symtab_shndx`. + /// This also reserves a section index. + /// + /// This function does nothing if extended section indices are not needed. + /// This must be called after [`Self::reserve_symbol_index`]. + pub fn reserve_symtab_shndx(&mut self) { + debug_assert_eq!(self.symtab_shndx_offset, 0); + if !self.need_symtab_shndx { + return; + } + self.symtab_shndx_offset = self.reserve(self.symtab_num as usize * 4, 4); + self.symtab_shndx_data.reserve(self.symtab_num as usize * 4); + } + + /// Write the extended section indices for the symbol table. + /// + /// This function does nothing if the section was not reserved. + pub fn write_symtab_shndx(&mut self) { + if self.symtab_shndx_offset == 0 { + return; + } + debug_assert_eq!(self.symtab_shndx_offset, self.buffer.len()); + debug_assert_eq!(self.symtab_num as usize * 4, self.symtab_shndx_data.len()); + self.buffer.write_bytes(&self.symtab_shndx_data); + } + + /// Reserve the section index for the extended section indices symbol table. + /// + /// You should check [`Self::symtab_shndx_needed`] before calling this + /// unless you have other means of knowing if this section is needed. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_symtab_shndx_section_index(&mut self) -> SectionIndex { + debug_assert!(self.symtab_shndx_str_id.is_none()); + self.symtab_shndx_str_id = Some(self.add_section_name(&b".symtab_shndx"[..])); + self.reserve_section_index() + } + + /// Write the section header for the extended section indices for the symbol table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_symtab_shndx_section_header(&mut self) { + if self.symtab_shndx_str_id.is_none() { + return; + } + let sh_size = if self.symtab_shndx_offset == 0 { + 0 + } else { + (self.symtab_num * 4) as u64 + }; + self.write_section_header(&SectionHeader { + name: self.symtab_shndx_str_id, + sh_type: elf::SHT_SYMTAB_SHNDX, + sh_flags: 0, + sh_addr: 0, + sh_offset: self.symtab_shndx_offset as u64, + sh_size, + sh_link: self.symtab_index.0, + sh_info: 0, + sh_addralign: 4, + sh_entsize: 4, + }); + } + + /// Add a string to the dynamic string table. + /// + /// This will be stored in the `.dynstr` section. + /// + /// This must be called before [`Self::reserve_dynstr`]. + pub fn add_dynamic_string(&mut self, name: &'a [u8]) -> StringId { + debug_assert_eq!(self.dynstr_offset, 0); + self.need_dynstr = true; + self.dynstr.add(name) + } + + /// Get a string that was previously added to the dynamic string table. + /// + /// Panics if the string was not added. + pub fn get_dynamic_string(&self, name: &'a [u8]) -> StringId { + self.dynstr.get_id(name) + } + + /// Return true if `.dynstr` is needed. + pub fn dynstr_needed(&self) -> bool { + self.need_dynstr + } + + /// Reserve the range for the dynamic string table. + /// + /// This range is used for a section named `.dynstr`. + /// + /// This function does nothing if no dynamic strings or symbols were defined. + /// This must be called after [`Self::add_dynamic_string`]. + pub fn reserve_dynstr(&mut self) { + debug_assert_eq!(self.dynstr_offset, 0); + if !self.need_dynstr { + return; + } + // Start with null string. + self.dynstr_data = vec![0]; + self.dynstr.write(1, &mut self.dynstr_data); + self.dynstr_offset = self.reserve(self.dynstr_data.len(), 1); + } + + /// Write the dynamic string table. + /// + /// This function does nothing if the section was not reserved. + pub fn write_dynstr(&mut self) { + if self.dynstr_offset == 0 { + return; + } + debug_assert_eq!(self.dynstr_offset, self.buffer.len()); + self.buffer.write_bytes(&self.dynstr_data); + } + + /// Reserve the section index for the dynamic string table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_dynstr_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.dynstr_index, SectionIndex(0)); + self.dynstr_str_id = Some(self.add_section_name(&b".dynstr"[..])); + self.dynstr_index = self.reserve_section_index(); + self.dynstr_index + } + + /// Return the section index of the dynamic string table. + pub fn dynstr_index(&mut self) -> SectionIndex { + self.dynstr_index + } + + /// Write the section header for the dynamic string table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_dynstr_section_header(&mut self, sh_addr: u64) { + if self.dynstr_index == SectionIndex(0) { + return; + } + self.write_section_header(&SectionHeader { + name: self.dynstr_str_id, + sh_type: elf::SHT_STRTAB, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.dynstr_offset as u64, + sh_size: self.dynstr_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }); + } + + /// Reserve the null dynamic symbol table entry. + /// + /// This will be stored in the `.dynsym` section. + /// + /// The null dynamic symbol table entry is usually automatically reserved, + /// but this can be used to force an empty dynamic symbol table. + /// + /// This must be called before [`Self::reserve_dynsym`]. + pub fn reserve_null_dynamic_symbol_index(&mut self) -> SymbolIndex { + debug_assert_eq!(self.dynsym_offset, 0); + debug_assert_eq!(self.dynsym_num, 0); + self.dynsym_num = 1; + // The symtab must link to a strtab. + self.need_dynstr = true; + SymbolIndex(0) + } + + /// Reserve a dynamic symbol table entry. + /// + /// This will be stored in the `.dynsym` section. + /// + /// Automatically also reserves the null symbol if required. + /// Callers may assume that the returned indices will be sequential + /// starting at 1. + /// + /// This must be called before [`Self::reserve_dynsym`]. + pub fn reserve_dynamic_symbol_index(&mut self) -> SymbolIndex { + debug_assert_eq!(self.dynsym_offset, 0); + if self.dynsym_num == 0 { + self.dynsym_num = 1; + // The symtab must link to a strtab. + self.need_dynstr = true; + } + let index = self.dynsym_num; + self.dynsym_num += 1; + SymbolIndex(index) + } + + /// Return the number of reserved dynamic symbols. + /// + /// Includes the null symbol. + pub fn dynamic_symbol_count(&mut self) -> u32 { + self.dynsym_num + } + + /// Reserve the range for the dynamic symbol table. + /// + /// This range is used for a section named `.dynsym`. + /// + /// This function does nothing if no dynamic symbols were reserved. + /// This must be called after [`Self::reserve_dynamic_symbol_index`]. + pub fn reserve_dynsym(&mut self) { + debug_assert_eq!(self.dynsym_offset, 0); + if self.dynsym_num == 0 { + return; + } + self.dynsym_offset = self.reserve( + self.dynsym_num as usize * self.symbol_size(), + self.elf_align, + ); + } + + /// Write the null dynamic symbol. + /// + /// This must be the first dynamic symbol that is written. + /// This function does nothing if no dynamic symbols were reserved. + pub fn write_null_dynamic_symbol(&mut self) { + if self.dynsym_num == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.dynsym_offset, self.buffer.len()); + if self.is_64 { + self.buffer.write(&elf::Sym64::<Endianness>::default()); + } else { + self.buffer.write(&elf::Sym32::<Endianness>::default()); + } + } + + /// Write a dynamic symbol. + pub fn write_dynamic_symbol(&mut self, sym: &Sym) { + let st_name = if let Some(name) = sym.name { + self.dynstr.get_offset(name) as u32 + } else { + 0 + }; + + let st_shndx = if let Some(section) = sym.section { + if section.0 >= elf::SHN_LORESERVE as u32 { + // TODO: we don't actually write out .dynsym_shndx yet. + // This is unlikely to be needed though. + elf::SHN_XINDEX + } else { + section.0 as u16 + } + } else { + sym.st_shndx + }; + + let endian = self.endian; + if self.is_64 { + let sym = elf::Sym64 { + st_name: U32::new(endian, st_name), + st_info: sym.st_info, + st_other: sym.st_other, + st_shndx: U16::new(endian, st_shndx), + st_value: U64::new(endian, sym.st_value), + st_size: U64::new(endian, sym.st_size), + }; + self.buffer.write(&sym); + } else { + let sym = elf::Sym32 { + st_name: U32::new(endian, st_name), + st_info: sym.st_info, + st_other: sym.st_other, + st_shndx: U16::new(endian, st_shndx), + st_value: U32::new(endian, sym.st_value as u32), + st_size: U32::new(endian, sym.st_size as u32), + }; + self.buffer.write(&sym); + } + } + + /// Reserve the section index for the dynamic symbol table. + /// + /// This must be called before [`Self::reserve_section_headers`]. + pub fn reserve_dynsym_section_index(&mut self) -> SectionIndex { + debug_assert_eq!(self.dynsym_index, SectionIndex(0)); + self.dynsym_str_id = Some(self.add_section_name(&b".dynsym"[..])); + self.dynsym_index = self.reserve_section_index(); + self.dynsym_index + } + + /// Return the section index of the dynamic symbol table. + pub fn dynsym_index(&mut self) -> SectionIndex { + self.dynsym_index + } + + /// Write the section header for the dynamic symbol table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_dynsym_section_header(&mut self, sh_addr: u64, num_local: u32) { + if self.dynsym_index == SectionIndex(0) { + return; + } + self.write_section_header(&SectionHeader { + name: self.dynsym_str_id, + sh_type: elf::SHT_DYNSYM, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.dynsym_offset as u64, + sh_size: self.dynsym_num as u64 * self.symbol_size() as u64, + sh_link: self.dynstr_index.0, + sh_info: num_local, + sh_addralign: self.elf_align as u64, + sh_entsize: self.symbol_size() as u64, + }); + } + + fn dyn_size(&self) -> usize { + if self.is_64 { + mem::size_of::<elf::Dyn64<Endianness>>() + } else { + mem::size_of::<elf::Dyn32<Endianness>>() + } + } + + /// Reserve the range for the `.dynamic` section. + /// + /// This function does nothing if `dynamic_num` is zero. + pub fn reserve_dynamic(&mut self, dynamic_num: usize) { + debug_assert_eq!(self.dynamic_offset, 0); + if dynamic_num == 0 { + return; + } + self.dynamic_num = dynamic_num; + self.dynamic_offset = self.reserve(dynamic_num * self.dyn_size(), self.elf_align); + } + + /// Write alignment padding bytes prior to the `.dynamic` section. + /// + /// This function does nothing if the section was not reserved. + pub fn write_align_dynamic(&mut self) { + if self.dynamic_offset == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.dynamic_offset, self.buffer.len()); + } + + /// Write a dynamic string entry. + pub fn write_dynamic_string(&mut self, tag: u32, id: StringId) { + self.write_dynamic(tag, self.dynstr.get_offset(id) as u64); + } + + /// Write a dynamic value entry. + pub fn write_dynamic(&mut self, d_tag: u32, d_val: u64) { + debug_assert!(self.dynamic_offset <= self.buffer.len()); + let endian = self.endian; + if self.is_64 { + let d = elf::Dyn64 { + d_tag: U64::new(endian, d_tag.into()), + d_val: U64::new(endian, d_val), + }; + self.buffer.write(&d); + } else { + let d = elf::Dyn32 { + d_tag: U32::new(endian, d_tag), + d_val: U32::new(endian, d_val as u32), + }; + self.buffer.write(&d); + } + debug_assert!( + self.dynamic_offset + self.dynamic_num * self.dyn_size() >= self.buffer.len() + ); + } + + /// Reserve the section index for the dynamic table. + pub fn reserve_dynamic_section_index(&mut self) -> SectionIndex { + debug_assert!(self.dynamic_str_id.is_none()); + self.dynamic_str_id = Some(self.add_section_name(&b".dynamic"[..])); + self.reserve_section_index() + } + + /// Write the section header for the dynamic table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_dynamic_section_header(&mut self, sh_addr: u64) { + if self.dynamic_str_id.is_none() { + return; + } + self.write_section_header(&SectionHeader { + name: self.dynamic_str_id, + sh_type: elf::SHT_DYNAMIC, + sh_flags: (elf::SHF_WRITE | elf::SHF_ALLOC).into(), + sh_addr, + sh_offset: self.dynamic_offset as u64, + sh_size: (self.dynamic_num * self.dyn_size()) as u64, + sh_link: self.dynstr_index.0, + sh_info: 0, + sh_addralign: self.elf_align as u64, + sh_entsize: self.dyn_size() as u64, + }); + } + + fn rel_size(&self, is_rela: bool) -> usize { + if self.is_64 { + if is_rela { + mem::size_of::<elf::Rela64<Endianness>>() + } else { + mem::size_of::<elf::Rel64<Endianness>>() + } + } else { + if is_rela { + mem::size_of::<elf::Rela32<Endianness>>() + } else { + mem::size_of::<elf::Rel32<Endianness>>() + } + } + } + + /// Reserve a file range for a SysV hash section. + /// + /// `symbol_count` is the number of symbols in the hash, + /// not the total number of symbols. + pub fn reserve_hash(&mut self, bucket_count: u32, chain_count: u32) { + self.hash_size = mem::size_of::<elf::HashHeader<Endianness>>() + + bucket_count as usize * 4 + + chain_count as usize * 4; + self.hash_offset = self.reserve(self.hash_size, self.elf_align); + } + + /// Write a SysV hash section. + /// + /// `chain_count` is the number of symbols in the hash. + /// The argument to `hash` will be in the range `0..chain_count`. + pub fn write_hash<F>(&mut self, bucket_count: u32, chain_count: u32, hash: F) + where + F: Fn(u32) -> Option<u32>, + { + let mut buckets = vec![U32::new(self.endian, 0); bucket_count as usize]; + let mut chains = vec![U32::new(self.endian, 0); chain_count as usize]; + for i in 0..chain_count { + if let Some(hash) = hash(i) { + let bucket = hash % bucket_count; + chains[i as usize] = buckets[bucket as usize]; + buckets[bucket as usize] = U32::new(self.endian, i); + } + } + + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.hash_offset, self.buffer.len()); + self.buffer.write(&elf::HashHeader { + bucket_count: U32::new(self.endian, bucket_count), + chain_count: U32::new(self.endian, chain_count), + }); + self.buffer.write_slice(&buckets); + self.buffer.write_slice(&chains); + } + + /// Reserve the section index for the SysV hash table. + pub fn reserve_hash_section_index(&mut self) -> SectionIndex { + debug_assert!(self.hash_str_id.is_none()); + self.hash_str_id = Some(self.add_section_name(&b".hash"[..])); + self.reserve_section_index() + } + + /// Write the section header for the SysV hash table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_hash_section_header(&mut self, sh_addr: u64) { + if self.hash_str_id.is_none() { + return; + } + self.write_section_header(&SectionHeader { + name: self.hash_str_id, + sh_type: elf::SHT_HASH, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.hash_offset as u64, + sh_size: self.hash_size as u64, + sh_link: self.dynsym_index.0, + sh_info: 0, + sh_addralign: self.elf_align as u64, + sh_entsize: 4, + }); + } + + /// Reserve a file range for a GNU hash section. + /// + /// `symbol_count` is the number of symbols in the hash, + /// not the total number of symbols. + pub fn reserve_gnu_hash(&mut self, bloom_count: u32, bucket_count: u32, symbol_count: u32) { + self.gnu_hash_size = mem::size_of::<elf::GnuHashHeader<Endianness>>() + + bloom_count as usize * self.elf_align + + bucket_count as usize * 4 + + symbol_count as usize * 4; + self.gnu_hash_offset = self.reserve(self.gnu_hash_size, self.elf_align); + } + + /// Write a GNU hash section. + /// + /// `symbol_count` is the number of symbols in the hash. + /// The argument to `hash` will be in the range `0..symbol_count`. + /// + /// This requires that symbols are already sorted by bucket. + pub fn write_gnu_hash<F>( + &mut self, + symbol_base: u32, + bloom_shift: u32, + bloom_count: u32, + bucket_count: u32, + symbol_count: u32, + hash: F, + ) where + F: Fn(u32) -> u32, + { + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.gnu_hash_offset, self.buffer.len()); + self.buffer.write(&elf::GnuHashHeader { + bucket_count: U32::new(self.endian, bucket_count), + symbol_base: U32::new(self.endian, symbol_base), + bloom_count: U32::new(self.endian, bloom_count), + bloom_shift: U32::new(self.endian, bloom_shift), + }); + + // Calculate and write bloom filter. + if self.is_64 { + let mut bloom_filters = vec![0; bloom_count as usize]; + for i in 0..symbol_count { + let h = hash(i); + bloom_filters[((h / 64) & (bloom_count - 1)) as usize] |= + 1 << (h % 64) | 1 << ((h >> bloom_shift) % 64); + } + for bloom_filter in bloom_filters { + self.buffer.write(&U64::new(self.endian, bloom_filter)); + } + } else { + let mut bloom_filters = vec![0; bloom_count as usize]; + for i in 0..symbol_count { + let h = hash(i); + bloom_filters[((h / 32) & (bloom_count - 1)) as usize] |= + 1 << (h % 32) | 1 << ((h >> bloom_shift) % 32); + } + for bloom_filter in bloom_filters { + self.buffer.write(&U32::new(self.endian, bloom_filter)); + } + } + + // Write buckets. + // + // This requires that symbols are already sorted by bucket. + let mut bucket = 0; + for i in 0..symbol_count { + let symbol_bucket = hash(i) % bucket_count; + while bucket < symbol_bucket { + self.buffer.write(&U32::new(self.endian, 0)); + bucket += 1; + } + if bucket == symbol_bucket { + self.buffer.write(&U32::new(self.endian, symbol_base + i)); + bucket += 1; + } + } + while bucket < bucket_count { + self.buffer.write(&U32::new(self.endian, 0)); + bucket += 1; + } + + // Write hash values. + for i in 0..symbol_count { + let mut h = hash(i); + if i == symbol_count - 1 || h % bucket_count != hash(i + 1) % bucket_count { + h |= 1; + } else { + h &= !1; + } + self.buffer.write(&U32::new(self.endian, h)); + } + } + + /// Reserve the section index for the GNU hash table. + pub fn reserve_gnu_hash_section_index(&mut self) -> SectionIndex { + debug_assert!(self.gnu_hash_str_id.is_none()); + self.gnu_hash_str_id = Some(self.add_section_name(&b".gnu.hash"[..])); + self.reserve_section_index() + } + + /// Write the section header for the GNU hash table. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_gnu_hash_section_header(&mut self, sh_addr: u64) { + if self.gnu_hash_str_id.is_none() { + return; + } + self.write_section_header(&SectionHeader { + name: self.gnu_hash_str_id, + sh_type: elf::SHT_GNU_HASH, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.gnu_hash_offset as u64, + sh_size: self.gnu_hash_size as u64, + sh_link: self.dynsym_index.0, + sh_info: 0, + sh_addralign: self.elf_align as u64, + sh_entsize: 0, + }); + } + + /// Reserve the range for the `.gnu.version` section. + /// + /// This function does nothing if no dynamic symbols were reserved. + pub fn reserve_gnu_versym(&mut self) { + debug_assert_eq!(self.gnu_versym_offset, 0); + if self.dynsym_num == 0 { + return; + } + self.gnu_versym_offset = self.reserve(self.dynsym_num as usize * 2, 2); + } + + /// Write the null symbol version entry. + /// + /// This must be the first symbol version that is written. + /// This function does nothing if no dynamic symbols were reserved. + pub fn write_null_gnu_versym(&mut self) { + if self.dynsym_num == 0 { + return; + } + util::write_align(self.buffer, 2); + debug_assert_eq!(self.gnu_versym_offset, self.buffer.len()); + self.write_gnu_versym(0); + } + + /// Write a symbol version entry. + pub fn write_gnu_versym(&mut self, versym: u16) { + self.buffer.write(&U16::new(self.endian, versym)); + } + + /// Reserve the section index for the `.gnu.version` section. + pub fn reserve_gnu_versym_section_index(&mut self) -> SectionIndex { + debug_assert!(self.gnu_versym_str_id.is_none()); + self.gnu_versym_str_id = Some(self.add_section_name(&b".gnu.version"[..])); + self.reserve_section_index() + } + + /// Write the section header for the `.gnu.version` section. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_gnu_versym_section_header(&mut self, sh_addr: u64) { + if self.gnu_versym_str_id.is_none() { + return; + } + self.write_section_header(&SectionHeader { + name: self.gnu_versym_str_id, + sh_type: elf::SHT_GNU_VERSYM, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.gnu_versym_offset as u64, + sh_size: self.dynsym_num as u64 * 2, + sh_link: self.dynsym_index.0, + sh_info: 0, + sh_addralign: 2, + sh_entsize: 2, + }); + } + + /// Reserve the range for the `.gnu.version_d` section. + pub fn reserve_gnu_verdef(&mut self, verdef_count: usize, verdaux_count: usize) { + debug_assert_eq!(self.gnu_verdef_offset, 0); + if verdef_count == 0 { + return; + } + self.gnu_verdef_size = verdef_count * mem::size_of::<elf::Verdef<Endianness>>() + + verdaux_count * mem::size_of::<elf::Verdaux<Endianness>>(); + self.gnu_verdef_offset = self.reserve(self.gnu_verdef_size, self.elf_align); + self.gnu_verdef_count = verdef_count as u16; + self.gnu_verdef_remaining = self.gnu_verdef_count; + } + + /// Write alignment padding bytes prior to a `.gnu.version_d` section. + pub fn write_align_gnu_verdef(&mut self) { + if self.gnu_verdef_offset == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.gnu_verdef_offset, self.buffer.len()); + } + + /// Write a version definition entry. + pub fn write_gnu_verdef(&mut self, verdef: &Verdef) { + debug_assert_ne!(self.gnu_verdef_remaining, 0); + self.gnu_verdef_remaining -= 1; + let vd_next = if self.gnu_verdef_remaining == 0 { + 0 + } else { + mem::size_of::<elf::Verdef<Endianness>>() as u32 + + verdef.aux_count as u32 * mem::size_of::<elf::Verdaux<Endianness>>() as u32 + }; + + self.gnu_verdaux_remaining = verdef.aux_count; + let vd_aux = if verdef.aux_count == 0 { + 0 + } else { + mem::size_of::<elf::Verdef<Endianness>>() as u32 + }; + + self.buffer.write(&elf::Verdef { + vd_version: U16::new(self.endian, verdef.version), + vd_flags: U16::new(self.endian, verdef.flags), + vd_ndx: U16::new(self.endian, verdef.index), + vd_cnt: U16::new(self.endian, verdef.aux_count), + vd_hash: U32::new(self.endian, elf::hash(self.dynstr.get_string(verdef.name))), + vd_aux: U32::new(self.endian, vd_aux), + vd_next: U32::new(self.endian, vd_next), + }); + self.write_gnu_verdaux(verdef.name); + } + + /// Write a version definition auxiliary entry. + pub fn write_gnu_verdaux(&mut self, name: StringId) { + debug_assert_ne!(self.gnu_verdaux_remaining, 0); + self.gnu_verdaux_remaining -= 1; + let vda_next = if self.gnu_verdaux_remaining == 0 { + 0 + } else { + mem::size_of::<elf::Verdaux<Endianness>>() as u32 + }; + self.buffer.write(&elf::Verdaux { + vda_name: U32::new(self.endian, self.dynstr.get_offset(name) as u32), + vda_next: U32::new(self.endian, vda_next), + }); + } + + /// Reserve the section index for the `.gnu.version_d` section. + pub fn reserve_gnu_verdef_section_index(&mut self) -> SectionIndex { + debug_assert!(self.gnu_verdef_str_id.is_none()); + self.gnu_verdef_str_id = Some(self.add_section_name(&b".gnu.version_d"[..])); + self.reserve_section_index() + } + + /// Write the section header for the `.gnu.version_d` section. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_gnu_verdef_section_header(&mut self, sh_addr: u64) { + if self.gnu_verdef_str_id.is_none() { + return; + } + self.write_section_header(&SectionHeader { + name: self.gnu_verdef_str_id, + sh_type: elf::SHT_GNU_VERDEF, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.gnu_verdef_offset as u64, + sh_size: self.gnu_verdef_size as u64, + sh_link: self.dynstr_index.0, + sh_info: self.gnu_verdef_count.into(), + sh_addralign: self.elf_align as u64, + sh_entsize: 0, + }); + } + + /// Reserve the range for the `.gnu.version_r` section. + pub fn reserve_gnu_verneed(&mut self, verneed_count: usize, vernaux_count: usize) { + debug_assert_eq!(self.gnu_verneed_offset, 0); + if verneed_count == 0 { + return; + } + self.gnu_verneed_size = verneed_count * mem::size_of::<elf::Verneed<Endianness>>() + + vernaux_count * mem::size_of::<elf::Vernaux<Endianness>>(); + self.gnu_verneed_offset = self.reserve(self.gnu_verneed_size, self.elf_align); + self.gnu_verneed_count = verneed_count as u16; + self.gnu_verneed_remaining = self.gnu_verneed_count; + } + + /// Write alignment padding bytes prior to a `.gnu.version_r` section. + pub fn write_align_gnu_verneed(&mut self) { + if self.gnu_verneed_offset == 0 { + return; + } + util::write_align(self.buffer, self.elf_align); + debug_assert_eq!(self.gnu_verneed_offset, self.buffer.len()); + } + + /// Write a version need entry. + pub fn write_gnu_verneed(&mut self, verneed: &Verneed) { + debug_assert_ne!(self.gnu_verneed_remaining, 0); + self.gnu_verneed_remaining -= 1; + let vn_next = if self.gnu_verneed_remaining == 0 { + 0 + } else { + mem::size_of::<elf::Verneed<Endianness>>() as u32 + + verneed.aux_count as u32 * mem::size_of::<elf::Vernaux<Endianness>>() as u32 + }; + + self.gnu_vernaux_remaining = verneed.aux_count; + let vn_aux = if verneed.aux_count == 0 { + 0 + } else { + mem::size_of::<elf::Verneed<Endianness>>() as u32 + }; + + self.buffer.write(&elf::Verneed { + vn_version: U16::new(self.endian, verneed.version), + vn_cnt: U16::new(self.endian, verneed.aux_count), + vn_file: U32::new(self.endian, self.dynstr.get_offset(verneed.file) as u32), + vn_aux: U32::new(self.endian, vn_aux), + vn_next: U32::new(self.endian, vn_next), + }); + } + + /// Write a version need auxiliary entry. + pub fn write_gnu_vernaux(&mut self, vernaux: &Vernaux) { + debug_assert_ne!(self.gnu_vernaux_remaining, 0); + self.gnu_vernaux_remaining -= 1; + let vna_next = if self.gnu_vernaux_remaining == 0 { + 0 + } else { + mem::size_of::<elf::Vernaux<Endianness>>() as u32 + }; + self.buffer.write(&elf::Vernaux { + vna_hash: U32::new(self.endian, elf::hash(self.dynstr.get_string(vernaux.name))), + vna_flags: U16::new(self.endian, vernaux.flags), + vna_other: U16::new(self.endian, vernaux.index), + vna_name: U32::new(self.endian, self.dynstr.get_offset(vernaux.name) as u32), + vna_next: U32::new(self.endian, vna_next), + }); + } + + /// Reserve the section index for the `.gnu.version_r` section. + pub fn reserve_gnu_verneed_section_index(&mut self) -> SectionIndex { + debug_assert!(self.gnu_verneed_str_id.is_none()); + self.gnu_verneed_str_id = Some(self.add_section_name(&b".gnu.version_r"[..])); + self.reserve_section_index() + } + + /// Write the section header for the `.gnu.version_r` section. + /// + /// This function does nothing if the section index was not reserved. + pub fn write_gnu_verneed_section_header(&mut self, sh_addr: u64) { + if self.gnu_verneed_str_id.is_none() { + return; + } + self.write_section_header(&SectionHeader { + name: self.gnu_verneed_str_id, + sh_type: elf::SHT_GNU_VERNEED, + sh_flags: elf::SHF_ALLOC.into(), + sh_addr, + sh_offset: self.gnu_verneed_offset as u64, + sh_size: self.gnu_verneed_size as u64, + sh_link: self.dynstr_index.0, + sh_info: self.gnu_verneed_count.into(), + sh_addralign: self.elf_align as u64, + sh_entsize: 0, + }); + } + + /// Reserve a file range for the given number of relocations. + /// + /// Returns the offset of the range. + pub fn reserve_relocations(&mut self, count: usize, is_rela: bool) -> usize { + self.reserve(count * self.rel_size(is_rela), self.elf_align) + } + + /// Write alignment padding bytes prior to a relocation section. + pub fn write_align_relocation(&mut self) { + util::write_align(self.buffer, self.elf_align); + } + + /// Write a relocation. + pub fn write_relocation(&mut self, is_rela: bool, rel: &Rel) { + let endian = self.endian; + if self.is_64 { + if is_rela { + let rel = elf::Rela64 { + r_offset: U64::new(endian, rel.r_offset), + r_info: elf::Rela64::r_info(endian, self.is_mips64el, rel.r_sym, rel.r_type), + r_addend: I64::new(endian, rel.r_addend), + }; + self.buffer.write(&rel); + } else { + let rel = elf::Rel64 { + r_offset: U64::new(endian, rel.r_offset), + r_info: elf::Rel64::r_info(endian, rel.r_sym, rel.r_type), + }; + self.buffer.write(&rel); + } + } else { + if is_rela { + let rel = elf::Rela32 { + r_offset: U32::new(endian, rel.r_offset as u32), + r_info: elf::Rel32::r_info(endian, rel.r_sym, rel.r_type as u8), + r_addend: I32::new(endian, rel.r_addend as i32), + }; + self.buffer.write(&rel); + } else { + let rel = elf::Rel32 { + r_offset: U32::new(endian, rel.r_offset as u32), + r_info: elf::Rel32::r_info(endian, rel.r_sym, rel.r_type as u8), + }; + self.buffer.write(&rel); + } + } + } + + /// Write the section header for a relocation section. + /// + /// `section` is the index of the section the relocations apply to, + /// or 0 if none. + /// + /// `symtab` is the index of the symbol table the relocations refer to, + /// or 0 if none. + /// + /// `offset` is the file offset of the relocations. + pub fn write_relocation_section_header( + &mut self, + name: StringId, + section: SectionIndex, + symtab: SectionIndex, + offset: usize, + count: usize, + is_rela: bool, + ) { + self.write_section_header(&SectionHeader { + name: Some(name), + sh_type: if is_rela { elf::SHT_RELA } else { elf::SHT_REL }, + sh_flags: elf::SHF_INFO_LINK.into(), + sh_addr: 0, + sh_offset: offset as u64, + sh_size: (count * self.rel_size(is_rela)) as u64, + sh_link: symtab.0, + sh_info: section.0, + sh_addralign: self.elf_align as u64, + sh_entsize: self.rel_size(is_rela) as u64, + }); + } + + /// Reserve a file range for a COMDAT section. + /// + /// `count` is the number of sections in the COMDAT group. + /// + /// Returns the offset of the range. + pub fn reserve_comdat(&mut self, count: usize) -> usize { + self.reserve((count + 1) * 4, 4) + } + + /// Write `GRP_COMDAT` at the start of the COMDAT section. + pub fn write_comdat_header(&mut self) { + util::write_align(self.buffer, 4); + self.buffer.write(&U32::new(self.endian, elf::GRP_COMDAT)); + } + + /// Write an entry in a COMDAT section. + pub fn write_comdat_entry(&mut self, entry: SectionIndex) { + self.buffer.write(&U32::new(self.endian, entry.0)); + } + + /// Write the section header for a COMDAT section. + pub fn write_comdat_section_header( + &mut self, + name: StringId, + symtab: SectionIndex, + symbol: SymbolIndex, + offset: usize, + count: usize, + ) { + self.write_section_header(&SectionHeader { + name: Some(name), + sh_type: elf::SHT_GROUP, + sh_flags: 0, + sh_addr: 0, + sh_offset: offset as u64, + sh_size: ((count + 1) * 4) as u64, + sh_link: symtab.0, + sh_info: symbol.0, + sh_addralign: 4, + sh_entsize: 4, + }); + } +} + +/// Native endian version of [`elf::FileHeader64`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct FileHeader { + pub os_abi: u8, + pub abi_version: u8, + pub e_type: u16, + pub e_machine: u16, + pub e_entry: u64, + pub e_flags: u32, +} + +/// Native endian version of [`elf::ProgramHeader64`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct ProgramHeader { + pub p_type: u32, + pub p_flags: u32, + pub p_offset: u64, + pub p_vaddr: u64, + pub p_paddr: u64, + pub p_filesz: u64, + pub p_memsz: u64, + pub p_align: u64, +} + +/// Native endian version of [`elf::SectionHeader64`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct SectionHeader { + pub name: Option<StringId>, + pub sh_type: u32, + pub sh_flags: u64, + pub sh_addr: u64, + pub sh_offset: u64, + pub sh_size: u64, + pub sh_link: u32, + pub sh_info: u32, + pub sh_addralign: u64, + pub sh_entsize: u64, +} + +/// Native endian version of [`elf::Sym64`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct Sym { + pub name: Option<StringId>, + pub section: Option<SectionIndex>, + pub st_info: u8, + pub st_other: u8, + pub st_shndx: u16, + pub st_value: u64, + pub st_size: u64, +} + +/// Unified native endian version of [`elf::Rel64`] and [`elf::Rela64`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct Rel { + pub r_offset: u64, + pub r_sym: u32, + pub r_type: u32, + pub r_addend: i64, +} + +/// Information required for writing [`elf::Verdef`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct Verdef { + pub version: u16, + pub flags: u16, + pub index: u16, + pub aux_count: u16, + /// The name for the first [`elf::Verdaux`] entry. + pub name: StringId, +} + +/// Information required for writing [`elf::Verneed`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct Verneed { + pub version: u16, + pub aux_count: u16, + pub file: StringId, +} + +/// Information required for writing [`elf::Vernaux`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct Vernaux { + pub flags: u16, + pub index: u16, + pub name: StringId, +} diff --git a/third_party/rust/object/src/write/macho.rs b/third_party/rust/object/src/write/macho.rs new file mode 100644 index 0000000000..8ef722fae6 --- /dev/null +++ b/third_party/rust/object/src/write/macho.rs @@ -0,0 +1,886 @@ +use core::mem; + +use crate::endian::*; +use crate::macho; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; +use crate::AddressSize; + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + address: u64, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + emit: bool, + index: usize, + str_id: Option<StringId>, +} + +impl<'a> Object<'a> { + pub(crate) fn macho_set_subsections_via_symbols(&mut self) { + let flags = match self.flags { + FileFlags::MachO { flags } => flags, + _ => 0, + }; + self.flags = FileFlags::MachO { + flags: flags | macho::MH_SUBSECTIONS_VIA_SYMBOLS, + }; + } + + pub(crate) fn macho_segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match segment { + StandardSegment::Text => &b"__TEXT"[..], + StandardSegment::Data => &b"__DATA"[..], + StandardSegment::Debug => &b"__DWARF"[..], + } + } + + pub(crate) fn macho_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&b"__TEXT"[..], &b"__text"[..], SectionKind::Text), + StandardSection::Data => (&b"__DATA"[..], &b"__data"[..], SectionKind::Data), + StandardSection::ReadOnlyData => { + (&b"__TEXT"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyDataWithRel => { + (&b"__DATA"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyString => ( + &b"__TEXT"[..], + &b"__cstring"[..], + SectionKind::ReadOnlyString, + ), + StandardSection::UninitializedData => ( + &b"__DATA"[..], + &b"__bss"[..], + SectionKind::UninitializedData, + ), + StandardSection::Tls => (&b"__DATA"[..], &b"__thread_data"[..], SectionKind::Tls), + StandardSection::UninitializedTls => ( + &b"__DATA"[..], + &b"__thread_bss"[..], + SectionKind::UninitializedTls, + ), + StandardSection::TlsVariables => ( + &b"__DATA"[..], + &b"__thread_vars"[..], + SectionKind::TlsVariables, + ), + StandardSection::Common => (&b"__DATA"[..], &b"__common"[..], SectionKind::Common), + } + } + + fn macho_tlv_bootstrap(&mut self) -> SymbolId { + match self.tlv_bootstrap { + Some(id) => id, + None => { + let id = self.add_symbol(Symbol { + name: b"_tlv_bootstrap".to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Dynamic, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + self.tlv_bootstrap = Some(id); + id + } + } + } + + /// Create the `__thread_vars` entry for a TLS variable. + /// + /// The symbol given by `symbol_id` will be updated to point to this entry. + /// + /// A new `SymbolId` will be returned. The caller must update this symbol + /// to point to the initializer. + /// + /// If `symbol_id` is not for a TLS variable, then it is returned unchanged. + pub(crate) fn macho_add_thread_var(&mut self, symbol_id: SymbolId) -> SymbolId { + let symbol = self.symbol_mut(symbol_id); + if symbol.kind != SymbolKind::Tls { + return symbol_id; + } + + // Create the initializer symbol. + let mut name = symbol.name.clone(); + name.extend_from_slice(b"$tlv$init"); + let init_symbol_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Tls, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + + // Add the tlv entry. + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer - used when mapped by the runtime + // - pointer to symbol initializer + let section = self.section_id(StandardSection::TlsVariables); + let address_size = self.architecture.address_size().unwrap().bytes(); + let size = u64::from(address_size) * 3; + let data = vec![0; size as usize]; + let offset = self.append_section_data(section, &data, u64::from(address_size)); + + let tlv_bootstrap = self.macho_tlv_bootstrap(); + self.add_relocation( + section, + Relocation { + offset, + size: address_size * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: tlv_bootstrap, + addend: 0, + }, + ) + .unwrap(); + self.add_relocation( + section, + Relocation { + offset: offset + u64::from(address_size) * 2, + size: address_size * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: init_symbol_id, + addend: 0, + }, + ) + .unwrap(); + + // Update the symbol to point to the tlv. + let symbol = self.symbol_mut(symbol_id); + symbol.value = offset; + symbol.size = size; + symbol.section = SymbolSection::Section(section); + + init_symbol_id + } + + pub(crate) fn macho_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + let constant = match relocation.kind { + // AArch64Call relocations have special handling for the addend, so don't adjust it + RelocationKind::Relative if relocation.encoding == RelocationEncoding::AArch64Call => 0, + RelocationKind::Relative + | RelocationKind::GotRelative + | RelocationKind::PltRelative => relocation.addend + 4, + _ => relocation.addend, + }; + // Aarch64 relocs of these sizes act as if they are double-word length + if self.architecture == Architecture::Aarch64 && matches!(relocation.size, 12 | 21 | 26) { + relocation.size = 32; + } + relocation.addend -= constant; + constant + } + + pub(crate) fn macho_write(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { + let address_size = self.architecture.address_size().unwrap(); + let endian = self.endian; + let macho32 = MachO32 { endian }; + let macho64 = MachO64 { endian }; + let macho: &dyn MachO = match address_size { + AddressSize::U8 | AddressSize::U16 | AddressSize::U32 => &macho32, + AddressSize::U64 => &macho64, + }; + let pointer_align = address_size.bytes() as usize; + + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + + // Calculate size of Mach-O header. + offset += macho.mach_header_size(); + + // Calculate size of commands. + let mut ncmds = 0; + let command_offset = offset; + + // Calculate size of segment command and section headers. + let segment_command_offset = offset; + let segment_command_len = + macho.segment_command_size() + self.sections.len() * macho.section_header_size(); + offset += segment_command_len; + ncmds += 1; + + // Calculate size of symtab command. + let symtab_command_offset = offset; + let symtab_command_len = mem::size_of::<macho::SymtabCommand<Endianness>>(); + offset += symtab_command_len; + ncmds += 1; + + let sizeofcmds = offset - command_offset; + + // Calculate size of section data. + let mut segment_file_offset = None; + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + let mut address = 0; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].index = 1 + index; + if !section.is_bss() { + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + if segment_file_offset.is_none() { + segment_file_offset = Some(offset); + } + offset += len; + } else { + section_offsets[index].offset = offset; + } + address = align_u64(address, section.align); + section_offsets[index].address = address; + address += section.size; + } + } + for (index, section) in self.sections.iter().enumerate() { + if section.kind.is_bss() { + assert!(section.data.is_empty()); + address = align_u64(address, section.align); + section_offsets[index].address = address; + address += section.size; + } + } + let segment_file_offset = segment_file_offset.unwrap_or(offset); + let segment_file_size = offset - segment_file_offset; + debug_assert!(segment_file_size as u64 <= address); + + // Count symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut nsyms = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + // The unified API allows creating symbols that we don't emit, so filter + // them out here. + // + // Since we don't actually emit the symbol kind, we validate it here too. + match symbol.kind { + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => {} + SymbolKind::File | SymbolKind::Section => continue, + SymbolKind::Unknown => { + if symbol.section != SymbolSection::Undefined { + return Err(Error(format!( + "defined symbol `{}` with unknown kind", + symbol.name().unwrap_or(""), + ))); + } + } + SymbolKind::Null | SymbolKind::Label => { + return Err(Error(format!( + "unimplemented symbol `{}` kind {:?}", + symbol.name().unwrap_or(""), + symbol.kind + ))); + } + } + symbol_offsets[index].emit = true; + symbol_offsets[index].index = nsyms; + nsyms += 1; + if !symbol.name.is_empty() { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = nsyms * macho.nlist_size(); + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + // Start with null name. + let mut strtab_data = vec![0]; + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, 4); + section_offsets[index].reloc_offset = offset; + let len = count * mem::size_of::<macho::Relocation<Endianness>>(); + offset += len; + } + } + + // Start writing. + buffer + .reserve(offset) + .map_err(|_| Error(String::from("Cannot allocate buffer")))?; + + // Write file header. + let (cputype, cpusubtype) = match self.architecture { + Architecture::Arm => (macho::CPU_TYPE_ARM, macho::CPU_SUBTYPE_ARM_ALL), + Architecture::Aarch64 => (macho::CPU_TYPE_ARM64, macho::CPU_SUBTYPE_ARM64_ALL), + Architecture::I386 => (macho::CPU_TYPE_X86, macho::CPU_SUBTYPE_I386_ALL), + Architecture::X86_64 => (macho::CPU_TYPE_X86_64, macho::CPU_SUBTYPE_X86_64_ALL), + Architecture::PowerPc => (macho::CPU_TYPE_POWERPC, macho::CPU_SUBTYPE_POWERPC_ALL), + Architecture::PowerPc64 => (macho::CPU_TYPE_POWERPC64, macho::CPU_SUBTYPE_POWERPC_ALL), + _ => { + return Err(Error(format!( + "unimplemented architecture {:?}", + self.architecture + ))); + } + }; + + let flags = match self.flags { + FileFlags::MachO { flags } => flags, + _ => 0, + }; + macho.write_mach_header( + buffer, + MachHeader { + cputype, + cpusubtype, + filetype: macho::MH_OBJECT, + ncmds, + sizeofcmds: sizeofcmds as u32, + flags, + }, + ); + + // Write segment command. + debug_assert_eq!(segment_command_offset, buffer.len()); + macho.write_segment_command( + buffer, + SegmentCommand { + cmdsize: segment_command_len as u32, + segname: [0; 16], + vmaddr: 0, + vmsize: address, + fileoff: segment_file_offset as u64, + filesize: segment_file_size as u64, + maxprot: macho::VM_PROT_READ | macho::VM_PROT_WRITE | macho::VM_PROT_EXECUTE, + initprot: macho::VM_PROT_READ | macho::VM_PROT_WRITE | macho::VM_PROT_EXECUTE, + nsects: self.sections.len() as u32, + flags: 0, + }, + ); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + let mut sectname = [0; 16]; + sectname + .get_mut(..section.name.len()) + .ok_or_else(|| { + Error(format!( + "section name `{}` is too long", + section.name().unwrap_or(""), + )) + })? + .copy_from_slice(§ion.name); + let mut segname = [0; 16]; + segname + .get_mut(..section.segment.len()) + .ok_or_else(|| { + Error(format!( + "segment name `{}` is too long", + section.segment().unwrap_or(""), + )) + })? + .copy_from_slice(§ion.segment); + let flags = if let SectionFlags::MachO { flags } = section.flags { + flags + } else { + match section.kind { + SectionKind::Text => { + macho::S_ATTR_PURE_INSTRUCTIONS | macho::S_ATTR_SOME_INSTRUCTIONS + } + SectionKind::Data => 0, + SectionKind::ReadOnlyData => 0, + SectionKind::ReadOnlyString => macho::S_CSTRING_LITERALS, + SectionKind::UninitializedData | SectionKind::Common => macho::S_ZEROFILL, + SectionKind::Tls => macho::S_THREAD_LOCAL_REGULAR, + SectionKind::UninitializedTls => macho::S_THREAD_LOCAL_ZEROFILL, + SectionKind::TlsVariables => macho::S_THREAD_LOCAL_VARIABLES, + SectionKind::Debug => macho::S_ATTR_DEBUG, + SectionKind::OtherString => macho::S_CSTRING_LITERALS, + SectionKind::Other | SectionKind::Linker | SectionKind::Metadata => 0, + SectionKind::Note | SectionKind::Unknown | SectionKind::Elf(_) => { + return Err(Error(format!( + "unimplemented section `{}` kind {:?}", + section.name().unwrap_or(""), + section.kind + ))); + } + } + }; + macho.write_section( + buffer, + SectionHeader { + sectname, + segname, + addr: section_offsets[index].address, + size: section.size, + offset: section_offsets[index].offset as u32, + align: section.align.trailing_zeros(), + reloff: section_offsets[index].reloc_offset as u32, + nreloc: section.relocations.len() as u32, + flags, + }, + ); + } + + // Write symtab command. + debug_assert_eq!(symtab_command_offset, buffer.len()); + let symtab_command = macho::SymtabCommand { + cmd: U32::new(endian, macho::LC_SYMTAB), + cmdsize: U32::new(endian, symtab_command_len as u32), + symoff: U32::new(endian, symtab_offset as u32), + nsyms: U32::new(endian, nsyms as u32), + stroff: U32::new(endian, strtab_offset as u32), + strsize: U32::new(endian, strtab_data.len() as u32), + }; + buffer.write(&symtab_command); + + // Write section data. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.write_bytes(§ion.data); + } + } + debug_assert_eq!(segment_file_offset + segment_file_size, buffer.len()); + + // Write symtab. + write_align(buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol_offsets[index].emit { + continue; + } + // TODO: N_STAB + let (mut n_type, n_sect) = match symbol.section { + SymbolSection::Undefined => (macho::N_UNDF | macho::N_EXT, 0), + SymbolSection::Absolute => (macho::N_ABS, 0), + SymbolSection::Section(id) => (macho::N_SECT, id.0 + 1), + SymbolSection::None | SymbolSection::Common => { + return Err(Error(format!( + "unimplemented symbol `{}` section {:?}", + symbol.name().unwrap_or(""), + symbol.section + ))); + } + }; + match symbol.scope { + SymbolScope::Unknown | SymbolScope::Compilation => {} + SymbolScope::Linkage => { + n_type |= macho::N_EXT | macho::N_PEXT; + } + SymbolScope::Dynamic => { + n_type |= macho::N_EXT; + } + } + + let n_desc = if let SymbolFlags::MachO { n_desc } = symbol.flags { + n_desc + } else { + let mut n_desc = 0; + if symbol.weak { + if symbol.is_undefined() { + n_desc |= macho::N_WEAK_REF; + } else { + n_desc |= macho::N_WEAK_DEF; + } + } + n_desc + }; + + let n_value = match symbol.section.id() { + Some(section) => section_offsets[section.0].address + symbol.value, + None => symbol.value, + }; + + let n_strx = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + + macho.write_nlist( + buffer, + Nlist { + n_strx: n_strx as u32, + n_type, + n_sect: n_sect as u8, + n_desc, + n_value, + }, + ); + } + + // Write strtab. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.write_bytes(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(buffer, 4); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_extern; + let mut r_symbolnum; + let symbol = &self.symbols[reloc.symbol.0]; + if symbol.kind == SymbolKind::Section { + r_symbolnum = section_offsets[symbol.section.id().unwrap().0].index as u32; + r_extern = false; + } else { + r_symbolnum = symbol_offsets[reloc.symbol.0].index as u32; + r_extern = true; + } + let r_length = match reloc.size { + 8 => 0, + 16 => 1, + 32 => 2, + 64 => 3, + _ => return Err(Error(format!("unimplemented reloc size {:?}", reloc))), + }; + let (r_pcrel, r_type) = match self.architecture { + Architecture::I386 => match reloc.kind { + RelocationKind::Absolute => (false, macho::GENERIC_RELOC_VANILLA), + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.addend) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { + (false, macho::X86_64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::Generic, -4) => { + (true, macho::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86RipRelative, -4) => { + (true, macho::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86Branch, -4) => { + (true, macho::X86_64_RELOC_BRANCH) + } + (RelocationKind::PltRelative, RelocationEncoding::X86Branch, -4) => { + (true, macho::X86_64_RELOC_BRANCH) + } + (RelocationKind::GotRelative, RelocationEncoding::Generic, -4) => { + (true, macho::X86_64_RELOC_GOT) + } + ( + RelocationKind::GotRelative, + RelocationEncoding::X86RipRelativeMovq, + -4, + ) => (true, macho::X86_64_RELOC_GOT_LOAD), + (RelocationKind::MachO { value, relative }, _, _) => (relative, value), + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + Architecture::Aarch64 => match (reloc.kind, reloc.encoding, reloc.addend) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { + (false, macho::ARM64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::AArch64Call, 0) => { + (true, macho::ARM64_RELOC_BRANCH26) + } + // Non-zero addend, so we have to encode the addend separately + (RelocationKind::Relative, RelocationEncoding::AArch64Call, value) => { + // first emit the BR26 relocation + let reloc_info = macho::RelocationInfo { + r_address: reloc.offset as u32, + r_symbolnum, + r_pcrel: true, + r_length, + r_extern: true, + r_type: macho::ARM64_RELOC_BRANCH26, + }; + buffer.write(&reloc_info.relocation(endian)); + + // set up a separate relocation for the addend + r_symbolnum = value as u32; + (false, macho::ARM64_RELOC_ADDEND) + } + ( + RelocationKind::MachO { value, relative }, + RelocationEncoding::Generic, + 0, + ) => (relative, value), + _ => { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + }, + _ => { + if let RelocationKind::MachO { value, relative } = reloc.kind { + (relative, value) + } else { + return Err(Error(format!("unimplemented relocation {:?}", reloc))); + } + } + }; + let reloc_info = macho::RelocationInfo { + r_address: reloc.offset as u32, + r_symbolnum, + r_pcrel, + r_length, + r_extern, + r_type, + }; + buffer.write(&reloc_info.relocation(endian)); + } + } + } + + debug_assert_eq!(offset, buffer.len()); + + Ok(()) + } +} + +struct MachHeader { + cputype: u32, + cpusubtype: u32, + filetype: u32, + ncmds: u32, + sizeofcmds: u32, + flags: u32, +} + +struct SegmentCommand { + cmdsize: u32, + segname: [u8; 16], + vmaddr: u64, + vmsize: u64, + fileoff: u64, + filesize: u64, + maxprot: u32, + initprot: u32, + nsects: u32, + flags: u32, +} + +pub struct SectionHeader { + sectname: [u8; 16], + segname: [u8; 16], + addr: u64, + size: u64, + offset: u32, + align: u32, + reloff: u32, + nreloc: u32, + flags: u32, +} + +struct Nlist { + n_strx: u32, + n_type: u8, + n_sect: u8, + n_desc: u16, + n_value: u64, +} + +trait MachO { + fn mach_header_size(&self) -> usize; + fn segment_command_size(&self) -> usize; + fn section_header_size(&self) -> usize; + fn nlist_size(&self) -> usize; + fn write_mach_header(&self, buffer: &mut dyn WritableBuffer, section: MachHeader); + fn write_segment_command(&self, buffer: &mut dyn WritableBuffer, segment: SegmentCommand); + fn write_section(&self, buffer: &mut dyn WritableBuffer, section: SectionHeader); + fn write_nlist(&self, buffer: &mut dyn WritableBuffer, nlist: Nlist); +} + +struct MachO32<E> { + endian: E, +} + +impl<E: Endian> MachO for MachO32<E> { + fn mach_header_size(&self) -> usize { + mem::size_of::<macho::MachHeader32<E>>() + } + + fn segment_command_size(&self) -> usize { + mem::size_of::<macho::SegmentCommand32<E>>() + } + + fn section_header_size(&self) -> usize { + mem::size_of::<macho::Section32<E>>() + } + + fn nlist_size(&self) -> usize { + mem::size_of::<macho::Nlist32<E>>() + } + + fn write_mach_header(&self, buffer: &mut dyn WritableBuffer, header: MachHeader) { + let endian = self.endian; + let magic = if endian.is_big_endian() { + macho::MH_MAGIC + } else { + macho::MH_CIGAM + }; + let header = macho::MachHeader32 { + magic: U32::new(BigEndian, magic), + cputype: U32::new(endian, header.cputype), + cpusubtype: U32::new(endian, header.cpusubtype), + filetype: U32::new(endian, header.filetype), + ncmds: U32::new(endian, header.ncmds), + sizeofcmds: U32::new(endian, header.sizeofcmds), + flags: U32::new(endian, header.flags), + }; + buffer.write(&header); + } + + fn write_segment_command(&self, buffer: &mut dyn WritableBuffer, segment: SegmentCommand) { + let endian = self.endian; + let segment = macho::SegmentCommand32 { + cmd: U32::new(endian, macho::LC_SEGMENT), + cmdsize: U32::new(endian, segment.cmdsize), + segname: segment.segname, + vmaddr: U32::new(endian, segment.vmaddr as u32), + vmsize: U32::new(endian, segment.vmsize as u32), + fileoff: U32::new(endian, segment.fileoff as u32), + filesize: U32::new(endian, segment.filesize as u32), + maxprot: U32::new(endian, segment.maxprot), + initprot: U32::new(endian, segment.initprot), + nsects: U32::new(endian, segment.nsects), + flags: U32::new(endian, segment.flags), + }; + buffer.write(&segment); + } + + fn write_section(&self, buffer: &mut dyn WritableBuffer, section: SectionHeader) { + let endian = self.endian; + let section = macho::Section32 { + sectname: section.sectname, + segname: section.segname, + addr: U32::new(endian, section.addr as u32), + size: U32::new(endian, section.size as u32), + offset: U32::new(endian, section.offset), + align: U32::new(endian, section.align), + reloff: U32::new(endian, section.reloff), + nreloc: U32::new(endian, section.nreloc), + flags: U32::new(endian, section.flags), + reserved1: U32::default(), + reserved2: U32::default(), + }; + buffer.write(§ion); + } + + fn write_nlist(&self, buffer: &mut dyn WritableBuffer, nlist: Nlist) { + let endian = self.endian; + let nlist = macho::Nlist32 { + n_strx: U32::new(endian, nlist.n_strx), + n_type: nlist.n_type, + n_sect: nlist.n_sect, + n_desc: U16::new(endian, nlist.n_desc), + n_value: U32::new(endian, nlist.n_value as u32), + }; + buffer.write(&nlist); + } +} + +struct MachO64<E> { + endian: E, +} + +impl<E: Endian> MachO for MachO64<E> { + fn mach_header_size(&self) -> usize { + mem::size_of::<macho::MachHeader64<E>>() + } + + fn segment_command_size(&self) -> usize { + mem::size_of::<macho::SegmentCommand64<E>>() + } + + fn section_header_size(&self) -> usize { + mem::size_of::<macho::Section64<E>>() + } + + fn nlist_size(&self) -> usize { + mem::size_of::<macho::Nlist64<E>>() + } + + fn write_mach_header(&self, buffer: &mut dyn WritableBuffer, header: MachHeader) { + let endian = self.endian; + let magic = if endian.is_big_endian() { + macho::MH_MAGIC_64 + } else { + macho::MH_CIGAM_64 + }; + let header = macho::MachHeader64 { + magic: U32::new(BigEndian, magic), + cputype: U32::new(endian, header.cputype), + cpusubtype: U32::new(endian, header.cpusubtype), + filetype: U32::new(endian, header.filetype), + ncmds: U32::new(endian, header.ncmds), + sizeofcmds: U32::new(endian, header.sizeofcmds), + flags: U32::new(endian, header.flags), + reserved: U32::default(), + }; + buffer.write(&header); + } + + fn write_segment_command(&self, buffer: &mut dyn WritableBuffer, segment: SegmentCommand) { + let endian = self.endian; + let segment = macho::SegmentCommand64 { + cmd: U32::new(endian, macho::LC_SEGMENT_64), + cmdsize: U32::new(endian, segment.cmdsize), + segname: segment.segname, + vmaddr: U64::new(endian, segment.vmaddr), + vmsize: U64::new(endian, segment.vmsize), + fileoff: U64::new(endian, segment.fileoff), + filesize: U64::new(endian, segment.filesize), + maxprot: U32::new(endian, segment.maxprot), + initprot: U32::new(endian, segment.initprot), + nsects: U32::new(endian, segment.nsects), + flags: U32::new(endian, segment.flags), + }; + buffer.write(&segment); + } + + fn write_section(&self, buffer: &mut dyn WritableBuffer, section: SectionHeader) { + let endian = self.endian; + let section = macho::Section64 { + sectname: section.sectname, + segname: section.segname, + addr: U64::new(endian, section.addr), + size: U64::new(endian, section.size), + offset: U32::new(endian, section.offset), + align: U32::new(endian, section.align), + reloff: U32::new(endian, section.reloff), + nreloc: U32::new(endian, section.nreloc), + flags: U32::new(endian, section.flags), + reserved1: U32::default(), + reserved2: U32::default(), + reserved3: U32::default(), + }; + buffer.write(§ion); + } + + fn write_nlist(&self, buffer: &mut dyn WritableBuffer, nlist: Nlist) { + let endian = self.endian; + let nlist = macho::Nlist64 { + n_strx: U32::new(endian, nlist.n_strx), + n_type: nlist.n_type, + n_sect: nlist.n_sect, + n_desc: U16::new(endian, nlist.n_desc), + n_value: U64Bytes::new(endian, nlist.n_value), + }; + buffer.write(&nlist); + } +} diff --git a/third_party/rust/object/src/write/mod.rs b/third_party/rust/object/src/write/mod.rs new file mode 100644 index 0000000000..aa4980b1ee --- /dev/null +++ b/third_party/rust/object/src/write/mod.rs @@ -0,0 +1,918 @@ +//! Interface for writing object files. + +use alloc::borrow::Cow; +use alloc::string::String; +use alloc::vec::Vec; +use core::{fmt, result, str}; +#[cfg(not(feature = "std"))] +use hashbrown::HashMap; +#[cfg(feature = "std")] +use std::{boxed::Box, collections::HashMap, error, io}; + +use crate::endian::{Endianness, U32, U64}; +use crate::{ + Architecture, BinaryFormat, ComdatKind, FileFlags, RelocationEncoding, RelocationKind, + SectionFlags, SectionKind, SymbolFlags, SymbolKind, SymbolScope, +}; + +#[cfg(feature = "coff")] +mod coff; +#[cfg(feature = "coff")] +pub use coff::CoffExportStyle; + +#[cfg(feature = "elf")] +pub mod elf; + +#[cfg(feature = "macho")] +mod macho; + +#[cfg(feature = "pe")] +pub mod pe; + +mod string; +pub use string::StringId; + +mod util; +pub use util::*; + +/// The error type used within the write module. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Error(String); + +impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(&self.0) + } +} + +#[cfg(feature = "std")] +impl error::Error for Error {} + +/// The result type used within the write module. +pub type Result<T> = result::Result<T, Error>; + +/// A writable object file. +#[derive(Debug)] +pub struct Object<'a> { + format: BinaryFormat, + architecture: Architecture, + endian: Endianness, + sections: Vec<Section<'a>>, + standard_sections: HashMap<StandardSection, SectionId>, + symbols: Vec<Symbol>, + symbol_map: HashMap<Vec<u8>, SymbolId>, + stub_symbols: HashMap<SymbolId, SymbolId>, + comdats: Vec<Comdat>, + /// File flags that are specific to each file format. + pub flags: FileFlags, + /// The symbol name mangling scheme. + pub mangling: Mangling, + /// Mach-O "_tlv_bootstrap" symbol. + tlv_bootstrap: Option<SymbolId>, +} + +impl<'a> Object<'a> { + /// Create an empty object file. + pub fn new(format: BinaryFormat, architecture: Architecture, endian: Endianness) -> Object<'a> { + Object { + format, + architecture, + endian, + sections: Vec::new(), + standard_sections: HashMap::new(), + symbols: Vec::new(), + symbol_map: HashMap::new(), + stub_symbols: HashMap::new(), + comdats: Vec::new(), + flags: FileFlags::None, + mangling: Mangling::default(format, architecture), + tlv_bootstrap: None, + } + } + + /// Return the file format. + #[inline] + pub fn format(&self) -> BinaryFormat { + self.format + } + + /// Return the architecture. + #[inline] + pub fn architecture(&self) -> Architecture { + self.architecture + } + + /// Return the current mangling setting. + #[inline] + pub fn mangling(&self) -> Mangling { + self.mangling + } + + /// Specify the mangling setting. + #[inline] + pub fn set_mangling(&mut self, mangling: Mangling) { + self.mangling = mangling; + } + + /// Return the name for a standard segment. + /// + /// This will vary based on the file format. + #[allow(unused_variables)] + pub fn segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => &[], + #[cfg(feature = "elf")] + BinaryFormat::Elf => &[], + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_segment_name(segment), + _ => unimplemented!(), + } + } + + /// Get the section with the given `SectionId`. + #[inline] + pub fn section(&self, section: SectionId) -> &Section<'a> { + &self.sections[section.0] + } + + /// Mutably get the section with the given `SectionId`. + #[inline] + pub fn section_mut(&mut self, section: SectionId) -> &mut Section<'a> { + &mut self.sections[section.0] + } + + /// Set the data for an existing section. + /// + /// Must not be called for sections that already have data, or that contain uninitialized data. + pub fn set_section_data<T>(&mut self, section: SectionId, data: T, align: u64) + where + T: Into<Cow<'a, [u8]>>, + { + self.sections[section.0].set_data(data, align) + } + + /// Append data to an existing section. Returns the section offset of the data. + pub fn append_section_data(&mut self, section: SectionId, data: &[u8], align: u64) -> u64 { + self.sections[section.0].append_data(data, align) + } + + /// Append zero-initialized data to an existing section. Returns the section offset of the data. + pub fn append_section_bss(&mut self, section: SectionId, size: u64, align: u64) -> u64 { + self.sections[section.0].append_bss(size, align) + } + + /// Return the `SectionId` of a standard section. + /// + /// If the section doesn't already exist then it is created. + pub fn section_id(&mut self, section: StandardSection) -> SectionId { + self.standard_sections + .get(§ion) + .cloned() + .unwrap_or_else(|| { + let (segment, name, kind) = self.section_info(section); + self.add_section(segment.to_vec(), name.to_vec(), kind) + }) + } + + /// Add a new section and return its `SectionId`. + /// + /// This also creates a section symbol. + pub fn add_section(&mut self, segment: Vec<u8>, name: Vec<u8>, kind: SectionKind) -> SectionId { + let id = SectionId(self.sections.len()); + self.sections.push(Section { + segment, + name, + kind, + size: 0, + align: 1, + data: Cow::Borrowed(&[]), + relocations: Vec::new(), + symbol: None, + flags: SectionFlags::None, + }); + + // Add to self.standard_sections if required. This may match multiple standard sections. + let section = &self.sections[id.0]; + for standard_section in StandardSection::all() { + if !self.standard_sections.contains_key(standard_section) { + let (segment, name, kind) = self.section_info(*standard_section); + if segment == &*section.segment && name == &*section.name && kind == section.kind { + self.standard_sections.insert(*standard_section, id); + } + } + } + + id + } + + fn section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => self.coff_section_info(section), + #[cfg(feature = "elf")] + BinaryFormat::Elf => self.elf_section_info(section), + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_section_info(section), + _ => unimplemented!(), + } + } + + /// Add a subsection. Returns the `SectionId` and section offset of the data. + pub fn add_subsection( + &mut self, + section: StandardSection, + name: &[u8], + data: &[u8], + align: u64, + ) -> (SectionId, u64) { + let section_id = if self.has_subsections_via_symbols() { + self.set_subsections_via_symbols(); + self.section_id(section) + } else { + let (segment, name, kind) = self.subsection_info(section, name); + self.add_section(segment.to_vec(), name, kind) + }; + let offset = self.append_section_data(section_id, data, align); + (section_id, offset) + } + + fn has_subsections_via_symbols(&self) -> bool { + match self.format { + BinaryFormat::Coff | BinaryFormat::Elf => false, + BinaryFormat::MachO => true, + _ => unimplemented!(), + } + } + + fn set_subsections_via_symbols(&mut self) { + match self.format { + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_set_subsections_via_symbols(), + _ => unimplemented!(), + } + } + + fn subsection_info( + &self, + section: StandardSection, + value: &[u8], + ) -> (&'static [u8], Vec<u8>, SectionKind) { + let (segment, section, kind) = self.section_info(section); + let name = self.subsection_name(section, value); + (segment, name, kind) + } + + #[allow(unused_variables)] + fn subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + debug_assert!(!self.has_subsections_via_symbols()); + match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => self.coff_subsection_name(section, value), + #[cfg(feature = "elf")] + BinaryFormat::Elf => self.elf_subsection_name(section, value), + _ => unimplemented!(), + } + } + + /// Get the COMDAT section group with the given `ComdatId`. + #[inline] + pub fn comdat(&self, comdat: ComdatId) -> &Comdat { + &self.comdats[comdat.0] + } + + /// Mutably get the COMDAT section group with the given `ComdatId`. + #[inline] + pub fn comdat_mut(&mut self, comdat: ComdatId) -> &mut Comdat { + &mut self.comdats[comdat.0] + } + + /// Add a new COMDAT section group and return its `ComdatId`. + pub fn add_comdat(&mut self, comdat: Comdat) -> ComdatId { + let comdat_id = ComdatId(self.comdats.len()); + self.comdats.push(comdat); + comdat_id + } + + /// Get the `SymbolId` of the symbol with the given name. + pub fn symbol_id(&self, name: &[u8]) -> Option<SymbolId> { + self.symbol_map.get(name).cloned() + } + + /// Get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol(&self, symbol: SymbolId) -> &Symbol { + &self.symbols[symbol.0] + } + + /// Mutably get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol_mut(&mut self, symbol: SymbolId) -> &mut Symbol { + &mut self.symbols[symbol.0] + } + + /// Add a new symbol and return its `SymbolId`. + pub fn add_symbol(&mut self, mut symbol: Symbol) -> SymbolId { + // Defined symbols must have a scope. + debug_assert!(symbol.is_undefined() || symbol.scope != SymbolScope::Unknown); + if symbol.kind == SymbolKind::Section { + // There can only be one section symbol, but update its flags, since + // the automatically generated section symbol will have none. + let symbol_id = self.section_symbol(symbol.section.id().unwrap()); + if symbol.flags != SymbolFlags::None { + self.symbol_mut(symbol_id).flags = symbol.flags; + } + return symbol_id; + } + if !symbol.name.is_empty() + && (symbol.kind == SymbolKind::Text + || symbol.kind == SymbolKind::Data + || symbol.kind == SymbolKind::Tls) + { + let unmangled_name = symbol.name.clone(); + if let Some(prefix) = self.mangling.global_prefix() { + symbol.name.insert(0, prefix); + } + let symbol_id = self.add_raw_symbol(symbol); + self.symbol_map.insert(unmangled_name, symbol_id); + symbol_id + } else { + self.add_raw_symbol(symbol) + } + } + + fn add_raw_symbol(&mut self, symbol: Symbol) -> SymbolId { + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(symbol); + symbol_id + } + + /// Return true if the file format supports `StandardSection::UninitializedTls`. + #[inline] + pub fn has_uninitialized_tls(&self) -> bool { + self.format != BinaryFormat::Coff + } + + /// Return true if the file format supports `StandardSection::Common`. + #[inline] + pub fn has_common(&self) -> bool { + self.format == BinaryFormat::MachO + } + + /// Add a new common symbol and return its `SymbolId`. + /// + /// For Mach-O, this appends the symbol to the `__common` section. + pub fn add_common_symbol(&mut self, mut symbol: Symbol, size: u64, align: u64) -> SymbolId { + if self.has_common() { + let symbol_id = self.add_symbol(symbol); + let section = self.section_id(StandardSection::Common); + self.add_symbol_bss(symbol_id, section, size, align); + symbol_id + } else { + symbol.section = SymbolSection::Common; + symbol.size = size; + self.add_symbol(symbol) + } + } + + /// Add a new file symbol and return its `SymbolId`. + pub fn add_file_symbol(&mut self, name: Vec<u8>) -> SymbolId { + self.add_raw_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::File, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::None, + flags: SymbolFlags::None, + }) + } + + /// Get the symbol for a section. + pub fn section_symbol(&mut self, section_id: SectionId) -> SymbolId { + let section = &mut self.sections[section_id.0]; + if let Some(symbol) = section.symbol { + return symbol; + } + let name = if self.format == BinaryFormat::Coff { + section.name.clone() + } else { + Vec::new() + }; + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Section, + scope: SymbolScope::Compilation, + weak: false, + section: SymbolSection::Section(section_id), + flags: SymbolFlags::None, + }); + section.symbol = Some(symbol_id); + symbol_id + } + + /// Append data to an existing section, and update a symbol to refer to it. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the added data via the `__thread_vars` entry. + /// + /// Returns the section offset of the data. + pub fn add_symbol_data( + &mut self, + symbol_id: SymbolId, + section: SectionId, + data: &[u8], + align: u64, + ) -> u64 { + let offset = self.append_section_data(section, data, align); + self.set_symbol_data(symbol_id, section, offset, data.len() as u64); + offset + } + + /// Append zero-initialized data to an existing section, and update a symbol to refer to it. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the added data via the `__thread_vars` entry. + /// + /// Returns the section offset of the data. + pub fn add_symbol_bss( + &mut self, + symbol_id: SymbolId, + section: SectionId, + size: u64, + align: u64, + ) -> u64 { + let offset = self.append_section_bss(section, size, align); + self.set_symbol_data(symbol_id, section, offset, size); + offset + } + + /// Update a symbol to refer to the given data within a section. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the data via the `__thread_vars` entry. + #[allow(unused_mut)] + pub fn set_symbol_data( + &mut self, + mut symbol_id: SymbolId, + section: SectionId, + offset: u64, + size: u64, + ) { + // Defined symbols must have a scope. + debug_assert!(self.symbol(symbol_id).scope != SymbolScope::Unknown); + match self.format { + #[cfg(feature = "macho")] + BinaryFormat::MachO => symbol_id = self.macho_add_thread_var(symbol_id), + _ => {} + } + let symbol = self.symbol_mut(symbol_id); + symbol.value = offset; + symbol.size = size; + symbol.section = SymbolSection::Section(section); + } + + /// Convert a symbol to a section symbol and offset. + /// + /// Returns `None` if the symbol does not have a section. + pub fn symbol_section_and_offset(&mut self, symbol_id: SymbolId) -> Option<(SymbolId, u64)> { + let symbol = self.symbol(symbol_id); + if symbol.kind == SymbolKind::Section { + return Some((symbol_id, 0)); + } + let symbol_offset = symbol.value; + let section = symbol.section.id()?; + let section_symbol = self.section_symbol(section); + Some((section_symbol, symbol_offset)) + } + + /// Add a relocation to a section. + /// + /// Relocations must only be added after the referenced symbols have been added + /// and defined (if applicable). + pub fn add_relocation(&mut self, section: SectionId, mut relocation: Relocation) -> Result<()> { + let addend = match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => self.coff_fixup_relocation(&mut relocation), + #[cfg(feature = "elf")] + BinaryFormat::Elf => self.elf_fixup_relocation(&mut relocation)?, + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_fixup_relocation(&mut relocation), + _ => unimplemented!(), + }; + if addend != 0 { + self.write_relocation_addend(section, &relocation, addend)?; + } + self.sections[section.0].relocations.push(relocation); + Ok(()) + } + + fn write_relocation_addend( + &mut self, + section: SectionId, + relocation: &Relocation, + addend: i64, + ) -> Result<()> { + let data = self.sections[section.0].data_mut(); + let offset = relocation.offset as usize; + match relocation.size { + 32 => data.write_at(offset, &U32::new(self.endian, addend as u32)), + 64 => data.write_at(offset, &U64::new(self.endian, addend as u64)), + _ => { + return Err(Error(format!( + "unimplemented relocation addend {:?}", + relocation + ))); + } + } + .map_err(|_| { + Error(format!( + "invalid relocation offset {}+{} (max {})", + relocation.offset, + relocation.size, + data.len() + )) + }) + } + + /// Write the object to a `Vec`. + pub fn write(&self) -> Result<Vec<u8>> { + let mut buffer = Vec::new(); + self.emit(&mut buffer)?; + Ok(buffer) + } + + /// Write the object to a `Write` implementation. + /// + /// Also flushes the writer. + /// + /// It is advisable to use a buffered writer like [`BufWriter`](std::io::BufWriter) + /// instead of an unbuffered writer like [`File`](std::fs::File). + #[cfg(feature = "std")] + pub fn write_stream<W: io::Write>(&self, w: W) -> result::Result<(), Box<dyn error::Error>> { + let mut stream = StreamingBuffer::new(w); + self.emit(&mut stream)?; + stream.result()?; + stream.into_inner().flush()?; + Ok(()) + } + + /// Write the object to a `WritableBuffer`. + pub fn emit(&self, buffer: &mut dyn WritableBuffer) -> Result<()> { + match self.format { + #[cfg(feature = "coff")] + BinaryFormat::Coff => self.coff_write(buffer), + #[cfg(feature = "elf")] + BinaryFormat::Elf => self.elf_write(buffer), + #[cfg(feature = "macho")] + BinaryFormat::MachO => self.macho_write(buffer), + _ => unimplemented!(), + } + } +} + +/// A standard segment kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[non_exhaustive] +pub enum StandardSegment { + Text, + Data, + Debug, +} + +/// A standard section kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[non_exhaustive] +pub enum StandardSection { + Text, + Data, + ReadOnlyData, + ReadOnlyDataWithRel, + ReadOnlyString, + UninitializedData, + Tls, + /// Zero-fill TLS initializers. Unsupported for COFF. + UninitializedTls, + /// TLS variable structures. Only supported for Mach-O. + TlsVariables, + /// Common data. Only supported for Mach-O. + Common, +} + +impl StandardSection { + /// Return the section kind of a standard section. + pub fn kind(self) -> SectionKind { + match self { + StandardSection::Text => SectionKind::Text, + StandardSection::Data => SectionKind::Data, + StandardSection::ReadOnlyData | StandardSection::ReadOnlyDataWithRel => { + SectionKind::ReadOnlyData + } + StandardSection::ReadOnlyString => SectionKind::ReadOnlyString, + StandardSection::UninitializedData => SectionKind::UninitializedData, + StandardSection::Tls => SectionKind::Tls, + StandardSection::UninitializedTls => SectionKind::UninitializedTls, + StandardSection::TlsVariables => SectionKind::TlsVariables, + StandardSection::Common => SectionKind::Common, + } + } + + // TODO: remembering to update this is error-prone, can we do better? + fn all() -> &'static [StandardSection] { + &[ + StandardSection::Text, + StandardSection::Data, + StandardSection::ReadOnlyData, + StandardSection::ReadOnlyDataWithRel, + StandardSection::ReadOnlyString, + StandardSection::UninitializedData, + StandardSection::Tls, + StandardSection::UninitializedTls, + StandardSection::TlsVariables, + StandardSection::Common, + ] + } +} + +/// An identifier used to reference a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionId(usize); + +/// A section in an object file. +#[derive(Debug)] +pub struct Section<'a> { + segment: Vec<u8>, + name: Vec<u8>, + kind: SectionKind, + size: u64, + align: u64, + data: Cow<'a, [u8]>, + relocations: Vec<Relocation>, + symbol: Option<SymbolId>, + /// Section flags that are specific to each file format. + pub flags: SectionFlags, +} + +impl<'a> Section<'a> { + /// Try to convert the name to a utf8 string. + #[inline] + pub fn name(&self) -> Option<&str> { + str::from_utf8(&self.name).ok() + } + + /// Try to convert the segment to a utf8 string. + #[inline] + pub fn segment(&self) -> Option<&str> { + str::from_utf8(&self.segment).ok() + } + + /// Return true if this section contains zerofill data. + #[inline] + pub fn is_bss(&self) -> bool { + self.kind.is_bss() + } + + /// Set the data for a section. + /// + /// Must not be called for sections that already have data, or that contain uninitialized data. + pub fn set_data<T>(&mut self, data: T, align: u64) + where + T: Into<Cow<'a, [u8]>>, + { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + debug_assert!(self.data.is_empty()); + self.data = data.into(); + self.size = self.data.len() as u64; + self.align = align; + } + + /// Append data to a section. + /// + /// Must not be called for sections that contain uninitialized data. + pub fn append_data(&mut self, append_data: &[u8], align: u64) -> u64 { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let align = align as usize; + let data = self.data.to_mut(); + let mut offset = data.len(); + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + data.resize(offset, 0); + } + data.extend_from_slice(append_data); + self.size = data.len() as u64; + offset as u64 + } + + /// Append unitialized data to a section. + /// + /// Must not be called for sections that contain initialized data. + pub fn append_bss(&mut self, size: u64, align: u64) -> u64 { + debug_assert!(self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let mut offset = self.size; + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.size = offset; + } + self.size += size; + offset as u64 + } + + /// Returns the section as-built so far. + /// + /// This requires that the section is not a bss section. + pub fn data(&self) -> &[u8] { + debug_assert!(!self.is_bss()); + &self.data + } + + /// Returns the section as-built so far. + /// + /// This requires that the section is not a bss section. + pub fn data_mut(&mut self) -> &mut [u8] { + debug_assert!(!self.is_bss()); + self.data.to_mut() + } +} + +/// The section where a symbol is defined. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum SymbolSection { + /// The section is not applicable for this symbol (such as file symbols). + None, + /// The symbol is undefined. + Undefined, + /// The symbol has an absolute value. + Absolute, + /// The symbol is a zero-initialized symbol that will be combined with duplicate definitions. + Common, + /// The symbol is defined in the given section. + Section(SectionId), +} + +impl SymbolSection { + /// Returns the section id for the section where the symbol is defined. + /// + /// May return `None` if the symbol is not defined in a section. + #[inline] + pub fn id(self) -> Option<SectionId> { + if let SymbolSection::Section(id) = self { + Some(id) + } else { + None + } + } +} + +/// An identifier used to reference a symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SymbolId(usize); + +/// A symbol in an object file. +#[derive(Debug)] +pub struct Symbol { + /// The name of the symbol. + pub name: Vec<u8>, + /// The value of the symbol. + /// + /// If the symbol defined in a section, then this is the section offset of the symbol. + pub value: u64, + /// The size of the symbol. + pub size: u64, + /// The kind of the symbol. + pub kind: SymbolKind, + /// The scope of the symbol. + pub scope: SymbolScope, + /// Whether the symbol has weak binding. + pub weak: bool, + /// The section containing the symbol. + pub section: SymbolSection, + /// Symbol flags that are specific to each file format. + pub flags: SymbolFlags<SectionId>, +} + +impl Symbol { + /// Try to convert the name to a utf8 string. + #[inline] + pub fn name(&self) -> Option<&str> { + str::from_utf8(&self.name).ok() + } + + /// Return true if the symbol is undefined. + #[inline] + pub fn is_undefined(&self) -> bool { + self.section == SymbolSection::Undefined + } + + /// Return true if the symbol is common data. + /// + /// Note: does not check for `SymbolSection::Section` with `SectionKind::Common`. + #[inline] + pub fn is_common(&self) -> bool { + self.section == SymbolSection::Common + } + + /// Return true if the symbol scope is local. + #[inline] + pub fn is_local(&self) -> bool { + self.scope == SymbolScope::Compilation + } +} + +/// A relocation in an object file. +#[derive(Debug)] +pub struct Relocation { + /// The section offset of the place of the relocation. + pub offset: u64, + /// The size in bits of the place of relocation. + pub size: u8, + /// The operation used to calculate the result of the relocation. + pub kind: RelocationKind, + /// Information about how the result of the relocation operation is encoded in the place. + pub encoding: RelocationEncoding, + /// The symbol referred to by the relocation. + /// + /// This may be a section symbol. + pub symbol: SymbolId, + /// The addend to use in the relocation calculation. + /// + /// This may be in addition to an implicit addend stored at the place of the relocation. + pub addend: i64, +} + +/// An identifier used to reference a COMDAT section group. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ComdatId(usize); + +/// A COMDAT section group. +#[derive(Debug)] +pub struct Comdat { + /// The COMDAT selection kind. + /// + /// This determines the way in which the linker resolves multiple definitions of the COMDAT + /// sections. + pub kind: ComdatKind, + /// The COMDAT symbol. + /// + /// If this symbol is referenced, then all sections in the group will be included by the + /// linker. + pub symbol: SymbolId, + /// The sections in the group. + pub sections: Vec<SectionId>, +} + +/// The symbol name mangling scheme. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum Mangling { + /// No symbol mangling. + None, + /// Windows COFF symbol mangling. + Coff, + /// Windows COFF i386 symbol mangling. + CoffI386, + /// ELF symbol mangling. + Elf, + /// Mach-O symbol mangling. + MachO, +} + +impl Mangling { + /// Return the default symboling mangling for the given format and architecture. + pub fn default(format: BinaryFormat, architecture: Architecture) -> Self { + match (format, architecture) { + (BinaryFormat::Coff, Architecture::I386) => Mangling::CoffI386, + (BinaryFormat::Coff, _) => Mangling::Coff, + (BinaryFormat::Elf, _) => Mangling::Elf, + (BinaryFormat::MachO, _) => Mangling::MachO, + _ => Mangling::None, + } + } + + /// Return the prefix to use for global symbols. + pub fn global_prefix(self) -> Option<u8> { + match self { + Mangling::None | Mangling::Elf | Mangling::Coff => None, + Mangling::CoffI386 | Mangling::MachO => Some(b'_'), + } + } +} diff --git a/third_party/rust/object/src/write/pe.rs b/third_party/rust/object/src/write/pe.rs new file mode 100644 index 0000000000..70da3a0937 --- /dev/null +++ b/third_party/rust/object/src/write/pe.rs @@ -0,0 +1,847 @@ +//! Helper for writing PE files. +use alloc::string::String; +use alloc::vec::Vec; +use core::mem; + +use crate::endian::{LittleEndian as LE, *}; +use crate::pe; +use crate::write::util; +use crate::write::{Error, Result, WritableBuffer}; + +/// A helper for writing PE files. +/// +/// Writing uses a two phase approach. The first phase reserves file ranges and virtual +/// address ranges for everything in the order that they will be written. +/// +/// The second phase writes everything out in order. Thus the caller must ensure writing +/// is in the same order that file ranges were reserved. +#[allow(missing_debug_implementations)] +pub struct Writer<'a> { + is_64: bool, + section_alignment: u32, + file_alignment: u32, + + buffer: &'a mut dyn WritableBuffer, + len: u32, + virtual_len: u32, + headers_len: u32, + + code_address: u32, + data_address: u32, + code_len: u32, + data_len: u32, + bss_len: u32, + + nt_headers_offset: u32, + data_directories: Vec<DataDirectory>, + section_header_num: u16, + sections: Vec<Section>, + + symbol_offset: u32, + symbol_num: u32, + + reloc_blocks: Vec<RelocBlock>, + relocs: Vec<U16<LE>>, + reloc_offset: u32, +} + +impl<'a> Writer<'a> { + /// Create a new `Writer`. + pub fn new( + is_64: bool, + section_alignment: u32, + file_alignment: u32, + buffer: &'a mut dyn WritableBuffer, + ) -> Self { + Writer { + is_64, + section_alignment, + file_alignment, + + buffer, + len: 0, + virtual_len: 0, + headers_len: 0, + + code_address: 0, + data_address: 0, + code_len: 0, + data_len: 0, + bss_len: 0, + + nt_headers_offset: 0, + data_directories: Vec::new(), + section_header_num: 0, + sections: Vec::new(), + + symbol_offset: 0, + symbol_num: 0, + + reloc_blocks: Vec::new(), + relocs: Vec::new(), + reloc_offset: 0, + } + } + + /// Return the current virtual address size that has been reserved. + /// + /// This is only valid after section headers have been reserved. + pub fn virtual_len(&self) -> u32 { + self.virtual_len + } + + /// Reserve a virtual address range with the given size. + /// + /// The reserved length will be increased to match the section alignment. + /// + /// Returns the aligned offset of the start of the range. + pub fn reserve_virtual(&mut self, len: u32) -> u32 { + let offset = self.virtual_len; + self.virtual_len += len; + self.virtual_len = util::align_u32(self.virtual_len, self.section_alignment); + offset + } + + /// Reserve up to the given virtual address. + /// + /// The reserved length will be increased to match the section alignment. + pub fn reserve_virtual_until(&mut self, address: u32) { + debug_assert!(self.virtual_len <= address); + self.virtual_len = util::align_u32(address, self.section_alignment); + } + + /// Return the current file length that has been reserved. + pub fn reserved_len(&self) -> u32 { + self.len + } + + /// Return the current file length that has been written. + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + self.buffer.len() + } + + /// Reserve a file range with the given size and starting alignment. + /// + /// Returns the aligned offset of the start of the range. + pub fn reserve(&mut self, len: u32, align_start: u32) -> u32 { + if len == 0 { + return self.len; + } + self.reserve_align(align_start); + let offset = self.len; + self.len += len; + offset + } + + /// Reserve a file range with the given size and using the file alignment. + /// + /// Returns the aligned offset of the start of the range. + pub fn reserve_file(&mut self, len: u32) -> u32 { + self.reserve(len, self.file_alignment) + } + + /// Write data. + pub fn write(&mut self, data: &[u8]) { + self.buffer.write_bytes(data); + } + + /// Reserve alignment padding bytes. + pub fn reserve_align(&mut self, align_start: u32) { + self.len = util::align_u32(self.len, align_start); + } + + /// Write alignment padding bytes. + pub fn write_align(&mut self, align_start: u32) { + util::write_align(self.buffer, align_start as usize); + } + + /// Write padding up to the next multiple of file alignment. + pub fn write_file_align(&mut self) { + self.write_align(self.file_alignment); + } + + /// Reserve the file range up to the given file offset. + pub fn reserve_until(&mut self, offset: u32) { + debug_assert!(self.len <= offset); + self.len = offset; + } + + /// Write padding up to the given file offset. + pub fn pad_until(&mut self, offset: u32) { + debug_assert!(self.buffer.len() <= offset as usize); + self.buffer.resize(offset as usize); + } + + /// Reserve the range for the DOS header. + /// + /// This must be at the start of the file. + /// + /// When writing, you may use `write_custom_dos_header` or `write_empty_dos_header`. + pub fn reserve_dos_header(&mut self) { + debug_assert_eq!(self.len, 0); + self.reserve(mem::size_of::<pe::ImageDosHeader>() as u32, 1); + } + + /// Write a custom DOS header. + /// + /// This must be at the start of the file. + pub fn write_custom_dos_header(&mut self, dos_header: &pe::ImageDosHeader) -> Result<()> { + debug_assert_eq!(self.buffer.len(), 0); + + // Start writing. + self.buffer + .reserve(self.len as usize) + .map_err(|_| Error(String::from("Cannot allocate buffer")))?; + + self.buffer.write(dos_header); + Ok(()) + } + + /// Write the DOS header for a file without a stub. + /// + /// This must be at the start of the file. + /// + /// Uses default values for all fields. + pub fn write_empty_dos_header(&mut self) -> Result<()> { + self.write_custom_dos_header(&pe::ImageDosHeader { + e_magic: U16::new(LE, pe::IMAGE_DOS_SIGNATURE), + e_cblp: U16::new(LE, 0), + e_cp: U16::new(LE, 0), + e_crlc: U16::new(LE, 0), + e_cparhdr: U16::new(LE, 0), + e_minalloc: U16::new(LE, 0), + e_maxalloc: U16::new(LE, 0), + e_ss: U16::new(LE, 0), + e_sp: U16::new(LE, 0), + e_csum: U16::new(LE, 0), + e_ip: U16::new(LE, 0), + e_cs: U16::new(LE, 0), + e_lfarlc: U16::new(LE, 0), + e_ovno: U16::new(LE, 0), + e_res: [U16::new(LE, 0); 4], + e_oemid: U16::new(LE, 0), + e_oeminfo: U16::new(LE, 0), + e_res2: [U16::new(LE, 0); 10], + e_lfanew: U32::new(LE, self.nt_headers_offset), + }) + } + + /// Reserve a fixed DOS header and stub. + /// + /// Use `reserve_dos_header` and `reserve` if you need a custom stub. + pub fn reserve_dos_header_and_stub(&mut self) { + self.reserve_dos_header(); + self.reserve(64, 1); + } + + /// Write a fixed DOS header and stub. + /// + /// Use `write_custom_dos_header` and `write` if you need a custom stub. + pub fn write_dos_header_and_stub(&mut self) -> Result<()> { + self.write_custom_dos_header(&pe::ImageDosHeader { + e_magic: U16::new(LE, pe::IMAGE_DOS_SIGNATURE), + e_cblp: U16::new(LE, 0x90), + e_cp: U16::new(LE, 3), + e_crlc: U16::new(LE, 0), + e_cparhdr: U16::new(LE, 4), + e_minalloc: U16::new(LE, 0), + e_maxalloc: U16::new(LE, 0xffff), + e_ss: U16::new(LE, 0), + e_sp: U16::new(LE, 0xb8), + e_csum: U16::new(LE, 0), + e_ip: U16::new(LE, 0), + e_cs: U16::new(LE, 0), + e_lfarlc: U16::new(LE, 0x40), + e_ovno: U16::new(LE, 0), + e_res: [U16::new(LE, 0); 4], + e_oemid: U16::new(LE, 0), + e_oeminfo: U16::new(LE, 0), + e_res2: [U16::new(LE, 0); 10], + e_lfanew: U32::new(LE, self.nt_headers_offset), + })?; + + #[rustfmt::skip] + self.buffer.write_bytes(&[ + 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, + 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, + 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, + 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, + 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, + 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, + 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x0d, 0x0d, 0x0a, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]); + + Ok(()) + } + + fn nt_headers_size(&self) -> u32 { + if self.is_64 { + mem::size_of::<pe::ImageNtHeaders64>() as u32 + } else { + mem::size_of::<pe::ImageNtHeaders32>() as u32 + } + } + + fn optional_header_size(&self) -> u32 { + let size = if self.is_64 { + mem::size_of::<pe::ImageOptionalHeader64>() as u32 + } else { + mem::size_of::<pe::ImageOptionalHeader32>() as u32 + }; + size + self.data_directories.len() as u32 * mem::size_of::<pe::ImageDataDirectory>() as u32 + } + + /// Return the offset of the NT headers, if reserved. + pub fn nt_headers_offset(&self) -> u32 { + self.nt_headers_offset + } + + /// Reserve the range for the NT headers. + pub fn reserve_nt_headers(&mut self, data_directory_num: usize) { + debug_assert_eq!(self.nt_headers_offset, 0); + self.nt_headers_offset = self.reserve(self.nt_headers_size(), 8); + self.data_directories = vec![DataDirectory::default(); data_directory_num]; + self.reserve( + data_directory_num as u32 * mem::size_of::<pe::ImageDataDirectory>() as u32, + 1, + ); + } + + /// Set the virtual address and size of a data directory. + pub fn set_data_directory(&mut self, index: usize, virtual_address: u32, size: u32) { + self.data_directories[index] = DataDirectory { + virtual_address, + size, + } + } + + /// Write the NT headers. + pub fn write_nt_headers(&mut self, nt_headers: NtHeaders) { + self.pad_until(self.nt_headers_offset); + self.buffer.write(&U32::new(LE, pe::IMAGE_NT_SIGNATURE)); + let file_header = pe::ImageFileHeader { + machine: U16::new(LE, nt_headers.machine), + number_of_sections: U16::new(LE, self.section_header_num), + time_date_stamp: U32::new(LE, nt_headers.time_date_stamp), + pointer_to_symbol_table: U32::new(LE, self.symbol_offset), + number_of_symbols: U32::new(LE, self.symbol_num), + size_of_optional_header: U16::new(LE, self.optional_header_size() as u16), + characteristics: U16::new(LE, nt_headers.characteristics), + }; + self.buffer.write(&file_header); + if self.is_64 { + let optional_header = pe::ImageOptionalHeader64 { + magic: U16::new(LE, pe::IMAGE_NT_OPTIONAL_HDR64_MAGIC), + major_linker_version: nt_headers.major_linker_version, + minor_linker_version: nt_headers.minor_linker_version, + size_of_code: U32::new(LE, self.code_len), + size_of_initialized_data: U32::new(LE, self.data_len), + size_of_uninitialized_data: U32::new(LE, self.bss_len), + address_of_entry_point: U32::new(LE, nt_headers.address_of_entry_point), + base_of_code: U32::new(LE, self.code_address), + image_base: U64::new(LE, nt_headers.image_base), + section_alignment: U32::new(LE, self.section_alignment), + file_alignment: U32::new(LE, self.file_alignment), + major_operating_system_version: U16::new( + LE, + nt_headers.major_operating_system_version, + ), + minor_operating_system_version: U16::new( + LE, + nt_headers.minor_operating_system_version, + ), + major_image_version: U16::new(LE, nt_headers.major_image_version), + minor_image_version: U16::new(LE, nt_headers.minor_image_version), + major_subsystem_version: U16::new(LE, nt_headers.major_subsystem_version), + minor_subsystem_version: U16::new(LE, nt_headers.minor_subsystem_version), + win32_version_value: U32::new(LE, 0), + size_of_image: U32::new(LE, self.virtual_len), + size_of_headers: U32::new(LE, self.headers_len), + check_sum: U32::new(LE, 0), + subsystem: U16::new(LE, nt_headers.subsystem), + dll_characteristics: U16::new(LE, nt_headers.dll_characteristics), + size_of_stack_reserve: U64::new(LE, nt_headers.size_of_stack_reserve), + size_of_stack_commit: U64::new(LE, nt_headers.size_of_stack_commit), + size_of_heap_reserve: U64::new(LE, nt_headers.size_of_heap_reserve), + size_of_heap_commit: U64::new(LE, nt_headers.size_of_heap_commit), + loader_flags: U32::new(LE, 0), + number_of_rva_and_sizes: U32::new(LE, self.data_directories.len() as u32), + }; + self.buffer.write(&optional_header); + } else { + let optional_header = pe::ImageOptionalHeader32 { + magic: U16::new(LE, pe::IMAGE_NT_OPTIONAL_HDR32_MAGIC), + major_linker_version: nt_headers.major_linker_version, + minor_linker_version: nt_headers.minor_linker_version, + size_of_code: U32::new(LE, self.code_len), + size_of_initialized_data: U32::new(LE, self.data_len), + size_of_uninitialized_data: U32::new(LE, self.bss_len), + address_of_entry_point: U32::new(LE, nt_headers.address_of_entry_point), + base_of_code: U32::new(LE, self.code_address), + base_of_data: U32::new(LE, self.data_address), + image_base: U32::new(LE, nt_headers.image_base as u32), + section_alignment: U32::new(LE, self.section_alignment), + file_alignment: U32::new(LE, self.file_alignment), + major_operating_system_version: U16::new( + LE, + nt_headers.major_operating_system_version, + ), + minor_operating_system_version: U16::new( + LE, + nt_headers.minor_operating_system_version, + ), + major_image_version: U16::new(LE, nt_headers.major_image_version), + minor_image_version: U16::new(LE, nt_headers.minor_image_version), + major_subsystem_version: U16::new(LE, nt_headers.major_subsystem_version), + minor_subsystem_version: U16::new(LE, nt_headers.minor_subsystem_version), + win32_version_value: U32::new(LE, 0), + size_of_image: U32::new(LE, self.virtual_len), + size_of_headers: U32::new(LE, self.headers_len), + check_sum: U32::new(LE, 0), + subsystem: U16::new(LE, nt_headers.subsystem), + dll_characteristics: U16::new(LE, nt_headers.dll_characteristics), + size_of_stack_reserve: U32::new(LE, nt_headers.size_of_stack_reserve as u32), + size_of_stack_commit: U32::new(LE, nt_headers.size_of_stack_commit as u32), + size_of_heap_reserve: U32::new(LE, nt_headers.size_of_heap_reserve as u32), + size_of_heap_commit: U32::new(LE, nt_headers.size_of_heap_commit as u32), + loader_flags: U32::new(LE, 0), + number_of_rva_and_sizes: U32::new(LE, self.data_directories.len() as u32), + }; + self.buffer.write(&optional_header); + } + + for dir in &self.data_directories { + self.buffer.write(&pe::ImageDataDirectory { + virtual_address: U32::new(LE, dir.virtual_address), + size: U32::new(LE, dir.size), + }) + } + } + + /// Reserve the section headers. + /// + /// The number of reserved section headers must be the same as the number of sections that + /// are later reserved. + // TODO: change this to a maximum number of sections? + pub fn reserve_section_headers(&mut self, section_header_num: u16) { + debug_assert_eq!(self.section_header_num, 0); + self.section_header_num = section_header_num; + self.reserve( + u32::from(section_header_num) * mem::size_of::<pe::ImageSectionHeader>() as u32, + 1, + ); + // Padding before sections must be included in headers_len. + self.reserve_align(self.file_alignment); + self.headers_len = self.len; + self.reserve_virtual(self.len); + } + + /// Write the section headers. + /// + /// This uses information that was recorded when the sections were reserved. + pub fn write_section_headers(&mut self) { + debug_assert_eq!(self.section_header_num as usize, self.sections.len()); + for section in &self.sections { + let section_header = pe::ImageSectionHeader { + name: section.name, + virtual_size: U32::new(LE, section.range.virtual_size), + virtual_address: U32::new(LE, section.range.virtual_address), + size_of_raw_data: U32::new(LE, section.range.file_size), + pointer_to_raw_data: U32::new(LE, section.range.file_offset), + pointer_to_relocations: U32::new(LE, 0), + pointer_to_linenumbers: U32::new(LE, 0), + number_of_relocations: U16::new(LE, 0), + number_of_linenumbers: U16::new(LE, 0), + characteristics: U32::new(LE, section.characteristics), + }; + self.buffer.write(§ion_header); + } + } + + /// Reserve a section. + /// + /// Returns the file range and virtual address range that are reserved + /// for the section. + pub fn reserve_section( + &mut self, + name: [u8; 8], + characteristics: u32, + virtual_size: u32, + data_size: u32, + ) -> SectionRange { + let virtual_address = self.reserve_virtual(virtual_size); + + // Padding after section must be included in section file size. + let file_size = util::align_u32(data_size, self.file_alignment); + let file_offset = if file_size != 0 { + self.reserve(file_size, self.file_alignment) + } else { + 0 + }; + + // Sizes in optional header use the virtual size with the file alignment. + let aligned_virtual_size = util::align_u32(virtual_size, self.file_alignment); + if characteristics & pe::IMAGE_SCN_CNT_CODE != 0 { + if self.code_address == 0 { + self.code_address = virtual_address; + } + self.code_len += aligned_virtual_size; + } else if characteristics & pe::IMAGE_SCN_CNT_INITIALIZED_DATA != 0 { + if self.data_address == 0 { + self.data_address = virtual_address; + } + self.data_len += aligned_virtual_size; + } else if characteristics & pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0 { + if self.data_address == 0 { + self.data_address = virtual_address; + } + self.bss_len += aligned_virtual_size; + } + + let range = SectionRange { + virtual_address, + virtual_size, + file_offset, + file_size, + }; + self.sections.push(Section { + name, + characteristics, + range, + }); + range + } + + /// Write the data for a section. + pub fn write_section(&mut self, offset: u32, data: &[u8]) { + if data.is_empty() { + return; + } + self.pad_until(offset); + self.write(data); + self.write_align(self.file_alignment); + } + + /// Reserve a `.text` section. + /// + /// Contains executable code. + pub fn reserve_text_section(&mut self, size: u32) -> SectionRange { + self.reserve_section( + *b".text\0\0\0", + pe::IMAGE_SCN_CNT_CODE | pe::IMAGE_SCN_MEM_EXECUTE | pe::IMAGE_SCN_MEM_READ, + size, + size, + ) + } + + /// Reserve a `.data` section. + /// + /// Contains initialized data. + /// + /// May also contain uninitialized data if `virtual_size` is greater than `data_size`. + pub fn reserve_data_section(&mut self, virtual_size: u32, data_size: u32) -> SectionRange { + self.reserve_section( + *b".data\0\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ | pe::IMAGE_SCN_MEM_WRITE, + virtual_size, + data_size, + ) + } + + /// Reserve a `.rdata` section. + /// + /// Contains read-only initialized data. + pub fn reserve_rdata_section(&mut self, size: u32) -> SectionRange { + self.reserve_section( + *b".rdata\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ, + size, + size, + ) + } + + /// Reserve a `.bss` section. + /// + /// Contains uninitialized data. + pub fn reserve_bss_section(&mut self, size: u32) -> SectionRange { + self.reserve_section( + *b".bss\0\0\0\0", + pe::IMAGE_SCN_CNT_UNINITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ | pe::IMAGE_SCN_MEM_WRITE, + size, + 0, + ) + } + + /// Reserve an `.idata` section. + /// + /// Contains import tables. Note that it is permissible to store import tables in a different + /// section. + /// + /// This also sets the `pe::IMAGE_DIRECTORY_ENTRY_IMPORT` data directory. + pub fn reserve_idata_section(&mut self, size: u32) -> SectionRange { + let range = self.reserve_section( + *b".idata\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ | pe::IMAGE_SCN_MEM_WRITE, + size, + size, + ); + let dir = &mut self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_IMPORT]; + debug_assert_eq!(dir.virtual_address, 0); + *dir = DataDirectory { + virtual_address: range.virtual_address, + size, + }; + range + } + + /// Reserve an `.edata` section. + /// + /// Contains export tables. + /// + /// This also sets the `pe::IMAGE_DIRECTORY_ENTRY_EXPORT` data directory. + pub fn reserve_edata_section(&mut self, size: u32) -> SectionRange { + let range = self.reserve_section( + *b".edata\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ, + size, + size, + ); + let dir = &mut self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_EXPORT]; + debug_assert_eq!(dir.virtual_address, 0); + *dir = DataDirectory { + virtual_address: range.virtual_address, + size, + }; + range + } + + /// Reserve a `.pdata` section. + /// + /// Contains exception information. + /// + /// This also sets the `pe::IMAGE_DIRECTORY_ENTRY_EXCEPTION` data directory. + pub fn reserve_pdata_section(&mut self, size: u32) -> SectionRange { + let range = self.reserve_section( + *b".pdata\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ, + size, + size, + ); + let dir = &mut self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_EXCEPTION]; + debug_assert_eq!(dir.virtual_address, 0); + *dir = DataDirectory { + virtual_address: range.virtual_address, + size, + }; + range + } + + /// Reserve a `.xdata` section. + /// + /// Contains exception information. + pub fn reserve_xdata_section(&mut self, size: u32) -> SectionRange { + self.reserve_section( + *b".xdata\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ, + size, + size, + ) + } + + /// Reserve a `.rsrc` section. + /// + /// Contains the resource directory. + /// + /// This also sets the `pe::IMAGE_DIRECTORY_ENTRY_RESOURCE` data directory. + pub fn reserve_rsrc_section(&mut self, size: u32) -> SectionRange { + let range = self.reserve_section( + *b".rsrc\0\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA | pe::IMAGE_SCN_MEM_READ, + size, + size, + ); + let dir = &mut self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_RESOURCE]; + debug_assert_eq!(dir.virtual_address, 0); + *dir = DataDirectory { + virtual_address: range.virtual_address, + size, + }; + range + } + + /// Add a base relocation. + /// + /// `typ` must be one of the `IMAGE_REL_BASED_*` constants. + pub fn add_reloc(&mut self, mut virtual_address: u32, typ: u16) { + let reloc = U16::new(LE, typ << 12 | (virtual_address & 0xfff) as u16); + virtual_address &= !0xfff; + if let Some(block) = self.reloc_blocks.last_mut() { + if block.virtual_address == virtual_address { + self.relocs.push(reloc); + block.count += 1; + return; + } + // Blocks must have an even number of relocations. + if block.count & 1 != 0 { + self.relocs.push(U16::new(LE, 0)); + block.count += 1; + } + debug_assert!(block.virtual_address < virtual_address); + } + self.relocs.push(reloc); + self.reloc_blocks.push(RelocBlock { + virtual_address, + count: 1, + }); + } + + /// Return true if a base relocation has been added. + pub fn has_relocs(&mut self) -> bool { + !self.relocs.is_empty() + } + + /// Reserve a `.reloc` section. + /// + /// This contains the base relocations that were added with `add_reloc`. + /// + /// This also sets the `pe::IMAGE_DIRECTORY_ENTRY_BASERELOC` data directory. + pub fn reserve_reloc_section(&mut self) -> SectionRange { + if let Some(block) = self.reloc_blocks.last_mut() { + // Blocks must have an even number of relocations. + if block.count & 1 != 0 { + self.relocs.push(U16::new(LE, 0)); + block.count += 1; + } + } + let size = self.reloc_blocks.iter().map(RelocBlock::size).sum(); + let range = self.reserve_section( + *b".reloc\0\0", + pe::IMAGE_SCN_CNT_INITIALIZED_DATA + | pe::IMAGE_SCN_MEM_READ + | pe::IMAGE_SCN_MEM_DISCARDABLE, + size, + size, + ); + let dir = &mut self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_BASERELOC]; + debug_assert_eq!(dir.virtual_address, 0); + *dir = DataDirectory { + virtual_address: range.virtual_address, + size, + }; + self.reloc_offset = range.file_offset; + range + } + + /// Write a `.reloc` section. + /// + /// This contains the base relocations that were added with `add_reloc`. + pub fn write_reloc_section(&mut self) { + if self.reloc_offset == 0 { + return; + } + self.pad_until(self.reloc_offset); + + let mut total = 0; + for block in &self.reloc_blocks { + self.buffer.write(&pe::ImageBaseRelocation { + virtual_address: U32::new(LE, block.virtual_address), + size_of_block: U32::new(LE, block.size()), + }); + self.buffer + .write_slice(&self.relocs[total..][..block.count as usize]); + total += block.count as usize; + } + debug_assert_eq!(total, self.relocs.len()); + + self.write_align(self.file_alignment); + } + + /// Reserve the certificate table. + /// + /// This also sets the `pe::IMAGE_DIRECTORY_ENTRY_SECURITY` data directory. + // TODO: reserve individual certificates + pub fn reserve_certificate_table(&mut self, size: u32) { + let size = util::align_u32(size, 8); + let offset = self.reserve(size, 8); + let dir = &mut self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_SECURITY]; + debug_assert_eq!(dir.virtual_address, 0); + *dir = DataDirectory { + virtual_address: offset, + size, + }; + } + + /// Write the certificate table. + // TODO: write individual certificates + pub fn write_certificate_table(&mut self, data: &[u8]) { + let dir = self.data_directories[pe::IMAGE_DIRECTORY_ENTRY_SECURITY]; + self.pad_until(dir.virtual_address); + self.write(data); + self.pad_until(dir.virtual_address + dir.size); + } +} + +/// Information required for writing [`pe::ImageNtHeaders32`] or [`pe::ImageNtHeaders64`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct NtHeaders { + // ImageFileHeader + pub machine: u16, + pub time_date_stamp: u32, + pub characteristics: u16, + // ImageOptionalHeader + pub major_linker_version: u8, + pub minor_linker_version: u8, + pub address_of_entry_point: u32, + pub image_base: u64, + pub major_operating_system_version: u16, + pub minor_operating_system_version: u16, + pub major_image_version: u16, + pub minor_image_version: u16, + pub major_subsystem_version: u16, + pub minor_subsystem_version: u16, + pub subsystem: u16, + pub dll_characteristics: u16, + pub size_of_stack_reserve: u64, + pub size_of_stack_commit: u64, + pub size_of_heap_reserve: u64, + pub size_of_heap_commit: u64, +} + +#[derive(Default, Clone, Copy)] +struct DataDirectory { + virtual_address: u32, + size: u32, +} + +/// Information required for writing [`pe::ImageSectionHeader`]. +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub struct Section { + pub name: [u8; pe::IMAGE_SIZEOF_SHORT_NAME], + pub characteristics: u32, + pub range: SectionRange, +} + +/// The file range and virtual address range for a section. +#[allow(missing_docs)] +#[derive(Debug, Default, Clone, Copy)] +pub struct SectionRange { + pub virtual_address: u32, + pub virtual_size: u32, + pub file_offset: u32, + pub file_size: u32, +} + +struct RelocBlock { + virtual_address: u32, + count: u32, +} + +impl RelocBlock { + fn size(&self) -> u32 { + mem::size_of::<pe::ImageBaseRelocation>() as u32 + self.count * mem::size_of::<u16>() as u32 + } +} diff --git a/third_party/rust/object/src/write/string.rs b/third_party/rust/object/src/write/string.rs new file mode 100644 index 0000000000..3bdfcccaaf --- /dev/null +++ b/third_party/rust/object/src/write/string.rs @@ -0,0 +1,159 @@ +use alloc::vec::Vec; + +#[cfg(feature = "std")] +type IndexSet<K> = indexmap::IndexSet<K>; +#[cfg(not(feature = "std"))] +type IndexSet<K> = indexmap::IndexSet<K, hashbrown::hash_map::DefaultHashBuilder>; + +/// An identifer for an entry in a string table. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct StringId(usize); + +#[derive(Debug, Default)] +pub(crate) struct StringTable<'a> { + strings: IndexSet<&'a [u8]>, + offsets: Vec<usize>, +} + +impl<'a> StringTable<'a> { + /// Add a string to the string table. + /// + /// Panics if the string table has already been written, or + /// if the string contains a null byte. + pub fn add(&mut self, string: &'a [u8]) -> StringId { + assert!(self.offsets.is_empty()); + assert!(!string.contains(&0)); + let id = self.strings.insert_full(string).0; + StringId(id) + } + + /// Return the id of the given string. + /// + /// Panics if the string is not in the string table. + pub fn get_id(&self, string: &[u8]) -> StringId { + let id = self.strings.get_index_of(string).unwrap(); + StringId(id) + } + + /// Return the string for the given id. + /// + /// Panics if the string is not in the string table. + pub fn get_string(&self, id: StringId) -> &'a [u8] { + self.strings.get_index(id.0).unwrap() + } + + /// Return the offset of the given string. + /// + /// Panics if the string table has not been written, or + /// if the string is not in the string table. + pub fn get_offset(&self, id: StringId) -> usize { + self.offsets[id.0] + } + + /// Append the string table to the given `Vec`, and + /// calculate the list of string offsets. + /// + /// `base` is the initial string table offset. For example, + /// this should be 1 for ELF, to account for the initial + /// null byte (which must have been written by the caller). + pub fn write(&mut self, base: usize, w: &mut Vec<u8>) { + assert!(self.offsets.is_empty()); + + let mut ids: Vec<_> = (0..self.strings.len()).collect(); + sort(&mut ids, 1, &self.strings); + + self.offsets = vec![0; ids.len()]; + let mut offset = base; + let mut previous = &[][..]; + for id in ids { + let string = self.strings.get_index(id).unwrap(); + if previous.ends_with(string) { + self.offsets[id] = offset - string.len() - 1; + } else { + self.offsets[id] = offset; + w.extend_from_slice(string); + w.push(0); + offset += string.len() + 1; + previous = string; + } + } + } +} + +// Multi-key quicksort. +// +// Ordering is such that if a string is a suffix of at least one other string, +// then it is placed immediately after one of those strings. That is: +// - comparison starts at the end of the string +// - shorter strings come later +// +// Based on the implementation in LLVM. +fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { + loop { + if ids.len() <= 1 { + return; + } + + let pivot = byte(ids[0], pos, strings); + let mut lower = 0; + let mut upper = ids.len(); + let mut i = 1; + while i < upper { + let b = byte(ids[i], pos, strings); + if b > pivot { + ids.swap(lower, i); + lower += 1; + i += 1; + } else if b < pivot { + upper -= 1; + ids.swap(upper, i); + } else { + i += 1; + } + } + + sort(&mut ids[..lower], pos, strings); + sort(&mut ids[upper..], pos, strings); + + if pivot == 0 { + return; + } + ids = &mut ids[lower..upper]; + pos += 1; + } +} + +fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { + let string = strings.get_index(id).unwrap(); + let len = string.len(); + if len >= pos { + string[len - pos] + } else { + // We know the strings don't contain null bytes. + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn string_table() { + let mut table = StringTable::default(); + let id0 = table.add(b""); + let id1 = table.add(b"foo"); + let id2 = table.add(b"bar"); + let id3 = table.add(b"foobar"); + + let mut data = Vec::new(); + data.push(0); + table.write(1, &mut data); + assert_eq!(data, b"\0foobar\0foo\0"); + + assert_eq!(table.get_offset(id0), 11); + assert_eq!(table.get_offset(id1), 8); + assert_eq!(table.get_offset(id2), 4); + assert_eq!(table.get_offset(id3), 1); + } +} diff --git a/third_party/rust/object/src/write/util.rs b/third_party/rust/object/src/write/util.rs new file mode 100644 index 0000000000..51bc3515f2 --- /dev/null +++ b/third_party/rust/object/src/write/util.rs @@ -0,0 +1,210 @@ +use alloc::vec::Vec; +#[cfg(feature = "std")] +use std::{io, mem}; + +use crate::pod::{bytes_of, bytes_of_slice, Pod}; + +/// Trait for writable buffer. +#[allow(clippy::len_without_is_empty)] +pub trait WritableBuffer { + /// Returns position/offset for data to be written at. + /// + /// Should only be used in debug assertions + fn len(&self) -> usize; + + /// Reserves specified number of bytes in the buffer. + /// + /// This will be called exactly once before writing anything to the buffer, + /// and the given size is the exact total number of bytes that will be written. + fn reserve(&mut self, size: usize) -> Result<(), ()>; + + /// Writes zero bytes at the end of the buffer until the buffer + /// has the specified length. + fn resize(&mut self, new_len: usize); + + /// Writes the specified slice of bytes at the end of the buffer. + fn write_bytes(&mut self, val: &[u8]); + + /// Writes the specified `Pod` type at the end of the buffer. + fn write_pod<T: Pod>(&mut self, val: &T) + where + Self: Sized, + { + self.write_bytes(bytes_of(val)) + } + + /// Writes the specified `Pod` slice at the end of the buffer. + fn write_pod_slice<T: Pod>(&mut self, val: &[T]) + where + Self: Sized, + { + self.write_bytes(bytes_of_slice(val)) + } +} + +impl<'a> dyn WritableBuffer + 'a { + /// Writes the specified `Pod` type at the end of the buffer. + pub fn write<T: Pod>(&mut self, val: &T) { + self.write_bytes(bytes_of(val)) + } + + /// Writes the specified `Pod` slice at the end of the buffer. + pub fn write_slice<T: Pod>(&mut self, val: &[T]) { + self.write_bytes(bytes_of_slice(val)) + } +} + +impl WritableBuffer for Vec<u8> { + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn reserve(&mut self, size: usize) -> Result<(), ()> { + debug_assert!(self.is_empty()); + self.reserve(size); + Ok(()) + } + + #[inline] + fn resize(&mut self, new_len: usize) { + debug_assert!(new_len >= self.len()); + self.resize(new_len, 0); + } + + #[inline] + fn write_bytes(&mut self, val: &[u8]) { + debug_assert!(self.len() + val.len() <= self.capacity()); + self.extend_from_slice(val) + } +} + +/// A [`WritableBuffer`] that streams data to a [`Write`](std::io::Write) implementation. +/// +/// [`Self::result`] must be called to determine if an I/O error occurred during writing. +/// +/// It is advisable to use a buffered writer like [`BufWriter`](std::io::BufWriter) +/// instead of an unbuffered writer like [`File`](std::fs::File). +#[cfg(feature = "std")] +#[derive(Debug)] +pub struct StreamingBuffer<W> { + writer: W, + len: usize, + result: Result<(), io::Error>, +} + +#[cfg(feature = "std")] +impl<W> StreamingBuffer<W> { + /// Create a new `StreamingBuffer` backed by the given writer. + pub fn new(writer: W) -> Self { + StreamingBuffer { + writer, + len: 0, + result: Ok(()), + } + } + + /// Unwraps this [`StreamingBuffer`] giving back the original writer. + pub fn into_inner(self) -> W { + self.writer + } + + /// Returns any error that occurred during writing. + pub fn result(&mut self) -> Result<(), io::Error> { + mem::replace(&mut self.result, Ok(())) + } +} + +#[cfg(feature = "std")] +impl<W: io::Write> WritableBuffer for StreamingBuffer<W> { + #[inline] + fn len(&self) -> usize { + self.len + } + + #[inline] + fn reserve(&mut self, _size: usize) -> Result<(), ()> { + Ok(()) + } + + #[inline] + fn resize(&mut self, new_len: usize) { + debug_assert!(self.len <= new_len); + while self.len < new_len { + let write_amt = (new_len - self.len - 1) % 1024 + 1; + self.write_bytes(&[0; 1024][..write_amt]); + } + } + + #[inline] + fn write_bytes(&mut self, val: &[u8]) { + if self.result.is_ok() { + self.result = self.writer.write_all(val); + } + self.len += val.len(); + } +} + +/// A trait for mutable byte slices. +/// +/// It provides convenience methods for `Pod` types. +pub(crate) trait BytesMut { + fn write_at<T: Pod>(self, offset: usize, val: &T) -> Result<(), ()>; +} + +impl<'a> BytesMut for &'a mut [u8] { + #[inline] + fn write_at<T: Pod>(self, offset: usize, val: &T) -> Result<(), ()> { + let src = bytes_of(val); + let dest = self.get_mut(offset..).ok_or(())?; + let dest = dest.get_mut(..src.len()).ok_or(())?; + dest.copy_from_slice(src); + Ok(()) + } +} + +pub(crate) fn align(offset: usize, size: usize) -> usize { + (offset + (size - 1)) & !(size - 1) +} + +#[allow(dead_code)] +pub(crate) fn align_u32(offset: u32, size: u32) -> u32 { + (offset + (size - 1)) & !(size - 1) +} + +#[allow(dead_code)] +pub(crate) fn align_u64(offset: u64, size: u64) -> u64 { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn write_align(buffer: &mut dyn WritableBuffer, size: usize) { + let new_len = align(buffer.len(), size); + buffer.resize(new_len); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn bytes_mut() { + let data = vec![0x01, 0x23, 0x45, 0x67]; + + let mut bytes = data.clone(); + bytes.extend_from_slice(bytes_of(&u16::to_be(0x89ab))); + assert_eq!(bytes, [0x01, 0x23, 0x45, 0x67, 0x89, 0xab]); + + let mut bytes = data.clone(); + assert_eq!(bytes.write_at(0, &u16::to_be(0x89ab)), Ok(())); + assert_eq!(bytes, [0x89, 0xab, 0x45, 0x67]); + + let mut bytes = data.clone(); + assert_eq!(bytes.write_at(2, &u16::to_be(0x89ab)), Ok(())); + assert_eq!(bytes, [0x01, 0x23, 0x89, 0xab]); + + assert_eq!(bytes.write_at(3, &u16::to_be(0x89ab)), Err(())); + assert_eq!(bytes.write_at(4, &u16::to_be(0x89ab)), Err(())); + assert_eq!(vec![].write_at(0, &u32::to_be(0x89ab)), Err(())); + } +} |