diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/object/src/write | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/object/src/write')
-rw-r--r-- | third_party/rust/object/src/write/coff.rs | 486 | ||||
-rw-r--r-- | third_party/rust/object/src/write/elf.rs | 736 | ||||
-rw-r--r-- | third_party/rust/object/src/write/macho.rs | 558 | ||||
-rw-r--r-- | third_party/rust/object/src/write/mod.rs | 674 | ||||
-rw-r--r-- | third_party/rust/object/src/write/string.rs | 140 | ||||
-rw-r--r-- | third_party/rust/object/src/write/util.rs | 14 |
6 files changed, 2608 insertions, 0 deletions
diff --git a/third_party/rust/object/src/write/coff.rs b/third_party/rust/object/src/write/coff.rs new file mode 100644 index 0000000000..5077bc7f0a --- /dev/null +++ b/third_party/rust/object/src/write/coff.rs @@ -0,0 +1,486 @@ +use crc32fast; +use scroll::ctx::SizeWith; +use scroll::IOwrite; +use std::iter; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod coff { + pub use goblin::pe::characteristic::*; + pub use goblin::pe::header::*; + pub use goblin::pe::relocation::*; + pub use goblin::pe::section_table::*; + pub use goblin::pe::symbol::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + offset: usize, + str_id: Option<StringId>, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, + aux_count: u8, +} + +impl Object { + pub(crate) fn coff_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rdata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + StandardSection::Tls => (&[], &b".tls$"[..], SectionKind::Tls), + StandardSection::UninitializedTls => { + // Unsupported section. + (&[], &[], SectionKind::UninitializedTls) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables) + } + } + } + + pub(crate) fn coff_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + let mut name = section.to_vec(); + name.push(b'$'); + name.extend(value); + name + } + + pub(crate) fn coff_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + if relocation.kind == RelocationKind::GotRelative { + // Use a stub symbol for the relocation instead. + // This isn't really a GOT, but it's a similar purpose. + // TODO: need to handle DLL imports differently? + relocation.kind = RelocationKind::Relative; + relocation.symbol = self.coff_add_stub_symbol(relocation.symbol); + } else if relocation.kind == RelocationKind::PltRelative { + // Windows doesn't need a separate relocation type for + // references to functions in import libraries. + // For convenience, treat this the same as Relative. + relocation.kind = RelocationKind::Relative; + } + + let constant = match self.architecture { + Architecture::I386 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_I386_REL32 + relocation.addend + 4 + } + _ => relocation.addend, + }, + Architecture::X86_64 => match relocation.kind { + RelocationKind::Relative => { + // IMAGE_REL_AMD64_REL32 through to IMAGE_REL_AMD64_REL32_5 + if relocation.addend >= -4 && relocation.addend <= -9 { + 0 + } else { + relocation.addend + 4 + } + } + _ => relocation.addend, + }, + _ => unimplemented!(), + }; + relocation.addend -= constant; + constant + } + + fn coff_add_stub_symbol(&mut self, symbol_id: SymbolId) -> SymbolId { + if let Some(stub_id) = self.stub_symbols.get(&symbol_id) { + return *stub_id; + } + let stub_size = self.architecture.pointer_width().unwrap().bytes(); + + let mut name = b".rdata$.refptr.".to_vec(); + name.extend(&self.symbols[symbol_id.0].name); + let section_id = self.add_section(Vec::new(), name, SectionKind::ReadOnlyData); + let section = self.section_mut(section_id); + section.set_data(vec![0; stub_size as usize], u64::from(stub_size)); + section.relocations = vec![Relocation { + offset: 0, + size: stub_size * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: symbol_id, + addend: 0, + }]; + + let mut name = b".refptr.".to_vec(); + name.extend(&self.symbol(symbol_id).name); + let stub_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: u64::from(stub_size), + kind: SymbolKind::Data, + scope: SymbolScope::Compilation, + weak: false, + section: Some(section_id), + }); + self.stub_symbols.insert(symbol_id, stub_id); + + stub_id + } + + pub(crate) fn coff_write(&self) -> Result<Vec<u8>, String> { + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + let mut strtab = StringTable::default(); + + // COFF header. + let ctx = scroll::LE; + offset += coff::CoffHeader::size_with(&ctx); + + // Section headers. + offset += self.sections.len() * coff::SectionTable::size_with(&ctx); + + // Calculate size of section data and add section strings to strtab. + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + for (index, section) in self.sections.iter().enumerate() { + if section.name.len() > 8 { + section_offsets[index].str_id = Some(strtab.add(§ion.name)); + } + + let len = section.data.len(); + if len != 0 { + // TODO: not sure what alignment is required here, but this seems to match LLVM + offset = align(offset, 4); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + + // Calculate size of relocations. + let count = section.relocations.len(); + if count != 0 { + section_offsets[index].reloc_offset = offset; + offset += count * coff::Relocation::size_with(&ctx); + } + } + + // Calculate size of symbols and add symbol strings to strtab. + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut symtab_count = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + let aux_count = + (symbol.name.len() + coff::COFF_SYMBOL_SIZE - 1) / coff::COFF_SYMBOL_SIZE; + symbol_offsets[index].aux_count = aux_count as u8; + symtab_count += aux_count; + // Don't add name to strtab. + continue; + } + SymbolKind::Section => { + symbol_offsets[index].aux_count = 1; + symtab_count += 1; + } + _ => {} + } + if symbol.name.len() > 8 { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_offset = offset; + let symtab_len = symtab_count * coff::COFF_SYMBOL_SIZE; + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // First 4 bytes of strtab are the length. + strtab.write(4, &mut strtab_data); + let strtab_len = strtab_data.len() + 4; + offset += strtab_len; + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let header = coff::CoffHeader { + machine: match self.architecture { + Architecture::I386 => coff::COFF_MACHINE_X86, + Architecture::X86_64 => coff::COFF_MACHINE_X86_64, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }, + number_of_sections: self.sections.len() as u16, + time_date_stamp: 0, + pointer_to_symbol_table: symtab_offset as u32, + number_of_symbol_table: symtab_count as u32, + size_of_optional_header: 0, + characteristics: 0, + }; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + // TODO: IMAGE_SCN_LNK_COMDAT + let characteristics = match section.kind { + SectionKind::Text => { + coff::IMAGE_SCN_CNT_CODE + | coff::IMAGE_SCN_MEM_EXECUTE + | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Data => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::UninitializedData => { + coff::IMAGE_SCN_CNT_UNINITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_WRITE + } + SectionKind::ReadOnlyData | SectionKind::ReadOnlyString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA | coff::IMAGE_SCN_MEM_READ + } + SectionKind::Debug | SectionKind::Other | SectionKind::OtherString => { + coff::IMAGE_SCN_CNT_INITIALIZED_DATA + | coff::IMAGE_SCN_MEM_READ + | coff::IMAGE_SCN_MEM_DISCARDABLE + } + SectionKind::Linker => coff::IMAGE_SCN_LNK_INFO | coff::IMAGE_SCN_LNK_REMOVE, + SectionKind::Tls + | SectionKind::UninitializedTls + | SectionKind::TlsVariables + | SectionKind::Unknown + | SectionKind::Metadata => { + return Err(format!("unimplemented section {:?}", section.kind)) + } + }; + let align = match section.align { + 1 => coff::IMAGE_SCN_ALIGN_1BYTES, + 2 => coff::IMAGE_SCN_ALIGN_2BYTES, + 4 => coff::IMAGE_SCN_ALIGN_4BYTES, + 8 => coff::IMAGE_SCN_ALIGN_8BYTES, + 16 => coff::IMAGE_SCN_ALIGN_16BYTES, + 32 => coff::IMAGE_SCN_ALIGN_32BYTES, + 64 => coff::IMAGE_SCN_ALIGN_64BYTES, + 128 => coff::IMAGE_SCN_ALIGN_128BYTES, + 256 => coff::IMAGE_SCN_ALIGN_256BYTES, + 512 => coff::IMAGE_SCN_ALIGN_512BYTES, + 1024 => coff::IMAGE_SCN_ALIGN_1024BYTES, + 2048 => coff::IMAGE_SCN_ALIGN_2048BYTES, + 4096 => coff::IMAGE_SCN_ALIGN_4096BYTES, + 8192 => coff::IMAGE_SCN_ALIGN_8192BYTES, + _ => return Err(format!("unimplemented section align {}", section.align)), + }; + let mut coff_section = coff::SectionTable { + name: [0; 8], + real_name: None, + virtual_size: if section.data.is_empty() { + section.size as u32 + } else { + 0 + }, + virtual_address: 0, + size_of_raw_data: section.data.len() as u32, + pointer_to_raw_data: if section.data.is_empty() { + 0 + } else { + section_offsets[index].offset as u32 + }, + pointer_to_relocations: section_offsets[index].reloc_offset as u32, + pointer_to_linenumbers: 0, + number_of_relocations: section.relocations.len() as u16, + number_of_linenumbers: 0, + characteristics: characteristics | align, + }; + if section.name.len() <= 8 { + coff_section.name[..section.name.len()].copy_from_slice(§ion.name); + } else { + let str_offset = strtab.get_offset(section_offsets[index].str_id.unwrap()); + coff_section.set_name_offset(str_offset).unwrap(); + } + buffer.iowrite_with(coff_section, ctx).unwrap(); + } + + // Write section data and relocations. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, 4); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + + if !section.relocations.is_empty() { + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + //assert!(reloc.implicit_addend); + let typ = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 16, 0) => coff::IMAGE_REL_I386_DIR16, + (RelocationKind::Relative, 16, 0) => coff::IMAGE_REL_I386_REL16, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_I386_DIR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_I386_DIR32NB, + (RelocationKind::SectionIndex, 16, 0) => coff::IMAGE_REL_I386_SECTION, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_I386_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_I386_SECREL7, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_I386_REL32, + (RelocationKind::Coff(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.size, reloc.addend) { + (RelocationKind::Absolute, 64, 0) => coff::IMAGE_REL_AMD64_ADDR64, + (RelocationKind::Absolute, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32, + (RelocationKind::ImageOffset, 32, 0) => coff::IMAGE_REL_AMD64_ADDR32NB, + (RelocationKind::Relative, 32, -4) => coff::IMAGE_REL_AMD64_REL32, + (RelocationKind::Relative, 32, -5) => coff::IMAGE_REL_AMD64_REL32_1, + (RelocationKind::Relative, 32, -6) => coff::IMAGE_REL_AMD64_REL32_2, + (RelocationKind::Relative, 32, -7) => coff::IMAGE_REL_AMD64_REL32_3, + (RelocationKind::Relative, 32, -8) => coff::IMAGE_REL_AMD64_REL32_4, + (RelocationKind::Relative, 32, -9) => coff::IMAGE_REL_AMD64_REL32_5, + (RelocationKind::SectionOffset, 32, 0) => coff::IMAGE_REL_AMD64_SECREL, + (RelocationKind::SectionOffset, 7, 0) => coff::IMAGE_REL_AMD64_SECREL7, + (RelocationKind::Coff(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + buffer + .iowrite_with( + coff::Relocation { + virtual_address: reloc.offset as u32, + symbol_table_index: symbol_offsets[reloc.symbol.0].index as u32, + typ, + }, + ctx, + ) + .unwrap(); + } + } + } + + // Write symbols. + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + let mut name = &symbol.name[..]; + let mut section_number = symbol.section.map(|x| x.0 + 1).unwrap_or(0) as i16; + let typ = if symbol.kind == SymbolKind::Text { + coff::IMAGE_SYM_DTYPE_FUNCTION << coff::IMAGE_SYM_DTYPE_SHIFT + } else { + coff::IMAGE_SYM_TYPE_NULL + }; + let storage_class = match symbol.kind { + SymbolKind::File => { + // Name goes in auxilary symbol records. + name = b".file"; + section_number = coff::IMAGE_SYM_DEBUG; + coff::IMAGE_SYM_CLASS_FILE + } + SymbolKind::Section => coff::IMAGE_SYM_CLASS_STATIC, + SymbolKind::Label => coff::IMAGE_SYM_CLASS_LABEL, + SymbolKind::Text | SymbolKind::Data => { + match symbol.scope { + _ if symbol.is_undefined() => coff::IMAGE_SYM_CLASS_EXTERNAL, + // TODO: does this need aux symbol records too? + _ if symbol.weak => coff::IMAGE_SYM_CLASS_WEAK_EXTERNAL, + SymbolScope::Unknown => { + return Err(format!("unimplemented symbol scope {:?}", symbol)) + } + SymbolScope::Compilation => coff::IMAGE_SYM_CLASS_STATIC, + SymbolScope::Linkage | SymbolScope::Dynamic => { + coff::IMAGE_SYM_CLASS_EXTERNAL + } + } + } + _ => return Err(format!("unimplemented symbol {:?}", symbol.kind)), + }; + let number_of_aux_symbols = symbol_offsets[index].aux_count; + let mut coff_symbol = coff::Symbol { + name: [0; 8], + value: symbol.value as u32, + section_number, + typ, + storage_class, + number_of_aux_symbols, + }; + if name.len() <= 8 { + coff_symbol.name[..name.len()].copy_from_slice(name); + } else { + let str_offset = strtab.get_offset(symbol_offsets[index].str_id.unwrap()); + coff_symbol.set_name_offset(str_offset as u32); + } + buffer.iowrite_with(coff_symbol, ctx).unwrap(); + + match symbol.kind { + SymbolKind::File => { + let aux_len = number_of_aux_symbols as usize * coff::COFF_SYMBOL_SIZE; + debug_assert!(aux_len >= symbol.name.len()); + buffer.extend(&symbol.name); + buffer.extend(iter::repeat(0).take(aux_len - symbol.name.len())); + } + SymbolKind::Section => { + debug_assert_eq!(number_of_aux_symbols, 1); + let section = &self.sections[symbol.section.unwrap().0]; + buffer + .iowrite_with( + coff::AuxSectionDefinition { + length: section.data.len() as u32, + number_of_relocations: section.relocations.len() as u16, + number_of_line_numbers: 0, + checksum: checksum(§ion.data), + number: section_number as u16, + // TODO: COMDAT + selection: 0, + unused: [0; 3], + }, + ctx, + ) + .unwrap(); + } + _ => { + debug_assert_eq!(number_of_aux_symbols, 0); + } + } + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.iowrite_with(strtab_len as u32, ctx).unwrap(); + buffer.extend(&strtab_data); + + Ok(buffer) + } +} + +// JamCRC +fn checksum(data: &[u8]) -> u32 { + let mut hasher = crc32fast::Hasher::new_with_initial(0xffff_ffff); + hasher.update(data); + !hasher.finalize() +} diff --git a/third_party/rust/object/src/write/elf.rs b/third_party/rust/object/src/write/elf.rs new file mode 100644 index 0000000000..e784794dc8 --- /dev/null +++ b/third_party/rust/object/src/write/elf.rs @@ -0,0 +1,736 @@ +use scroll::ctx::SizeWith; +use scroll::IOwrite; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod elf { + pub use goblin::elf::header::*; + pub use goblin::elf::program_header::*; + pub use goblin::elf::reloc::*; + pub use goblin::elf::section_header::*; + pub use goblin::elf::sym::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + str_id: Option<StringId>, + reloc_index: usize, + reloc_offset: usize, + reloc_len: usize, + reloc_str_id: Option<StringId>, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, +} + +impl Object { + pub(crate) fn elf_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&[], &b".text"[..], SectionKind::Text), + StandardSection::Data => (&[], &b".data"[..], SectionKind::Data), + StandardSection::ReadOnlyData + | StandardSection::ReadOnlyDataWithRel + | StandardSection::ReadOnlyString => (&[], &b".rodata"[..], SectionKind::ReadOnlyData), + StandardSection::UninitializedData => { + (&[], &b".bss"[..], SectionKind::UninitializedData) + } + StandardSection::Tls => (&[], &b".tdata"[..], SectionKind::Tls), + StandardSection::UninitializedTls => { + (&[], &b".tbss"[..], SectionKind::UninitializedTls) + } + StandardSection::TlsVariables => { + // Unsupported section. + (&[], &[], SectionKind::TlsVariables) + } + } + } + + pub(crate) fn elf_subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + let mut name = section.to_vec(); + name.push(b'.'); + name.extend(value); + name + } + + fn elf_has_relocation_addend(&self) -> Result<bool, String> { + Ok(match self.architecture { + Architecture::Arm(_) => false, + Architecture::Aarch64(_) => false, + Architecture::I386 => false, + Architecture::X86_64 => true, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }) + } + + pub(crate) fn elf_fixup_relocation( + &mut self, + mut relocation: &mut Relocation, + ) -> Result<i64, String> { + // Return true if we should use a section symbol to avoid preemption. + fn want_section_symbol(relocation: &Relocation, symbol: &Symbol) -> bool { + if symbol.scope != SymbolScope::Dynamic { + // Only dynamic symbols can be preemptible. + return false; + } + match symbol.kind { + SymbolKind::Text | SymbolKind::Data => {} + _ => return false, + } + match relocation.kind { + // Anything using GOT or PLT is preemptible. + // We also require that `Other` relocations must already be correct. + RelocationKind::Got + | RelocationKind::GotRelative + | RelocationKind::GotBaseRelative + | RelocationKind::PltRelative + | RelocationKind::Elf(_) => return false, + // Absolute relocations are preemptible for non-local data. + // TODO: not sure if this rule is exactly correct + // This rule was added to handle global data references in debuginfo. + // Maybe this should be a new relocation kind so that the caller can decide. + RelocationKind::Absolute => { + if symbol.kind == SymbolKind::Data { + return false; + } + } + _ => {} + } + true + } + + // Use section symbols for relocations where required to avoid preemption. + // Otherwise, the linker will fail with: + // relocation R_X86_64_PC32 against symbol `SomeSymbolName' can not be used when + // making a shared object; recompile with -fPIC + let symbol = &self.symbols[relocation.symbol.0]; + if want_section_symbol(relocation, symbol) { + if let Some(section) = symbol.section { + relocation.addend += symbol.value as i64; + relocation.symbol = self.section_symbol(section); + } + } + + // Determine whether the addend is stored in the relocation or the data. + if self.elf_has_relocation_addend()? { + Ok(0) + } else { + let constant = relocation.addend; + relocation.addend = 0; + Ok(constant) + } + } + + pub(crate) fn elf_write(&self) -> Result<Vec<u8>, String> { + let (container, pointer_align) = match self.architecture.pointer_width().unwrap() { + PointerWidth::U16 | PointerWidth::U32 => (goblin::container::Container::Little, 4), + PointerWidth::U64 => (goblin::container::Container::Big, 8), + }; + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => goblin::container::Endian::Little, + Endianness::Big => goblin::container::Endian::Big, + }; + let ctx = goblin::container::Ctx::new(container, endian); + let is_rela = self.elf_has_relocation_addend()?; + let reloc_ctx = (is_rela, ctx); + + // Calculate offsets of everything. + let mut offset = 0; + + // ELF header. + let e_ehsize = elf::Header::size_with(&ctx); + offset += e_ehsize; + + // Create reloc section header names. + let reloc_names: Vec<_> = self + .sections + .iter() + .map(|section| { + let mut reloc_name = Vec::new(); + if !section.relocations.is_empty() { + reloc_name.extend_from_slice(if is_rela { + &b".rela"[..] + } else { + &b".rel"[..] + }); + reloc_name.extend_from_slice(§ion.name); + } + reloc_name + }) + .collect(); + + // Calculate size of section data. + let mut shstrtab = StringTable::default(); + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + // Null section. + let mut e_shnum = 1; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].str_id = Some(shstrtab.add(§ion.name)); + section_offsets[index].index = e_shnum; + e_shnum += 1; + + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + + if !section.relocations.is_empty() { + section_offsets[index].reloc_str_id = Some(shstrtab.add(&reloc_names[index])); + section_offsets[index].reloc_index = e_shnum; + e_shnum += 1; + } + } + + // Calculate index of symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + // Null symbol. + let mut symtab_count = 1; + // Local symbols must come before global. + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + } + } + let symtab_count_local = symtab_count; + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + symbol_offsets[index].index = symtab_count; + symtab_count += 1; + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.kind != SymbolKind::Section { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + let symtab_str_id = shstrtab.add(&b".symtab"[..]); + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = symtab_count * elf::Sym::size_with(&ctx); + offset += symtab_len; + let symtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of symtab_shndx. + let mut need_symtab_shndx = false; + for symbol in &self.symbols { + let index = symbol + .section + .map(|s| section_offsets[s.0].index) + .unwrap_or(0); + if index >= elf::SHN_LORESERVE as usize { + need_symtab_shndx = true; + break; + } + } + let symtab_shndx_offset = offset; + let mut symtab_shndx_str_id = None; + let mut symtab_shndx_len = 0; + if need_symtab_shndx { + symtab_shndx_str_id = Some(shstrtab.add(&b".symtab_shndx"[..])); + symtab_shndx_len = symtab_count * 4; + offset += symtab_shndx_len; + e_shnum += 1; + } + + // Calculate size of strtab. + let strtab_str_id = shstrtab.add(&b".strtab"[..]); + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // Null name. + strtab_data.push(0); + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + let strtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, pointer_align); + section_offsets[index].reloc_offset = offset; + let len = count * elf::Reloc::size_with(&reloc_ctx); + section_offsets[index].reloc_len = len; + offset += len; + } + } + + // Calculate size of shstrtab. + let shstrtab_str_id = shstrtab.add(&b".shstrtab"[..]); + let shstrtab_offset = offset; + let mut shstrtab_data = Vec::new(); + // Null section name. + shstrtab_data.push(0); + shstrtab.write(1, &mut shstrtab_data); + offset += shstrtab_data.len(); + let shstrtab_index = e_shnum; + e_shnum += 1; + + // Calculate size of section headers. + offset = align(offset, pointer_align); + let e_shoff = offset; + let e_shentsize = elf::SectionHeader::size_with(&ctx); + offset += e_shnum * e_shentsize; + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let e_machine = match self.architecture { + Architecture::Arm(_) => elf::EM_ARM, + Architecture::Aarch64(_) => elf::EM_AARCH64, + Architecture::I386 => elf::EM_386, + Architecture::X86_64 => elf::EM_X86_64, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let mut header = elf::Header { + e_ident: [0; 16], + e_type: elf::ET_REL, + e_machine, + e_version: elf::EV_CURRENT.into(), + e_entry: 0, + e_phoff: 0, + e_shoff: e_shoff as u64, + e_flags: 0, + e_ehsize: e_ehsize as u16, + e_phentsize: 0, + e_phnum: 0, + e_shentsize: e_shentsize as u16, + e_shnum: if e_shnum >= elf::SHN_LORESERVE as usize { + 0 + } else { + e_shnum as u16 + }, + e_shstrndx: if shstrtab_index >= elf::SHN_LORESERVE as usize { + elf::SHN_XINDEX as u16 + } else { + shstrtab_index as u16 + }, + }; + header.e_ident[0..4].copy_from_slice(elf::ELFMAG); + header.e_ident[elf::EI_CLASS] = if container.is_big() { + elf::ELFCLASS64 + } else { + elf::ELFCLASS32 + }; + header.e_ident[elf::EI_DATA] = if endian.is_little() { + elf::ELFDATA2LSB + } else { + elf::ELFDATA2MSB + }; + header.e_ident[elf::EI_VERSION] = elf::EV_CURRENT; + header.e_ident[elf::EI_OSABI] = elf::ELFOSABI_NONE; + header.e_ident[elf::EI_ABIVERSION] = 0; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write section data. + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + } + + // Write symbols. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + buffer + .iowrite_with( + elf::Sym { + st_name: 0, + st_info: 0, + st_other: 0, + st_shndx: 0, + st_value: 0, + st_size: 0, + }, + ctx, + ) + .unwrap(); + let mut symtab_shndx = Vec::new(); + if need_symtab_shndx { + symtab_shndx.iowrite_with(0, ctx.le).unwrap(); + } + let mut write_symbol = |index: usize, symbol: &Symbol| { + let st_type = match symbol.kind { + SymbolKind::Unknown | SymbolKind::Null => elf::STT_NOTYPE, + SymbolKind::Text => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_FUNC + } + } + SymbolKind::Data => { + if symbol.is_undefined() { + elf::STT_NOTYPE + } else { + elf::STT_OBJECT + } + } + SymbolKind::Section => elf::STT_SECTION, + SymbolKind::File => elf::STT_FILE, + SymbolKind::Common => elf::STT_COMMON, + SymbolKind::Tls => elf::STT_TLS, + SymbolKind::Label => elf::STT_NOTYPE, + }; + let st_bind = if symbol.is_undefined() { + elf::STB_GLOBAL + } else if symbol.is_local() { + elf::STB_LOCAL + } else if symbol.weak { + elf::STB_WEAK + } else { + elf::STB_GLOBAL + }; + let st_other = if symbol.scope == SymbolScope::Linkage { + elf::STV_HIDDEN + } else { + elf::STV_DEFAULT + }; + let st_shndx = match symbol.kind { + SymbolKind::File => { + if need_symtab_shndx { + symtab_shndx.iowrite_with(0, ctx.le).unwrap(); + } + elf::SHN_ABS as usize + } + _ => { + let index = symbol + .section + .map(|s| section_offsets[s.0].index) + .unwrap_or(elf::SHN_UNDEF as usize); + if need_symtab_shndx { + symtab_shndx.iowrite_with(index as u32, ctx.le).unwrap(); + } + if index >= elf::SHN_LORESERVE as usize { + elf::SHN_XINDEX as usize + } else { + index + } + } + }; + let st_name = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::Sym { + st_name, + st_info: (st_bind << 4) + st_type, + st_other, + st_shndx, + st_value: symbol.value, + st_size: symbol.size, + }, + ctx, + ) + .unwrap(); + }; + for (index, symbol) in self.symbols.iter().enumerate() { + if symbol.is_local() { + write_symbol(index, symbol); + } + } + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_local() { + write_symbol(index, symbol); + } + } + if need_symtab_shndx { + debug_assert_eq!(symtab_shndx_offset, buffer.len()); + debug_assert_eq!(symtab_shndx_len, symtab_shndx.len()); + buffer.extend(&symtab_shndx); + } + + // Write strtab section. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.extend(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(&mut buffer, pointer_align); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_type = match self.architecture { + Architecture::I386 => match (reloc.kind, reloc.size) { + (RelocationKind::Absolute, 32) => elf::R_386_32, + (RelocationKind::Relative, 32) => elf::R_386_PC32, + (RelocationKind::Got, 32) => elf::R_386_GOT32, + (RelocationKind::PltRelative, 32) => elf::R_386_PLT32, + (RelocationKind::GotBaseOffset, 32) => elf::R_386_GOTOFF, + (RelocationKind::GotBaseRelative, 32) => elf::R_386_GOTPC, + (RelocationKind::Absolute, 16) => elf::R_386_16, + (RelocationKind::Relative, 16) => elf::R_386_PC16, + (RelocationKind::Absolute, 8) => elf::R_386_8, + (RelocationKind::Relative, 8) => elf::R_386_PC8, + (RelocationKind::Elf(x), _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.size) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 64) => { + elf::R_X86_64_64 + } + (RelocationKind::Relative, _, 32) => elf::R_X86_64_PC32, + (RelocationKind::Got, _, 32) => elf::R_X86_64_GOT32, + (RelocationKind::PltRelative, _, 32) => elf::R_X86_64_PLT32, + (RelocationKind::GotRelative, _, 32) => elf::R_X86_64_GOTPCREL, + (RelocationKind::Absolute, RelocationEncoding::Generic, 32) => { + elf::R_X86_64_32 + } + (RelocationKind::Absolute, RelocationEncoding::X86Signed, 32) => { + elf::R_X86_64_32S + } + (RelocationKind::Absolute, _, 16) => elf::R_X86_64_16, + (RelocationKind::Relative, _, 16) => elf::R_X86_64_PC16, + (RelocationKind::Absolute, _, 8) => elf::R_X86_64_8, + (RelocationKind::Relative, _, 8) => elf::R_X86_64_PC8, + (RelocationKind::Elf(x), _, _) => x, + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let r_sym = symbol_offsets[reloc.symbol.0].index; + buffer + .iowrite_with( + elf::Reloc { + r_offset: reloc.offset, + r_addend: Some(reloc.addend), + r_sym, + r_type, + }, + reloc_ctx, + ) + .unwrap(); + } + } + } + + // Write shstrtab section. + debug_assert_eq!(shstrtab_offset, buffer.len()); + buffer.extend(&shstrtab_data); + + // Write section headers. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(e_shoff, buffer.len()); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: 0, + sh_type: 0, + sh_flags: 0, + sh_addr: 0, + sh_offset: 0, + sh_size: if e_shnum >= elf::SHN_LORESERVE as usize { + e_shnum as u64 + } else { + 0 + }, + sh_link: if shstrtab_index >= elf::SHN_LORESERVE as usize { + shstrtab_index as u32 + } else { + 0 + }, + // TODO: e_phnum overflow + sh_info: 0, + sh_addralign: 0, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + for (index, section) in self.sections.iter().enumerate() { + let sh_type = match section.kind { + SectionKind::UninitializedData | SectionKind::UninitializedTls => elf::SHT_NOBITS, + _ => elf::SHT_PROGBITS, + }; + let sh_flags = match section.kind { + SectionKind::Text => elf::SHF_ALLOC | elf::SHF_EXECINSTR, + SectionKind::Data => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::Tls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::UninitializedData => elf::SHF_ALLOC | elf::SHF_WRITE, + SectionKind::UninitializedTls => elf::SHF_ALLOC | elf::SHF_WRITE | elf::SHF_TLS, + SectionKind::ReadOnlyData => elf::SHF_ALLOC, + SectionKind::ReadOnlyString => elf::SHF_ALLOC | elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::OtherString => elf::SHF_STRINGS | elf::SHF_MERGE, + SectionKind::Other + | SectionKind::Debug + | SectionKind::Unknown + | SectionKind::Metadata + | SectionKind::Linker => 0, + SectionKind::TlsVariables => { + return Err(format!("unimplemented section {:?}", section.kind)) + } + }; + // TODO: not sure if this is correct, maybe user should determine this + let sh_entsize = match section.kind { + SectionKind::ReadOnlyString | SectionKind::OtherString => 1, + _ => 0, + }; + let sh_name = section_offsets[index] + .str_id + .map(|id| shstrtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name, + sh_type, + sh_flags: sh_flags.into(), + sh_addr: 0, + sh_offset: section_offsets[index].offset as u64, + sh_size: section.size, + sh_link: 0, + sh_info: 0, + sh_addralign: section.align, + sh_entsize, + }, + ctx, + ) + .unwrap(); + + if !section.relocations.is_empty() { + let sh_name = section_offsets[index] + .reloc_str_id + .map(|id| shstrtab.get_offset(id)) + .unwrap_or(0); + buffer + .iowrite_with( + elf::SectionHeader { + sh_name, + sh_type: if is_rela { elf::SHT_RELA } else { elf::SHT_REL }, + sh_flags: elf::SHF_INFO_LINK.into(), + sh_addr: 0, + sh_offset: section_offsets[index].reloc_offset as u64, + sh_size: section_offsets[index].reloc_len as u64, + sh_link: symtab_index as u32, + sh_info: section_offsets[index].index as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Reloc::size_with(&reloc_ctx) as u64, + }, + ctx, + ) + .unwrap(); + } + } + + // Write symtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(symtab_str_id), + sh_type: elf::SHT_SYMTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: symtab_offset as u64, + sh_size: symtab_len as u64, + sh_link: strtab_index as u32, + sh_info: symtab_count_local as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Sym::size_with(&ctx) as u64, + }, + ctx, + ) + .unwrap(); + + // Write symtab_shndx section header. + if need_symtab_shndx { + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(symtab_shndx_str_id.unwrap()), + sh_type: elf::SHT_SYMTAB_SHNDX, + sh_flags: 0, + sh_addr: 0, + sh_offset: symtab_shndx_offset as u64, + sh_size: symtab_shndx_len as u64, + sh_link: strtab_index as u32, + sh_info: symtab_count_local as u32, + sh_addralign: pointer_align as u64, + sh_entsize: elf::Sym::size_with(&ctx) as u64, + }, + ctx, + ) + .unwrap(); + } + + // Write strtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(strtab_str_id), + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: strtab_offset as u64, + sh_size: strtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + + // Write shstrtab section header. + buffer + .iowrite_with( + elf::SectionHeader { + sh_name: shstrtab.get_offset(shstrtab_str_id), + sh_type: elf::SHT_STRTAB, + sh_flags: 0, + sh_addr: 0, + sh_offset: shstrtab_offset as u64, + sh_size: shstrtab_data.len() as u64, + sh_link: 0, + sh_info: 0, + sh_addralign: 1, + sh_entsize: 0, + }, + ctx, + ) + .unwrap(); + + Ok(buffer) + } +} diff --git a/third_party/rust/object/src/write/macho.rs b/third_party/rust/object/src/write/macho.rs new file mode 100644 index 0000000000..356a44b34d --- /dev/null +++ b/third_party/rust/object/src/write/macho.rs @@ -0,0 +1,558 @@ +use scroll::ctx::SizeWith; +use scroll::{IOwrite, Pwrite}; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::write::string::*; +use crate::write::util::*; +use crate::write::*; + +mod mach { + pub use goblin::mach::constants::cputype::*; + pub use goblin::mach::constants::*; + pub use goblin::mach::header::*; + pub use goblin::mach::load_command::*; + pub use goblin::mach::relocation::*; + pub use goblin::mach::segment::*; + pub use goblin::mach::symbols::*; +} + +#[derive(Default, Clone, Copy)] +struct SectionOffsets { + index: usize, + offset: usize, + address: u64, + reloc_offset: usize, +} + +#[derive(Default, Clone, Copy)] +struct SymbolOffsets { + index: usize, + str_id: Option<StringId>, +} + +impl Object { + pub(crate) fn macho_segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match segment { + StandardSegment::Text => &b"__TEXT"[..], + StandardSegment::Data => &b"__DATA"[..], + StandardSegment::Debug => &b"__DWARF"[..], + } + } + + pub(crate) fn macho_section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match section { + StandardSection::Text => (&b"__TEXT"[..], &b"__text"[..], SectionKind::Text), + StandardSection::Data => (&b"__DATA"[..], &b"__data"[..], SectionKind::Data), + StandardSection::ReadOnlyData => { + (&b"__TEXT"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyDataWithRel => { + (&b"__DATA"[..], &b"__const"[..], SectionKind::ReadOnlyData) + } + StandardSection::ReadOnlyString => ( + &b"__TEXT"[..], + &b"__cstring"[..], + SectionKind::ReadOnlyString, + ), + StandardSection::UninitializedData => ( + &b"__DATA"[..], + &b"__bss"[..], + SectionKind::UninitializedData, + ), + StandardSection::Tls => (&b"__DATA"[..], &b"__thread_data"[..], SectionKind::Tls), + StandardSection::UninitializedTls => ( + &b"__DATA"[..], + &b"__thread_bss"[..], + SectionKind::UninitializedTls, + ), + StandardSection::TlsVariables => ( + &b"__DATA"[..], + &b"__thread_vars"[..], + SectionKind::TlsVariables, + ), + } + } + + fn macho_tlv_bootstrap(&mut self) -> SymbolId { + match self.tlv_bootstrap { + Some(id) => id, + None => { + let id = self.add_symbol(Symbol { + name: b"_tlv_bootstrap".to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Dynamic, + weak: false, + section: None, + }); + self.tlv_bootstrap = Some(id); + id + } + } + } + + /// Create the `__thread_vars` entry for a TLS variable. + /// + /// The symbol given by `symbol_id` will be updated to point to this entry. + /// + /// A new `SymbolId` will be returned. The caller must update this symbol + /// to point to the initializer. + /// + /// If `symbol_id` is not for a TLS variable, then it is returned unchanged. + pub(crate) fn macho_add_thread_var(&mut self, symbol_id: SymbolId) -> SymbolId { + let symbol = self.symbol_mut(symbol_id); + if symbol.kind != SymbolKind::Tls { + return symbol_id; + } + + // Create the initializer symbol. + let mut name = symbol.name.clone(); + name.extend(b"$tlv$init"); + let init_symbol_id = self.add_raw_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Tls, + scope: SymbolScope::Compilation, + weak: false, + section: None, + }); + + // Add the tlv entry. + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer - used when mapped by the runtime + // - pointer to symbol initializer + let section = self.section_id(StandardSection::TlsVariables); + let pointer_width = self.architecture.pointer_width().unwrap().bytes(); + let size = u64::from(pointer_width) * 3; + let data = vec![0; size as usize]; + let offset = self.append_section_data(section, &data, u64::from(pointer_width)); + + let tlv_bootstrap = self.macho_tlv_bootstrap(); + self.add_relocation( + section, + Relocation { + offset: offset, + size: pointer_width * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: tlv_bootstrap, + addend: 0, + }, + ) + .unwrap(); + self.add_relocation( + section, + Relocation { + offset: offset + u64::from(pointer_width) * 2, + size: pointer_width * 8, + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + symbol: init_symbol_id, + addend: 0, + }, + ) + .unwrap(); + + // Update the symbol to point to the tlv. + let symbol = self.symbol_mut(symbol_id); + symbol.value = offset; + symbol.size = size; + symbol.section = Some(section); + + init_symbol_id + } + + pub(crate) fn macho_fixup_relocation(&mut self, mut relocation: &mut Relocation) -> i64 { + let constant = match relocation.kind { + RelocationKind::Relative + | RelocationKind::GotRelative + | RelocationKind::PltRelative => relocation.addend + 4, + _ => relocation.addend, + }; + relocation.addend -= constant; + constant + } + + pub(crate) fn macho_write(&self) -> Result<Vec<u8>, String> { + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => goblin::container::Endian::Little, + Endianness::Big => goblin::container::Endian::Big, + }; + let (container, pointer_align) = match self.architecture.pointer_width().unwrap() { + PointerWidth::U16 | PointerWidth::U32 => (goblin::container::Container::Little, 4), + PointerWidth::U64 => (goblin::container::Container::Big, 8), + }; + let ctx = goblin::container::Ctx::new(container, endian); + + // Calculate offsets of everything, and build strtab. + let mut offset = 0; + + // Calculate size of Mach-O header. + offset += mach::Header::size_with(&ctx); + + // Calculate size of commands. + let mut ncmds = 0; + let command_offset = offset; + + // Calculate size of segment command and section headers. + let segment_command_offset = offset; + let segment_command_len = + mach::Segment::size_with(&ctx) + self.sections.len() * mach::Section::size_with(&ctx); + offset += segment_command_len; + ncmds += 1; + + // Calculate size of symtab command. + let symtab_command_offset = offset; + let symtab_command_len = mach::SymtabCommand::size_with(&ctx.le); + offset += symtab_command_len; + ncmds += 1; + + let sizeofcmds = offset - command_offset; + + // Calculate size of section data. + let segment_data_offset = offset; + let mut section_offsets = vec![SectionOffsets::default(); self.sections.len()]; + let mut address = 0; + for (index, section) in self.sections.iter().enumerate() { + section_offsets[index].index = 1 + index; + let len = section.data.len(); + if len != 0 { + offset = align(offset, section.align as usize); + section_offsets[index].offset = offset; + offset += len; + } else { + section_offsets[index].offset = offset; + } + address = align_u64(address, section.align); + section_offsets[index].address = address; + address += section.size; + } + let segment_data_size = offset - segment_data_offset; + + // Count symbols and add symbol strings to strtab. + let mut strtab = StringTable::default(); + let mut symbol_offsets = vec![SymbolOffsets::default(); self.symbols.len()]; + let mut nsyms = 0; + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_undefined() { + match symbol.kind { + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => {} + SymbolKind::File | SymbolKind::Section => continue, + _ => return Err(format!("unimplemented symbol {:?}", symbol)), + } + } + symbol_offsets[index].index = nsyms; + nsyms += 1; + if !symbol.name.is_empty() { + symbol_offsets[index].str_id = Some(strtab.add(&symbol.name)); + } + } + + // Calculate size of symtab. + offset = align(offset, pointer_align); + let symtab_offset = offset; + let symtab_len = nsyms * mach::Nlist::size_with(&ctx); + offset += symtab_len; + + // Calculate size of strtab. + let strtab_offset = offset; + let mut strtab_data = Vec::new(); + // Null name. + strtab_data.push(0); + strtab.write(1, &mut strtab_data); + offset += strtab_data.len(); + + // Calculate size of relocations. + for (index, section) in self.sections.iter().enumerate() { + let count = section.relocations.len(); + if count != 0 { + offset = align(offset, 4); + section_offsets[index].reloc_offset = offset; + let len = count * mach::RelocationInfo::size_with(&ctx.le); + offset += len; + } + } + + // Start writing. + let mut buffer = Vec::with_capacity(offset); + + // Write file header. + let (cputype, cpusubtype) = match self.architecture { + Architecture::Arm(_) => (mach::CPU_TYPE_ARM, mach::CPU_SUBTYPE_ARM_ALL), + Architecture::Aarch64(_) => (mach::CPU_TYPE_ARM64, mach::CPU_SUBTYPE_ARM64_ALL), + Architecture::I386 => (mach::CPU_TYPE_I386, mach::CPU_SUBTYPE_I386_ALL), + Architecture::X86_64 => (mach::CPU_TYPE_X86_64, mach::CPU_SUBTYPE_X86_64_ALL), + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + + let header = mach::Header { + magic: if ctx.is_big() { + mach::MH_MAGIC_64 + } else { + mach::MH_MAGIC + }, + cputype, + cpusubtype, + filetype: mach::MH_OBJECT, + ncmds, + sizeofcmds: sizeofcmds as u32, + flags: if self.subsection_via_symbols { + mach::MH_SUBSECTIONS_VIA_SYMBOLS + } else { + 0 + }, + reserved: 0, + }; + buffer.iowrite_with(header, ctx).unwrap(); + + // Write segment command. + debug_assert_eq!(segment_command_offset, buffer.len()); + let mut segment_command = mach::Segment::new(ctx, &[]); + segment_command.cmd = if ctx.is_big() { + mach::LC_SEGMENT_64 + } else { + mach::LC_SEGMENT + }; + segment_command.cmdsize = segment_command_len as u32; + segment_command.segname = [0; 16]; + segment_command.vmaddr = 0; + segment_command.vmsize = address; + segment_command.fileoff = segment_data_offset as u64; + segment_command.filesize = segment_data_size as u64; + segment_command.maxprot = mach::VM_PROT_READ | mach::VM_PROT_WRITE | mach::VM_PROT_EXECUTE; + segment_command.initprot = mach::VM_PROT_READ | mach::VM_PROT_WRITE | mach::VM_PROT_EXECUTE; + segment_command.nsects = self.sections.len() as u32; + segment_command.flags = 0; + buffer.iowrite_with(segment_command, ctx).unwrap(); + + // Write section headers. + for (index, section) in self.sections.iter().enumerate() { + let mut sectname = [0; 16]; + sectname.pwrite(&*section.name, 0).unwrap(); + let mut segname = [0; 16]; + segname.pwrite(&*section.segment, 0).unwrap(); + let flags = match section.kind { + SectionKind::Text => { + mach::S_ATTR_PURE_INSTRUCTIONS | mach::S_ATTR_SOME_INSTRUCTIONS + } + SectionKind::Data => 0, + SectionKind::ReadOnlyData => 0, + SectionKind::ReadOnlyString => mach::S_CSTRING_LITERALS, + SectionKind::UninitializedData => mach::S_ZEROFILL, + SectionKind::Tls => mach::S_THREAD_LOCAL_REGULAR, + SectionKind::UninitializedTls => mach::S_THREAD_LOCAL_ZEROFILL, + SectionKind::TlsVariables => mach::S_THREAD_LOCAL_VARIABLES, + SectionKind::Debug => mach::S_ATTR_DEBUG, + SectionKind::OtherString => mach::S_CSTRING_LITERALS, + SectionKind::Other + | SectionKind::Unknown + | SectionKind::Linker + | SectionKind::Metadata => 0, + }; + buffer + .iowrite_with( + mach::Section { + sectname, + segname, + addr: section_offsets[index].address, + size: section.size, + offset: section_offsets[index].offset as u32, + align: section.align.trailing_zeros(), + reloff: section_offsets[index].reloc_offset as u32, + nreloc: section.relocations.len() as u32, + flags, + }, + ctx, + ) + .unwrap(); + } + + // Write symtab command. + debug_assert_eq!(symtab_command_offset, buffer.len()); + buffer + .iowrite_with( + mach::SymtabCommand { + cmd: mach::LC_SYMTAB, + cmdsize: symtab_command_len as u32, + symoff: symtab_offset as u32, + nsyms: nsyms as u32, + stroff: strtab_offset as u32, + strsize: strtab_data.len() as u32, + }, + ctx.le, + ) + .unwrap(); + + // Write section data. + debug_assert_eq!(segment_data_offset, buffer.len()); + for (index, section) in self.sections.iter().enumerate() { + let len = section.data.len(); + if len != 0 { + write_align(&mut buffer, section.align as usize); + debug_assert_eq!(section_offsets[index].offset, buffer.len()); + buffer.extend(§ion.data); + } + } + + // Write symtab. + write_align(&mut buffer, pointer_align); + debug_assert_eq!(symtab_offset, buffer.len()); + for (index, symbol) in self.symbols.iter().enumerate() { + if !symbol.is_undefined() { + match symbol.kind { + SymbolKind::Text | SymbolKind::Data | SymbolKind::Tls => {} + SymbolKind::File | SymbolKind::Section => continue, + _ => return Err(format!("unimplemented symbol {:?}", symbol)), + } + } + // TODO: N_STAB + // TODO: N_ABS + let mut n_type = if symbol.is_undefined() { + mach::N_UNDF | mach::N_EXT + } else { + mach::N_SECT + }; + match symbol.scope { + SymbolScope::Unknown | SymbolScope::Compilation => {} + SymbolScope::Linkage => { + n_type |= mach::N_EXT | mach::N_PEXT; + } + SymbolScope::Dynamic => { + n_type |= mach::N_EXT; + } + } + + let mut n_desc = 0; + if symbol.weak { + if symbol.is_undefined() { + n_desc |= mach::N_WEAK_REF; + } else { + n_desc |= mach::N_WEAK_DEF; + } + } + + let n_value = match symbol.section { + Some(section) => section_offsets[section.0].address + symbol.value, + None => symbol.value, + }; + + let n_strx = symbol_offsets[index] + .str_id + .map(|id| strtab.get_offset(id)) + .unwrap_or(0); + + buffer + .iowrite_with( + mach::Nlist { + n_strx, + n_type, + n_sect: symbol.section.map(|x| x.0 + 1).unwrap_or(0), + n_desc, + n_value, + }, + ctx, + ) + .unwrap(); + } + + // Write strtab. + debug_assert_eq!(strtab_offset, buffer.len()); + buffer.extend(&strtab_data); + + // Write relocations. + for (index, section) in self.sections.iter().enumerate() { + if !section.relocations.is_empty() { + write_align(&mut buffer, 4); + debug_assert_eq!(section_offsets[index].reloc_offset, buffer.len()); + for reloc in §ion.relocations { + let r_extern; + let r_symbolnum; + let symbol = &self.symbols[reloc.symbol.0]; + if symbol.kind == SymbolKind::Section { + r_symbolnum = section_offsets[symbol.section.unwrap().0].index as u32; + r_extern = 0; + } else { + r_symbolnum = symbol_offsets[reloc.symbol.0].index as u32; + r_extern = 1; + } + let r_length = match reloc.size { + 8 => 0, + 16 => 1, + 32 => 2, + 64 => 3, + _ => return Err(format!("unimplemented reloc size {:?}", reloc)), + }; + let (r_pcrel, r_type) = match self.architecture { + Architecture::I386 => match reloc.kind { + RelocationKind::Absolute => (0, mach::GENERIC_RELOC_VANILLA), + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + Architecture::X86_64 => match (reloc.kind, reloc.encoding, reloc.addend) { + (RelocationKind::Absolute, RelocationEncoding::Generic, 0) => { + (0, mach::X86_64_RELOC_UNSIGNED) + } + (RelocationKind::Relative, RelocationEncoding::Generic, -4) => { + (1, mach::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86RipRelative, -4) => { + (1, mach::X86_64_RELOC_SIGNED) + } + (RelocationKind::Relative, RelocationEncoding::X86Branch, -4) => { + (1, mach::X86_64_RELOC_BRANCH) + } + (RelocationKind::PltRelative, RelocationEncoding::X86Branch, -4) => { + (1, mach::X86_64_RELOC_BRANCH) + } + (RelocationKind::GotRelative, RelocationEncoding::Generic, -4) => { + (1, mach::X86_64_RELOC_GOT) + } + ( + RelocationKind::GotRelative, + RelocationEncoding::X86RipRelativeMovq, + -4, + ) => (1, mach::X86_64_RELOC_GOT_LOAD), + (RelocationKind::MachO { value, relative }, _, _) => { + (u32::from(relative), value) + } + _ => return Err(format!("unimplemented relocation {:?}", reloc)), + }, + _ => { + return Err(format!( + "unimplemented architecture {:?}", + self.architecture + )) + } + }; + let r_info = r_symbolnum + | r_pcrel << 24 + | r_length << 25 + | r_extern << 27 + | u32::from(r_type) << 28; + buffer + .iowrite_with( + mach::RelocationInfo { + r_address: reloc.offset as i32, + r_info, + }, + ctx.le, + ) + .unwrap(); + } + } + } + + Ok(buffer) + } +} diff --git a/third_party/rust/object/src/write/mod.rs b/third_party/rust/object/src/write/mod.rs new file mode 100644 index 0000000000..79ffc16599 --- /dev/null +++ b/third_party/rust/object/src/write/mod.rs @@ -0,0 +1,674 @@ +//! Interface for writing object files. + +#![allow(clippy::collapsible_if)] +#![allow(clippy::cognitive_complexity)] +#![allow(clippy::module_inception)] + +use scroll::Pwrite; +use std::collections::HashMap; +use std::string::String; + +use crate::alloc::vec::Vec; +use crate::target_lexicon::{Architecture, BinaryFormat, Endianness, PointerWidth}; +use crate::{RelocationEncoding, RelocationKind, SectionKind, SymbolKind, SymbolScope}; + +mod coff; +mod elf; +mod macho; +mod string; +mod util; + +/// A writable object file. +#[derive(Debug)] +pub struct Object { + format: BinaryFormat, + architecture: Architecture, + sections: Vec<Section>, + standard_sections: HashMap<StandardSection, SectionId>, + symbols: Vec<Symbol>, + symbol_map: HashMap<Vec<u8>, SymbolId>, + stub_symbols: HashMap<SymbolId, SymbolId>, + subsection_via_symbols: bool, + /// The symbol name mangling scheme. + pub mangling: Mangling, + /// Mach-O "_tlv_bootstrap" symbol. + tlv_bootstrap: Option<SymbolId>, +} + +impl Object { + /// Create an empty object file. + pub fn new(format: BinaryFormat, architecture: Architecture) -> Object { + Object { + format, + architecture, + sections: Vec::new(), + standard_sections: HashMap::new(), + symbols: Vec::new(), + symbol_map: HashMap::new(), + stub_symbols: HashMap::new(), + subsection_via_symbols: false, + mangling: Mangling::default(format, architecture), + tlv_bootstrap: None, + } + } + + /// Return the file format. + #[inline] + pub fn format(&self) -> BinaryFormat { + self.format + } + + /// Return the architecture. + #[inline] + pub fn architecture(&self) -> Architecture { + self.architecture + } + + /// Return the current mangling setting. + #[inline] + pub fn mangling(&self) -> Mangling { + self.mangling + } + + /// Return the current mangling setting. + #[inline] + pub fn set_mangling(&mut self, mangling: Mangling) { + self.mangling = mangling; + } + + /// Return the name for a standard segment. + /// + /// This will vary based on the file format. + pub fn segment_name(&self, segment: StandardSegment) -> &'static [u8] { + match self.format { + BinaryFormat::Elf | BinaryFormat::Coff => &[], + BinaryFormat::Macho => self.macho_segment_name(segment), + _ => unimplemented!(), + } + } + + /// Get the section with the given `SectionId`. + #[inline] + pub fn section(&self, section: SectionId) -> &Section { + &self.sections[section.0] + } + + /// Mutably get the section with the given `SectionId`. + #[inline] + pub fn section_mut(&mut self, section: SectionId) -> &mut Section { + &mut self.sections[section.0] + } + + /// Append data to an existing section. Returns the section offset of the data. + pub fn append_section_data(&mut self, section: SectionId, data: &[u8], align: u64) -> u64 { + self.sections[section.0].append_data(data, align) + } + + /// Append zero-initialized data to an existing section. Returns the section offset of the data. + pub fn append_section_bss(&mut self, section: SectionId, size: u64, align: u64) -> u64 { + self.sections[section.0].append_bss(size, align) + } + + /// Return the `SectionId` of a standard section. + /// + /// If the section doesn't already exist then it is created. + pub fn section_id(&mut self, section: StandardSection) -> SectionId { + self.standard_sections + .get(§ion) + .cloned() + .unwrap_or_else(|| { + let (segment, name, kind) = self.section_info(section); + self.add_section(segment.to_vec(), name.to_vec(), kind) + }) + } + + /// Add a new section and return its `SectionId`. + /// + /// This also creates a section symbol. + pub fn add_section(&mut self, segment: Vec<u8>, name: Vec<u8>, kind: SectionKind) -> SectionId { + let id = SectionId(self.sections.len()); + self.sections.push(Section { + segment, + name, + kind, + size: 0, + align: 1, + data: Vec::new(), + relocations: Vec::new(), + symbol: None, + }); + + // Add to self.standard_sections if required. This may match multiple standard sections. + let section = &self.sections[id.0]; + for standard_section in StandardSection::all() { + if !self.standard_sections.contains_key(standard_section) { + let (segment, name, kind) = self.section_info(*standard_section); + if segment == &*section.segment && name == &*section.name && kind == section.kind { + self.standard_sections.insert(*standard_section, id); + } + } + } + + id + } + + fn section_info( + &self, + section: StandardSection, + ) -> (&'static [u8], &'static [u8], SectionKind) { + match self.format { + BinaryFormat::Elf => self.elf_section_info(section), + BinaryFormat::Coff => self.coff_section_info(section), + BinaryFormat::Macho => self.macho_section_info(section), + _ => unimplemented!(), + } + } + + /// Add a subsection. Returns the `SectionId` and section offset of the data. + pub fn add_subsection( + &mut self, + section: StandardSection, + name: &[u8], + data: &[u8], + align: u64, + ) -> (SectionId, u64) { + let section_id = if self.has_subsection_via_symbols() { + self.subsection_via_symbols = true; + self.section_id(section) + } else { + let (segment, name, kind) = self.subsection_info(section, name); + self.add_section(segment.to_vec(), name, kind) + }; + let offset = self.append_section_data(section_id, data, align); + (section_id, offset) + } + + fn has_subsection_via_symbols(&self) -> bool { + match self.format { + BinaryFormat::Elf | BinaryFormat::Coff => false, + BinaryFormat::Macho => true, + _ => unimplemented!(), + } + } + + fn subsection_info( + &self, + section: StandardSection, + value: &[u8], + ) -> (&'static [u8], Vec<u8>, SectionKind) { + let (segment, section, kind) = self.section_info(section); + let name = self.subsection_name(section, value); + (segment, name, kind) + } + + fn subsection_name(&self, section: &[u8], value: &[u8]) -> Vec<u8> { + debug_assert!(!self.has_subsection_via_symbols()); + match self.format { + BinaryFormat::Elf => self.elf_subsection_name(section, value), + BinaryFormat::Coff => self.coff_subsection_name(section, value), + _ => unimplemented!(), + } + } + + /// Get the `SymbolId` of the symbol with the given name. + pub fn symbol_id(&self, name: &[u8]) -> Option<SymbolId> { + self.symbol_map.get(name).cloned() + } + + /// Get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol(&self, symbol: SymbolId) -> &Symbol { + &self.symbols[symbol.0] + } + + /// Mutably get the symbol with the given `SymbolId`. + #[inline] + pub fn symbol_mut(&mut self, symbol: SymbolId) -> &mut Symbol { + &mut self.symbols[symbol.0] + } + + /// Add a new symbol and return its `SymbolId`. + pub fn add_symbol(&mut self, mut symbol: Symbol) -> SymbolId { + // Defined symbols must have a scope. + debug_assert!(symbol.is_undefined() || symbol.scope != SymbolScope::Unknown); + if symbol.kind == SymbolKind::Section { + return self.section_symbol(symbol.section.unwrap()); + } + if !symbol.name.is_empty() + && (symbol.kind == SymbolKind::Text + || symbol.kind == SymbolKind::Data + || symbol.kind == SymbolKind::Tls) + { + let unmangled_name = symbol.name.clone(); + if let Some(prefix) = self.mangling.global_prefix() { + symbol.name.insert(0, prefix); + } + let symbol_id = self.add_raw_symbol(symbol); + self.symbol_map.insert(unmangled_name, symbol_id); + symbol_id + } else { + self.add_raw_symbol(symbol) + } + } + + fn add_raw_symbol(&mut self, symbol: Symbol) -> SymbolId { + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(symbol); + symbol_id + } + + /// Return true if the file format supports `StandardSection::UninitializedTls`. + pub fn has_uninitialized_tls(&self) -> bool { + self.format != BinaryFormat::Coff + } + + /// Add a new file symbol and return its `SymbolId`. + pub fn add_file_symbol(&mut self, name: Vec<u8>) -> SymbolId { + self.add_raw_symbol(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::File, + scope: SymbolScope::Compilation, + weak: false, + section: None, + }) + } + + /// Get the symbol for a section. + pub fn section_symbol(&mut self, section_id: SectionId) -> SymbolId { + let section = &mut self.sections[section_id.0]; + if let Some(symbol) = section.symbol { + return symbol; + } + let name = if self.format == BinaryFormat::Coff { + section.name.clone() + } else { + Vec::new() + }; + let symbol_id = SymbolId(self.symbols.len()); + self.symbols.push(Symbol { + name, + value: 0, + size: 0, + kind: SymbolKind::Section, + scope: SymbolScope::Compilation, + weak: false, + section: Some(section_id), + }); + section.symbol = Some(symbol_id); + symbol_id + } + + /// Append data to an existing section, and update a symbol to refer to it. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the added data via the `__thread_vars` entry. + /// + /// Returns the section offset of the data. + pub fn add_symbol_data( + &mut self, + symbol_id: SymbolId, + section: SectionId, + data: &[u8], + align: u64, + ) -> u64 { + let offset = self.append_section_data(section, data, align); + self.set_symbol_data(symbol_id, section, offset, data.len() as u64); + offset + } + + /// Append zero-initialized data to an existing section, and update a symbol to refer to it. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the added data via the `__thread_vars` entry. + /// + /// Returns the section offset of the data. + pub fn add_symbol_bss( + &mut self, + symbol_id: SymbolId, + section: SectionId, + size: u64, + align: u64, + ) -> u64 { + let offset = self.append_section_bss(section, size, align); + self.set_symbol_data(symbol_id, section, offset, size); + offset + } + + /// Update a symbol to refer to the given data within a section. + /// + /// For Mach-O, this also creates a `__thread_vars` entry for TLS symbols, and the + /// symbol will indirectly point to the data via the `__thread_vars` entry. + pub fn set_symbol_data( + &mut self, + mut symbol_id: SymbolId, + section: SectionId, + offset: u64, + size: u64, + ) { + // Defined symbols must have a scope. + debug_assert!(self.symbol(symbol_id).scope != SymbolScope::Unknown); + if self.format == BinaryFormat::Macho { + symbol_id = self.macho_add_thread_var(symbol_id); + } + let symbol = self.symbol_mut(symbol_id); + symbol.value = offset; + symbol.size = size; + symbol.section = Some(section); + } + + /// Convert a symbol to a section symbol and offset. + /// + /// Returns an error if the symbol is not defined. + pub fn symbol_section_and_offset( + &mut self, + symbol_id: SymbolId, + ) -> Result<(SymbolId, u64), ()> { + let symbol = self.symbol(symbol_id); + if symbol.kind == SymbolKind::Section { + return Ok((symbol_id, 0)); + } + let symbol_offset = symbol.value; + let section = symbol.section.ok_or(())?; + let section_symbol = self.section_symbol(section); + Ok((section_symbol, symbol_offset)) + } + + /// Add a relocation to a section. + /// + /// Relocations must only be added after the referenced symbols have been added + /// and defined (if applicable). + pub fn add_relocation( + &mut self, + section: SectionId, + mut relocation: Relocation, + ) -> Result<(), String> { + let addend = match self.format { + BinaryFormat::Elf => self.elf_fixup_relocation(&mut relocation)?, + BinaryFormat::Coff => self.coff_fixup_relocation(&mut relocation), + BinaryFormat::Macho => self.macho_fixup_relocation(&mut relocation), + _ => unimplemented!(), + }; + if addend != 0 { + self.write_relocation_addend(section, &relocation, addend)?; + } + self.sections[section.0].relocations.push(relocation); + Ok(()) + } + + fn write_relocation_addend( + &mut self, + section: SectionId, + relocation: &Relocation, + addend: i64, + ) -> Result<(), String> { + let endian = match self.architecture.endianness().unwrap() { + Endianness::Little => scroll::LE, + Endianness::Big => scroll::BE, + }; + + let data = &mut self.sections[section.0].data; + if relocation.offset + (u64::from(relocation.size) + 7) / 8 > data.len() as u64 { + return Err(format!( + "invalid relocation offset {}+{} (max {})", + relocation.offset, + relocation.size, + data.len() + )); + } + match relocation.size { + 32 => { + data.pwrite_with(addend as i32, relocation.offset as usize, endian) + .unwrap(); + } + 64 => { + data.pwrite_with(addend, relocation.offset as usize, endian) + .unwrap(); + } + _ => return Err(format!("unimplemented relocation addend {:?}", relocation)), + } + Ok(()) + } + + /// Write the object to a `Vec`. + pub fn write(&self) -> Result<Vec<u8>, String> { + match self.format { + BinaryFormat::Elf => self.elf_write(), + BinaryFormat::Coff => self.coff_write(), + BinaryFormat::Macho => self.macho_write(), + _ => unimplemented!(), + } + } +} + +/// A standard segment kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StandardSegment { + Text, + Data, + Debug, +} + +/// A standard section kind. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum StandardSection { + Text, + Data, + ReadOnlyData, + ReadOnlyDataWithRel, + ReadOnlyString, + UninitializedData, + Tls, + /// Zero-fill TLS initializers. Unsupported for COFF. + UninitializedTls, + /// TLS variable structures. Only supported for Mach-O. + TlsVariables, +} + +impl StandardSection { + /// Return the section kind of a standard section. + pub fn kind(self) -> SectionKind { + match self { + StandardSection::Text => SectionKind::Text, + StandardSection::Data => SectionKind::Data, + StandardSection::ReadOnlyData | StandardSection::ReadOnlyDataWithRel => { + SectionKind::ReadOnlyData + } + StandardSection::ReadOnlyString => SectionKind::ReadOnlyString, + StandardSection::UninitializedData => SectionKind::UninitializedData, + StandardSection::Tls => SectionKind::Tls, + StandardSection::UninitializedTls => SectionKind::UninitializedTls, + StandardSection::TlsVariables => SectionKind::TlsVariables, + } + } + + fn all() -> &'static [StandardSection] { + &[ + StandardSection::Text, + StandardSection::Data, + StandardSection::ReadOnlyData, + StandardSection::ReadOnlyDataWithRel, + StandardSection::ReadOnlyString, + StandardSection::UninitializedData, + StandardSection::Tls, + StandardSection::UninitializedTls, + StandardSection::TlsVariables, + ] + } +} + +/// An identifier used to reference a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionId(usize); + +/// A section in an object file. +#[derive(Debug)] +pub struct Section { + segment: Vec<u8>, + name: Vec<u8>, + kind: SectionKind, + size: u64, + align: u64, + data: Vec<u8>, + relocations: Vec<Relocation>, + symbol: Option<SymbolId>, +} + +impl Section { + /// Return true if this section contains uninitialized data. + #[inline] + pub fn is_bss(&self) -> bool { + self.kind == SectionKind::UninitializedData || self.kind == SectionKind::UninitializedTls + } + + /// Set the data for a section. + /// + /// Must not be called for sections that already have data, or that contain uninitialized data. + pub fn set_data(&mut self, data: Vec<u8>, align: u64) { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + debug_assert!(self.data.is_empty()); + self.size = data.len() as u64; + self.data = data; + self.align = align; + } + + /// Append data to a section. + /// + /// Must not be called for sections that contain uninitialized data. + pub fn append_data(&mut self, data: &[u8], align: u64) -> u64 { + debug_assert!(!self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let align = align as usize; + let mut offset = self.data.len(); + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.data.resize(offset, 0); + } + self.data.extend(data); + self.size = self.data.len() as u64; + offset as u64 + } + + /// Append unitialized data to a section. + /// + /// Must not be called for sections that contain initialized data. + pub fn append_bss(&mut self, size: u64, align: u64) -> u64 { + debug_assert!(self.is_bss()); + debug_assert_eq!(align & (align - 1), 0); + if self.align < align { + self.align = align; + } + let mut offset = self.size; + if offset & (align - 1) != 0 { + offset += align - (offset & (align - 1)); + self.size = offset; + } + self.size += size; + offset as u64 + } +} + +/// An identifier used to reference a symbol. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SymbolId(usize); + +/// A symbol in an object file. +#[derive(Debug)] +pub struct Symbol { + /// The name of the symbol. + pub name: Vec<u8>, + /// The value of the symbol. + /// + /// If the symbol defined in a section, then this is the section offset of the symbol. + pub value: u64, + /// The size of the symbol. + pub size: u64, + /// The kind of the symbol. + pub kind: SymbolKind, + /// The scope of the symbol. + pub scope: SymbolScope, + /// Whether the symbol has weak binding. + pub weak: bool, + /// The section containing the symbol. + /// + /// Set to `None` for undefined symbols. + pub section: Option<SectionId>, +} + +impl Symbol { + /// Return true if the symbol is undefined. + #[inline] + pub fn is_undefined(&self) -> bool { + self.section.is_none() + } + + /// Return true if the symbol scope is local. + #[inline] + pub fn is_local(&self) -> bool { + self.scope == SymbolScope::Compilation + } +} + +/// A relocation in an object file. +#[derive(Debug)] +pub struct Relocation { + /// The section offset of the place of the relocation. + pub offset: u64, + /// The size in bits of the place of relocation. + pub size: u8, + /// The operation used to calculate the result of the relocation. + pub kind: RelocationKind, + /// Information about how the result of the relocation operation is encoded in the place. + pub encoding: RelocationEncoding, + /// The symbol referred to by the relocation. + /// + /// This may be a section symbol. + pub symbol: SymbolId, + /// The addend to use in the relocation calculation. + /// + /// This may be in addition to an implicit addend stored at the place of the relocation. + pub addend: i64, +} + +/// The symbol name mangling scheme. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Mangling { + /// No symbol mangling. + None, + /// Windows COFF symbol mangling. + Coff, + /// Windows COFF i386 symbol mangling. + CoffI386, + /// ELF symbol mangling. + Elf, + /// Mach-O symbol mangling. + Macho, +} + +impl Mangling { + /// Return the default symboling mangling for the given format and architecture. + pub fn default(format: BinaryFormat, architecture: Architecture) -> Self { + match (format, architecture) { + (BinaryFormat::Coff, Architecture::I386) => Mangling::CoffI386, + (BinaryFormat::Coff, _) => Mangling::Coff, + (BinaryFormat::Elf, _) => Mangling::Elf, + (BinaryFormat::Macho, _) => Mangling::Macho, + _ => Mangling::None, + } + } + + /// Return the prefix to use for global symbols. + pub fn global_prefix(self) -> Option<u8> { + match self { + Mangling::None | Mangling::Elf | Mangling::Coff => None, + Mangling::CoffI386 | Mangling::Macho => Some(b'_'), + } + } +} diff --git a/third_party/rust/object/src/write/string.rs b/third_party/rust/object/src/write/string.rs new file mode 100644 index 0000000000..6a7ac31fe6 --- /dev/null +++ b/third_party/rust/object/src/write/string.rs @@ -0,0 +1,140 @@ +use indexmap::IndexSet; + +use crate::alloc::vec::Vec; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct StringId(usize); + +#[derive(Debug, Default)] +pub(crate) struct StringTable<'a> { + strings: IndexSet<&'a [u8]>, + offsets: Vec<usize>, +} + +impl<'a> StringTable<'a> { + /// Add a string to the string table. + /// + /// Panics if the string table has already been written, or + /// if the string contains a null byte. + pub fn add(&mut self, string: &'a [u8]) -> StringId { + assert!(self.offsets.is_empty()); + assert!(!string.contains(&0)); + let id = self.strings.insert_full(string).0; + StringId(id) + } + + /// Return the offset of the given string. + /// + /// Panics if the string table has not been written, or + /// if the string is not in the string table. + pub fn get_offset(&self, id: StringId) -> usize { + self.offsets[id.0] + } + + /// Append the string table to the given `Vec`, and + /// calculate the list of string offsets. + /// + /// `base` is the initial string table offset. For example, + /// this should be 1 for ELF, to account for the initial + /// null byte (which must have been written by the caller). + pub fn write(&mut self, base: usize, w: &mut Vec<u8>) { + assert!(self.offsets.is_empty()); + + let mut ids: Vec<_> = (0..self.strings.len()).collect(); + sort(&mut ids, 1, &self.strings); + + self.offsets = vec![0; ids.len()]; + let mut offset = base; + let mut previous = &[][..]; + for id in ids { + let string = self.strings.get_index(id).unwrap(); + if previous.ends_with(string) { + self.offsets[id] = offset - string.len() - 1; + } else { + self.offsets[id] = offset; + w.extend_from_slice(string); + w.push(0); + offset += string.len() + 1; + previous = string; + } + } + } +} + +// Multi-key quicksort. +// +// Ordering is such that if a string is a suffix of at least one other string, +// then it is placed immediately after one of those strings. That is: +// - comparison starts at the end of the string +// - shorter strings come later +// +// Based on the implementation in LLVM. +fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) { + loop { + if ids.len() <= 1 { + return; + } + + let pivot = byte(ids[0], pos, strings); + let mut lower = 0; + let mut upper = ids.len(); + let mut i = 1; + while i < upper { + let b = byte(ids[i], pos, strings); + if b > pivot { + ids.swap(lower, i); + lower += 1; + i += 1; + } else if b < pivot { + upper -= 1; + ids.swap(upper, i); + } else { + i += 1; + } + } + + sort(&mut ids[..lower], pos, strings); + sort(&mut ids[upper..], pos, strings); + + if pivot == 0 { + return; + } + ids = &mut ids[lower..upper]; + pos += 1; + } +} + +fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 { + let string = strings.get_index(id).unwrap(); + let len = string.len(); + if len >= pos { + string[len - pos] + } else { + // We know the strings don't contain null bytes. + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn string_table() { + let mut table = StringTable::default(); + let id0 = table.add(b""); + let id1 = table.add(b"foo"); + let id2 = table.add(b"bar"); + let id3 = table.add(b"foobar"); + + let mut data = Vec::new(); + data.push(0); + table.write(1, &mut data); + assert_eq!(data, b"\0foobar\0foo\0"); + + assert_eq!(table.get_offset(id0), 11); + assert_eq!(table.get_offset(id1), 8); + assert_eq!(table.get_offset(id2), 4); + assert_eq!(table.get_offset(id3), 1); + } +} diff --git a/third_party/rust/object/src/write/util.rs b/third_party/rust/object/src/write/util.rs new file mode 100644 index 0000000000..0ac65424af --- /dev/null +++ b/third_party/rust/object/src/write/util.rs @@ -0,0 +1,14 @@ +use crate::alloc::vec::Vec; + +pub(crate) fn align(offset: usize, size: usize) -> usize { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn align_u64(offset: u64, size: u64) -> u64 { + (offset + (size - 1)) & !(size - 1) +} + +pub(crate) fn write_align(buffer: &mut Vec<u8>, size: usize) { + let offset = align(buffer.len(), size); + buffer.resize(offset, 0); +} |