use std::{ borrow::Cow, collections::HashSet, fmt, path::{Path, PathBuf}, }; use gimli::{EndianSlice, Reader}; use object::{write::Object as WritableObject, FileKind, Object, ObjectSection}; use tracing::{debug, trace}; use crate::{ error::Result, ext::EndianityExt, index::Bucketable, package::{dwo_identifier_of_unit, DwarfObject, InProgressDwarfPackage}, relocate::{add_relocations, Relocate, RelocationMap}, }; mod error; mod ext; mod index; mod package; mod relocate; mod strings; pub use crate::error::Error; /// `Session` is expected to be implemented by users of `thorin`, allowing users of `thorin` to /// decide how to manage data, rather than `thorin` having arenas internally. pub trait Session { /// Returns a reference to `data`'s contents with lifetime `'session`. fn alloc_data<'session>(&'session self, data: Vec) -> &'session [u8]; /// Returns a reference to `data`'s contents with lifetime `'input`. /// /// If `Cow` is borrowed, then return the contained reference (`'input`). If `Cow` is owned, /// then calls `alloc_data` to return a reference of lifetime `'session`, which is guaranteed /// to be longer than `'input`, so can be returned. fn alloc_owned_cow<'input, 'session: 'input>( &'session self, data: Cow<'input, [u8]>, ) -> &'input [u8] { match data { Cow::Borrowed(data) => data, Cow::Owned(data) => self.alloc_data(data), } } /// Returns a reference to `relocation` with lifetime `'session`. fn alloc_relocation<'session>(&'session self, data: Relocations) -> &'session Relocations; /// Returns a reference to contents of file at `path` with lifetime `'session`. fn read_input<'session>(&'session self, path: &Path) -> std::io::Result<&'session [u8]>; } /// Should missing DWARF objects referenced by executables be skipped or result in an error? /// /// Referenced objects that are still missing when the DWARF package is finished will result in /// an error. #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)] pub enum MissingReferencedObjectBehaviour { /// Skip missing referenced DWARF objects - useful if this is expected, i.e. the path in the /// executable is wrong, but the referenced object will be found because it is an input. Skip, /// Error when encountering missing referenced DWARF objects. Error, } impl MissingReferencedObjectBehaviour { /// Should missing referenced objects be skipped? pub fn skip_missing(&self) -> bool { match *self { MissingReferencedObjectBehaviour::Skip => true, MissingReferencedObjectBehaviour::Error => false, } } } /// Builder for DWARF packages, add input objects/packages with `add_input_object` or input objects /// referenced by an executable with `add_executable` before accessing the completed object with /// `finish`. pub struct DwarfPackage<'output, 'session: 'output, Sess: Session> { sess: &'session Sess, maybe_in_progress: Option>, targets: HashSet, } impl<'output, 'session: 'output, Sess> fmt::Debug for DwarfPackage<'output, 'session, Sess> where Sess: Session, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("DwarfPackage") .field("in_progress", &self.maybe_in_progress) .field("target_count", &self.targets.len()) .finish() } } impl<'output, 'session: 'output, Sess> DwarfPackage<'output, 'session, Sess> where Sess: Session, { /// Create a new `DwarfPackage` with the provided `Session` implementation. pub fn new(sess: &'session Sess) -> Self { Self { sess, maybe_in_progress: None, targets: HashSet::new() } } /// Add an input object to the in-progress package. #[tracing::instrument(level = "trace", skip(obj))] fn process_input_object<'input>(&mut self, obj: &'input object::File<'input>) -> Result<()> { if self.maybe_in_progress.is_none() { self.maybe_in_progress = Some(InProgressDwarfPackage::new(obj.architecture(), obj.endianness())); } let encoding = if let Some(section) = obj.section_by_name(".debug_info.dwo") { let data = section.compressed_data()?.decompress()?; let data_ref = self.sess.alloc_owned_cow(data); let debug_info = gimli::DebugInfo::new(data_ref, obj.endianness().as_runtime_endian()); debug_info .units() .next() .map_err(Error::ParseUnitHeader)? .map(|root_header| root_header.encoding()) .ok_or(Error::NoCompilationUnits)? } else { debug!("no `.debug_info.dwo` in input dwarf object"); return Ok(()); }; let sess = self.sess; self.maybe_in_progress .as_mut() .expect("`process_input_object` is broken") .add_input_object(sess, obj, encoding) } /// Add input objects referenced by executable to the DWARF package. #[tracing::instrument(level = "trace")] pub fn add_executable( &mut self, path: &Path, missing_behaviour: MissingReferencedObjectBehaviour, ) -> Result<()> { let data = self.sess.read_input(path).map_err(Error::ReadInput)?; let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?; let mut load_section = |id: gimli::SectionId| -> Result<_> { let mut relocations = RelocationMap::default(); let data = match obj.section_by_name(&id.name()) { Some(ref section) => { add_relocations(&mut relocations, &obj, section)?; section.compressed_data()?.decompress()? } // Use a non-zero capacity so that `ReaderOffsetId`s are unique. None => Cow::Owned(Vec::with_capacity(1)), }; let data_ref = self.sess.alloc_owned_cow(data); let reader = EndianSlice::new(data_ref, obj.endianness().as_runtime_endian()); let section = reader; let relocations = self.sess.alloc_relocation(relocations); Ok(Relocate { relocations, section, reader }) }; let dwarf = gimli::Dwarf::load(&mut load_section)?; let mut iter = dwarf.units(); while let Some(header) = iter.next().map_err(Error::ParseUnitHeader)? { let unit = dwarf.unit(header).map_err(Error::ParseUnit)?; let target = match dwo_identifier_of_unit(&dwarf.debug_abbrev, &unit.header)? { Some(target) => target, None => { debug!("no target"); continue; } }; let dwo_name = { let mut cursor = unit.header.entries(&unit.abbreviations); cursor.next_dfs()?; let root = cursor.current().expect("unit w/out root debugging information entry"); let dwo_name = if let Some(val) = root.attr_value(gimli::DW_AT_dwo_name)? { // DWARF 5 val } else if let Some(val) = root.attr_value(gimli::DW_AT_GNU_dwo_name)? { // GNU Extension val } else { return Err(Error::MissingDwoName(target.index())); }; dwarf.attr_string(&unit, dwo_name)?.to_string()?.into_owned() }; // Prepend the compilation directory if it exists. let mut path = if let Some(comp_dir) = &unit.comp_dir { PathBuf::from(comp_dir.to_string()?.into_owned()) } else { PathBuf::new() }; path.push(dwo_name); // Only add `DwoId`s to the targets, not `DebugTypeSignature`s. There doesn't // appear to be a "skeleton type unit" to find the corresponding unit of (there are // normal type units in an executable, but should we expect to find a corresponding // split type unit for those?). if matches!(target, DwarfObject::Compilation(_)) { // Input objects are processed first, if a DWARF object referenced by this // executable was already found then don't add it to the target and try to add it // again. if let Some(package) = &self.maybe_in_progress { if package.contained_units().contains(&target) { continue; } } debug!(?target, "adding target"); self.targets.insert(target); } match self.add_input_object(&path) { Ok(()) => (), Err(Error::ReadInput(..)) if missing_behaviour.skip_missing() => (), Err(e) => return Err(e), } } Ok(()) } /// Add an input object to the DWARF package. /// /// Input object must be an archive or an elf object. #[tracing::instrument(level = "trace")] pub fn add_input_object(&mut self, path: &Path) -> Result<()> { let data = self.sess.read_input(&path).map_err(Error::ReadInput)?; let kind = FileKind::parse(data).map_err(Error::ParseFileKind)?; trace!(?kind); match kind { FileKind::Archive => { let archive = object::read::archive::ArchiveFile::parse(data) .map_err(Error::ParseArchiveFile)?; for member in archive.members() { let member = member.map_err(Error::ParseArchiveMember)?; let data = member.data(data)?; let kind = if let Ok(kind) = FileKind::parse(data) { kind } else { trace!("skipping non-elf archive member"); continue; }; trace!(?kind, "archive member"); match kind { FileKind::Elf32 | FileKind::Elf64 => { let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?; self.process_input_object(&obj)?; } _ => { trace!("skipping non-elf archive member"); } } } Ok(()) } FileKind::Elf32 | FileKind::Elf64 => { let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?; self.process_input_object(&obj) } _ => Err(Error::InvalidInputKind), } } /// Returns the `object::write::Object` containing the created DWARF package. /// /// Returns an `Error::MissingReferencedUnit` if DWARF objects referenced by executables were /// not subsequently found. /// Returns an `Error::NoOutputObjectCreated` if no input objects or executables were provided. #[tracing::instrument(level = "trace")] pub fn finish(self) -> Result> { match self.maybe_in_progress { Some(package) => { if let Some(missing) = self.targets.difference(package.contained_units()).next() { return Err(Error::MissingReferencedUnit(missing.index())); } package.finish() } None if !self.targets.is_empty() => { let first_missing_unit = self .targets .iter() .next() .copied() .expect("non-empty map doesn't have first element"); Err(Error::MissingReferencedUnit(first_missing_unit.index())) } None => Err(Error::NoOutputObjectCreated), } } }