//! This crate provides types for identifiers of object files, such as executables, dynamic //! libraries or debug companion files. The concept originates in Google Breakpad and defines two //! types: //! //! - [`CodeId`]: Identifies the file containing source code, i.e. the actual library or //! executable. The identifier is platform dependent and implementation defined. Thus, there is //! no canonical representation. //! - [`DebugId`]: Identifies a debug information file, which may or may not use information from //! the Code ID. The contents are also implementation defined, but as opposed to `CodeId`, the //! structure is streamlined across platforms. It is also guaranteed to be 32 bytes in size. //! //! [`CodeId`]: struct.CodeId.html //! [`DebugId`]: struct.DebugId.html #![warn(missing_docs)] use std::error; use std::fmt; use std::fmt::Write; use std::str; use uuid::{Bytes, Uuid}; /// Indicates an error parsing a [`DebugId`](struct.DebugId.html). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ParseDebugIdError; impl error::Error for ParseDebugIdError {} impl fmt::Display for ParseDebugIdError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "invalid debug identifier") } } #[derive(Clone, Copy, Debug)] struct ParseOptions { allow_hyphens: bool, require_appendix: bool, allow_tail: bool, } /// Unique identifier for debug information files and their debug information. /// /// This type is analogous to [`CodeId`], except that it identifies a debug file instead of the /// actual library or executable. One some platforms, a `DebugId` is an alias for a `CodeId` but the /// exact rules around this are complex. On Windows, the identifiers are completely different and /// refer to separate files. /// /// The string representation must be between 33 and 40 characters long and consist of: /// /// 1. 36 character hyphenated hex representation of the UUID field /// 2. 1-16 character lowercase hex representation of the u32 appendix /// /// The debug identifier is compatible to Google Breakpad. Use [`DebugId::breakpad`] to get a /// breakpad string representation of this debug identifier. /// /// There is one exception to this: for the old PDB 2.0 format the debug identifier consists /// of only a 32-bit integer + age resulting in a string representation of between 9 and 16 /// hex characters. /// /// # Example /// /// ``` /// # extern crate debugid; /// use std::str::FromStr; /// use debugid::DebugId; /// /// # fn foo() -> Result<(), ::debugid::ParseDebugIdError> { /// let id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a")?; /// assert_eq!("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a".to_string(), id.to_string()); /// # Ok(()) /// # } /// /// # fn main() { foo().unwrap() } /// ``` /// /// # In-memory representation /// /// The in-memory representation takes up 32 bytes and can be directly written to storage /// and mapped back into an object reference. /// /// ``` /// use std::str::FromStr; /// use debugid::DebugId; /// /// let debug_id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a").unwrap(); /// /// let slice = &[debug_id]; /// let ptr = slice.as_ptr() as *const u8; /// let len = std::mem::size_of_val(slice); /// let buf: &[u8] = unsafe { std::slice::from_raw_parts(ptr, len) }; /// /// let mut new_buf: Vec = Vec::new(); /// std::io::copy(&mut std::io::Cursor::new(buf), &mut new_buf).unwrap(); /// /// let ptr = new_buf.as_ptr() as *const DebugId; /// let new_debug_id = unsafe { &*ptr }; /// /// assert_eq!(*new_debug_id, debug_id); /// ``` /// /// As long the bytes were written using the same major version of this crate you will be /// able to read it again like this. /// /// [`CodeId`]: struct.CodeId.html /// [`DebugId::breakpad`]: struct.DebugId.html#method.breakpad // This needs to be backwards compatible also in its exact in-memory byte-layout since this // struct is directly mapped from disk in e.g. Symbolic SymCache formats. The first version // of this struct was defined as: // // ```rust // struct DebugId { // uuid: Uuid, // appendix: u32, // _padding: [u8; 12], // } // ``` // // For this reason the current `typ` byte represents the type of `DebugId` stored in the // `Bytes`: // // - `0u8`: The `bytes` field contains a UUID. // - `1u8`: The first 4 bytes of the `bytes` field contain a big-endian u32, the remaining // bytes are 0. #[repr(C, packed)] #[derive(Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)] pub struct DebugId { bytes: Bytes, appendix: u32, _padding: [u8; 11], typ: u8, } impl DebugId { /// Constructs an empty debug identifier, containing only zeros. pub fn nil() -> Self { Self::default() } /// Constructs a `DebugId` from its `uuid`. pub fn from_uuid(uuid: Uuid) -> Self { Self::from_parts(uuid, 0) } /// Constructs a `DebugId` from its `uuid` and `appendix` parts. pub fn from_parts(uuid: Uuid, appendix: u32) -> Self { DebugId { bytes: *uuid.as_bytes(), appendix, typ: 0, _padding: [0; 11], } } /// Constructs a `DebugId` from a Microsoft little-endian GUID and age. pub fn from_guid_age(guid: &[u8], age: u32) -> Result { if guid.len() != 16 { return Err(ParseDebugIdError); } let uuid = Uuid::from_bytes([ guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8], guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15], ]); Ok(DebugId::from_parts(uuid, age)) } /// Constructs a `DebugId` from a PDB 2.0 timestamp and age. pub fn from_pdb20(timestamp: u32, age: u32) -> Self { // The big-endian byte-order here has to match the one used to read this number in // the DebugId::timestamp method. DebugId { bytes: [ (timestamp >> 24) as u8, (timestamp >> 16) as u8, (timestamp >> 8) as u8, timestamp as u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, ], appendix: age, _padding: [0u8; 11], typ: 1u8, } } /// Parses a breakpad identifier from a string. pub fn from_breakpad(string: &str) -> Result { let options = ParseOptions { allow_hyphens: false, require_appendix: true, allow_tail: false, }; Self::parse_str(string, options).ok_or(ParseDebugIdError) } /// Returns the UUID part of the code module's debug_identifier. /// /// If this is a debug identifier for the PDB 2.0 format an invalid UUID is returned /// where only the first 4 bytes are filled in and the remainder of the bytes are 0. /// This means the UUID has variant [`uuid::Variant::NCS`] and an unknown version, /// [`Uuid::get_version`] will return `None`, which is not a valid UUID. /// /// This may seem odd however does seem reasonable: /// /// - Every [`DebugId`] can be represented as [`Uuid`] and will still mostly look /// reasonable e.g. in comparisons etc. /// - The PDB 2.0 format is very old and very unlikely to appear practically. pub fn uuid(&self) -> Uuid { Uuid::from_bytes(self.bytes) } /// Returns the appendix part of the code module's debug identifier. /// /// On Windows, this is an incrementing counter to identify the build. /// On all other platforms, this value will always be zero. pub fn appendix(&self) -> u32 { self.appendix } /// Returns whether this identifier is nil, i.e. it consists only of zeros. pub fn is_nil(&self) -> bool { self.bytes == [0u8; 16] && self.appendix == 0 } /// Returns whether this identifier is from the PDB 2.0 format. pub fn is_pdb20(&self) -> bool { self.typ == 1 } /// Returns a wrapper which when formatted via `fmt::Display` will format a /// a breakpad identifier. pub fn breakpad(&self) -> BreakpadFormat<'_> { BreakpadFormat { inner: self } } fn parse_str(string: &str, options: ParseOptions) -> Option { let is_hyphenated = string.get(8..9) == Some("-"); if is_hyphenated && !options.allow_hyphens || !string.is_ascii() { return None; } // Can the PDB 2.0 format match? This can never be true for a valid UUID. let min_len = if is_hyphenated { 10 } else { 9 }; let max_len = if is_hyphenated { 17 } else { 16 }; if min_len <= string.len() && string.len() <= max_len { let timestamp_str = string.get(..8)?; let timestamp = u32::from_str_radix(timestamp_str, 16).ok()?; let appendix_str = match is_hyphenated { true => string.get(9..)?, false => string.get(8..)?, }; let appendix = u32::from_str_radix(appendix_str, 16).ok()?; return Some(Self::from_pdb20(timestamp, appendix)); } let uuid_len = if is_hyphenated { 36 } else { 32 }; let uuid = string.get(..uuid_len)?.parse().ok()?; if !options.require_appendix && string.len() == uuid_len { return Some(Self::from_parts(uuid, 0)); } let mut appendix_str = &string[uuid_len..]; if is_hyphenated ^ appendix_str.starts_with('-') { return None; // Require a hyphen if and only if we're hyphenated. } else if is_hyphenated { appendix_str = &appendix_str[1..]; // Skip the hyphen for parsing. } if options.allow_tail && appendix_str.len() > 8 { appendix_str = &appendix_str[..8]; } // Parse the appendix, which fails on empty strings. let appendix = u32::from_str_radix(appendix_str, 16).ok()?; Some(Self::from_parts(uuid, appendix)) } /// Returns the PDB 2.0 timestamp. /// /// Only valid if you know this is a PDB 2.0 debug identifier. fn timestamp(&self) -> u32 { u32::from_be_bytes([self.bytes[0], self.bytes[1], self.bytes[2], self.bytes[3]]) } } impl fmt::Debug for DebugId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let uuid = self.uuid(); f.debug_struct("DebugId") .field("uuid", &uuid.hyphenated().to_string()) .field("appendix", &self.appendix()) .finish() } } impl fmt::Display for DebugId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.is_pdb20() { true => { let timestamp = self.timestamp(); write!(f, "{:08X}", timestamp)?; } false => { let uuid = self.uuid(); uuid.fmt(f)?; } } if self.appendix > 0 { write!(f, "-{:x}", { self.appendix })?; } Ok(()) } } impl str::FromStr for DebugId { type Err = ParseDebugIdError; fn from_str(string: &str) -> Result { let options = ParseOptions { allow_hyphens: true, require_appendix: false, allow_tail: true, }; Self::parse_str(string, options).ok_or(ParseDebugIdError) } } impl From for DebugId { fn from(uuid: Uuid) -> Self { DebugId::from_uuid(uuid) } } impl From<(Uuid, u32)> for DebugId { fn from(tuple: (Uuid, u32)) -> Self { let (uuid, appendix) = tuple; DebugId::from_parts(uuid, appendix) } } /// Wrapper around [`DebugId`] for Breakpad formatting. /// /// **Example:** /// /// ``` /// # extern crate debugid; /// use std::str::FromStr; /// use debugid::DebugId; /// /// # fn foo() -> Result<(), debugid::ParseDebugIdError> { /// let id = DebugId::from_breakpad("DFB8E43AF2423D73A453AEB6A777EF75a")?; /// assert_eq!("DFB8E43AF2423D73A453AEB6A777EF75a".to_string(), id.breakpad().to_string()); /// # Ok(()) /// # } /// /// # fn main() { foo().unwrap() } /// ``` /// /// [`DebugId`]: struct.DebugId.html #[derive(Debug)] pub struct BreakpadFormat<'a> { inner: &'a DebugId, } impl<'a> fmt::Display for BreakpadFormat<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.inner.is_pdb20() { true => { let timestamp = self.inner.timestamp(); write!(f, "{:08X}{:x}", timestamp, self.inner.appendix()) } false => { let uuid = self.inner.uuid(); write!(f, "{:X}{:x}", uuid.simple(), self.inner.appendix()) } } } } /// Indicates an error parsing a [`CodeId`](struct.CodeId.html). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ParseCodeIdError; impl error::Error for ParseCodeIdError {} impl fmt::Display for ParseCodeIdError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "invalid code identifier") } } /// Unique platform-dependent identifier of code files. /// /// This identifier assumes a string representation that depends on the platform and compiler used. /// The representation only retains hex characters and canonically stores lower case. /// /// There are the following known formats: /// /// - **MachO UUID**: The unique identifier of a Mach binary, specified in the `LC_UUID` load /// command header. /// - **GNU Build ID**: Contents of the `.gnu.build-id` note or section contents formatted as /// lowercase hex string. /// - **PE Timestamp**: Timestamp and size of image values from a Windows PE header. The size of /// image value is truncated, so the length of the `CodeId` might not be a multiple of 2. #[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct CodeId { inner: String, } impl CodeId { /// Constructs an empty code identifier. pub fn nil() -> Self { Self::default() } /// Constructs a `CodeId` from its string representation. pub fn new(mut string: String) -> Self { string.retain(|c| c.is_ascii_hexdigit()); string.make_ascii_lowercase(); CodeId { inner: string } } /// Constructs a `CodeId` from a binary slice. pub fn from_binary(slice: &[u8]) -> Self { let mut string = String::with_capacity(slice.len() * 2); for byte in slice { write!(&mut string, "{:02x}", byte).expect(""); } Self::new(string) } /// Returns whether this identifier is nil, i.e. it is empty. pub fn is_nil(&self) -> bool { self.inner.is_empty() } /// Returns the string representation of this code identifier. pub fn as_str(&self) -> &str { self.inner.as_str() } } impl fmt::Display for CodeId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(&self.inner) } } impl fmt::Debug for CodeId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "CodeId({})", self) } } impl From for CodeId { fn from(string: String) -> Self { Self::new(string) } } impl From<&'_ str> for CodeId { fn from(string: &str) -> Self { Self::new(string.into()) } } impl AsRef for CodeId { fn as_ref(&self) -> &str { self.as_str() } } impl str::FromStr for CodeId { type Err = ParseCodeIdError; fn from_str(string: &str) -> Result { Ok(Self::new(string.into())) } } #[cfg(feature = "serde")] mod serde_support { use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor}; use serde::ser::{Serialize, Serializer}; use super::*; impl Serialize for CodeId { fn serialize(&self, serializer: S) -> Result { serializer.serialize_str(self.as_str()) } } impl<'de> Deserialize<'de> for CodeId { fn deserialize>(deserializer: D) -> Result { let string = String::deserialize(deserializer)?; Ok(CodeId::new(string)) } } impl<'de> Deserialize<'de> for DebugId { fn deserialize>(deserializer: D) -> Result { struct V; impl<'de> Visitor<'de> for V { type Value = DebugId; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("DebugId") } fn visit_str(self, value: &str) -> Result { value .parse() .map_err(|_| de::Error::invalid_value(Unexpected::Str(value), &self)) } } deserializer.deserialize_str(V) } } impl Serialize for DebugId { fn serialize(&self, serializer: S) -> Result { serializer.serialize_str(&self.to_string()) } } }