summaryrefslogtreecommitdiffstats
path: root/third_party/rust/debugid/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/debugid/src/lib.rs')
-rw-r--r--third_party/rust/debugid/src/lib.rs543
1 files changed, 543 insertions, 0 deletions
diff --git a/third_party/rust/debugid/src/lib.rs b/third_party/rust/debugid/src/lib.rs
new file mode 100644
index 0000000000..7022051a41
--- /dev/null
+++ b/third_party/rust/debugid/src/lib.rs
@@ -0,0 +1,543 @@
+//! This crate provides types for identifiers of object files, such as executables, dynamic
+//! libraries or debug companion files. The concept originates in Google Breakpad and defines two
+//! types:
+//!
+//! - [`CodeId`]: Identifies the file containing source code, i.e. the actual library or
+//! executable. The identifier is platform dependent and implementation defined. Thus, there is
+//! no canonical representation.
+//! - [`DebugId`]: Identifies a debug information file, which may or may not use information from
+//! the Code ID. The contents are also implementation defined, but as opposed to `CodeId`, the
+//! structure is streamlined across platforms. It is also guaranteed to be 32 bytes in size.
+//!
+//! [`CodeId`]: struct.CodeId.html
+//! [`DebugId`]: struct.DebugId.html
+
+#![warn(missing_docs)]
+
+use std::error;
+use std::fmt;
+use std::fmt::Write;
+use std::str;
+
+use uuid::{Bytes, Uuid};
+
+/// Indicates an error parsing a [`DebugId`](struct.DebugId.html).
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct ParseDebugIdError;
+
+impl error::Error for ParseDebugIdError {}
+
+impl fmt::Display for ParseDebugIdError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "invalid debug identifier")
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+struct ParseOptions {
+ allow_hyphens: bool,
+ require_appendix: bool,
+ allow_tail: bool,
+}
+
+/// Unique identifier for debug information files and their debug information.
+///
+/// This type is analogous to [`CodeId`], except that it identifies a debug file instead of the
+/// actual library or executable. One some platforms, a `DebugId` is an alias for a `CodeId` but the
+/// exact rules around this are complex. On Windows, the identifiers are completely different and
+/// refer to separate files.
+///
+/// The string representation must be between 33 and 40 characters long and consist of:
+///
+/// 1. 36 character hyphenated hex representation of the UUID field
+/// 2. 1-16 character lowercase hex representation of the u32 appendix
+///
+/// The debug identifier is compatible to Google Breakpad. Use [`DebugId::breakpad`] to get a
+/// breakpad string representation of this debug identifier.
+///
+/// There is one exception to this: for the old PDB 2.0 format the debug identifier consists
+/// of only a 32-bit integer + age resulting in a string representation of between 9 and 16
+/// hex characters.
+///
+/// # Example
+///
+/// ```
+/// # extern crate debugid;
+/// use std::str::FromStr;
+/// use debugid::DebugId;
+///
+/// # fn foo() -> Result<(), ::debugid::ParseDebugIdError> {
+/// let id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a")?;
+/// assert_eq!("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a".to_string(), id.to_string());
+/// # Ok(())
+/// # }
+///
+/// # fn main() { foo().unwrap() }
+/// ```
+///
+/// # In-memory representation
+///
+/// The in-memory representation takes up 32 bytes and can be directly written to storage
+/// and mapped back into an object reference.
+///
+/// ```
+/// use std::str::FromStr;
+/// use debugid::DebugId;
+///
+/// let debug_id = DebugId::from_str("dfb8e43a-f242-3d73-a453-aeb6a777ef75-a").unwrap();
+///
+/// let slice = &[debug_id];
+/// let ptr = slice.as_ptr() as *const u8;
+/// let len = std::mem::size_of_val(slice);
+/// let buf: &[u8] = unsafe { std::slice::from_raw_parts(ptr, len) };
+///
+/// let mut new_buf: Vec<u8> = Vec::new();
+/// std::io::copy(&mut std::io::Cursor::new(buf), &mut new_buf).unwrap();
+///
+/// let ptr = new_buf.as_ptr() as *const DebugId;
+/// let new_debug_id = unsafe { &*ptr };
+///
+/// assert_eq!(*new_debug_id, debug_id);
+/// ```
+///
+/// As long the bytes were written using the same major version of this crate you will be
+/// able to read it again like this.
+///
+/// [`CodeId`]: struct.CodeId.html
+/// [`DebugId::breakpad`]: struct.DebugId.html#method.breakpad
+// This needs to be backwards compatible also in its exact in-memory byte-layout since this
+// struct is directly mapped from disk in e.g. Symbolic SymCache formats. The first version
+// of this struct was defined as:
+//
+// ```rust
+// struct DebugId {
+// uuid: Uuid,
+// appendix: u32,
+// _padding: [u8; 12],
+// }
+// ```
+//
+// For this reason the current `typ` byte represents the type of `DebugId` stored in the
+// `Bytes`:
+//
+// - `0u8`: The `bytes` field contains a UUID.
+// - `1u8`: The first 4 bytes of the `bytes` field contain a big-endian u32, the remaining
+// bytes are 0.
+#[repr(C, packed)]
+#[derive(Default, Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
+pub struct DebugId {
+ bytes: Bytes,
+ appendix: u32,
+ _padding: [u8; 11],
+ typ: u8,
+}
+
+impl DebugId {
+ /// Constructs an empty debug identifier, containing only zeros.
+ pub fn nil() -> Self {
+ Self::default()
+ }
+
+ /// Constructs a `DebugId` from its `uuid`.
+ pub fn from_uuid(uuid: Uuid) -> Self {
+ Self::from_parts(uuid, 0)
+ }
+
+ /// Constructs a `DebugId` from its `uuid` and `appendix` parts.
+ pub fn from_parts(uuid: Uuid, appendix: u32) -> Self {
+ DebugId {
+ bytes: *uuid.as_bytes(),
+ appendix,
+ typ: 0,
+ _padding: [0; 11],
+ }
+ }
+
+ /// Constructs a `DebugId` from a Microsoft little-endian GUID and age.
+ pub fn from_guid_age(guid: &[u8], age: u32) -> Result<Self, ParseDebugIdError> {
+ if guid.len() != 16 {
+ return Err(ParseDebugIdError);
+ }
+
+ let uuid = Uuid::from_bytes([
+ guid[3], guid[2], guid[1], guid[0], guid[5], guid[4], guid[7], guid[6], guid[8],
+ guid[9], guid[10], guid[11], guid[12], guid[13], guid[14], guid[15],
+ ]);
+
+ Ok(DebugId::from_parts(uuid, age))
+ }
+
+ /// Constructs a `DebugId` from a PDB 2.0 timestamp and age.
+ pub fn from_pdb20(timestamp: u32, age: u32) -> Self {
+ // The big-endian byte-order here has to match the one used to read this number in
+ // the DebugId::timestamp method.
+ DebugId {
+ bytes: [
+ (timestamp >> 24) as u8,
+ (timestamp >> 16) as u8,
+ (timestamp >> 8) as u8,
+ timestamp as u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ 0u8,
+ ],
+ appendix: age,
+ _padding: [0u8; 11],
+ typ: 1u8,
+ }
+ }
+
+ /// Parses a breakpad identifier from a string.
+ pub fn from_breakpad(string: &str) -> Result<Self, ParseDebugIdError> {
+ let options = ParseOptions {
+ allow_hyphens: false,
+ require_appendix: true,
+ allow_tail: false,
+ };
+ Self::parse_str(string, options).ok_or(ParseDebugIdError)
+ }
+
+ /// Returns the UUID part of the code module's debug_identifier.
+ ///
+ /// If this is a debug identifier for the PDB 2.0 format an invalid UUID is returned
+ /// where only the first 4 bytes are filled in and the remainder of the bytes are 0.
+ /// This means the UUID has variant [`uuid::Variant::NCS`] and an unknown version,
+ /// [`Uuid::get_version`] will return `None`, which is not a valid UUID.
+ ///
+ /// This may seem odd however does seem reasonable:
+ ///
+ /// - Every [`DebugId`] can be represented as [`Uuid`] and will still mostly look
+ /// reasonable e.g. in comparisons etc.
+ /// - The PDB 2.0 format is very old and very unlikely to appear practically.
+ pub fn uuid(&self) -> Uuid {
+ Uuid::from_bytes(self.bytes)
+ }
+
+ /// Returns the appendix part of the code module's debug identifier.
+ ///
+ /// On Windows, this is an incrementing counter to identify the build.
+ /// On all other platforms, this value will always be zero.
+ pub fn appendix(&self) -> u32 {
+ self.appendix
+ }
+
+ /// Returns whether this identifier is nil, i.e. it consists only of zeros.
+ pub fn is_nil(&self) -> bool {
+ self.bytes == [0u8; 16] && self.appendix == 0
+ }
+
+ /// Returns whether this identifier is from the PDB 2.0 format.
+ pub fn is_pdb20(&self) -> bool {
+ self.typ == 1
+ }
+
+ /// Returns a wrapper which when formatted via `fmt::Display` will format a
+ /// a breakpad identifier.
+ pub fn breakpad(&self) -> BreakpadFormat<'_> {
+ BreakpadFormat { inner: self }
+ }
+
+ fn parse_str(string: &str, options: ParseOptions) -> Option<Self> {
+ let is_hyphenated = string.get(8..9) == Some("-");
+ if is_hyphenated && !options.allow_hyphens || !string.is_ascii() {
+ return None;
+ }
+
+ // Can the PDB 2.0 format match? This can never be true for a valid UUID.
+ let min_len = if is_hyphenated { 10 } else { 9 };
+ let max_len = if is_hyphenated { 17 } else { 16 };
+ if min_len <= string.len() && string.len() <= max_len {
+ let timestamp_str = string.get(..8)?;
+ let timestamp = u32::from_str_radix(timestamp_str, 16).ok()?;
+ let appendix_str = match is_hyphenated {
+ true => string.get(9..)?,
+ false => string.get(8..)?,
+ };
+ let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
+ return Some(Self::from_pdb20(timestamp, appendix));
+ }
+
+ let uuid_len = if is_hyphenated { 36 } else { 32 };
+ let uuid = string.get(..uuid_len)?.parse().ok()?;
+ if !options.require_appendix && string.len() == uuid_len {
+ return Some(Self::from_parts(uuid, 0));
+ }
+
+ let mut appendix_str = &string[uuid_len..];
+ if is_hyphenated ^ appendix_str.starts_with('-') {
+ return None; // Require a hyphen if and only if we're hyphenated.
+ } else if is_hyphenated {
+ appendix_str = &appendix_str[1..]; // Skip the hyphen for parsing.
+ }
+
+ if options.allow_tail && appendix_str.len() > 8 {
+ appendix_str = &appendix_str[..8];
+ }
+
+ // Parse the appendix, which fails on empty strings.
+ let appendix = u32::from_str_radix(appendix_str, 16).ok()?;
+ Some(Self::from_parts(uuid, appendix))
+ }
+
+ /// Returns the PDB 2.0 timestamp.
+ ///
+ /// Only valid if you know this is a PDB 2.0 debug identifier.
+ fn timestamp(&self) -> u32 {
+ u32::from_be_bytes([self.bytes[0], self.bytes[1], self.bytes[2], self.bytes[3]])
+ }
+}
+
+impl fmt::Debug for DebugId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let uuid = self.uuid();
+ f.debug_struct("DebugId")
+ .field("uuid", &uuid.hyphenated().to_string())
+ .field("appendix", &self.appendix())
+ .finish()
+ }
+}
+
+impl fmt::Display for DebugId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.is_pdb20() {
+ true => {
+ let timestamp = self.timestamp();
+ write!(f, "{:08X}", timestamp)?;
+ }
+ false => {
+ let uuid = self.uuid();
+ uuid.fmt(f)?;
+ }
+ }
+ if self.appendix > 0 {
+ write!(f, "-{:x}", { self.appendix })?;
+ }
+ Ok(())
+ }
+}
+
+impl str::FromStr for DebugId {
+ type Err = ParseDebugIdError;
+
+ fn from_str(string: &str) -> Result<Self, ParseDebugIdError> {
+ let options = ParseOptions {
+ allow_hyphens: true,
+ require_appendix: false,
+ allow_tail: true,
+ };
+ Self::parse_str(string, options).ok_or(ParseDebugIdError)
+ }
+}
+
+impl From<Uuid> for DebugId {
+ fn from(uuid: Uuid) -> Self {
+ DebugId::from_uuid(uuid)
+ }
+}
+
+impl From<(Uuid, u32)> for DebugId {
+ fn from(tuple: (Uuid, u32)) -> Self {
+ let (uuid, appendix) = tuple;
+ DebugId::from_parts(uuid, appendix)
+ }
+}
+
+/// Wrapper around [`DebugId`] for Breakpad formatting.
+///
+/// **Example:**
+///
+/// ```
+/// # extern crate debugid;
+/// use std::str::FromStr;
+/// use debugid::DebugId;
+///
+/// # fn foo() -> Result<(), debugid::ParseDebugIdError> {
+/// let id = DebugId::from_breakpad("DFB8E43AF2423D73A453AEB6A777EF75a")?;
+/// assert_eq!("DFB8E43AF2423D73A453AEB6A777EF75a".to_string(), id.breakpad().to_string());
+/// # Ok(())
+/// # }
+///
+/// # fn main() { foo().unwrap() }
+/// ```
+///
+/// [`DebugId`]: struct.DebugId.html
+#[derive(Debug)]
+pub struct BreakpadFormat<'a> {
+ inner: &'a DebugId,
+}
+
+impl<'a> fmt::Display for BreakpadFormat<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.inner.is_pdb20() {
+ true => {
+ let timestamp = self.inner.timestamp();
+ write!(f, "{:08X}{:x}", timestamp, self.inner.appendix())
+ }
+ false => {
+ let uuid = self.inner.uuid();
+ write!(f, "{:X}{:x}", uuid.simple(), self.inner.appendix())
+ }
+ }
+ }
+}
+
+/// Indicates an error parsing a [`CodeId`](struct.CodeId.html).
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct ParseCodeIdError;
+
+impl error::Error for ParseCodeIdError {}
+
+impl fmt::Display for ParseCodeIdError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "invalid code identifier")
+ }
+}
+
+/// Unique platform-dependent identifier of code files.
+///
+/// This identifier assumes a string representation that depends on the platform and compiler used.
+/// The representation only retains hex characters and canonically stores lower case.
+///
+/// There are the following known formats:
+///
+/// - **MachO UUID**: The unique identifier of a Mach binary, specified in the `LC_UUID` load
+/// command header.
+/// - **GNU Build ID**: Contents of the `.gnu.build-id` note or section contents formatted as
+/// lowercase hex string.
+/// - **PE Timestamp**: Timestamp and size of image values from a Windows PE header. The size of
+/// image value is truncated, so the length of the `CodeId` might not be a multiple of 2.
+#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct CodeId {
+ inner: String,
+}
+
+impl CodeId {
+ /// Constructs an empty code identifier.
+ pub fn nil() -> Self {
+ Self::default()
+ }
+
+ /// Constructs a `CodeId` from its string representation.
+ pub fn new(mut string: String) -> Self {
+ string.retain(|c| c.is_ascii_hexdigit());
+ string.make_ascii_lowercase();
+ CodeId { inner: string }
+ }
+
+ /// Constructs a `CodeId` from a binary slice.
+ pub fn from_binary(slice: &[u8]) -> Self {
+ let mut string = String::with_capacity(slice.len() * 2);
+
+ for byte in slice {
+ write!(&mut string, "{:02x}", byte).expect("");
+ }
+
+ Self::new(string)
+ }
+
+ /// Returns whether this identifier is nil, i.e. it is empty.
+ pub fn is_nil(&self) -> bool {
+ self.inner.is_empty()
+ }
+
+ /// Returns the string representation of this code identifier.
+ pub fn as_str(&self) -> &str {
+ self.inner.as_str()
+ }
+}
+
+impl fmt::Display for CodeId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_str(&self.inner)
+ }
+}
+
+impl fmt::Debug for CodeId {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "CodeId({})", self)
+ }
+}
+
+impl From<String> for CodeId {
+ fn from(string: String) -> Self {
+ Self::new(string)
+ }
+}
+
+impl From<&'_ str> for CodeId {
+ fn from(string: &str) -> Self {
+ Self::new(string.into())
+ }
+}
+
+impl AsRef<str> for CodeId {
+ fn as_ref(&self) -> &str {
+ self.as_str()
+ }
+}
+
+impl str::FromStr for CodeId {
+ type Err = ParseCodeIdError;
+
+ fn from_str(string: &str) -> Result<Self, ParseCodeIdError> {
+ Ok(Self::new(string.into()))
+ }
+}
+
+#[cfg(feature = "serde")]
+mod serde_support {
+ use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor};
+ use serde::ser::{Serialize, Serializer};
+
+ use super::*;
+
+ impl Serialize for CodeId {
+ fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+ serializer.serialize_str(self.as_str())
+ }
+ }
+
+ impl<'de> Deserialize<'de> for CodeId {
+ fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
+ let string = String::deserialize(deserializer)?;
+ Ok(CodeId::new(string))
+ }
+ }
+
+ impl<'de> Deserialize<'de> for DebugId {
+ fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
+ struct V;
+
+ impl<'de> Visitor<'de> for V {
+ type Value = DebugId;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("DebugId")
+ }
+
+ fn visit_str<E: de::Error>(self, value: &str) -> Result<DebugId, E> {
+ value
+ .parse()
+ .map_err(|_| de::Error::invalid_value(Unexpected::Str(value), &self))
+ }
+ }
+
+ deserializer.deserialize_str(V)
+ }
+ }
+
+ impl Serialize for DebugId {
+ fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+ serializer.serialize_str(&self.to_string())
+ }
+ }
+}