diff options
Diffstat (limited to 'src/tools/rust-demangler')
-rw-r--r-- | src/tools/rust-demangler/Cargo.toml | 16 | ||||
-rw-r--r-- | src/tools/rust-demangler/README.md | 36 | ||||
-rw-r--r-- | src/tools/rust-demangler/src/lib.rs | 21 | ||||
-rw-r--r-- | src/tools/rust-demangler/src/main.rs | 97 | ||||
-rw-r--r-- | src/tools/rust-demangler/tests/lib.rs | 84 |
5 files changed, 254 insertions, 0 deletions
diff --git a/src/tools/rust-demangler/Cargo.toml b/src/tools/rust-demangler/Cargo.toml new file mode 100644 index 000000000..2bb73b326 --- /dev/null +++ b/src/tools/rust-demangler/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "rust-demangler" +version = "0.0.1" +edition = "2021" + +[dependencies] +regex = "1.0" +rustc-demangle = "0.1.17" + +[lib] +name = "rust_demangler" +doctest = false + +[[bin]] +name = "rust-demangler" +test = false diff --git a/src/tools/rust-demangler/README.md b/src/tools/rust-demangler/README.md new file mode 100644 index 000000000..4e8a689a1 --- /dev/null +++ b/src/tools/rust-demangler/README.md @@ -0,0 +1,36 @@ +# rust-demangler + +_Demangles rustc mangled names._ + +`rust-demangler` supports the requirements of the [`llvm-cov show -Xdemangler` +option](https://llvm.org/docs/CommandGuide/llvm-cov.html#cmdoption-llvm-cov-show-xdemangler), +to perform Rust-specific symbol demangling: + +> _The demangler is expected to read a newline-separated list of symbols from +> stdin and write a newline-separated list of the same length to stdout._ + +To use `rust-demangler` with `llvm-cov` for example: + +```shell +$ TARGET="${PWD}/build/x86_64-unknown-linux-gnu" +$ "${TARGET}"/llvm/bin/llvm-cov show \ + --Xdemangler=path/to/rust-demangler \ + --instr-profile=main.profdata ./main --show-line-counts-or-regions +``` + +`rust-demangler` is a Rust "extended tool", used in Rust compiler tests, and +optionally included in Rust distributions that enable coverage profiling. Symbol +demangling is implemented using the +[rustc-demangle](https://crates.io/crates/rustc-demangle) crate. + +_(Note, for Rust developers, the third-party tool +[`rustfilt`](https://crates.io/crates/rustfilt) also supports `llvm-cov` symbol +demangling. `rustfilt` is a more generalized tool that searches any body of +text, using pattern matching, to find and demangle Rust symbols.)_ + +## License + +Rust-demangler is distributed under the terms of both the MIT license and the +Apache License (Version 2.0). + +See [LICENSE-APACHE](/LICENSE-APACHE) and [LICENSE-MIT](/LICENSE-MIT) for details. diff --git a/src/tools/rust-demangler/src/lib.rs b/src/tools/rust-demangler/src/lib.rs new file mode 100644 index 000000000..1d972229d --- /dev/null +++ b/src/tools/rust-demangler/src/lib.rs @@ -0,0 +1,21 @@ +use regex::Regex; +use rustc_demangle::demangle; +use std::str::Lines; + +const REPLACE_COLONS: &str = "::"; + +pub fn create_disambiguator_re() -> Regex { + Regex::new(r"\[[a-f0-9]{5,16}\]::").unwrap() +} + +pub fn demangle_lines(lines: Lines<'_>, strip_crate_disambiguators: Option<Regex>) -> Vec<String> { + let mut demangled_lines = Vec::new(); + for mangled in lines { + let mut demangled = demangle(mangled).to_string(); + if let Some(re) = &strip_crate_disambiguators { + demangled = re.replace_all(&demangled, REPLACE_COLONS).to_string(); + } + demangled_lines.push(demangled); + } + demangled_lines +} diff --git a/src/tools/rust-demangler/src/main.rs b/src/tools/rust-demangler/src/main.rs new file mode 100644 index 000000000..1b5ef5d24 --- /dev/null +++ b/src/tools/rust-demangler/src/main.rs @@ -0,0 +1,97 @@ +//! Demangles rustc mangled names. +//! +//! Note regarding crate disambiguators: +//! +//! Some demangled symbol paths can include "crate disambiguator" suffixes, represented as a large +//! hexadecimal value enclosed in square braces, and appended to the name of the crate. a suffix to the +//! original crate name. For example, the `core` crate, here, includes a disambiguator: +//! +//! ```rust +//! <generics::Firework<f64> as core[a7a74cee373f048]::ops::drop::Drop>::drop +//! ``` +//! +//! These disambiguators are known to vary depending on environmental circumstances. As a result, +//! tests that compare results including demangled names can fail across development environments, +//! particularly with cross-platform testing. Also, the resulting crate paths are not syntactically +//! valid, and don't match the original source symbol paths, which can impact development tools. +//! +//! For these reasons, by default, `rust-demangler` uses a heuristic to remove crate disambiguators +//! from their original demangled representation before printing them to standard output. If crate +//! disambiguators are required, add the `-d` (or `--disambiguators`) flag, and the disambiguators +//! will not be removed. +//! +//! Also note that the disambiguators are stripped by a Regex pattern that is tolerant to some +//! variation in the number of hexadecimal digits. The disambiguators come from a hash value, which +//! typically generates a 16-digit hex representation on a 64-bit architecture; however, leading +//! zeros are not included, which can shorten the hex digit length, and a different hash algorithm +//! that might also be dependent on the architecture, might shorten the length even further. A +//! minimum length of 5 digits is assumed, which should be more than sufficient to support hex +//! representations that generate only 8-digits of precision with an extremely rare (but not +//! impossible) result with up to 3 leading zeros. +//! +//! Using a minimum number of digits less than 5 risks the possibility of stripping demangled name +//! components with a similar pattern. For example, some closures instantiated multiple times +//! include their own disambiguators, demangled as non-hashed zero-based indexes in square brackets. +//! These disambiguators seem to have more analytical value (for instance, in coverage analysis), so +//! they are not removed. + +use rust_demangler::*; +use std::io::{self, Read, Write}; + +fn main() -> io::Result<()> { + // FIXME(richkadel): In Issue #77615 discussed updating the `rustc-demangle` library, to provide + // an option to generate demangled names without including crate disambiguators. If that + // happens, update this tool to use that option (if the `-d` flag is not set) instead stripping + // them via the Regex heuristic. The update the doc comments and help. + + // Strip hashed hexadecimal crate disambiguators. Leading zeros are not enforced, and can be + // different across different platform/architecture types, so while 16 hex digits are common, + // they can also be shorter. + // + // Also note that a demangled symbol path may include the `[<digits>]` pattern, with zero-based + // indexes (such as for closures, and possibly for types defined in anonymous scopes). Preferably + // these should not be stripped. + // + // The minimum length of 5 digits supports the possibility that some target architecture (maybe + // a 32-bit or smaller architecture) could generate a hash value with a maximum of 8 digits, + // and more than three leading zeros should be extremely unlikely. Conversely, it should be + // sufficient to assume the zero-based indexes for closures and anonymous scopes will never + // exceed the value 9999. + let mut strip_crate_disambiguators = Some(create_disambiguator_re()); + + let mut args = std::env::args(); + let progname = args.next().unwrap(); + for arg in args { + if arg == "--disambiguators" || arg == "-d" { + strip_crate_disambiguators = None; + } else { + eprintln!(); + eprintln!("Usage: {} [-d|--disambiguators]", progname); + eprintln!(); + eprintln!( + "This tool converts a list of Rust mangled symbols (one per line) into a\n\ + corresponding list of demangled symbols." + ); + eprintln!(); + eprintln!( + "With -d (--disambiguators), Rust symbols mangled with the v0 symbol mangler may\n\ + include crate disambiguators (a hexadecimal hash value, typically up to 16 digits\n\ + long, enclosed in square brackets)." + ); + eprintln!(); + eprintln!( + "By default, crate disambiguators are removed, using a heuristics-based regular\n\ + expression. (See the `rust-demangler` doc comments for more information.)" + ); + eprintln!(); + std::process::exit(1) + } + } + + let mut buffer = String::new(); + io::stdin().read_to_string(&mut buffer)?; + let mut demangled_lines = demangle_lines(buffer.lines(), strip_crate_disambiguators); + demangled_lines.push("".to_string()); // ensure a trailing newline + io::stdout().write_all(demangled_lines.join("\n").as_bytes())?; + Ok(()) +} diff --git a/src/tools/rust-demangler/tests/lib.rs b/src/tools/rust-demangler/tests/lib.rs new file mode 100644 index 000000000..85019df78 --- /dev/null +++ b/src/tools/rust-demangler/tests/lib.rs @@ -0,0 +1,84 @@ +use rust_demangler::*; + +const MANGLED_INPUT: &str = r" +_RNvC6_123foo3bar +_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y +_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_ +_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_ +_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std +INtC8arrayvec8ArrayVechKj7b_E +_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E +_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E +_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E +_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E +_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO +_RC3foo.llvm.9D1C9369 +_RC3foo.llvm.9D1C9369@@16 +_RNvC9backtrace3foo.llvm.A5310EB9 +_RNvNtNtNtNtCs92dm3009vxr_4rand4rngs7adapter9reseeding4fork23FORK_HANDLER_REGISTERED.0.0 +"; + +const DEMANGLED_OUTPUT: &str = r" +123foo[0]::bar +utf8_idents[317d481089b8c8fe]::საჭმელად_გემრიელი_სადილი +cc[4d6468d6c9fd4bb3]::spawn::{closure#0}::{closure#0} +<core[846817f741e54dfd]::slice::Iter<u8> as core[846817f741e54dfd]::iter::iterator::Iterator>::rposition::<core[846817f741e54dfd]::slice::memchr::memrchr::{closure#1}>::{closure#0} +alloc[f15a878b47eb696b]::alloc::box_free::<dyn alloc[f15a878b47eb696b]::boxed::FnBox<(), Output = ()>> +INtC8arrayvec8ArrayVechKj7b_E +<const_generic[317d481089b8c8fe]::Unsigned<11u8>> +<const_generic[317d481089b8c8fe]::Signed<152i16>> +<const_generic[317d481089b8c8fe]::Signed<-11i8>> +<const_generic[317d481089b8c8fe]::Bool<false>> +<const_generic[317d481089b8c8fe]::Bool<true>> +<const_generic[317d481089b8c8fe]::Char<'v'>> +<const_generic[317d481089b8c8fe]::Char<'\n'>> +<const_generic[317d481089b8c8fe]::Char<'∂'>> +<const_generic[317d481089b8c8fe]::Foo<_>>::foo::FOO +foo[0] +foo[0] +backtrace[0]::foo +rand[693ea8e72247470f]::rngs::adapter::reseeding::fork::FORK_HANDLER_REGISTERED.0.0 +"; + +const DEMANGLED_OUTPUT_NO_CRATE_DISAMBIGUATORS: &str = r" +123foo[0]::bar +utf8_idents::საჭმელად_გემრიელი_სადილი +cc::spawn::{closure#0}::{closure#0} +<core::slice::Iter<u8> as core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0} +alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>> +INtC8arrayvec8ArrayVechKj7b_E +<const_generic::Unsigned<11u8>> +<const_generic::Signed<152i16>> +<const_generic::Signed<-11i8>> +<const_generic::Bool<false>> +<const_generic::Bool<true>> +<const_generic::Char<'v'>> +<const_generic::Char<'\n'>> +<const_generic::Char<'∂'>> +<const_generic::Foo<_>>::foo::FOO +foo[0] +foo[0] +backtrace[0]::foo +rand::rngs::adapter::reseeding::fork::FORK_HANDLER_REGISTERED.0.0 +"; + +#[test] +fn test_demangle_lines() { + let demangled_lines = demangle_lines(MANGLED_INPUT.lines(), None); + for (expected, actual) in DEMANGLED_OUTPUT.lines().zip(demangled_lines) { + assert_eq!(expected, actual); + } +} + +#[test] +fn test_demangle_lines_no_crate_disambiguators() { + let demangled_lines = demangle_lines(MANGLED_INPUT.lines(), Some(create_disambiguator_re())); + for (expected, actual) in DEMANGLED_OUTPUT_NO_CRATE_DISAMBIGUATORS.lines().zip(demangled_lines) + { + assert_eq!(expected, actual); + } +} |