summaryrefslogtreecommitdiffstats
path: root/src/tools/rust-demangler
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /src/tools/rust-demangler
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/tools/rust-demangler')
-rw-r--r--src/tools/rust-demangler/Cargo.toml16
-rw-r--r--src/tools/rust-demangler/README.md36
-rw-r--r--src/tools/rust-demangler/src/lib.rs21
-rw-r--r--src/tools/rust-demangler/src/main.rs97
-rw-r--r--src/tools/rust-demangler/tests/lib.rs84
5 files changed, 254 insertions, 0 deletions
diff --git a/src/tools/rust-demangler/Cargo.toml b/src/tools/rust-demangler/Cargo.toml
new file mode 100644
index 000000000..2bb73b326
--- /dev/null
+++ b/src/tools/rust-demangler/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "rust-demangler"
+version = "0.0.1"
+edition = "2021"
+
+[dependencies]
+regex = "1.0"
+rustc-demangle = "0.1.17"
+
+[lib]
+name = "rust_demangler"
+doctest = false
+
+[[bin]]
+name = "rust-demangler"
+test = false
diff --git a/src/tools/rust-demangler/README.md b/src/tools/rust-demangler/README.md
new file mode 100644
index 000000000..4e8a689a1
--- /dev/null
+++ b/src/tools/rust-demangler/README.md
@@ -0,0 +1,36 @@
+# rust-demangler
+
+_Demangles rustc mangled names._
+
+`rust-demangler` supports the requirements of the [`llvm-cov show -Xdemangler`
+option](https://llvm.org/docs/CommandGuide/llvm-cov.html#cmdoption-llvm-cov-show-xdemangler),
+to perform Rust-specific symbol demangling:
+
+> _The demangler is expected to read a newline-separated list of symbols from
+> stdin and write a newline-separated list of the same length to stdout._
+
+To use `rust-demangler` with `llvm-cov` for example:
+
+```shell
+$ TARGET="${PWD}/build/x86_64-unknown-linux-gnu"
+$ "${TARGET}"/llvm/bin/llvm-cov show \
+ --Xdemangler=path/to/rust-demangler \
+ --instr-profile=main.profdata ./main --show-line-counts-or-regions
+```
+
+`rust-demangler` is a Rust "extended tool", used in Rust compiler tests, and
+optionally included in Rust distributions that enable coverage profiling. Symbol
+demangling is implemented using the
+[rustc-demangle](https://crates.io/crates/rustc-demangle) crate.
+
+_(Note, for Rust developers, the third-party tool
+[`rustfilt`](https://crates.io/crates/rustfilt) also supports `llvm-cov` symbol
+demangling. `rustfilt` is a more generalized tool that searches any body of
+text, using pattern matching, to find and demangle Rust symbols.)_
+
+## License
+
+Rust-demangler is distributed under the terms of both the MIT license and the
+Apache License (Version 2.0).
+
+See [LICENSE-APACHE](/LICENSE-APACHE) and [LICENSE-MIT](/LICENSE-MIT) for details.
diff --git a/src/tools/rust-demangler/src/lib.rs b/src/tools/rust-demangler/src/lib.rs
new file mode 100644
index 000000000..1d972229d
--- /dev/null
+++ b/src/tools/rust-demangler/src/lib.rs
@@ -0,0 +1,21 @@
+use regex::Regex;
+use rustc_demangle::demangle;
+use std::str::Lines;
+
+const REPLACE_COLONS: &str = "::";
+
+pub fn create_disambiguator_re() -> Regex {
+ Regex::new(r"\[[a-f0-9]{5,16}\]::").unwrap()
+}
+
+pub fn demangle_lines(lines: Lines<'_>, strip_crate_disambiguators: Option<Regex>) -> Vec<String> {
+ let mut demangled_lines = Vec::new();
+ for mangled in lines {
+ let mut demangled = demangle(mangled).to_string();
+ if let Some(re) = &strip_crate_disambiguators {
+ demangled = re.replace_all(&demangled, REPLACE_COLONS).to_string();
+ }
+ demangled_lines.push(demangled);
+ }
+ demangled_lines
+}
diff --git a/src/tools/rust-demangler/src/main.rs b/src/tools/rust-demangler/src/main.rs
new file mode 100644
index 000000000..1b5ef5d24
--- /dev/null
+++ b/src/tools/rust-demangler/src/main.rs
@@ -0,0 +1,97 @@
+//! Demangles rustc mangled names.
+//!
+//! Note regarding crate disambiguators:
+//!
+//! Some demangled symbol paths can include "crate disambiguator" suffixes, represented as a large
+//! hexadecimal value enclosed in square braces, and appended to the name of the crate. a suffix to the
+//! original crate name. For example, the `core` crate, here, includes a disambiguator:
+//!
+//! ```rust
+//! <generics::Firework<f64> as core[a7a74cee373f048]::ops::drop::Drop>::drop
+//! ```
+//!
+//! These disambiguators are known to vary depending on environmental circumstances. As a result,
+//! tests that compare results including demangled names can fail across development environments,
+//! particularly with cross-platform testing. Also, the resulting crate paths are not syntactically
+//! valid, and don't match the original source symbol paths, which can impact development tools.
+//!
+//! For these reasons, by default, `rust-demangler` uses a heuristic to remove crate disambiguators
+//! from their original demangled representation before printing them to standard output. If crate
+//! disambiguators are required, add the `-d` (or `--disambiguators`) flag, and the disambiguators
+//! will not be removed.
+//!
+//! Also note that the disambiguators are stripped by a Regex pattern that is tolerant to some
+//! variation in the number of hexadecimal digits. The disambiguators come from a hash value, which
+//! typically generates a 16-digit hex representation on a 64-bit architecture; however, leading
+//! zeros are not included, which can shorten the hex digit length, and a different hash algorithm
+//! that might also be dependent on the architecture, might shorten the length even further. A
+//! minimum length of 5 digits is assumed, which should be more than sufficient to support hex
+//! representations that generate only 8-digits of precision with an extremely rare (but not
+//! impossible) result with up to 3 leading zeros.
+//!
+//! Using a minimum number of digits less than 5 risks the possibility of stripping demangled name
+//! components with a similar pattern. For example, some closures instantiated multiple times
+//! include their own disambiguators, demangled as non-hashed zero-based indexes in square brackets.
+//! These disambiguators seem to have more analytical value (for instance, in coverage analysis), so
+//! they are not removed.
+
+use rust_demangler::*;
+use std::io::{self, Read, Write};
+
+fn main() -> io::Result<()> {
+ // FIXME(richkadel): In Issue #77615 discussed updating the `rustc-demangle` library, to provide
+ // an option to generate demangled names without including crate disambiguators. If that
+ // happens, update this tool to use that option (if the `-d` flag is not set) instead stripping
+ // them via the Regex heuristic. The update the doc comments and help.
+
+ // Strip hashed hexadecimal crate disambiguators. Leading zeros are not enforced, and can be
+ // different across different platform/architecture types, so while 16 hex digits are common,
+ // they can also be shorter.
+ //
+ // Also note that a demangled symbol path may include the `[<digits>]` pattern, with zero-based
+ // indexes (such as for closures, and possibly for types defined in anonymous scopes). Preferably
+ // these should not be stripped.
+ //
+ // The minimum length of 5 digits supports the possibility that some target architecture (maybe
+ // a 32-bit or smaller architecture) could generate a hash value with a maximum of 8 digits,
+ // and more than three leading zeros should be extremely unlikely. Conversely, it should be
+ // sufficient to assume the zero-based indexes for closures and anonymous scopes will never
+ // exceed the value 9999.
+ let mut strip_crate_disambiguators = Some(create_disambiguator_re());
+
+ let mut args = std::env::args();
+ let progname = args.next().unwrap();
+ for arg in args {
+ if arg == "--disambiguators" || arg == "-d" {
+ strip_crate_disambiguators = None;
+ } else {
+ eprintln!();
+ eprintln!("Usage: {} [-d|--disambiguators]", progname);
+ eprintln!();
+ eprintln!(
+ "This tool converts a list of Rust mangled symbols (one per line) into a\n\
+ corresponding list of demangled symbols."
+ );
+ eprintln!();
+ eprintln!(
+ "With -d (--disambiguators), Rust symbols mangled with the v0 symbol mangler may\n\
+ include crate disambiguators (a hexadecimal hash value, typically up to 16 digits\n\
+ long, enclosed in square brackets)."
+ );
+ eprintln!();
+ eprintln!(
+ "By default, crate disambiguators are removed, using a heuristics-based regular\n\
+ expression. (See the `rust-demangler` doc comments for more information.)"
+ );
+ eprintln!();
+ std::process::exit(1)
+ }
+ }
+
+ let mut buffer = String::new();
+ io::stdin().read_to_string(&mut buffer)?;
+ let mut demangled_lines = demangle_lines(buffer.lines(), strip_crate_disambiguators);
+ demangled_lines.push("".to_string()); // ensure a trailing newline
+ io::stdout().write_all(demangled_lines.join("\n").as_bytes())?;
+ Ok(())
+}
diff --git a/src/tools/rust-demangler/tests/lib.rs b/src/tools/rust-demangler/tests/lib.rs
new file mode 100644
index 000000000..85019df78
--- /dev/null
+++ b/src/tools/rust-demangler/tests/lib.rs
@@ -0,0 +1,84 @@
+use rust_demangler::*;
+
+const MANGLED_INPUT: &str = r"
+_RNvC6_123foo3bar
+_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y
+_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_
+_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_
+_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std
+INtC8arrayvec8ArrayVechKj7b_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_8UnsignedKhb_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKs98_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_6SignedKanb_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb0_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_4BoolKb1_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc76_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKca_E
+_RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E
+_RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO
+_RC3foo.llvm.9D1C9369
+_RC3foo.llvm.9D1C9369@@16
+_RNvC9backtrace3foo.llvm.A5310EB9
+_RNvNtNtNtNtCs92dm3009vxr_4rand4rngs7adapter9reseeding4fork23FORK_HANDLER_REGISTERED.0.0
+";
+
+const DEMANGLED_OUTPUT: &str = r"
+123foo[0]::bar
+utf8_idents[317d481089b8c8fe]::საჭმელად_გემრიელი_სადილი
+cc[4d6468d6c9fd4bb3]::spawn::{closure#0}::{closure#0}
+<core[846817f741e54dfd]::slice::Iter<u8> as core[846817f741e54dfd]::iter::iterator::Iterator>::rposition::<core[846817f741e54dfd]::slice::memchr::memrchr::{closure#1}>::{closure#0}
+alloc[f15a878b47eb696b]::alloc::box_free::<dyn alloc[f15a878b47eb696b]::boxed::FnBox<(), Output = ()>>
+INtC8arrayvec8ArrayVechKj7b_E
+<const_generic[317d481089b8c8fe]::Unsigned<11u8>>
+<const_generic[317d481089b8c8fe]::Signed<152i16>>
+<const_generic[317d481089b8c8fe]::Signed<-11i8>>
+<const_generic[317d481089b8c8fe]::Bool<false>>
+<const_generic[317d481089b8c8fe]::Bool<true>>
+<const_generic[317d481089b8c8fe]::Char<'v'>>
+<const_generic[317d481089b8c8fe]::Char<'\n'>>
+<const_generic[317d481089b8c8fe]::Char<'∂'>>
+<const_generic[317d481089b8c8fe]::Foo<_>>::foo::FOO
+foo[0]
+foo[0]
+backtrace[0]::foo
+rand[693ea8e72247470f]::rngs::adapter::reseeding::fork::FORK_HANDLER_REGISTERED.0.0
+";
+
+const DEMANGLED_OUTPUT_NO_CRATE_DISAMBIGUATORS: &str = r"
+123foo[0]::bar
+utf8_idents::საჭმელად_გემრიელი_სადილი
+cc::spawn::{closure#0}::{closure#0}
+<core::slice::Iter<u8> as core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}
+alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>
+INtC8arrayvec8ArrayVechKj7b_E
+<const_generic::Unsigned<11u8>>
+<const_generic::Signed<152i16>>
+<const_generic::Signed<-11i8>>
+<const_generic::Bool<false>>
+<const_generic::Bool<true>>
+<const_generic::Char<'v'>>
+<const_generic::Char<'\n'>>
+<const_generic::Char<'∂'>>
+<const_generic::Foo<_>>::foo::FOO
+foo[0]
+foo[0]
+backtrace[0]::foo
+rand::rngs::adapter::reseeding::fork::FORK_HANDLER_REGISTERED.0.0
+";
+
+#[test]
+fn test_demangle_lines() {
+ let demangled_lines = demangle_lines(MANGLED_INPUT.lines(), None);
+ for (expected, actual) in DEMANGLED_OUTPUT.lines().zip(demangled_lines) {
+ assert_eq!(expected, actual);
+ }
+}
+
+#[test]
+fn test_demangle_lines_no_crate_disambiguators() {
+ let demangled_lines = demangle_lines(MANGLED_INPUT.lines(), Some(create_disambiguator_re()));
+ for (expected, actual) in DEMANGLED_OUTPUT_NO_CRATE_DISAMBIGUATORS.lines().zip(demangled_lines)
+ {
+ assert_eq!(expected, actual);
+ }
+}