diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_symbol_mangling | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_symbol_mangling')
-rw-r--r-- | compiler/rustc_symbol_mangling/Cargo.toml | 20 | ||||
-rw-r--r-- | compiler/rustc_symbol_mangling/src/legacy.rs | 464 | ||||
-rw-r--r-- | compiler/rustc_symbol_mangling/src/lib.rs | 277 | ||||
-rw-r--r-- | compiler/rustc_symbol_mangling/src/test.rs | 74 | ||||
-rw-r--r-- | compiler/rustc_symbol_mangling/src/typeid.rs | 18 | ||||
-rw-r--r-- | compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs | 929 | ||||
-rw-r--r-- | compiler/rustc_symbol_mangling/src/v0.rs | 844 |
7 files changed, 2626 insertions, 0 deletions
diff --git a/compiler/rustc_symbol_mangling/Cargo.toml b/compiler/rustc_symbol_mangling/Cargo.toml new file mode 100644 index 000000000..b104a40c2 --- /dev/null +++ b/compiler/rustc_symbol_mangling/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "rustc_symbol_mangling" +version = "0.0.0" +edition = "2021" + +[lib] +doctest = false + +[dependencies] +bitflags = "1.2.1" +tracing = "0.1" +punycode = "0.4.0" +rustc-demangle = "0.1.21" + +rustc_span = { path = "../rustc_span" } +rustc_middle = { path = "../rustc_middle" } +rustc_hir = { path = "../rustc_hir" } +rustc_target = { path = "../rustc_target" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_session = { path = "../rustc_session" } diff --git a/compiler/rustc_symbol_mangling/src/legacy.rs b/compiler/rustc_symbol_mangling/src/legacy.rs new file mode 100644 index 000000000..9241fd82c --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/legacy.rs @@ -0,0 +1,464 @@ +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_hir::def_id::CrateNum; +use rustc_hir::definitions::{DefPathData, DisambiguatedDefPathData}; +use rustc_middle::ty::print::{PrettyPrinter, Print, Printer}; +use rustc_middle::ty::subst::{GenericArg, GenericArgKind}; +use rustc_middle::ty::{self, Instance, Ty, TyCtxt, TypeVisitable}; +use rustc_middle::util::common::record_time; + +use tracing::debug; + +use std::fmt::{self, Write}; +use std::mem::{self, discriminant}; + +pub(super) fn mangle<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + instantiating_crate: Option<CrateNum>, +) -> String { + let def_id = instance.def_id(); + + // We want to compute the "type" of this item. Unfortunately, some + // kinds of items (e.g., closures) don't have an entry in the + // item-type array. So walk back up the find the closest parent + // that DOES have an entry. + let mut ty_def_id = def_id; + let instance_ty; + loop { + let key = tcx.def_key(ty_def_id); + match key.disambiguated_data.data { + DefPathData::TypeNs(_) | DefPathData::ValueNs(_) => { + instance_ty = tcx.type_of(ty_def_id); + debug!(?instance_ty); + break; + } + _ => { + // if we're making a symbol for something, there ought + // to be a value or type-def or something in there + // *somewhere* + ty_def_id.index = key.parent.unwrap_or_else(|| { + bug!( + "finding type for {:?}, encountered def-id {:?} with no \ + parent", + def_id, + ty_def_id + ); + }); + } + } + } + + // Erase regions because they may not be deterministic when hashed + // and should not matter anyhow. + let instance_ty = tcx.erase_regions(instance_ty); + + let hash = get_symbol_hash(tcx, instance, instance_ty, instantiating_crate); + + let mut printer = SymbolPrinter { tcx, path: SymbolPath::new(), keep_within_component: false }; + printer + .print_def_path( + def_id, + if let ty::InstanceDef::DropGlue(_, _) = instance.def { + // Add the name of the dropped type to the symbol name + &*instance.substs + } else { + &[] + }, + ) + .unwrap(); + + if let ty::InstanceDef::VTableShim(..) = instance.def { + let _ = printer.write_str("{{vtable-shim}}"); + } + + if let ty::InstanceDef::ReifyShim(..) = instance.def { + let _ = printer.write_str("{{reify-shim}}"); + } + + printer.path.finish(hash) +} + +fn get_symbol_hash<'tcx>( + tcx: TyCtxt<'tcx>, + + // instance this name will be for + instance: Instance<'tcx>, + + // type of the item, without any generic + // parameters substituted; this is + // included in the hash as a kind of + // safeguard. + item_type: Ty<'tcx>, + + instantiating_crate: Option<CrateNum>, +) -> u64 { + let def_id = instance.def_id(); + let substs = instance.substs; + debug!("get_symbol_hash(def_id={:?}, parameters={:?})", def_id, substs); + + tcx.with_stable_hashing_context(|mut hcx| { + let mut hasher = StableHasher::new(); + + record_time(&tcx.sess.perf_stats.symbol_hash_time, || { + // the main symbol name is not necessarily unique; hash in the + // compiler's internal def-path, guaranteeing each symbol has a + // truly unique path + tcx.def_path_hash(def_id).hash_stable(&mut hcx, &mut hasher); + + // Include the main item-type. Note that, in this case, the + // assertions about `needs_subst` may not hold, but this item-type + // ought to be the same for every reference anyway. + assert!(!item_type.has_erasable_regions()); + hcx.while_hashing_spans(false, |hcx| { + item_type.hash_stable(hcx, &mut hasher); + + // If this is a function, we hash the signature as well. + // This is not *strictly* needed, but it may help in some + // situations, see the `run-make/a-b-a-linker-guard` test. + if let ty::FnDef(..) = item_type.kind() { + item_type.fn_sig(tcx).hash_stable(hcx, &mut hasher); + } + + // also include any type parameters (for generic items) + substs.hash_stable(hcx, &mut hasher); + + if let Some(instantiating_crate) = instantiating_crate { + tcx.def_path_hash(instantiating_crate.as_def_id()) + .stable_crate_id() + .hash_stable(hcx, &mut hasher); + } + + // We want to avoid accidental collision between different types of instances. + // Especially, `VTableShim`s and `ReifyShim`s may overlap with their original + // instances without this. + discriminant(&instance.def).hash_stable(hcx, &mut hasher); + }); + }); + + // 64 bits should be enough to avoid collisions. + hasher.finish::<u64>() + }) +} + +// Follow C++ namespace-mangling style, see +// https://en.wikipedia.org/wiki/Name_mangling for more info. +// +// It turns out that on macOS you can actually have arbitrary symbols in +// function names (at least when given to LLVM), but this is not possible +// when using unix's linker. Perhaps one day when we just use a linker from LLVM +// we won't need to do this name mangling. The problem with name mangling is +// that it seriously limits the available characters. For example we can't +// have things like &T in symbol names when one would theoretically +// want them for things like impls of traits on that type. +// +// To be able to work on all platforms and get *some* reasonable output, we +// use C++ name-mangling. +#[derive(Debug)] +struct SymbolPath { + result: String, + temp_buf: String, +} + +impl SymbolPath { + fn new() -> Self { + let mut result = + SymbolPath { result: String::with_capacity(64), temp_buf: String::with_capacity(16) }; + result.result.push_str("_ZN"); // _Z == Begin name-sequence, N == nested + result + } + + fn finalize_pending_component(&mut self) { + if !self.temp_buf.is_empty() { + let _ = write!(self.result, "{}{}", self.temp_buf.len(), self.temp_buf); + self.temp_buf.clear(); + } + } + + fn finish(mut self, hash: u64) -> String { + self.finalize_pending_component(); + // E = end name-sequence + let _ = write!(self.result, "17h{:016x}E", hash); + self.result + } +} + +struct SymbolPrinter<'tcx> { + tcx: TyCtxt<'tcx>, + path: SymbolPath, + + // When `true`, `finalize_pending_component` isn't used. + // This is needed when recursing into `path_qualified`, + // or `path_generic_args`, as any nested paths are + // logically within one component. + keep_within_component: bool, +} + +// HACK(eddyb) this relies on using the `fmt` interface to get +// `PrettyPrinter` aka pretty printing of e.g. types in paths, +// symbol names should have their own printing machinery. + +impl<'tcx> Printer<'tcx> for &mut SymbolPrinter<'tcx> { + type Error = fmt::Error; + + type Path = Self; + type Region = Self; + type Type = Self; + type DynExistential = Self; + type Const = Self; + + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } + + fn print_region(self, _region: ty::Region<'_>) -> Result<Self::Region, Self::Error> { + Ok(self) + } + + fn print_type(mut self, ty: Ty<'tcx>) -> Result<Self::Type, Self::Error> { + match *ty.kind() { + // Print all nominal types as paths (unlike `pretty_print_type`). + ty::FnDef(def_id, substs) + | ty::Opaque(def_id, substs) + | ty::Projection(ty::ProjectionTy { item_def_id: def_id, substs }) + | ty::Closure(def_id, substs) + | ty::Generator(def_id, substs, _) => self.print_def_path(def_id, substs), + + // The `pretty_print_type` formatting of array size depends on + // -Zverbose flag, so we cannot reuse it here. + ty::Array(ty, size) => { + self.write_str("[")?; + self = self.print_type(ty)?; + self.write_str("; ")?; + if let Some(size) = size.kind().try_to_bits(self.tcx().data_layout.pointer_size) { + write!(self, "{}", size)? + } else if let ty::ConstKind::Param(param) = size.kind() { + self = param.print(self)? + } else { + self.write_str("_")? + } + self.write_str("]")?; + Ok(self) + } + + _ => self.pretty_print_type(ty), + } + } + + fn print_dyn_existential( + mut self, + predicates: &'tcx ty::List<ty::Binder<'tcx, ty::ExistentialPredicate<'tcx>>>, + ) -> Result<Self::DynExistential, Self::Error> { + let mut first = true; + for p in predicates { + if !first { + write!(self, "+")?; + } + first = false; + self = p.print(self)?; + } + Ok(self) + } + + fn print_const(self, ct: ty::Const<'tcx>) -> Result<Self::Const, Self::Error> { + // only print integers + match (ct.kind(), ct.ty().kind()) { + (ty::ConstKind::Value(ty::ValTree::Leaf(scalar)), ty::Int(_) | ty::Uint(_)) => { + // The `pretty_print_const` formatting depends on -Zverbose + // flag, so we cannot reuse it here. + let signed = matches!(ct.ty().kind(), ty::Int(_)); + write!( + self, + "{:#?}", + ty::ConstInt::new(scalar, signed, ct.ty().is_ptr_sized_integral()) + )?; + } + _ => self.write_str("_")?, + } + Ok(self) + } + + fn path_crate(self, cnum: CrateNum) -> Result<Self::Path, Self::Error> { + self.write_str(self.tcx.crate_name(cnum).as_str())?; + Ok(self) + } + fn path_qualified( + self, + self_ty: Ty<'tcx>, + trait_ref: Option<ty::TraitRef<'tcx>>, + ) -> Result<Self::Path, Self::Error> { + // Similar to `pretty_path_qualified`, but for the other + // types that are printed as paths (see `print_type` above). + match self_ty.kind() { + ty::FnDef(..) + | ty::Opaque(..) + | ty::Projection(_) + | ty::Closure(..) + | ty::Generator(..) + if trait_ref.is_none() => + { + self.print_type(self_ty) + } + + _ => self.pretty_path_qualified(self_ty, trait_ref), + } + } + + fn path_append_impl( + self, + print_prefix: impl FnOnce(Self) -> Result<Self::Path, Self::Error>, + _disambiguated_data: &DisambiguatedDefPathData, + self_ty: Ty<'tcx>, + trait_ref: Option<ty::TraitRef<'tcx>>, + ) -> Result<Self::Path, Self::Error> { + self.pretty_path_append_impl( + |mut cx| { + cx = print_prefix(cx)?; + + if cx.keep_within_component { + // HACK(eddyb) print the path similarly to how `FmtPrinter` prints it. + cx.write_str("::")?; + } else { + cx.path.finalize_pending_component(); + } + + Ok(cx) + }, + self_ty, + trait_ref, + ) + } + fn path_append( + mut self, + print_prefix: impl FnOnce(Self) -> Result<Self::Path, Self::Error>, + disambiguated_data: &DisambiguatedDefPathData, + ) -> Result<Self::Path, Self::Error> { + self = print_prefix(self)?; + + // Skip `::{{extern}}` blocks and `::{{constructor}}` on tuple/unit structs. + if let DefPathData::ForeignMod | DefPathData::Ctor = disambiguated_data.data { + return Ok(self); + } + + if self.keep_within_component { + // HACK(eddyb) print the path similarly to how `FmtPrinter` prints it. + self.write_str("::")?; + } else { + self.path.finalize_pending_component(); + } + + write!(self, "{}", disambiguated_data.data)?; + + Ok(self) + } + fn path_generic_args( + mut self, + print_prefix: impl FnOnce(Self) -> Result<Self::Path, Self::Error>, + args: &[GenericArg<'tcx>], + ) -> Result<Self::Path, Self::Error> { + self = print_prefix(self)?; + + let args = + args.iter().cloned().filter(|arg| !matches!(arg.unpack(), GenericArgKind::Lifetime(_))); + + if args.clone().next().is_some() { + self.generic_delimiters(|cx| cx.comma_sep(args)) + } else { + Ok(self) + } + } +} + +impl<'tcx> PrettyPrinter<'tcx> for &mut SymbolPrinter<'tcx> { + fn should_print_region(&self, _region: ty::Region<'_>) -> bool { + false + } + fn comma_sep<T>(mut self, mut elems: impl Iterator<Item = T>) -> Result<Self, Self::Error> + where + T: Print<'tcx, Self, Output = Self, Error = Self::Error>, + { + if let Some(first) = elems.next() { + self = first.print(self)?; + for elem in elems { + self.write_str(",")?; + self = elem.print(self)?; + } + } + Ok(self) + } + + fn generic_delimiters( + mut self, + f: impl FnOnce(Self) -> Result<Self, Self::Error>, + ) -> Result<Self, Self::Error> { + write!(self, "<")?; + + let kept_within_component = mem::replace(&mut self.keep_within_component, true); + self = f(self)?; + self.keep_within_component = kept_within_component; + + write!(self, ">")?; + + Ok(self) + } +} + +impl fmt::Write for SymbolPrinter<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + // Name sanitation. LLVM will happily accept identifiers with weird names, but + // gas doesn't! + // gas accepts the following characters in symbols: a-z, A-Z, 0-9, ., _, $ + // NVPTX assembly has more strict naming rules than gas, so additionally, dots + // are replaced with '$' there. + + for c in s.chars() { + if self.path.temp_buf.is_empty() { + match c { + 'a'..='z' | 'A'..='Z' | '_' => {} + _ => { + // Underscore-qualify anything that didn't start as an ident. + self.path.temp_buf.push('_'); + } + } + } + match c { + // Escape these with $ sequences + '@' => self.path.temp_buf.push_str("$SP$"), + '*' => self.path.temp_buf.push_str("$BP$"), + '&' => self.path.temp_buf.push_str("$RF$"), + '<' => self.path.temp_buf.push_str("$LT$"), + '>' => self.path.temp_buf.push_str("$GT$"), + '(' => self.path.temp_buf.push_str("$LP$"), + ')' => self.path.temp_buf.push_str("$RP$"), + ',' => self.path.temp_buf.push_str("$C$"), + + '-' | ':' | '.' if self.tcx.has_strict_asm_symbol_naming() => { + // NVPTX doesn't support these characters in symbol names. + self.path.temp_buf.push('$') + } + + // '.' doesn't occur in types and functions, so reuse it + // for ':' and '-' + '-' | ':' => self.path.temp_buf.push('.'), + + // Avoid crashing LLVM in certain (LTO-related) situations, see #60925. + 'm' if self.path.temp_buf.ends_with(".llv") => self.path.temp_buf.push_str("$u6d$"), + + // These are legal symbols + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '$' => self.path.temp_buf.push(c), + + _ => { + self.path.temp_buf.push('$'); + for c in c.escape_unicode().skip(1) { + match c { + '{' => {} + '}' => self.path.temp_buf.push('$'), + c => self.path.temp_buf.push(c), + } + } + } + } + } + + Ok(()) + } +} diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs new file mode 100644 index 000000000..5fc992023 --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -0,0 +1,277 @@ +//! The Rust Linkage Model and Symbol Names +//! ======================================= +//! +//! The semantic model of Rust linkage is, broadly, that "there's no global +//! namespace" between crates. Our aim is to preserve the illusion of this +//! model despite the fact that it's not *quite* possible to implement on +//! modern linkers. We initially didn't use system linkers at all, but have +//! been convinced of their utility. +//! +//! There are a few issues to handle: +//! +//! - Linkers operate on a flat namespace, so we have to flatten names. +//! We do this using the C++ namespace-mangling technique. Foo::bar +//! symbols and such. +//! +//! - Symbols for distinct items with the same *name* need to get different +//! linkage-names. Examples of this are monomorphizations of functions or +//! items within anonymous scopes that end up having the same path. +//! +//! - Symbols in different crates but with same names "within" the crate need +//! to get different linkage-names. +//! +//! - Symbol names should be deterministic: Two consecutive runs of the +//! compiler over the same code base should produce the same symbol names for +//! the same items. +//! +//! - Symbol names should not depend on any global properties of the code base, +//! so that small modifications to the code base do not result in all symbols +//! changing. In previous versions of the compiler, symbol names incorporated +//! the SVH (Stable Version Hash) of the crate. This scheme turned out to be +//! infeasible when used in conjunction with incremental compilation because +//! small code changes would invalidate all symbols generated previously. +//! +//! - Even symbols from different versions of the same crate should be able to +//! live next to each other without conflict. +//! +//! In order to fulfill the above requirements the following scheme is used by +//! the compiler: +//! +//! The main tool for avoiding naming conflicts is the incorporation of a 64-bit +//! hash value into every exported symbol name. Anything that makes a difference +//! to the symbol being named, but does not show up in the regular path needs to +//! be fed into this hash: +//! +//! - Different monomorphizations of the same item have the same path but differ +//! in their concrete type parameters, so these parameters are part of the +//! data being digested for the symbol hash. +//! +//! - Rust allows items to be defined in anonymous scopes, such as in +//! `fn foo() { { fn bar() {} } { fn bar() {} } }`. Both `bar` functions have +//! the path `foo::bar`, since the anonymous scopes do not contribute to the +//! path of an item. The compiler already handles this case via so-called +//! disambiguating `DefPaths` which use indices to distinguish items with the +//! same name. The DefPaths of the functions above are thus `foo[0]::bar[0]` +//! and `foo[0]::bar[1]`. In order to incorporate this disambiguation +//! information into the symbol name too, these indices are fed into the +//! symbol hash, so that the above two symbols would end up with different +//! hash values. +//! +//! The two measures described above suffice to avoid intra-crate conflicts. In +//! order to also avoid inter-crate conflicts two more measures are taken: +//! +//! - The name of the crate containing the symbol is prepended to the symbol +//! name, i.e., symbols are "crate qualified". For example, a function `foo` in +//! module `bar` in crate `baz` would get a symbol name like +//! `baz::bar::foo::{hash}` instead of just `bar::foo::{hash}`. This avoids +//! simple conflicts between functions from different crates. +//! +//! - In order to be able to also use symbols from two versions of the same +//! crate (which naturally also have the same name), a stronger measure is +//! required: The compiler accepts an arbitrary "disambiguator" value via the +//! `-C metadata` command-line argument. This disambiguator is then fed into +//! the symbol hash of every exported item. Consequently, the symbols in two +//! identical crates but with different disambiguators are not in conflict +//! with each other. This facility is mainly intended to be used by build +//! tools like Cargo. +//! +//! A note on symbol name stability +//! ------------------------------- +//! Previous versions of the compiler resorted to feeding NodeIds into the +//! symbol hash in order to disambiguate between items with the same path. The +//! current version of the name generation algorithm takes great care not to do +//! that, since NodeIds are notoriously unstable: A small change to the +//! code base will offset all NodeIds after the change and thus, much as using +//! the SVH in the hash, invalidate an unbounded number of symbol names. This +//! makes re-using previously compiled code for incremental compilation +//! virtually impossible. Thus, symbol hash generation exclusively relies on +//! DefPaths which are much more robust in the face of changes to the code base. + +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(never_type)] +#![recursion_limit = "256"] +#![allow(rustc::potential_query_instability)] + +#[macro_use] +extern crate rustc_middle; + +use rustc_hir::def::DefKind; +use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; +use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; +use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; +use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; +use rustc_middle::ty::query::Providers; +use rustc_middle::ty::subst::SubstsRef; +use rustc_middle::ty::{self, Instance, TyCtxt}; +use rustc_session::config::SymbolManglingVersion; + +use tracing::debug; + +mod legacy; +mod v0; + +pub mod test; +pub mod typeid; + +/// This function computes the symbol name for the given `instance` and the +/// given instantiating crate. That is, if you know that instance X is +/// instantiated in crate Y, this is the symbol name this instance would have. +pub fn symbol_name_for_instance_in_crate<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + instantiating_crate: CrateNum, +) -> String { + compute_symbol_name(tcx, instance, || instantiating_crate) +} + +pub fn provide(providers: &mut Providers) { + *providers = Providers { symbol_name: symbol_name_provider, ..*providers }; +} + +// The `symbol_name` query provides the symbol name for calling a given +// instance from the local crate. In particular, it will also look up the +// correct symbol name of instances from upstream crates. +fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> { + let symbol_name = compute_symbol_name(tcx, instance, || { + // This closure determines the instantiating crate for instances that + // need an instantiating-crate-suffix for their symbol name, in order + // to differentiate between local copies. + if is_generic(instance.substs) { + // For generics we might find re-usable upstream instances. If there + // is one, we rely on the symbol being instantiated locally. + instance.upstream_monomorphization(tcx).unwrap_or(LOCAL_CRATE) + } else { + // For non-generic things that need to avoid naming conflicts, we + // always instantiate a copy in the local crate. + LOCAL_CRATE + } + }); + + ty::SymbolName::new(tcx, &symbol_name) +} + +pub fn typeid_for_trait_ref<'tcx>( + tcx: TyCtxt<'tcx>, + trait_ref: ty::PolyExistentialTraitRef<'tcx>, +) -> String { + v0::mangle_typeid_for_trait_ref(tcx, trait_ref) +} + +/// Computes the symbol name for the given instance. This function will call +/// `compute_instantiating_crate` if it needs to factor the instantiating crate +/// into the symbol name. +fn compute_symbol_name<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + compute_instantiating_crate: impl FnOnce() -> CrateNum, +) -> String { + let def_id = instance.def_id(); + let substs = instance.substs; + + debug!("symbol_name(def_id={:?}, substs={:?})", def_id, substs); + + if let Some(def_id) = def_id.as_local() { + if tcx.proc_macro_decls_static(()) == Some(def_id) { + let stable_crate_id = tcx.sess.local_stable_crate_id(); + return tcx.sess.generate_proc_macro_decls_symbol(stable_crate_id); + } + } + + // FIXME(eddyb) Precompute a custom symbol name based on attributes. + let attrs = if tcx.def_kind(def_id).has_codegen_attrs() { + tcx.codegen_fn_attrs(def_id) + } else { + CodegenFnAttrs::EMPTY + }; + + // Foreign items by default use no mangling for their symbol name. There's a + // few exceptions to this rule though: + // + // * This can be overridden with the `#[link_name]` attribute + // + // * On the wasm32 targets there is a bug (or feature) in LLD [1] where the + // same-named symbol when imported from different wasm modules will get + // hooked up incorrectly. As a result foreign symbols, on the wasm target, + // with a wasm import module, get mangled. Additionally our codegen will + // deduplicate symbols based purely on the symbol name, but for wasm this + // isn't quite right because the same-named symbol on wasm can come from + // different modules. For these reasons if `#[link(wasm_import_module)]` + // is present we mangle everything on wasm because the demangled form will + // show up in the `wasm-import-name` custom attribute in LLVM IR. + // + // [1]: https://bugs.llvm.org/show_bug.cgi?id=44316 + if tcx.is_foreign_item(def_id) + && (!tcx.sess.target.is_like_wasm + || !tcx.wasm_import_module_map(def_id.krate).contains_key(&def_id)) + { + if let Some(name) = attrs.link_name { + return name.to_string(); + } + return tcx.item_name(def_id).to_string(); + } + + if let Some(name) = attrs.export_name { + // Use provided name + return name.to_string(); + } + + if attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) { + // Don't mangle + return tcx.item_name(def_id).to_string(); + } + + // If we're dealing with an instance of a function that's inlined from + // another crate but we're marking it as globally shared to our + // compilation (aka we're not making an internal copy in each of our + // codegen units) then this symbol may become an exported (but hidden + // visibility) symbol. This means that multiple crates may do the same + // and we want to be sure to avoid any symbol conflicts here. + let is_globally_shared_function = matches!( + tcx.def_kind(instance.def_id()), + DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Generator | DefKind::Ctor(..) + ) && matches!( + MonoItem::Fn(instance).instantiation_mode(tcx), + InstantiationMode::GloballyShared { may_conflict: true } + ); + + // If this is an instance of a generic function, we also hash in + // the ID of the instantiating crate. This avoids symbol conflicts + // in case the same instances is emitted in two crates of the same + // project. + let avoid_cross_crate_conflicts = is_generic(substs) || is_globally_shared_function; + + let instantiating_crate = + if avoid_cross_crate_conflicts { Some(compute_instantiating_crate()) } else { None }; + + // Pick the crate responsible for the symbol mangling version, which has to: + // 1. be stable for each instance, whether it's being defined or imported + // 2. obey each crate's own `-C symbol-mangling-version`, as much as possible + // We solve these as follows: + // 1. because symbol names depend on both `def_id` and `instantiating_crate`, + // both their `CrateNum`s are stable for any given instance, so we can pick + // either and have a stable choice of symbol mangling version + // 2. we favor `instantiating_crate` where possible (i.e. when `Some`) + let mangling_version_crate = instantiating_crate.unwrap_or(def_id.krate); + let mangling_version = if mangling_version_crate == LOCAL_CRATE { + tcx.sess.opts.get_symbol_mangling_version() + } else { + tcx.symbol_mangling_version(mangling_version_crate) + }; + + let symbol = match mangling_version { + SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate), + SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate), + }; + + debug_assert!( + rustc_demangle::try_demangle(&symbol).is_ok(), + "compute_symbol_name: `{}` cannot be demangled", + symbol + ); + + symbol +} + +fn is_generic(substs: SubstsRef<'_>) -> bool { + substs.non_erasable_generics().next().is_some() +} diff --git a/compiler/rustc_symbol_mangling/src/test.rs b/compiler/rustc_symbol_mangling/src/test.rs new file mode 100644 index 000000000..7249ce04c --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/test.rs @@ -0,0 +1,74 @@ +//! Walks the crate looking for items/impl-items/trait-items that have +//! either a `rustc_symbol_name` or `rustc_def_path` attribute and +//! generates an error giving, respectively, the symbol name or +//! def-path. This is used for unit testing the code that generates +//! paths etc in all kinds of annoying scenarios. + +use rustc_hir::def_id::LocalDefId; +use rustc_middle::ty::print::with_no_trimmed_paths; +use rustc_middle::ty::{subst::InternalSubsts, Instance, TyCtxt}; +use rustc_span::symbol::{sym, Symbol}; + +const SYMBOL_NAME: Symbol = sym::rustc_symbol_name; +const DEF_PATH: Symbol = sym::rustc_def_path; + +pub fn report_symbol_names(tcx: TyCtxt<'_>) { + // if the `rustc_attrs` feature is not enabled, then the + // attributes we are interested in cannot be present anyway, so + // skip the walk. + if !tcx.features().rustc_attrs { + return; + } + + tcx.dep_graph.with_ignore(|| { + let mut symbol_names = SymbolNamesTest { tcx }; + let crate_items = tcx.hir_crate_items(()); + + for id in crate_items.items() { + symbol_names.process_attrs(id.def_id); + } + + for id in crate_items.trait_items() { + symbol_names.process_attrs(id.def_id); + } + + for id in crate_items.impl_items() { + symbol_names.process_attrs(id.def_id); + } + + for id in crate_items.foreign_items() { + symbol_names.process_attrs(id.def_id); + } + }) +} + +struct SymbolNamesTest<'tcx> { + tcx: TyCtxt<'tcx>, +} + +impl SymbolNamesTest<'_> { + fn process_attrs(&mut self, def_id: LocalDefId) { + let tcx = self.tcx; + // The formatting of `tag({})` is chosen so that tests can elect + // to test the entirety of the string, if they choose, or else just + // some subset. + for attr in tcx.get_attrs(def_id.to_def_id(), SYMBOL_NAME) { + let def_id = def_id.to_def_id(); + let instance = Instance::new( + def_id, + tcx.erase_regions(InternalSubsts::identity_for_item(tcx, def_id)), + ); + let mangled = tcx.symbol_name(instance); + tcx.sess.span_err(attr.span, &format!("symbol-name({})", mangled)); + if let Ok(demangling) = rustc_demangle::try_demangle(mangled.name) { + tcx.sess.span_err(attr.span, &format!("demangling({})", demangling)); + tcx.sess.span_err(attr.span, &format!("demangling-alt({:#})", demangling)); + } + } + + for attr in tcx.get_attrs(def_id.to_def_id(), DEF_PATH) { + let path = with_no_trimmed_paths!(tcx.def_path_str(def_id.to_def_id())); + tcx.sess.span_err(attr.span, &format!("def-path({})", path)); + } + } +} diff --git a/compiler/rustc_symbol_mangling/src/typeid.rs b/compiler/rustc_symbol_mangling/src/typeid.rs new file mode 100644 index 000000000..9228bea43 --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/typeid.rs @@ -0,0 +1,18 @@ +// For more information about type metadata and type metadata identifiers for cross-language LLVM +// CFI support, see Type metadata in the design document in the tracking issue #89653. + +use rustc_middle::ty::{FnSig, Ty, TyCtxt}; +use rustc_target::abi::call::FnAbi; + +mod typeid_itanium_cxx_abi; +use typeid_itanium_cxx_abi::TypeIdOptions; + +/// Returns a type metadata identifier for the specified FnAbi. +pub fn typeid_for_fnabi<'tcx>(tcx: TyCtxt<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> String { + typeid_itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, TypeIdOptions::NO_OPTIONS) +} + +/// Returns a type metadata identifier for the specified FnSig. +pub fn typeid_for_fnsig<'tcx>(tcx: TyCtxt<'tcx>, fn_sig: &FnSig<'tcx>) -> String { + typeid_itanium_cxx_abi::typeid_for_fnsig(tcx, fn_sig, TypeIdOptions::NO_OPTIONS) +} diff --git a/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs b/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs new file mode 100644 index 000000000..a09b52fbf --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs @@ -0,0 +1,929 @@ +// For more information about type metadata and type metadata identifiers for cross-language LLVM +// CFI support, see Type metadata in the design document in the tracking issue #89653. + +// FIXME(rcvalle): Identify C char and integer type uses and encode them with their respective +// builtin type encodings as specified by the Itanium C++ ABI for extern function types with the "C" +// calling convention to use this encoding for cross-language LLVM CFI. + +use bitflags::bitflags; +use core::fmt::Display; +use rustc_data_structures::base_n; +use rustc_data_structures::fx::FxHashMap; +use rustc_hir as hir; +use rustc_middle::ty::subst::{GenericArg, GenericArgKind, SubstsRef}; +use rustc_middle::ty::{ + self, Binder, Const, ExistentialPredicate, FloatTy, FnSig, IntTy, List, Region, RegionKind, + Term, Ty, TyCtxt, UintTy, +}; +use rustc_span::def_id::DefId; +use rustc_span::symbol::sym; +use rustc_target::abi::call::{Conv, FnAbi}; +use rustc_target::spec::abi::Abi; +use std::fmt::Write as _; + +/// Type and extended type qualifiers. +#[derive(Eq, Hash, PartialEq)] +enum TyQ { + None, + Const, + Mut, +} + +/// Substitution dictionary key. +#[derive(Eq, Hash, PartialEq)] +enum DictKey<'tcx> { + Ty(Ty<'tcx>, TyQ), + Region(Region<'tcx>), + Const(Const<'tcx>), + Predicate(ExistentialPredicate<'tcx>), +} + +bitflags! { + /// Options for typeid_for_fnabi and typeid_for_fnsig. + pub struct TypeIdOptions: u32 { + const NO_OPTIONS = 0; + const GENERALIZE_POINTERS = 1; + const GENERALIZE_REPR_C = 2; + } +} + +/// Options for encode_ty. +type EncodeTyOptions = TypeIdOptions; + +/// Options for transform_ty. +type TransformTyOptions = TypeIdOptions; + +/// Converts a number to a disambiguator (see +/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>). +fn to_disambiguator(num: u64) -> String { + if let Some(num) = num.checked_sub(1) { + format!("s{}_", base_n::encode(num as u128, 62)) + } else { + "s_".to_string() + } +} + +/// Converts a number to a sequence number (see +/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>). +fn to_seq_id(num: usize) -> String { + if let Some(num) = num.checked_sub(1) { + base_n::encode(num as u128, 36).to_uppercase() + } else { + "".to_string() + } +} + +/// Substitutes a component if found in the substitution dictionary (see +/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression>). +fn compress<'tcx>( + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + key: DictKey<'tcx>, + comp: &mut String, +) { + match dict.get(&key) { + Some(num) => { + comp.clear(); + let _ = write!(comp, "S{}_", to_seq_id(*num)); + } + None => { + dict.insert(key, dict.len()); + } + } +} + +// FIXME(rcvalle): Move to compiler/rustc_middle/src/ty/sty.rs after C types work is done, possibly +// along with other is_c_type methods. +/// Returns whether a `ty::Ty` is `c_void`. +fn is_c_void_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> bool { + match ty.kind() { + ty::Adt(adt_def, ..) => { + let def_id = adt_def.0.did; + let crate_name = tcx.crate_name(def_id.krate); + if tcx.item_name(def_id).as_str() == "c_void" + && (crate_name == sym::core || crate_name == sym::std || crate_name == sym::libc) + { + true + } else { + false + } + } + _ => false, + } +} + +/// Encodes a const using the Itanium C++ ABI as a literal argument (see +/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling.literal>). +fn encode_const<'tcx>( + tcx: TyCtxt<'tcx>, + c: Const<'tcx>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + options: EncodeTyOptions, +) -> String { + // L<element-type>[n]<element-value>E as literal argument + let mut s = String::from('L'); + + // Element type + s.push_str(&encode_ty(tcx, c.ty(), dict, options)); + + // The only allowed types of const parameters are bool, u8, u16, u32, u64, u128, usize i8, i16, + // i32, i64, i128, isize, and char. The bool value false is encoded as 0 and true as 1. + fn push_signed_value<T: Display + PartialOrd>(s: &mut String, value: T, zero: T) { + if value < zero { + s.push('n') + }; + let _ = write!(s, "{}", value); + } + + fn push_unsigned_value<T: Display>(s: &mut String, value: T) { + let _ = write!(s, "{}", value); + } + + if let Some(scalar_int) = c.kind().try_to_scalar_int() { + let signed = c.ty().is_signed(); + match scalar_int.size().bits() { + 8 if signed => push_signed_value(&mut s, scalar_int.try_to_i8().unwrap(), 0), + 16 if signed => push_signed_value(&mut s, scalar_int.try_to_i16().unwrap(), 0), + 32 if signed => push_signed_value(&mut s, scalar_int.try_to_i32().unwrap(), 0), + 64 if signed => push_signed_value(&mut s, scalar_int.try_to_i64().unwrap(), 0), + 128 if signed => push_signed_value(&mut s, scalar_int.try_to_i128().unwrap(), 0), + 8 => push_unsigned_value(&mut s, scalar_int.try_to_u8().unwrap()), + 16 => push_unsigned_value(&mut s, scalar_int.try_to_u16().unwrap()), + 32 => push_unsigned_value(&mut s, scalar_int.try_to_u32().unwrap()), + 64 => push_unsigned_value(&mut s, scalar_int.try_to_u64().unwrap()), + 128 => push_unsigned_value(&mut s, scalar_int.try_to_u128().unwrap()), + _ => { + bug!("encode_const: unexpected size `{:?}`", scalar_int.size().bits()); + } + }; + } else { + bug!("encode_const: unexpected type `{:?}`", c.ty()); + } + + // Close the "L..E" pair + s.push('E'); + + compress(dict, DictKey::Const(c), &mut s); + + s +} + +/// Encodes a FnSig using the Itanium C++ ABI with vendor extended type qualifiers and types for +/// Rust types that are not used at the FFI boundary. +fn encode_fnsig<'tcx>( + tcx: TyCtxt<'tcx>, + fn_sig: &FnSig<'tcx>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + options: TypeIdOptions, +) -> String { + // Function types are delimited by an "F..E" pair + let mut s = String::from("F"); + + let mut encode_ty_options = EncodeTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("encode_fnsig: invalid option(s) `{:?}`", options.bits())); + match fn_sig.abi { + Abi::C { .. } => { + encode_ty_options.insert(EncodeTyOptions::GENERALIZE_REPR_C); + } + _ => { + encode_ty_options.remove(EncodeTyOptions::GENERALIZE_REPR_C); + } + } + + // Encode the return type + let transform_ty_options = TransformTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("encode_fnsig: invalid option(s) `{:?}`", options.bits())); + let ty = transform_ty(tcx, fn_sig.output(), transform_ty_options); + s.push_str(&encode_ty(tcx, ty, dict, encode_ty_options)); + + // Encode the parameter types + let tys = fn_sig.inputs(); + if !tys.is_empty() { + for ty in tys { + let ty = transform_ty(tcx, *ty, transform_ty_options); + s.push_str(&encode_ty(tcx, ty, dict, encode_ty_options)); + } + + if fn_sig.c_variadic { + s.push('z'); + } + } else { + if fn_sig.c_variadic { + s.push('z'); + } else { + // Empty parameter lists, whether declared as () or conventionally as (void), are + // encoded with a void parameter specifier "v". + s.push('v') + } + } + + // Close the "F..E" pair + s.push('E'); + + s +} + +/// Encodes a predicate using the Itanium C++ ABI with vendor extended type qualifiers and types for +/// Rust types that are not used at the FFI boundary. +fn encode_predicate<'tcx>( + tcx: TyCtxt<'tcx>, + predicate: Binder<'tcx, ExistentialPredicate<'tcx>>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + options: EncodeTyOptions, +) -> String { + // u<length><name>[I<element-type1..element-typeN>E], where <element-type> is <subst>, as vendor + // extended type. + let mut s = String::new(); + match predicate.as_ref().skip_binder() { + ty::ExistentialPredicate::Trait(trait_ref) => { + let name = encode_ty_name(tcx, trait_ref.def_id); + let _ = write!(s, "u{}{}", name.len(), &name); + s.push_str(&encode_substs(tcx, trait_ref.substs, dict, options)); + } + ty::ExistentialPredicate::Projection(projection) => { + let name = encode_ty_name(tcx, projection.item_def_id); + let _ = write!(s, "u{}{}", name.len(), &name); + s.push_str(&encode_substs(tcx, projection.substs, dict, options)); + match projection.term { + Term::Ty(ty) => { + s.push_str(&encode_ty(tcx, ty, dict, options)); + } + Term::Const(c) => { + s.push_str(&encode_const(tcx, c, dict, options)); + } + } + } + ty::ExistentialPredicate::AutoTrait(def_id) => { + let name = encode_ty_name(tcx, *def_id); + let _ = write!(s, "u{}{}", name.len(), &name); + } + }; + compress(dict, DictKey::Predicate(*predicate.as_ref().skip_binder()), &mut s); + s +} + +/// Encodes predicates using the Itanium C++ ABI with vendor extended type qualifiers and types for +/// Rust types that are not used at the FFI boundary. +fn encode_predicates<'tcx>( + tcx: TyCtxt<'tcx>, + predicates: &List<Binder<'tcx, ExistentialPredicate<'tcx>>>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + options: EncodeTyOptions, +) -> String { + // <predicate1[..predicateN]>E as part of vendor extended type + let mut s = String::new(); + let predicates: Vec<Binder<'tcx, ExistentialPredicate<'tcx>>> = + predicates.iter().map(|predicate| predicate).collect(); + for predicate in predicates { + s.push_str(&encode_predicate(tcx, predicate, dict, options)); + } + s +} + +/// Encodes a region using the Itanium C++ ABI as a vendor extended type. +fn encode_region<'tcx>( + _tcx: TyCtxt<'tcx>, + region: Region<'tcx>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + _options: EncodeTyOptions, +) -> String { + // u6region[I[<region-disambiguator>][<region-index>]E] as vendor extended type + let mut s = String::new(); + match region.kind() { + RegionKind::ReLateBound(debruijn, r) => { + s.push_str("u6regionI"); + // Debruijn index, which identifies the binder, as region disambiguator + let num = debruijn.index() as u64; + if num > 0 { + s.push_str(&to_disambiguator(num)); + } + // Index within the binder + let _ = write!(s, "{}", r.var.index() as u64); + s.push('E'); + compress(dict, DictKey::Region(region), &mut s); + } + RegionKind::ReErased => { + s.push_str("u6region"); + compress(dict, DictKey::Region(region), &mut s); + } + RegionKind::ReEarlyBound(..) + | RegionKind::ReFree(..) + | RegionKind::ReStatic + | RegionKind::ReVar(..) + | RegionKind::RePlaceholder(..) + | RegionKind::ReEmpty(..) => { + bug!("encode_region: unexpected `{:?}`", region.kind()); + } + } + s +} + +/// Encodes substs using the Itanium C++ ABI with vendor extended type qualifiers and types for Rust +/// types that are not used at the FFI boundary. +fn encode_substs<'tcx>( + tcx: TyCtxt<'tcx>, + substs: SubstsRef<'tcx>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + options: EncodeTyOptions, +) -> String { + // [I<subst1..substN>E] as part of vendor extended type + let mut s = String::new(); + let substs: Vec<GenericArg<'_>> = substs.iter().map(|subst| subst).collect(); + if !substs.is_empty() { + s.push('I'); + for subst in substs { + match subst.unpack() { + GenericArgKind::Lifetime(region) => { + s.push_str(&encode_region(tcx, region, dict, options)); + } + GenericArgKind::Type(ty) => { + s.push_str(&encode_ty(tcx, ty, dict, options)); + } + GenericArgKind::Const(c) => { + s.push_str(&encode_const(tcx, c, dict, options)); + } + } + } + s.push('E'); + } + s +} + +/// Encodes a ty:Ty name, including its crate and path disambiguators and names. +fn encode_ty_name<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> String { + // Encode <name> for use in u<length><name>[I<element-type1..element-typeN>E], where + // <element-type> is <subst>, using v0's <path> without v0's extended form of paths: + // + // N<namespace-tagN>..N<namespace-tag1> + // C<crate-disambiguator><crate-name> + // <path-disambiguator1><path-name1>..<path-disambiguatorN><path-nameN> + // + // With additional tags for DefPathData::Impl and DefPathData::ForeignMod. For instance: + // + // pub type Type1 = impl Send; + // let _: Type1 = <Struct1<i32>>::foo; + // fn foo1(_: Type1) { } + // + // pub type Type2 = impl Send; + // let _: Type2 = <Trait1<i32>>::foo; + // fn foo2(_: Type2) { } + // + // pub type Type3 = impl Send; + // let _: Type3 = <i32 as Trait1<i32>>::foo; + // fn foo3(_: Type3) { } + // + // pub type Type4 = impl Send; + // let _: Type4 = <Struct1<i32> as Trait1<i32>>::foo; + // fn foo3(_: Type4) { } + // + // Are encoded as: + // + // _ZTSFvu29NvNIC1234_5crate8{{impl}}3fooIu3i32EE + // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3dynIu21NtC1234_5crate6Trait1Iu3i32Eu6regionES_EE + // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3i32S_EE + // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu22NtC1234_5crate7Struct1Iu3i32ES_EE + // + // The reason for not using v0's extended form of paths is to use a consistent and simpler + // encoding, as the reasoning for using it isn't relevand for type metadata identifiers (i.e., + // keep symbol names close to how methods are represented in error messages). See + // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#methods. + let mut s = String::new(); + + // Start and namespace tags + let mut def_path = tcx.def_path(def_id); + def_path.data.reverse(); + for disambiguated_data in &def_path.data { + s.push('N'); + s.push_str(match disambiguated_data.data { + hir::definitions::DefPathData::Impl => "I", // Not specified in v0's <namespace> + hir::definitions::DefPathData::ForeignMod => "F", // Not specified in v0's <namespace> + hir::definitions::DefPathData::TypeNs(..) => "t", + hir::definitions::DefPathData::ValueNs(..) => "v", + hir::definitions::DefPathData::ClosureExpr => "C", + hir::definitions::DefPathData::Ctor => "c", + hir::definitions::DefPathData::AnonConst => "k", + hir::definitions::DefPathData::ImplTrait => "i", + hir::definitions::DefPathData::CrateRoot + | hir::definitions::DefPathData::Use + | hir::definitions::DefPathData::GlobalAsm + | hir::definitions::DefPathData::MacroNs(..) + | hir::definitions::DefPathData::LifetimeNs(..) => { + bug!("encode_ty_name: unexpected `{:?}`", disambiguated_data.data); + } + }); + } + + // Crate disambiguator and name + s.push('C'); + s.push_str(&to_disambiguator(tcx.stable_crate_id(def_path.krate).to_u64())); + let crate_name = tcx.crate_name(def_path.krate).to_string(); + let _ = write!(s, "{}{}", crate_name.len(), &crate_name); + + // Disambiguators and names + def_path.data.reverse(); + for disambiguated_data in &def_path.data { + let num = disambiguated_data.disambiguator as u64; + if num > 0 { + s.push_str(&to_disambiguator(num)); + } + + let name = disambiguated_data.data.to_string(); + let _ = write!(s, "{}", name.len()); + + // Prepend a '_' if name starts with a digit or '_' + if let Some(first) = name.as_bytes().get(0) { + if first.is_ascii_digit() || *first == b'_' { + s.push('_'); + } + } else { + bug!("encode_ty_name: invalid name `{:?}`", name); + } + + s.push_str(&name); + } + + s +} + +/// Encodes a ty:Ty using the Itanium C++ ABI with vendor extended type qualifiers and types for +/// Rust types that are not used at the FFI boundary. +fn encode_ty<'tcx>( + tcx: TyCtxt<'tcx>, + ty: Ty<'tcx>, + dict: &mut FxHashMap<DictKey<'tcx>, usize>, + options: EncodeTyOptions, +) -> String { + let mut typeid = String::new(); + + match ty.kind() { + // Primitive types + ty::Bool => { + typeid.push('b'); + } + + ty::Int(..) | ty::Uint(..) | ty::Float(..) => { + // u<length><type-name> as vendor extended type + let mut s = String::from(match ty.kind() { + ty::Int(IntTy::I8) => "u2i8", + ty::Int(IntTy::I16) => "u3i16", + ty::Int(IntTy::I32) => "u3i32", + ty::Int(IntTy::I64) => "u3i64", + ty::Int(IntTy::I128) => "u4i128", + ty::Int(IntTy::Isize) => "u5isize", + ty::Uint(UintTy::U8) => "u2u8", + ty::Uint(UintTy::U16) => "u3u16", + ty::Uint(UintTy::U32) => "u3u32", + ty::Uint(UintTy::U64) => "u3u64", + ty::Uint(UintTy::U128) => "u4u128", + ty::Uint(UintTy::Usize) => "u5usize", + ty::Float(FloatTy::F32) => "u3f32", + ty::Float(FloatTy::F64) => "u3f64", + _ => "", + }); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + ty::Char => { + // u4char as vendor extended type + let mut s = String::from("u4char"); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + ty::Str => { + // u3str as vendor extended type + let mut s = String::from("u3str"); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + ty::Never => { + // u5never as vendor extended type + let mut s = String::from("u5never"); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + // Compound types + // () in Rust is equivalent to void return type in C + _ if ty.is_unit() => { + typeid.push('v'); + } + + // Sequence types + ty::Tuple(tys) => { + // u5tupleI<element-type1..element-typeN>E as vendor extended type + let mut s = String::from("u5tupleI"); + for ty in tys.iter() { + s.push_str(&encode_ty(tcx, ty, dict, options)); + } + s.push('E'); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + ty::Array(ty0, len) => { + // A<array-length><element-type> + let mut s = String::from("A"); + let _ = write!(s, "{}", &len.kind().try_to_scalar().unwrap().to_u64().unwrap()); + s.push_str(&encode_ty(tcx, *ty0, dict, options)); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + ty::Slice(ty0) => { + // u5sliceI<element-type>E as vendor extended type + let mut s = String::from("u5sliceI"); + s.push_str(&encode_ty(tcx, *ty0, dict, options)); + s.push('E'); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + // User-defined types + ty::Adt(adt_def, substs) => { + let mut s = String::new(); + let def_id = adt_def.0.did; + if options.contains(EncodeTyOptions::GENERALIZE_REPR_C) && adt_def.repr().c() { + // For for cross-language CFI support, the encoding must be compatible at the FFI + // boundary. For instance: + // + // struct type1 {}; + // void foo(struct type1* bar) {} + // + // Is encoded as: + // + // _ZTSFvP5type1E + // + // So, encode any repr(C) user-defined type for extern function types with the "C" + // calling convention (or extern types [i.e., ty::Foreign]) as <length><name>, where + // <name> is <unscoped-name>. + let name = tcx.item_name(def_id).to_string(); + let _ = write!(s, "{}{}", name.len(), &name); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + } else { + // u<length><name>[I<element-type1..element-typeN>E], where <element-type> is + // <subst>, as vendor extended type. + let name = encode_ty_name(tcx, def_id); + let _ = write!(s, "u{}{}", name.len(), &name); + s.push_str(&encode_substs(tcx, substs, dict, options)); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + } + typeid.push_str(&s); + } + + ty::Foreign(def_id) => { + // <length><name>, where <name> is <unscoped-name> + let mut s = String::new(); + let name = tcx.item_name(*def_id).to_string(); + let _ = write!(s, "{}{}", name.len(), &name); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + // Function types + ty::FnDef(def_id, substs) + | ty::Closure(def_id, substs) + | ty::Generator(def_id, substs, ..) => { + // u<length><name>[I<element-type1..element-typeN>E], where <element-type> is <subst>, + // as vendor extended type. + let mut s = String::new(); + let name = encode_ty_name(tcx, *def_id); + let _ = write!(s, "u{}{}", name.len(), &name); + s.push_str(&encode_substs(tcx, substs, dict, options)); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + // Pointer types + ty::Ref(region, ty0, ..) => { + // [U3mut]u3refI<element-type>E as vendor extended type qualifier and type + let mut s = String::new(); + s.push_str("u3refI"); + s.push_str(&encode_ty(tcx, *ty0, dict, options)); + s.push('E'); + compress(dict, DictKey::Ty(tcx.mk_imm_ref(*region, *ty0), TyQ::None), &mut s); + if ty.is_mutable_ptr() { + s = format!("{}{}", "U3mut", &s); + compress(dict, DictKey::Ty(ty, TyQ::Mut), &mut s); + } + typeid.push_str(&s); + } + + ty::RawPtr(tm) => { + // P[K]<element-type> + let mut s = String::new(); + s.push_str(&encode_ty(tcx, tm.ty, dict, options)); + if !ty.is_mutable_ptr() { + s = format!("{}{}", "K", &s); + compress(dict, DictKey::Ty(tm.ty, TyQ::Const), &mut s); + }; + s = format!("{}{}", "P", &s); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + ty::FnPtr(fn_sig) => { + // PF<return-type><parameter-type1..parameter-typeN>E + let mut s = String::from("P"); + s.push_str(&encode_fnsig(tcx, &fn_sig.skip_binder(), dict, TypeIdOptions::NO_OPTIONS)); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + // Trait types + ty::Dynamic(predicates, region) => { + // u3dynI<element-type1[..element-typeN]>E, where <element-type> is <predicate>, as + // vendor extended type. + let mut s = String::from("u3dynI"); + s.push_str(&encode_predicates(tcx, predicates, dict, options)); + s.push_str(&encode_region(tcx, *region, dict, options)); + s.push('E'); + compress(dict, DictKey::Ty(ty, TyQ::None), &mut s); + typeid.push_str(&s); + } + + // Unexpected types + ty::Bound(..) + | ty::Error(..) + | ty::GeneratorWitness(..) + | ty::Infer(..) + | ty::Opaque(..) + | ty::Param(..) + | ty::Placeholder(..) + | ty::Projection(..) => { + bug!("encode_ty: unexpected `{:?}`", ty.kind()); + } + }; + + typeid +} + +// Transforms a ty:Ty for being encoded and used in the substitution dictionary. It transforms all +// c_void types into unit types unconditionally, and generalizes all pointers if +// TransformTyOptions::GENERALIZE_POINTERS option is set. +fn transform_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>, options: TransformTyOptions) -> Ty<'tcx> { + let mut ty = ty; + + match ty.kind() { + ty::Bool + | ty::Int(..) + | ty::Uint(..) + | ty::Float(..) + | ty::Char + | ty::Str + | ty::Never + | ty::Foreign(..) + | ty::Dynamic(..) => {} + + _ if ty.is_unit() => {} + + ty::Tuple(tys) => { + ty = tcx.mk_tup(tys.iter().map(|ty| transform_ty(tcx, ty, options))); + } + + ty::Array(ty0, len) => { + let len = len.kind().try_to_scalar().unwrap().to_u64().unwrap(); + ty = tcx.mk_array(transform_ty(tcx, *ty0, options), len); + } + + ty::Slice(ty0) => { + ty = tcx.mk_slice(transform_ty(tcx, *ty0, options)); + } + + ty::Adt(adt_def, substs) => { + if is_c_void_ty(tcx, ty) { + ty = tcx.mk_unit(); + } else if options.contains(TransformTyOptions::GENERALIZE_REPR_C) && adt_def.repr().c() + { + ty = tcx.mk_adt(*adt_def, ty::List::empty()); + } else if adt_def.repr().transparent() && adt_def.is_struct() { + let variant = adt_def.non_enum_variant(); + let param_env = tcx.param_env(variant.def_id); + let field = variant.fields.iter().find(|field| { + let ty = tcx.type_of(field.did); + let is_zst = + tcx.layout_of(param_env.and(ty)).map_or(false, |layout| layout.is_zst()); + !is_zst + }); + if field.is_none() { + // Transform repr(transparent) types without non-ZST field into () + ty = tcx.mk_unit(); + } else { + let ty0 = tcx.type_of(field.unwrap().did); + // Generalize any repr(transparent) user-defined type that is either a pointer + // or reference, and either references itself or any other type that contains or + // references itself, to avoid a reference cycle. + if ty0.is_any_ptr() && ty0.contains(ty) { + ty = transform_ty( + tcx, + ty0, + options | TransformTyOptions::GENERALIZE_POINTERS, + ); + } else { + ty = transform_ty(tcx, ty0, options); + } + } + } else { + ty = tcx.mk_adt(*adt_def, transform_substs(tcx, substs, options)); + } + } + + ty::FnDef(def_id, substs) => { + ty = tcx.mk_fn_def(*def_id, transform_substs(tcx, substs, options)); + } + + ty::Closure(def_id, substs) => { + ty = tcx.mk_closure(*def_id, transform_substs(tcx, substs, options)); + } + + ty::Generator(def_id, substs, movability) => { + ty = tcx.mk_generator(*def_id, transform_substs(tcx, substs, options), *movability); + } + + ty::Ref(region, ty0, ..) => { + if options.contains(TransformTyOptions::GENERALIZE_POINTERS) { + if ty.is_mutable_ptr() { + ty = tcx.mk_mut_ref(tcx.lifetimes.re_static, tcx.mk_unit()); + } else { + ty = tcx.mk_imm_ref(tcx.lifetimes.re_static, tcx.mk_unit()); + } + } else { + if ty.is_mutable_ptr() { + ty = tcx.mk_mut_ref(*region, transform_ty(tcx, *ty0, options)); + } else { + ty = tcx.mk_imm_ref(*region, transform_ty(tcx, *ty0, options)); + } + } + } + + ty::RawPtr(tm) => { + if options.contains(TransformTyOptions::GENERALIZE_POINTERS) { + if ty.is_mutable_ptr() { + ty = tcx.mk_mut_ptr(tcx.mk_unit()); + } else { + ty = tcx.mk_imm_ptr(tcx.mk_unit()); + } + } else { + if ty.is_mutable_ptr() { + ty = tcx.mk_mut_ptr(transform_ty(tcx, tm.ty, options)); + } else { + ty = tcx.mk_imm_ptr(transform_ty(tcx, tm.ty, options)); + } + } + } + + ty::FnPtr(fn_sig) => { + if options.contains(TransformTyOptions::GENERALIZE_POINTERS) { + ty = tcx.mk_imm_ptr(tcx.mk_unit()); + } else { + let parameters: Vec<Ty<'tcx>> = fn_sig + .skip_binder() + .inputs() + .iter() + .map(|ty| transform_ty(tcx, *ty, options)) + .collect(); + let output = transform_ty(tcx, fn_sig.skip_binder().output(), options); + ty = tcx.mk_fn_ptr(ty::Binder::bind_with_vars( + tcx.mk_fn_sig( + parameters.iter(), + &output, + fn_sig.c_variadic(), + fn_sig.unsafety(), + fn_sig.abi(), + ), + fn_sig.bound_vars(), + )); + } + } + + ty::Bound(..) + | ty::Error(..) + | ty::GeneratorWitness(..) + | ty::Infer(..) + | ty::Opaque(..) + | ty::Param(..) + | ty::Placeholder(..) + | ty::Projection(..) => { + bug!("transform_ty: unexpected `{:?}`", ty.kind()); + } + } + + ty +} + +/// Transforms substs for being encoded and used in the substitution dictionary. +fn transform_substs<'tcx>( + tcx: TyCtxt<'tcx>, + substs: SubstsRef<'tcx>, + options: TransformTyOptions, +) -> SubstsRef<'tcx> { + let substs: Vec<GenericArg<'tcx>> = substs + .iter() + .map(|subst| { + if let GenericArgKind::Type(ty) = subst.unpack() { + if is_c_void_ty(tcx, ty) { + tcx.mk_unit().into() + } else { + transform_ty(tcx, ty, options).into() + } + } else { + subst + } + }) + .collect(); + tcx.mk_substs(substs.iter()) +} + +/// Returns a type metadata identifier for the specified FnAbi using the Itanium C++ ABI with vendor +/// extended type qualifiers and types for Rust types that are not used at the FFI boundary. +pub fn typeid_for_fnabi<'tcx>( + tcx: TyCtxt<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + options: TypeIdOptions, +) -> String { + // A name is mangled by prefixing "_Z" to an encoding of its name, and in the case of functions + // its type. + let mut typeid = String::from("_Z"); + + // Clang uses the Itanium C++ ABI's virtual tables and RTTI typeinfo structure name as type + // metadata identifiers for function pointers. The typeinfo name encoding is a two-character + // code (i.e., 'TS') prefixed to the type encoding for the function. + typeid.push_str("TS"); + + // Function types are delimited by an "F..E" pair + typeid.push('F'); + + // A dictionary of substitution candidates used for compression (see + // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression). + let mut dict: FxHashMap<DictKey<'tcx>, usize> = FxHashMap::default(); + + let mut encode_ty_options = EncodeTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits())); + match fn_abi.conv { + Conv::C => { + encode_ty_options.insert(EncodeTyOptions::GENERALIZE_REPR_C); + } + _ => { + encode_ty_options.remove(EncodeTyOptions::GENERALIZE_REPR_C); + } + } + + // Encode the return type + let transform_ty_options = TransformTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits())); + let ty = transform_ty(tcx, fn_abi.ret.layout.ty, transform_ty_options); + typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); + + // Encode the parameter types + if !fn_abi.c_variadic { + if !fn_abi.args.is_empty() { + for arg in fn_abi.args.iter() { + let ty = transform_ty(tcx, arg.layout.ty, transform_ty_options); + typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); + } + } else { + // Empty parameter lists, whether declared as () or conventionally as (void), are + // encoded with a void parameter specifier "v". + typeid.push('v'); + } + } else { + for n in 0..fn_abi.fixed_count { + let ty = transform_ty(tcx, fn_abi.args[n].layout.ty, transform_ty_options); + typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); + } + + typeid.push('z'); + } + + // Close the "F..E" pair + typeid.push('E'); + + typeid +} + +/// Returns a type metadata identifier for the specified FnSig using the Itanium C++ ABI with vendor +/// extended type qualifiers and types for Rust types that are not used at the FFI boundary. +pub fn typeid_for_fnsig<'tcx>( + tcx: TyCtxt<'tcx>, + fn_sig: &FnSig<'tcx>, + options: TypeIdOptions, +) -> String { + // A name is mangled by prefixing "_Z" to an encoding of its name, and in the case of functions + // its type. + let mut typeid = String::from("_Z"); + + // Clang uses the Itanium C++ ABI's virtual tables and RTTI typeinfo structure name as type + // metadata identifiers for function pointers. The typeinfo name encoding is a two-character + // code (i.e., 'TS') prefixed to the type encoding for the function. + typeid.push_str("TS"); + + // A dictionary of substitution candidates used for compression (see + // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression). + let mut dict: FxHashMap<DictKey<'tcx>, usize> = FxHashMap::default(); + + // Encode the function signature + typeid.push_str(&encode_fnsig(tcx, fn_sig, &mut dict, options)); + + typeid +} diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs new file mode 100644 index 000000000..71fa5a448 --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/v0.rs @@ -0,0 +1,844 @@ +use rustc_data_structures::base_n; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::intern::Interned; +use rustc_hir as hir; +use rustc_hir::def::CtorKind; +use rustc_hir::def_id::{CrateNum, DefId}; +use rustc_hir::definitions::{DefPathData, DisambiguatedDefPathData}; +use rustc_middle::ty::layout::IntegerExt; +use rustc_middle::ty::print::{Print, Printer}; +use rustc_middle::ty::subst::{GenericArg, GenericArgKind, Subst}; +use rustc_middle::ty::{ + self, EarlyBinder, FloatTy, Instance, IntTy, Ty, TyCtxt, TypeVisitable, UintTy, +}; +use rustc_span::symbol::kw; +use rustc_target::abi::Integer; +use rustc_target::spec::abi::Abi; + +use std::fmt::Write; +use std::iter; +use std::ops::Range; + +pub(super) fn mangle<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + instantiating_crate: Option<CrateNum>, +) -> String { + let def_id = instance.def_id(); + // FIXME(eddyb) this should ideally not be needed. + let substs = tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), instance.substs); + + let prefix = "_R"; + let mut cx = &mut SymbolMangler { + tcx, + start_offset: prefix.len(), + paths: FxHashMap::default(), + types: FxHashMap::default(), + consts: FxHashMap::default(), + binders: vec![], + out: String::from(prefix), + }; + + // Append `::{shim:...#0}` to shims that can coexist with a non-shim instance. + let shim_kind = match instance.def { + ty::InstanceDef::VTableShim(_) => Some("vtable"), + ty::InstanceDef::ReifyShim(_) => Some("reify"), + + _ => None, + }; + + cx = if let Some(shim_kind) = shim_kind { + cx.path_append_ns(|cx| cx.print_def_path(def_id, substs), 'S', 0, shim_kind).unwrap() + } else { + cx.print_def_path(def_id, substs).unwrap() + }; + if let Some(instantiating_crate) = instantiating_crate { + cx = cx.print_def_path(instantiating_crate.as_def_id(), &[]).unwrap(); + } + std::mem::take(&mut cx.out) +} + +pub(super) fn mangle_typeid_for_trait_ref<'tcx>( + tcx: TyCtxt<'tcx>, + trait_ref: ty::PolyExistentialTraitRef<'tcx>, +) -> String { + // FIXME(flip1995): See comment in `mangle_typeid_for_fnabi`. + let mut cx = &mut SymbolMangler { + tcx, + start_offset: 0, + paths: FxHashMap::default(), + types: FxHashMap::default(), + consts: FxHashMap::default(), + binders: vec![], + out: String::new(), + }; + cx = cx.print_def_path(trait_ref.def_id(), &[]).unwrap(); + std::mem::take(&mut cx.out) +} + +struct BinderLevel { + /// The range of distances from the root of what's + /// being printed, to the lifetimes in a binder. + /// Specifically, a `BrAnon(i)` lifetime has depth + /// `lifetime_depths.start + i`, going away from the + /// the root and towards its use site, as `i` increases. + /// This is used to flatten rustc's pairing of `BrAnon` + /// (intra-binder disambiguation) with a `DebruijnIndex` + /// (binder addressing), to "true" de Bruijn indices, + /// by subtracting the depth of a certain lifetime, from + /// the innermost depth at its use site. + lifetime_depths: Range<u32>, +} + +struct SymbolMangler<'tcx> { + tcx: TyCtxt<'tcx>, + binders: Vec<BinderLevel>, + out: String, + + /// The length of the prefix in `out` (e.g. 2 for `_R`). + start_offset: usize, + /// The values are start positions in `out`, in bytes. + paths: FxHashMap<(DefId, &'tcx [GenericArg<'tcx>]), usize>, + types: FxHashMap<Ty<'tcx>, usize>, + consts: FxHashMap<ty::Const<'tcx>, usize>, +} + +impl<'tcx> SymbolMangler<'tcx> { + fn push(&mut self, s: &str) { + self.out.push_str(s); + } + + /// Push a `_`-terminated base 62 integer, using the format + /// specified in the RFC as `<base-62-number>`, that is: + /// * `x = 0` is encoded as just the `"_"` terminator + /// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`, + /// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc. + fn push_integer_62(&mut self, x: u64) { + if let Some(x) = x.checked_sub(1) { + base_n::push_str(x as u128, 62, &mut self.out); + } + self.push("_"); + } + + /// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is: + /// * `x = 0` is encoded as `""` (nothing) + /// * `x > 0` is encoded as the `tag` followed by `push_integer_62(x - 1)` + /// e.g. `1` becomes `tag + "_"`, `2` becomes `tag + "0_"`, etc. + fn push_opt_integer_62(&mut self, tag: &str, x: u64) { + if let Some(x) = x.checked_sub(1) { + self.push(tag); + self.push_integer_62(x); + } + } + + fn push_disambiguator(&mut self, dis: u64) { + self.push_opt_integer_62("s", dis); + } + + fn push_ident(&mut self, ident: &str) { + let mut use_punycode = false; + for b in ident.bytes() { + match b { + b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {} + 0x80..=0xff => use_punycode = true, + _ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident), + } + } + + let punycode_string; + let ident = if use_punycode { + self.push("u"); + + // FIXME(eddyb) we should probably roll our own punycode implementation. + let mut punycode_bytes = match punycode::encode(ident) { + Ok(s) => s.into_bytes(), + Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident), + }; + + // Replace `-` with `_`. + if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') { + *c = b'_'; + } + + // FIXME(eddyb) avoid rechecking UTF-8 validity. + punycode_string = String::from_utf8(punycode_bytes).unwrap(); + &punycode_string + } else { + ident + }; + + let _ = write!(self.out, "{}", ident.len()); + + // Write a separating `_` if necessary (leading digit or `_`). + if let Some('_' | '0'..='9') = ident.chars().next() { + self.push("_"); + } + + self.push(ident); + } + + fn path_append_ns<'a>( + mut self: &'a mut Self, + print_prefix: impl FnOnce(&'a mut Self) -> Result<&'a mut Self, !>, + ns: char, + disambiguator: u64, + name: &str, + ) -> Result<&'a mut Self, !> { + self.push("N"); + self.out.push(ns); + self = print_prefix(self)?; + self.push_disambiguator(disambiguator as u64); + self.push_ident(name); + Ok(self) + } + + fn print_backref(&mut self, i: usize) -> Result<&mut Self, !> { + self.push("B"); + self.push_integer_62((i - self.start_offset) as u64); + Ok(self) + } + + fn in_binder<'a, T>( + mut self: &'a mut Self, + value: &ty::Binder<'tcx, T>, + print_value: impl FnOnce(&'a mut Self, &T) -> Result<&'a mut Self, !>, + ) -> Result<&'a mut Self, !> + where + T: TypeVisitable<'tcx>, + { + let regions = if value.has_late_bound_regions() { + self.tcx.collect_referenced_late_bound_regions(value) + } else { + FxHashSet::default() + }; + + let mut lifetime_depths = + self.binders.last().map(|b| b.lifetime_depths.end).map_or(0..0, |i| i..i); + + let lifetimes = regions + .into_iter() + .map(|br| match br { + ty::BrAnon(i) => i, + _ => bug!("symbol_names: non-anonymized region `{:?}` in `{:?}`", br, value), + }) + .max() + .map_or(0, |max| max + 1); + + self.push_opt_integer_62("G", lifetimes as u64); + lifetime_depths.end += lifetimes; + + self.binders.push(BinderLevel { lifetime_depths }); + self = print_value(self, value.as_ref().skip_binder())?; + self.binders.pop(); + + Ok(self) + } +} + +impl<'tcx> Printer<'tcx> for &mut SymbolMangler<'tcx> { + type Error = !; + + type Path = Self; + type Region = Self; + type Type = Self; + type DynExistential = Self; + type Const = Self; + + fn tcx(&self) -> TyCtxt<'tcx> { + self.tcx + } + + fn print_def_path( + mut self, + def_id: DefId, + substs: &'tcx [GenericArg<'tcx>], + ) -> Result<Self::Path, Self::Error> { + if let Some(&i) = self.paths.get(&(def_id, substs)) { + return self.print_backref(i); + } + let start = self.out.len(); + + self = self.default_print_def_path(def_id, substs)?; + + // Only cache paths that do not refer to an enclosing + // binder (which would change depending on context). + if !substs.iter().any(|k| k.has_escaping_bound_vars()) { + self.paths.insert((def_id, substs), start); + } + Ok(self) + } + + fn print_impl_path( + mut self, + impl_def_id: DefId, + substs: &'tcx [GenericArg<'tcx>], + mut self_ty: Ty<'tcx>, + mut impl_trait_ref: Option<ty::TraitRef<'tcx>>, + ) -> Result<Self::Path, Self::Error> { + let key = self.tcx.def_key(impl_def_id); + let parent_def_id = DefId { index: key.parent.unwrap(), ..impl_def_id }; + + let mut param_env = self.tcx.param_env_reveal_all_normalized(impl_def_id); + if !substs.is_empty() { + param_env = EarlyBinder(param_env).subst(self.tcx, substs); + } + + match &mut impl_trait_ref { + Some(impl_trait_ref) => { + assert_eq!(impl_trait_ref.self_ty(), self_ty); + *impl_trait_ref = self.tcx.normalize_erasing_regions(param_env, *impl_trait_ref); + self_ty = impl_trait_ref.self_ty(); + } + None => { + self_ty = self.tcx.normalize_erasing_regions(param_env, self_ty); + } + } + + self.push(match impl_trait_ref { + Some(_) => "X", + None => "M", + }); + + // Encode impl generic params if the substitutions contain parameters (implying + // polymorphization is enabled) and this isn't an inherent impl. + if impl_trait_ref.is_some() && substs.iter().any(|a| a.has_param_types_or_consts()) { + self = self.path_generic_args( + |this| { + this.path_append_ns( + |cx| cx.print_def_path(parent_def_id, &[]), + 'I', + key.disambiguated_data.disambiguator as u64, + "", + ) + }, + substs, + )?; + } else { + self.push_disambiguator(key.disambiguated_data.disambiguator as u64); + self = self.print_def_path(parent_def_id, &[])?; + } + + self = self_ty.print(self)?; + + if let Some(trait_ref) = impl_trait_ref { + self = self.print_def_path(trait_ref.def_id, trait_ref.substs)?; + } + + Ok(self) + } + + fn print_region(self, region: ty::Region<'_>) -> Result<Self::Region, Self::Error> { + let i = match *region { + // Erased lifetimes use the index 0, for a + // shorter mangling of `L_`. + ty::ReErased => 0, + + // Late-bound lifetimes use indices starting at 1, + // see `BinderLevel` for more details. + ty::ReLateBound(debruijn, ty::BoundRegion { kind: ty::BrAnon(i), .. }) => { + let binder = &self.binders[self.binders.len() - 1 - debruijn.index()]; + let depth = binder.lifetime_depths.start + i; + + 1 + (self.binders.last().unwrap().lifetime_depths.end - 1 - depth) + } + + _ => bug!("symbol_names: non-erased region `{:?}`", region), + }; + self.push("L"); + self.push_integer_62(i as u64); + Ok(self) + } + + fn print_type(mut self, ty: Ty<'tcx>) -> Result<Self::Type, Self::Error> { + // Basic types, never cached (single-character). + let basic_type = match ty.kind() { + ty::Bool => "b", + ty::Char => "c", + ty::Str => "e", + ty::Tuple(_) if ty.is_unit() => "u", + ty::Int(IntTy::I8) => "a", + ty::Int(IntTy::I16) => "s", + ty::Int(IntTy::I32) => "l", + ty::Int(IntTy::I64) => "x", + ty::Int(IntTy::I128) => "n", + ty::Int(IntTy::Isize) => "i", + ty::Uint(UintTy::U8) => "h", + ty::Uint(UintTy::U16) => "t", + ty::Uint(UintTy::U32) => "m", + ty::Uint(UintTy::U64) => "y", + ty::Uint(UintTy::U128) => "o", + ty::Uint(UintTy::Usize) => "j", + ty::Float(FloatTy::F32) => "f", + ty::Float(FloatTy::F64) => "d", + ty::Never => "z", + + // Placeholders (should be demangled as `_`). + ty::Param(_) | ty::Bound(..) | ty::Placeholder(_) | ty::Infer(_) | ty::Error(_) => "p", + + _ => "", + }; + if !basic_type.is_empty() { + self.push(basic_type); + return Ok(self); + } + + if let Some(&i) = self.types.get(&ty) { + return self.print_backref(i); + } + let start = self.out.len(); + + match *ty.kind() { + // Basic types, handled above. + ty::Bool | ty::Char | ty::Str | ty::Int(_) | ty::Uint(_) | ty::Float(_) | ty::Never => { + unreachable!() + } + ty::Tuple(_) if ty.is_unit() => unreachable!(), + + // Placeholders, also handled as part of basic types. + ty::Param(_) | ty::Bound(..) | ty::Placeholder(_) | ty::Infer(_) | ty::Error(_) => { + unreachable!() + } + + ty::Ref(r, ty, mutbl) => { + self.push(match mutbl { + hir::Mutability::Not => "R", + hir::Mutability::Mut => "Q", + }); + if !r.is_erased() { + self = r.print(self)?; + } + self = ty.print(self)?; + } + + ty::RawPtr(mt) => { + self.push(match mt.mutbl { + hir::Mutability::Not => "P", + hir::Mutability::Mut => "O", + }); + self = mt.ty.print(self)?; + } + + ty::Array(ty, len) => { + self.push("A"); + self = ty.print(self)?; + self = self.print_const(len)?; + } + ty::Slice(ty) => { + self.push("S"); + self = ty.print(self)?; + } + + ty::Tuple(tys) => { + self.push("T"); + for ty in tys.iter() { + self = ty.print(self)?; + } + self.push("E"); + } + + // Mangle all nominal types as paths. + ty::Adt(ty::AdtDef(Interned(&ty::AdtDefData { did: def_id, .. }, _)), substs) + | ty::FnDef(def_id, substs) + | ty::Opaque(def_id, substs) + | ty::Projection(ty::ProjectionTy { item_def_id: def_id, substs }) + | ty::Closure(def_id, substs) + | ty::Generator(def_id, substs, _) => { + self = self.print_def_path(def_id, substs)?; + } + ty::Foreign(def_id) => { + self = self.print_def_path(def_id, &[])?; + } + + ty::FnPtr(sig) => { + self.push("F"); + self = self.in_binder(&sig, |mut cx, sig| { + if sig.unsafety == hir::Unsafety::Unsafe { + cx.push("U"); + } + match sig.abi { + Abi::Rust => {} + Abi::C { unwind: false } => cx.push("KC"), + abi => { + cx.push("K"); + let name = abi.name(); + if name.contains('-') { + cx.push_ident(&name.replace('-', "_")); + } else { + cx.push_ident(name); + } + } + } + for &ty in sig.inputs() { + cx = ty.print(cx)?; + } + if sig.c_variadic { + cx.push("v"); + } + cx.push("E"); + sig.output().print(cx) + })?; + } + + ty::Dynamic(predicates, r) => { + self.push("D"); + self = self.print_dyn_existential(predicates)?; + self = r.print(self)?; + } + + ty::GeneratorWitness(_) => bug!("symbol_names: unexpected `GeneratorWitness`"), + } + + // Only cache types that do not refer to an enclosing + // binder (which would change depending on context). + if !ty.has_escaping_bound_vars() { + self.types.insert(ty, start); + } + Ok(self) + } + + fn print_dyn_existential( + mut self, + predicates: &'tcx ty::List<ty::Binder<'tcx, ty::ExistentialPredicate<'tcx>>>, + ) -> Result<Self::DynExistential, Self::Error> { + // Okay, so this is a bit tricky. Imagine we have a trait object like + // `dyn for<'a> Foo<'a, Bar = &'a ()>`. When we mangle this, the + // output looks really close to the syntax, where the `Bar = &'a ()` bit + // is under the same binders (`['a]`) as the `Foo<'a>` bit. However, we + // actually desugar these into two separate `ExistentialPredicate`s. We + // can't enter/exit the "binder scope" twice though, because then we + // would mangle the binders twice. (Also, side note, we merging these + // two is kind of difficult, because of potential HRTBs in the Projection + // predicate.) + // + // Also worth mentioning: imagine that we instead had + // `dyn for<'a> Foo<'a, Bar = &'a ()> + Send`. In this case, `Send` is + // under the same binders as `Foo`. Currently, this doesn't matter, + // because only *auto traits* are allowed other than the principal trait + // and all auto traits don't have any generics. Two things could + // make this not an "okay" mangling: + // 1) Instead of mangling only *used* + // bound vars, we want to mangle *all* bound vars (`for<'b> Send` is a + // valid trait predicate); + // 2) We allow multiple "principal" traits in the future, or at least + // allow in any form another trait predicate that can take generics. + // + // Here we assume that predicates have the following structure: + // [<Trait> [{<Projection>}]] [{<Auto>}] + // Since any predicates after the first one shouldn't change the binders, + // just put them all in the binders of the first. + self = self.in_binder(&predicates[0], |mut cx, _| { + for predicate in predicates.iter() { + // It would be nice to be able to validate bound vars here, but + // projections can actually include bound vars from super traits + // because of HRTBs (only in the `Self` type). Also, auto traits + // could have different bound vars *anyways*. + match predicate.as_ref().skip_binder() { + ty::ExistentialPredicate::Trait(trait_ref) => { + // Use a type that can't appear in defaults of type parameters. + let dummy_self = cx.tcx.mk_ty_infer(ty::FreshTy(0)); + let trait_ref = trait_ref.with_self_ty(cx.tcx, dummy_self); + cx = cx.print_def_path(trait_ref.def_id, trait_ref.substs)?; + } + ty::ExistentialPredicate::Projection(projection) => { + let name = cx.tcx.associated_item(projection.item_def_id).name; + cx.push("p"); + cx.push_ident(name.as_str()); + cx = match projection.term { + ty::Term::Ty(ty) => ty.print(cx), + ty::Term::Const(c) => c.print(cx), + }?; + } + ty::ExistentialPredicate::AutoTrait(def_id) => { + cx = cx.print_def_path(*def_id, &[])?; + } + } + } + Ok(cx) + })?; + + self.push("E"); + Ok(self) + } + + fn print_const(mut self, ct: ty::Const<'tcx>) -> Result<Self::Const, Self::Error> { + // We only mangle a typed value if the const can be evaluated. + let ct = ct.eval(self.tcx, ty::ParamEnv::reveal_all()); + match ct.kind() { + ty::ConstKind::Value(_) => {} + + // Placeholders (should be demangled as `_`). + // NOTE(eddyb) despite `Unevaluated` having a `DefId` (and therefore + // a path), even for it we still need to encode a placeholder, as + // the path could refer back to e.g. an `impl` using the constant. + ty::ConstKind::Unevaluated(_) + | ty::ConstKind::Param(_) + | ty::ConstKind::Infer(_) + | ty::ConstKind::Bound(..) + | ty::ConstKind::Placeholder(_) + | ty::ConstKind::Error(_) => { + // Never cached (single-character). + self.push("p"); + return Ok(self); + } + } + + if let Some(&i) = self.consts.get(&ct) { + return self.print_backref(i); + } + + let start = self.out.len(); + let ty = ct.ty(); + + match ty.kind() { + ty::Uint(_) | ty::Int(_) | ty::Bool | ty::Char => { + self = ty.print(self)?; + + let mut bits = ct.eval_bits(self.tcx, ty::ParamEnv::reveal_all(), ty); + + // Negative integer values are mangled using `n` as a "sign prefix". + if let ty::Int(ity) = ty.kind() { + let val = + Integer::from_int_ty(&self.tcx, *ity).size().sign_extend(bits) as i128; + if val < 0 { + self.push("n"); + } + bits = val.unsigned_abs(); + } + + let _ = write!(self.out, "{:x}_", bits); + } + + // FIXME(valtrees): Remove the special case for `str` + // here and fully support unsized constants. + ty::Ref(_, inner_ty, mutbl) => { + self.push(match mutbl { + hir::Mutability::Not => "R", + hir::Mutability::Mut => "Q", + }); + + match inner_ty.kind() { + ty::Str if *mutbl == hir::Mutability::Not => { + match ct.kind() { + ty::ConstKind::Value(valtree) => { + let slice = + valtree.try_to_raw_bytes(self.tcx(), ty).unwrap_or_else(|| { + bug!( + "expected to get raw bytes from valtree {:?} for type {:}", + valtree, ty + ) + }); + let s = std::str::from_utf8(slice).expect("non utf8 str from miri"); + + self.push("e"); + + // FIXME(eddyb) use a specialized hex-encoding loop. + for byte in s.bytes() { + let _ = write!(self.out, "{:02x}", byte); + } + + self.push("_"); + } + + _ => { + bug!("symbol_names: unsupported `&str` constant: {:?}", ct); + } + } + } + _ => { + let pointee_ty = ct + .ty() + .builtin_deref(true) + .expect("tried to dereference on non-ptr type") + .ty; + let dereferenced_const = + self.tcx.mk_const(ty::ConstS { kind: ct.kind(), ty: pointee_ty }); + self = dereferenced_const.print(self)?; + } + } + } + + ty::Array(..) | ty::Tuple(..) | ty::Adt(..) | ty::Slice(_) => { + let contents = self.tcx.destructure_const(ct); + let fields = contents.fields.iter().copied(); + + let print_field_list = |mut this: Self| { + for field in fields.clone() { + this = field.print(this)?; + } + this.push("E"); + Ok(this) + }; + + match *ct.ty().kind() { + ty::Array(..) | ty::Slice(_) => { + self.push("A"); + self = print_field_list(self)?; + } + ty::Tuple(..) => { + self.push("T"); + self = print_field_list(self)?; + } + ty::Adt(def, substs) => { + let variant_idx = + contents.variant.expect("destructed const of adt without variant idx"); + let variant_def = &def.variant(variant_idx); + + self.push("V"); + self = self.print_def_path(variant_def.def_id, substs)?; + + match variant_def.ctor_kind { + CtorKind::Const => { + self.push("U"); + } + CtorKind::Fn => { + self.push("T"); + self = print_field_list(self)?; + } + CtorKind::Fictive => { + self.push("S"); + for (field_def, field) in iter::zip(&variant_def.fields, fields) { + // HACK(eddyb) this mimics `path_append`, + // instead of simply using `field_def.ident`, + // just to be able to handle disambiguators. + let disambiguated_field = + self.tcx.def_key(field_def.did).disambiguated_data; + let field_name = disambiguated_field.data.get_opt_name(); + self.push_disambiguator( + disambiguated_field.disambiguator as u64, + ); + self.push_ident(field_name.unwrap_or(kw::Empty).as_str()); + + self = field.print(self)?; + } + self.push("E"); + } + } + } + _ => unreachable!(), + } + } + _ => { + bug!("symbol_names: unsupported constant of type `{}` ({:?})", ct.ty(), ct); + } + } + + // Only cache consts that do not refer to an enclosing + // binder (which would change depending on context). + if !ct.has_escaping_bound_vars() { + self.consts.insert(ct, start); + } + Ok(self) + } + + fn path_crate(self, cnum: CrateNum) -> Result<Self::Path, Self::Error> { + self.push("C"); + let stable_crate_id = self.tcx.def_path_hash(cnum.as_def_id()).stable_crate_id(); + self.push_disambiguator(stable_crate_id.to_u64()); + let name = self.tcx.crate_name(cnum); + self.push_ident(name.as_str()); + Ok(self) + } + + fn path_qualified( + mut self, + self_ty: Ty<'tcx>, + trait_ref: Option<ty::TraitRef<'tcx>>, + ) -> Result<Self::Path, Self::Error> { + assert!(trait_ref.is_some()); + let trait_ref = trait_ref.unwrap(); + + self.push("Y"); + self = self_ty.print(self)?; + self.print_def_path(trait_ref.def_id, trait_ref.substs) + } + + fn path_append_impl( + self, + _: impl FnOnce(Self) -> Result<Self::Path, Self::Error>, + _: &DisambiguatedDefPathData, + _: Ty<'tcx>, + _: Option<ty::TraitRef<'tcx>>, + ) -> Result<Self::Path, Self::Error> { + // Inlined into `print_impl_path` + unreachable!() + } + + fn path_append( + self, + print_prefix: impl FnOnce(Self) -> Result<Self::Path, Self::Error>, + disambiguated_data: &DisambiguatedDefPathData, + ) -> Result<Self::Path, Self::Error> { + let ns = match disambiguated_data.data { + // Extern block segments can be skipped, names from extern blocks + // are effectively living in their parent modules. + DefPathData::ForeignMod => return print_prefix(self), + + // Uppercase categories are more stable than lowercase ones. + DefPathData::TypeNs(_) => 't', + DefPathData::ValueNs(_) => 'v', + DefPathData::ClosureExpr => 'C', + DefPathData::Ctor => 'c', + DefPathData::AnonConst => 'k', + DefPathData::ImplTrait => 'i', + + // These should never show up as `path_append` arguments. + DefPathData::CrateRoot + | DefPathData::Use + | DefPathData::GlobalAsm + | DefPathData::Impl + | DefPathData::MacroNs(_) + | DefPathData::LifetimeNs(_) => { + bug!("symbol_names: unexpected DefPathData: {:?}", disambiguated_data.data) + } + }; + + let name = disambiguated_data.data.get_opt_name(); + + self.path_append_ns( + print_prefix, + ns, + disambiguated_data.disambiguator as u64, + name.unwrap_or(kw::Empty).as_str(), + ) + } + + fn path_generic_args( + mut self, + print_prefix: impl FnOnce(Self) -> Result<Self::Path, Self::Error>, + args: &[GenericArg<'tcx>], + ) -> Result<Self::Path, Self::Error> { + // Don't print any regions if they're all erased. + let print_regions = args.iter().any(|arg| match arg.unpack() { + GenericArgKind::Lifetime(r) => !r.is_erased(), + _ => false, + }); + let args = args.iter().cloned().filter(|arg| match arg.unpack() { + GenericArgKind::Lifetime(_) => print_regions, + _ => true, + }); + + if args.clone().next().is_none() { + return print_prefix(self); + } + + self.push("I"); + self = print_prefix(self)?; + for arg in args { + match arg.unpack() { + GenericArgKind::Lifetime(lt) => { + self = lt.print(self)?; + } + GenericArgKind::Type(ty) => { + self = ty.print(self)?; + } + GenericArgKind::Const(c) => { + self.push("K"); + self = c.print(self)?; + } + } + } + self.push("E"); + + Ok(self) + } +} |