diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_metadata | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_metadata')
-rw-r--r-- | compiler/rustc_metadata/Cargo.toml | 31 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/creader.rs | 1041 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/dependency_format.rs | 435 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/foreign_modules.rs | 19 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/fs.rs | 137 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/lib.rs | 41 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/locator.rs | 1222 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/native_libs.rs | 504 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/rmeta/decoder.rs | 1820 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs | 680 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs | 65 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/rmeta/encoder.rs | 2302 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/rmeta/mod.rs | 460 | ||||
-rw-r--r-- | compiler/rustc_metadata/src/rmeta/table.rs | 330 |
14 files changed, 9087 insertions, 0 deletions
diff --git a/compiler/rustc_metadata/Cargo.toml b/compiler/rustc_metadata/Cargo.toml new file mode 100644 index 000000000..2c5db9d8b --- /dev/null +++ b/compiler/rustc_metadata/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "rustc_metadata" +version = "0.0.0" +edition = "2021" + +[lib] +doctest = false + +[dependencies] +libloading = "0.7.1" +odht = { version = "0.3.1", features = ["nightly"] } +snap = "1" +tracing = "0.1" +smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } +tempfile = "3.2" +rustc_middle = { path = "../rustc_middle" } +rustc_attr = { path = "../rustc_attr" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_errors = { path = "../rustc_errors" } +rustc_feature = { path = "../rustc_feature" } +rustc_hir = { path = "../rustc_hir" } +rustc_hir_pretty = { path = "../rustc_hir_pretty" } +rustc_target = { path = "../rustc_target" } +rustc_index = { path = "../rustc_index" } +rustc_macros = { path = "../rustc_macros" } +rustc_serialize = { path = "../rustc_serialize" } +rustc_ast = { path = "../rustc_ast" } +rustc_expand = { path = "../rustc_expand" } +rustc_span = { path = "../rustc_span" } +rustc_session = { path = "../rustc_session" } +rustc_type_ir = { path = "../rustc_type_ir" } diff --git a/compiler/rustc_metadata/src/creader.rs b/compiler/rustc_metadata/src/creader.rs new file mode 100644 index 000000000..708d0b1fd --- /dev/null +++ b/compiler/rustc_metadata/src/creader.rs @@ -0,0 +1,1041 @@ +//! Validates all used crates and extern libraries and loads their metadata + +use crate::locator::{CrateError, CrateLocator, CratePaths}; +use crate::rmeta::{CrateDep, CrateMetadata, CrateNumMap, CrateRoot, MetadataBlob}; + +use rustc_ast::expand::allocator::AllocatorKind; +use rustc_ast::{self as ast, *}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::svh::Svh; +use rustc_data_structures::sync::Lrc; +use rustc_expand::base::SyntaxExtension; +use rustc_hir::def_id::{CrateNum, LocalDefId, StableCrateId, LOCAL_CRATE}; +use rustc_hir::definitions::Definitions; +use rustc_index::vec::IndexVec; +use rustc_middle::ty::TyCtxt; +use rustc_session::config::{self, CrateType, ExternLocation}; +use rustc_session::cstore::{CrateDepKind, CrateSource, ExternCrate}; +use rustc_session::cstore::{ExternCrateSource, MetadataLoaderDyn}; +use rustc_session::lint; +use rustc_session::output::validate_crate_name; +use rustc_session::search_paths::PathKind; +use rustc_session::Session; +use rustc_span::edition::Edition; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::{Span, DUMMY_SP}; +use rustc_target::spec::{PanicStrategy, TargetTriple}; + +use proc_macro::bridge::client::ProcMacro; +use std::ops::Fn; +use std::path::Path; +use std::{cmp, env}; +use tracing::{debug, info}; + +#[derive(Clone)] +pub struct CStore { + metas: IndexVec<CrateNum, Option<Lrc<CrateMetadata>>>, + injected_panic_runtime: Option<CrateNum>, + /// This crate needs an allocator and either provides it itself, or finds it in a dependency. + /// If the above is true, then this field denotes the kind of the found allocator. + allocator_kind: Option<AllocatorKind>, + /// This crate has a `#[global_allocator]` item. + has_global_allocator: bool, + + /// This map is used to verify we get no hash conflicts between + /// `StableCrateId` values. + pub(crate) stable_crate_ids: FxHashMap<StableCrateId, CrateNum>, + + /// Unused externs of the crate + unused_externs: Vec<Symbol>, +} + +impl std::fmt::Debug for CStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CStore").finish_non_exhaustive() + } +} + +pub struct CrateLoader<'a> { + // Immutable configuration. + sess: &'a Session, + metadata_loader: Box<MetadataLoaderDyn>, + local_crate_name: Symbol, + // Mutable output. + cstore: CStore, + used_extern_options: FxHashSet<Symbol>, +} + +pub enum LoadedMacro { + MacroDef(ast::Item, Edition), + ProcMacro(SyntaxExtension), +} + +pub(crate) struct Library { + pub source: CrateSource, + pub metadata: MetadataBlob, +} + +enum LoadResult { + Previous(CrateNum), + Loaded(Library), +} + +/// A reference to `CrateMetadata` that can also give access to whole crate store when necessary. +#[derive(Clone, Copy)] +pub(crate) struct CrateMetadataRef<'a> { + pub cdata: &'a CrateMetadata, + pub cstore: &'a CStore, +} + +impl std::ops::Deref for CrateMetadataRef<'_> { + type Target = CrateMetadata; + + fn deref(&self) -> &Self::Target { + self.cdata + } +} + +struct CrateDump<'a>(&'a CStore); + +impl<'a> std::fmt::Debug for CrateDump<'a> { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(fmt, "resolved crates:")?; + for (cnum, data) in self.0.iter_crate_data() { + writeln!(fmt, " name: {}", data.name())?; + writeln!(fmt, " cnum: {}", cnum)?; + writeln!(fmt, " hash: {}", data.hash())?; + writeln!(fmt, " reqd: {:?}", data.dep_kind())?; + let CrateSource { dylib, rlib, rmeta } = data.source(); + if let Some(dylib) = dylib { + writeln!(fmt, " dylib: {}", dylib.0.display())?; + } + if let Some(rlib) = rlib { + writeln!(fmt, " rlib: {}", rlib.0.display())?; + } + if let Some(rmeta) = rmeta { + writeln!(fmt, " rmeta: {}", rmeta.0.display())?; + } + } + Ok(()) + } +} + +impl CStore { + pub fn from_tcx(tcx: TyCtxt<'_>) -> &CStore { + tcx.cstore_untracked() + .as_any() + .downcast_ref::<CStore>() + .expect("`tcx.cstore` is not a `CStore`") + } + + fn alloc_new_crate_num(&mut self) -> CrateNum { + self.metas.push(None); + CrateNum::new(self.metas.len() - 1) + } + + pub fn has_crate_data(&self, cnum: CrateNum) -> bool { + self.metas[cnum].is_some() + } + + pub(crate) fn get_crate_data(&self, cnum: CrateNum) -> CrateMetadataRef<'_> { + let cdata = self.metas[cnum] + .as_ref() + .unwrap_or_else(|| panic!("Failed to get crate data for {:?}", cnum)); + CrateMetadataRef { cdata, cstore: self } + } + + fn set_crate_data(&mut self, cnum: CrateNum, data: CrateMetadata) { + assert!(self.metas[cnum].is_none(), "Overwriting crate metadata entry"); + self.metas[cnum] = Some(Lrc::new(data)); + } + + pub(crate) fn iter_crate_data(&self) -> impl Iterator<Item = (CrateNum, &CrateMetadata)> { + self.metas + .iter_enumerated() + .filter_map(|(cnum, data)| data.as_ref().map(|data| (cnum, &**data))) + } + + fn push_dependencies_in_postorder(&self, deps: &mut Vec<CrateNum>, cnum: CrateNum) { + if !deps.contains(&cnum) { + let data = self.get_crate_data(cnum); + for &dep in data.dependencies().iter() { + if dep != cnum { + self.push_dependencies_in_postorder(deps, dep); + } + } + + deps.push(cnum); + } + } + + pub(crate) fn crate_dependencies_in_postorder(&self, cnum: CrateNum) -> Vec<CrateNum> { + let mut deps = Vec::new(); + if cnum == LOCAL_CRATE { + for (cnum, _) in self.iter_crate_data() { + self.push_dependencies_in_postorder(&mut deps, cnum); + } + } else { + self.push_dependencies_in_postorder(&mut deps, cnum); + } + deps + } + + fn crate_dependencies_in_reverse_postorder(&self, cnum: CrateNum) -> Vec<CrateNum> { + let mut deps = self.crate_dependencies_in_postorder(cnum); + deps.reverse(); + deps + } + + pub(crate) fn injected_panic_runtime(&self) -> Option<CrateNum> { + self.injected_panic_runtime + } + + pub(crate) fn allocator_kind(&self) -> Option<AllocatorKind> { + self.allocator_kind + } + + pub(crate) fn has_global_allocator(&self) -> bool { + self.has_global_allocator + } + + pub fn report_unused_deps(&self, tcx: TyCtxt<'_>) { + let json_unused_externs = tcx.sess.opts.json_unused_externs; + + // We put the check for the option before the lint_level_at_node call + // because the call mutates internal state and introducing it + // leads to some ui tests failing. + if !json_unused_externs.is_enabled() { + return; + } + let level = tcx + .lint_level_at_node(lint::builtin::UNUSED_CRATE_DEPENDENCIES, rustc_hir::CRATE_HIR_ID) + .0; + if level != lint::Level::Allow { + let unused_externs = + self.unused_externs.iter().map(|ident| ident.to_ident_string()).collect::<Vec<_>>(); + let unused_externs = unused_externs.iter().map(String::as_str).collect::<Vec<&str>>(); + tcx.sess.parse_sess.span_diagnostic.emit_unused_externs( + level, + json_unused_externs.is_loud(), + &unused_externs, + ); + } + } +} + +impl<'a> CrateLoader<'a> { + pub fn new( + sess: &'a Session, + metadata_loader: Box<MetadataLoaderDyn>, + local_crate_name: &str, + ) -> Self { + let mut stable_crate_ids = FxHashMap::default(); + stable_crate_ids.insert(sess.local_stable_crate_id(), LOCAL_CRATE); + + CrateLoader { + sess, + metadata_loader, + local_crate_name: Symbol::intern(local_crate_name), + cstore: CStore { + // We add an empty entry for LOCAL_CRATE (which maps to zero) in + // order to make array indices in `metas` match with the + // corresponding `CrateNum`. This first entry will always remain + // `None`. + metas: IndexVec::from_elem_n(None, 1), + injected_panic_runtime: None, + allocator_kind: None, + has_global_allocator: false, + stable_crate_ids, + unused_externs: Vec::new(), + }, + used_extern_options: Default::default(), + } + } + + pub fn cstore(&self) -> &CStore { + &self.cstore + } + + pub fn into_cstore(self) -> CStore { + self.cstore + } + + fn existing_match(&self, name: Symbol, hash: Option<Svh>, kind: PathKind) -> Option<CrateNum> { + for (cnum, data) in self.cstore.iter_crate_data() { + if data.name() != name { + tracing::trace!("{} did not match {}", data.name(), name); + continue; + } + + match hash { + Some(hash) if hash == data.hash() => return Some(cnum), + Some(hash) => { + debug!("actual hash {} did not match expected {}", hash, data.hash()); + continue; + } + None => {} + } + + // When the hash is None we're dealing with a top-level dependency + // in which case we may have a specification on the command line for + // this library. Even though an upstream library may have loaded + // something of the same name, we have to make sure it was loaded + // from the exact same location as well. + // + // We're also sure to compare *paths*, not actual byte slices. The + // `source` stores paths which are normalized which may be different + // from the strings on the command line. + let source = self.cstore.get_crate_data(cnum).cdata.source(); + if let Some(entry) = self.sess.opts.externs.get(name.as_str()) { + // Only use `--extern crate_name=path` here, not `--extern crate_name`. + if let Some(mut files) = entry.files() { + if files.any(|l| { + let l = l.canonicalized(); + source.dylib.as_ref().map(|(p, _)| p) == Some(l) + || source.rlib.as_ref().map(|(p, _)| p) == Some(l) + || source.rmeta.as_ref().map(|(p, _)| p) == Some(l) + }) { + return Some(cnum); + } + } + continue; + } + + // Alright, so we've gotten this far which means that `data` has the + // right name, we don't have a hash, and we don't have a --extern + // pointing for ourselves. We're still not quite yet done because we + // have to make sure that this crate was found in the crate lookup + // path (this is a top-level dependency) as we don't want to + // implicitly load anything inside the dependency lookup path. + let prev_kind = source + .dylib + .as_ref() + .or(source.rlib.as_ref()) + .or(source.rmeta.as_ref()) + .expect("No sources for crate") + .1; + if kind.matches(prev_kind) { + return Some(cnum); + } else { + debug!( + "failed to load existing crate {}; kind {:?} did not match prev_kind {:?}", + name, kind, prev_kind + ); + } + } + + None + } + + fn verify_no_symbol_conflicts(&self, root: &CrateRoot) -> Result<(), CrateError> { + // Check for (potential) conflicts with the local crate + if self.sess.local_stable_crate_id() == root.stable_crate_id() { + return Err(CrateError::SymbolConflictsCurrent(root.name())); + } + + // Check for conflicts with any crate loaded so far + for (_, other) in self.cstore.iter_crate_data() { + // Same stable crate id but different SVH + if other.stable_crate_id() == root.stable_crate_id() && other.hash() != root.hash() { + return Err(CrateError::SymbolConflictsOthers(root.name())); + } + } + + Ok(()) + } + + fn verify_no_stable_crate_id_hash_conflicts( + &mut self, + root: &CrateRoot, + cnum: CrateNum, + ) -> Result<(), CrateError> { + if let Some(existing) = self.cstore.stable_crate_ids.insert(root.stable_crate_id(), cnum) { + let crate_name0 = root.name(); + let crate_name1 = self.cstore.get_crate_data(existing).name(); + return Err(CrateError::StableCrateIdCollision(crate_name0, crate_name1)); + } + + Ok(()) + } + + fn register_crate( + &mut self, + host_lib: Option<Library>, + root: Option<&CratePaths>, + lib: Library, + dep_kind: CrateDepKind, + name: Symbol, + ) -> Result<CrateNum, CrateError> { + let _prof_timer = self.sess.prof.generic_activity("metadata_register_crate"); + + let Library { source, metadata } = lib; + let crate_root = metadata.get_root(); + let host_hash = host_lib.as_ref().map(|lib| lib.metadata.get_root().hash()); + + let private_dep = + self.sess.opts.externs.get(name.as_str()).map_or(false, |e| e.is_private_dep); + + // Claim this crate number and cache it + let cnum = self.cstore.alloc_new_crate_num(); + + info!( + "register crate `{}` (cnum = {}. private_dep = {})", + crate_root.name(), + cnum, + private_dep + ); + + // Maintain a reference to the top most crate. + // Stash paths for top-most crate locally if necessary. + let crate_paths; + let root = if let Some(root) = root { + root + } else { + crate_paths = CratePaths::new(crate_root.name(), source.clone()); + &crate_paths + }; + + let cnum_map = self.resolve_crate_deps(root, &crate_root, &metadata, cnum, dep_kind)?; + + let raw_proc_macros = if crate_root.is_proc_macro_crate() { + let temp_root; + let (dlsym_source, dlsym_root) = match &host_lib { + Some(host_lib) => (&host_lib.source, { + temp_root = host_lib.metadata.get_root(); + &temp_root + }), + None => (&source, &crate_root), + }; + let dlsym_dylib = dlsym_source.dylib.as_ref().expect("no dylib for a proc-macro crate"); + Some(self.dlsym_proc_macros(&dlsym_dylib.0, dlsym_root.stable_crate_id())?) + } else { + None + }; + + // Perform some verification *after* resolve_crate_deps() above is + // known to have been successful. It seems that - in error cases - the + // cstore can be in a temporarily invalid state between cnum allocation + // and dependency resolution and the verification code would produce + // ICEs in that case (see #83045). + self.verify_no_symbol_conflicts(&crate_root)?; + self.verify_no_stable_crate_id_hash_conflicts(&crate_root, cnum)?; + + let crate_metadata = CrateMetadata::new( + self.sess, + &self.cstore, + metadata, + crate_root, + raw_proc_macros, + cnum, + cnum_map, + dep_kind, + source, + private_dep, + host_hash, + ); + + self.cstore.set_crate_data(cnum, crate_metadata); + + Ok(cnum) + } + + fn load_proc_macro<'b>( + &self, + locator: &mut CrateLocator<'b>, + path_kind: PathKind, + host_hash: Option<Svh>, + ) -> Result<Option<(LoadResult, Option<Library>)>, CrateError> + where + 'a: 'b, + { + // Use a new crate locator so trying to load a proc macro doesn't affect the error + // message we emit + let mut proc_macro_locator = locator.clone(); + + // Try to load a proc macro + proc_macro_locator.is_proc_macro = true; + + // Load the proc macro crate for the target + let (locator, target_result) = if self.sess.opts.unstable_opts.dual_proc_macros { + proc_macro_locator.reset(); + let result = match self.load(&mut proc_macro_locator)? { + Some(LoadResult::Previous(cnum)) => { + return Ok(Some((LoadResult::Previous(cnum), None))); + } + Some(LoadResult::Loaded(library)) => Some(LoadResult::Loaded(library)), + None => return Ok(None), + }; + locator.hash = host_hash; + // Use the locator when looking for the host proc macro crate, as that is required + // so we want it to affect the error message + (locator, result) + } else { + (&mut proc_macro_locator, None) + }; + + // Load the proc macro crate for the host + + locator.reset(); + locator.is_proc_macro = true; + locator.target = &self.sess.host; + locator.triple = TargetTriple::from_triple(config::host_triple()); + locator.filesearch = self.sess.host_filesearch(path_kind); + + let Some(host_result) = self.load(locator)? else { + return Ok(None); + }; + + Ok(Some(if self.sess.opts.unstable_opts.dual_proc_macros { + let host_result = match host_result { + LoadResult::Previous(..) => { + panic!("host and target proc macros must be loaded in lock-step") + } + LoadResult::Loaded(library) => library, + }; + (target_result.unwrap(), Some(host_result)) + } else { + (host_result, None) + })) + } + + fn resolve_crate<'b>( + &'b mut self, + name: Symbol, + span: Span, + dep_kind: CrateDepKind, + ) -> Option<CrateNum> { + self.used_extern_options.insert(name); + match self.maybe_resolve_crate(name, dep_kind, None) { + Ok(cnum) => Some(cnum), + Err(err) => { + let missing_core = + self.maybe_resolve_crate(sym::core, CrateDepKind::Explicit, None).is_err(); + err.report(&self.sess, span, missing_core); + None + } + } + } + + fn maybe_resolve_crate<'b>( + &'b mut self, + name: Symbol, + mut dep_kind: CrateDepKind, + dep: Option<(&'b CratePaths, &'b CrateDep)>, + ) -> Result<CrateNum, CrateError> { + info!("resolving crate `{}`", name); + if !name.as_str().is_ascii() { + return Err(CrateError::NonAsciiName(name)); + } + let (root, hash, host_hash, extra_filename, path_kind) = match dep { + Some((root, dep)) => ( + Some(root), + Some(dep.hash), + dep.host_hash, + Some(&dep.extra_filename[..]), + PathKind::Dependency, + ), + None => (None, None, None, None, PathKind::Crate), + }; + let result = if let Some(cnum) = self.existing_match(name, hash, path_kind) { + (LoadResult::Previous(cnum), None) + } else { + info!("falling back to a load"); + let mut locator = CrateLocator::new( + self.sess, + &*self.metadata_loader, + name, + hash, + extra_filename, + false, // is_host + path_kind, + ); + + match self.load(&mut locator)? { + Some(res) => (res, None), + None => { + dep_kind = CrateDepKind::MacrosOnly; + match self.load_proc_macro(&mut locator, path_kind, host_hash)? { + Some(res) => res, + None => return Err(locator.into_error(root.cloned())), + } + } + } + }; + + match result { + (LoadResult::Previous(cnum), None) => { + let data = self.cstore.get_crate_data(cnum); + if data.is_proc_macro_crate() { + dep_kind = CrateDepKind::MacrosOnly; + } + data.update_dep_kind(|data_dep_kind| cmp::max(data_dep_kind, dep_kind)); + Ok(cnum) + } + (LoadResult::Loaded(library), host_library) => { + self.register_crate(host_library, root, library, dep_kind, name) + } + _ => panic!(), + } + } + + fn load(&self, locator: &mut CrateLocator<'_>) -> Result<Option<LoadResult>, CrateError> { + let Some(library) = locator.maybe_load_library_crate()? else { + return Ok(None); + }; + + // In the case that we're loading a crate, but not matching + // against a hash, we could load a crate which has the same hash + // as an already loaded crate. If this is the case prevent + // duplicates by just using the first crate. + // + // Note that we only do this for target triple crates, though, as we + // don't want to match a host crate against an equivalent target one + // already loaded. + let root = library.metadata.get_root(); + // FIXME: why is this condition necessary? It was adding in #33625 but I + // don't know why and the original author doesn't remember ... + let can_reuse_cratenum = + locator.triple == self.sess.opts.target_triple || locator.is_proc_macro; + Ok(Some(if can_reuse_cratenum { + let mut result = LoadResult::Loaded(library); + for (cnum, data) in self.cstore.iter_crate_data() { + if data.name() == root.name() && root.hash() == data.hash() { + assert!(locator.hash.is_none()); + info!("load success, going to previous cnum: {}", cnum); + result = LoadResult::Previous(cnum); + break; + } + } + result + } else { + LoadResult::Loaded(library) + })) + } + + fn update_extern_crate(&self, cnum: CrateNum, extern_crate: ExternCrate) { + let cmeta = self.cstore.get_crate_data(cnum); + if cmeta.update_extern_crate(extern_crate) { + // Propagate the extern crate info to dependencies if it was updated. + let extern_crate = ExternCrate { dependency_of: cnum, ..extern_crate }; + for &dep_cnum in cmeta.dependencies().iter() { + self.update_extern_crate(dep_cnum, extern_crate); + } + } + } + + // Go through the crate metadata and load any crates that it references + fn resolve_crate_deps( + &mut self, + root: &CratePaths, + crate_root: &CrateRoot, + metadata: &MetadataBlob, + krate: CrateNum, + dep_kind: CrateDepKind, + ) -> Result<CrateNumMap, CrateError> { + debug!("resolving deps of external crate"); + if crate_root.is_proc_macro_crate() { + return Ok(CrateNumMap::new()); + } + + // The map from crate numbers in the crate we're resolving to local crate numbers. + // We map 0 and all other holes in the map to our parent crate. The "additional" + // self-dependencies should be harmless. + let deps = crate_root.decode_crate_deps(metadata); + let mut crate_num_map = CrateNumMap::with_capacity(1 + deps.len()); + crate_num_map.push(krate); + for dep in deps { + info!( + "resolving dep crate {} hash: `{}` extra filename: `{}`", + dep.name, dep.hash, dep.extra_filename + ); + let dep_kind = match dep_kind { + CrateDepKind::MacrosOnly => CrateDepKind::MacrosOnly, + _ => dep.kind, + }; + let cnum = self.maybe_resolve_crate(dep.name, dep_kind, Some((root, &dep)))?; + crate_num_map.push(cnum); + } + + debug!("resolve_crate_deps: cnum_map for {:?} is {:?}", krate, crate_num_map); + Ok(crate_num_map) + } + + fn dlsym_proc_macros( + &self, + path: &Path, + stable_crate_id: StableCrateId, + ) -> Result<&'static [ProcMacro], CrateError> { + // Make sure the path contains a / or the linker will search for it. + let path = env::current_dir().unwrap().join(path); + let lib = unsafe { libloading::Library::new(path) } + .map_err(|err| CrateError::DlOpen(err.to_string()))?; + + let sym_name = self.sess.generate_proc_macro_decls_symbol(stable_crate_id); + let sym = unsafe { lib.get::<*const &[ProcMacro]>(sym_name.as_bytes()) } + .map_err(|err| CrateError::DlSym(err.to_string()))?; + + // Intentionally leak the dynamic library. We can't ever unload it + // since the library can make things that will live arbitrarily long. + let sym = unsafe { sym.into_raw() }; + std::mem::forget(lib); + + Ok(unsafe { **sym }) + } + + fn inject_panic_runtime(&mut self, krate: &ast::Crate) { + // If we're only compiling an rlib, then there's no need to select a + // panic runtime, so we just skip this section entirely. + let any_non_rlib = self.sess.crate_types().iter().any(|ct| *ct != CrateType::Rlib); + if !any_non_rlib { + info!("panic runtime injection skipped, only generating rlib"); + return; + } + + // If we need a panic runtime, we try to find an existing one here. At + // the same time we perform some general validation of the DAG we've got + // going such as ensuring everything has a compatible panic strategy. + // + // The logic for finding the panic runtime here is pretty much the same + // as the allocator case with the only addition that the panic strategy + // compilation mode also comes into play. + let desired_strategy = self.sess.panic_strategy(); + let mut runtime_found = false; + let mut needs_panic_runtime = + self.sess.contains_name(&krate.attrs, sym::needs_panic_runtime); + + for (cnum, data) in self.cstore.iter_crate_data() { + needs_panic_runtime = needs_panic_runtime || data.needs_panic_runtime(); + if data.is_panic_runtime() { + // Inject a dependency from all #![needs_panic_runtime] to this + // #![panic_runtime] crate. + self.inject_dependency_if(cnum, "a panic runtime", &|data| { + data.needs_panic_runtime() + }); + runtime_found = runtime_found || data.dep_kind() == CrateDepKind::Explicit; + } + } + + // If an explicitly linked and matching panic runtime was found, or if + // we just don't need one at all, then we're done here and there's + // nothing else to do. + if !needs_panic_runtime || runtime_found { + return; + } + + // By this point we know that we (a) need a panic runtime and (b) no + // panic runtime was explicitly linked. Here we just load an appropriate + // default runtime for our panic strategy and then inject the + // dependencies. + // + // We may resolve to an already loaded crate (as the crate may not have + // been explicitly linked prior to this) and we may re-inject + // dependencies again, but both of those situations are fine. + // + // Also note that we have yet to perform validation of the crate graph + // in terms of everyone has a compatible panic runtime format, that's + // performed later as part of the `dependency_format` module. + let name = match desired_strategy { + PanicStrategy::Unwind => sym::panic_unwind, + PanicStrategy::Abort => sym::panic_abort, + }; + info!("panic runtime not found -- loading {}", name); + + let Some(cnum) = self.resolve_crate(name, DUMMY_SP, CrateDepKind::Implicit) else { return; }; + let data = self.cstore.get_crate_data(cnum); + + // Sanity check the loaded crate to ensure it is indeed a panic runtime + // and the panic strategy is indeed what we thought it was. + if !data.is_panic_runtime() { + self.sess.err(&format!("the crate `{}` is not a panic runtime", name)); + } + if data.required_panic_strategy() != Some(desired_strategy) { + self.sess.err(&format!( + "the crate `{}` does not have the panic \ + strategy `{}`", + name, + desired_strategy.desc() + )); + } + + self.cstore.injected_panic_runtime = Some(cnum); + self.inject_dependency_if(cnum, "a panic runtime", &|data| data.needs_panic_runtime()); + } + + fn inject_profiler_runtime(&mut self, krate: &ast::Crate) { + if self.sess.opts.unstable_opts.no_profiler_runtime + || !(self.sess.instrument_coverage() + || self.sess.opts.unstable_opts.profile + || self.sess.opts.cg.profile_generate.enabled()) + { + return; + } + + info!("loading profiler"); + + let name = Symbol::intern(&self.sess.opts.unstable_opts.profiler_runtime); + if name == sym::profiler_builtins && self.sess.contains_name(&krate.attrs, sym::no_core) { + self.sess.err( + "`profiler_builtins` crate (required by compiler options) \ + is not compatible with crate attribute `#![no_core]`", + ); + } + + let Some(cnum) = self.resolve_crate(name, DUMMY_SP, CrateDepKind::Implicit) else { return; }; + let data = self.cstore.get_crate_data(cnum); + + // Sanity check the loaded crate to ensure it is indeed a profiler runtime + if !data.is_profiler_runtime() { + self.sess.err(&format!("the crate `{}` is not a profiler runtime", name)); + } + } + + fn inject_allocator_crate(&mut self, krate: &ast::Crate) { + self.cstore.has_global_allocator = match &*global_allocator_spans(&self.sess, krate) { + [span1, span2, ..] => { + self.sess + .struct_span_err(*span2, "cannot define multiple global allocators") + .span_label(*span2, "cannot define a new global allocator") + .span_label(*span1, "previous global allocator defined here") + .emit(); + true + } + spans => !spans.is_empty(), + }; + + // Check to see if we actually need an allocator. This desire comes + // about through the `#![needs_allocator]` attribute and is typically + // written down in liballoc. + if !self.sess.contains_name(&krate.attrs, sym::needs_allocator) + && !self.cstore.iter_crate_data().any(|(_, data)| data.needs_allocator()) + { + return; + } + + // At this point we've determined that we need an allocator. Let's see + // if our compilation session actually needs an allocator based on what + // we're emitting. + let all_rlib = self.sess.crate_types().iter().all(|ct| matches!(*ct, CrateType::Rlib)); + if all_rlib { + return; + } + + // Ok, we need an allocator. Not only that but we're actually going to + // create an artifact that needs one linked in. Let's go find the one + // that we're going to link in. + // + // First up we check for global allocators. Look at the crate graph here + // and see what's a global allocator, including if we ourselves are a + // global allocator. + let mut global_allocator = + self.cstore.has_global_allocator.then(|| Symbol::intern("this crate")); + for (_, data) in self.cstore.iter_crate_data() { + if data.has_global_allocator() { + match global_allocator { + Some(other_crate) => { + self.sess.err(&format!( + "the `#[global_allocator]` in {} conflicts with global allocator in: {}", + other_crate, + data.name() + )); + } + None => global_allocator = Some(data.name()), + } + } + } + + if global_allocator.is_some() { + self.cstore.allocator_kind = Some(AllocatorKind::Global); + return; + } + + // Ok we haven't found a global allocator but we still need an + // allocator. At this point our allocator request is typically fulfilled + // by the standard library, denoted by the `#![default_lib_allocator]` + // attribute. + if !self.sess.contains_name(&krate.attrs, sym::default_lib_allocator) + && !self.cstore.iter_crate_data().any(|(_, data)| data.has_default_lib_allocator()) + { + self.sess.err( + "no global memory allocator found but one is required; link to std or add \ + `#[global_allocator]` to a static item that implements the GlobalAlloc trait", + ); + } + self.cstore.allocator_kind = Some(AllocatorKind::Default); + } + + fn inject_dependency_if( + &self, + krate: CrateNum, + what: &str, + needs_dep: &dyn Fn(&CrateMetadata) -> bool, + ) { + // don't perform this validation if the session has errors, as one of + // those errors may indicate a circular dependency which could cause + // this to stack overflow. + if self.sess.has_errors().is_some() { + return; + } + + // Before we inject any dependencies, make sure we don't inject a + // circular dependency by validating that this crate doesn't + // transitively depend on any crates satisfying `needs_dep`. + for dep in self.cstore.crate_dependencies_in_reverse_postorder(krate) { + let data = self.cstore.get_crate_data(dep); + if needs_dep(&data) { + self.sess.err(&format!( + "the crate `{}` cannot depend \ + on a crate that needs {}, but \ + it depends on `{}`", + self.cstore.get_crate_data(krate).name(), + what, + data.name() + )); + } + } + + // All crates satisfying `needs_dep` do not explicitly depend on the + // crate provided for this compile, but in order for this compilation to + // be successfully linked we need to inject a dependency (to order the + // crates on the command line correctly). + for (cnum, data) in self.cstore.iter_crate_data() { + if needs_dep(data) { + info!("injecting a dep from {} to {}", cnum, krate); + data.add_dependency(krate); + } + } + } + + fn report_unused_deps(&mut self, krate: &ast::Crate) { + // Make a point span rather than covering the whole file + let span = krate.spans.inner_span.shrink_to_lo(); + // Complain about anything left over + for (name, entry) in self.sess.opts.externs.iter() { + if let ExternLocation::FoundInLibrarySearchDirectories = entry.location { + // Don't worry about pathless `--extern foo` sysroot references + continue; + } + if entry.nounused_dep { + // We're not worried about this one + continue; + } + let name_interned = Symbol::intern(name); + if self.used_extern_options.contains(&name_interned) { + continue; + } + + // Got a real unused --extern + if self.sess.opts.json_unused_externs.is_enabled() { + self.cstore.unused_externs.push(name_interned); + continue; + } + + self.sess.parse_sess.buffer_lint( + lint::builtin::UNUSED_CRATE_DEPENDENCIES, + span, + ast::CRATE_NODE_ID, + &format!( + "external crate `{}` unused in `{}`: remove the dependency or add `use {} as _;`", + name, + self.local_crate_name, + name), + ); + } + } + + pub fn postprocess(&mut self, krate: &ast::Crate) { + self.inject_profiler_runtime(krate); + self.inject_allocator_crate(krate); + self.inject_panic_runtime(krate); + + self.report_unused_deps(krate); + + info!("{:?}", CrateDump(&self.cstore)); + } + + pub fn process_extern_crate( + &mut self, + item: &ast::Item, + definitions: &Definitions, + def_id: LocalDefId, + ) -> Option<CrateNum> { + match item.kind { + ast::ItemKind::ExternCrate(orig_name) => { + debug!( + "resolving extern crate stmt. ident: {} orig_name: {:?}", + item.ident, orig_name + ); + let name = match orig_name { + Some(orig_name) => { + validate_crate_name(self.sess, orig_name.as_str(), Some(item.span)); + orig_name + } + None => item.ident.name, + }; + let dep_kind = if self.sess.contains_name(&item.attrs, sym::no_link) { + CrateDepKind::MacrosOnly + } else { + CrateDepKind::Explicit + }; + + let cnum = self.resolve_crate(name, item.span, dep_kind)?; + + let path_len = definitions.def_path(def_id).data.len(); + self.update_extern_crate( + cnum, + ExternCrate { + src: ExternCrateSource::Extern(def_id.to_def_id()), + span: item.span, + path_len, + dependency_of: LOCAL_CRATE, + }, + ); + Some(cnum) + } + _ => bug!(), + } + } + + pub fn process_path_extern(&mut self, name: Symbol, span: Span) -> Option<CrateNum> { + let cnum = self.resolve_crate(name, span, CrateDepKind::Explicit)?; + + self.update_extern_crate( + cnum, + ExternCrate { + src: ExternCrateSource::Path, + span, + // to have the least priority in `update_extern_crate` + path_len: usize::MAX, + dependency_of: LOCAL_CRATE, + }, + ); + + Some(cnum) + } + + pub fn maybe_process_path_extern(&mut self, name: Symbol) -> Option<CrateNum> { + self.maybe_resolve_crate(name, CrateDepKind::Explicit, None).ok() + } +} + +fn global_allocator_spans(sess: &Session, krate: &ast::Crate) -> Vec<Span> { + struct Finder<'a> { + sess: &'a Session, + name: Symbol, + spans: Vec<Span>, + } + impl<'ast, 'a> visit::Visitor<'ast> for Finder<'a> { + fn visit_item(&mut self, item: &'ast ast::Item) { + if item.ident.name == self.name + && self.sess.contains_name(&item.attrs, sym::rustc_std_internal_symbol) + { + self.spans.push(item.span); + } + visit::walk_item(self, item) + } + } + + let name = Symbol::intern(&AllocatorKind::Global.fn_name(sym::alloc)); + let mut f = Finder { sess, name, spans: Vec::new() }; + visit::walk_crate(&mut f, krate); + f.spans +} diff --git a/compiler/rustc_metadata/src/dependency_format.rs b/compiler/rustc_metadata/src/dependency_format.rs new file mode 100644 index 000000000..b765c34f8 --- /dev/null +++ b/compiler/rustc_metadata/src/dependency_format.rs @@ -0,0 +1,435 @@ +//! Resolution of mixing rlibs and dylibs +//! +//! When producing a final artifact, such as a dynamic library, the compiler has +//! a choice between linking an rlib or linking a dylib of all upstream +//! dependencies. The linking phase must guarantee, however, that a library only +//! show up once in the object file. For example, it is illegal for library A to +//! be statically linked to B and C in separate dylibs, and then link B and C +//! into a crate D (because library A appears twice). +//! +//! The job of this module is to calculate what format each upstream crate +//! should be used when linking each output type requested in this session. This +//! generally follows this set of rules: +//! +//! 1. Each library must appear exactly once in the output. +//! 2. Each rlib contains only one library (it's just an object file) +//! 3. Each dylib can contain more than one library (due to static linking), +//! and can also bring in many dynamic dependencies. +//! +//! With these constraints in mind, it's generally a very difficult problem to +//! find a solution that's not "all rlibs" or "all dylibs". I have suspicions +//! that NP-ness may come into the picture here... +//! +//! The current selection algorithm below looks mostly similar to: +//! +//! 1. If static linking is required, then require all upstream dependencies +//! to be available as rlibs. If not, generate an error. +//! 2. If static linking is requested (generating an executable), then +//! attempt to use all upstream dependencies as rlibs. If any are not +//! found, bail out and continue to step 3. +//! 3. Static linking has failed, at least one library must be dynamically +//! linked. Apply a heuristic by greedily maximizing the number of +//! dynamically linked libraries. +//! 4. Each upstream dependency available as a dynamic library is +//! registered. The dependencies all propagate, adding to a map. It is +//! possible for a dylib to add a static library as a dependency, but it +//! is illegal for two dylibs to add the same static library as a +//! dependency. The same dylib can be added twice. Additionally, it is +//! illegal to add a static dependency when it was previously found as a +//! dylib (and vice versa) +//! 5. After all dynamic dependencies have been traversed, re-traverse the +//! remaining dependencies and add them statically (if they haven't been +//! added already). +//! +//! While not perfect, this algorithm should help support use-cases such as leaf +//! dependencies being static while the larger tree of inner dependencies are +//! all dynamic. This isn't currently very well battle tested, so it will likely +//! fall short in some use cases. +//! +//! Currently, there is no way to specify the preference of linkage with a +//! particular library (other than a global dynamic/static switch). +//! Additionally, the algorithm is geared towards finding *any* solution rather +//! than finding a number of solutions (there are normally quite a few). + +use crate::creader::CStore; + +use rustc_data_structures::fx::FxHashMap; +use rustc_hir::def_id::CrateNum; +use rustc_middle::middle::dependency_format::{Dependencies, DependencyList, Linkage}; +use rustc_middle::ty::TyCtxt; +use rustc_session::config::CrateType; +use rustc_session::cstore::CrateDepKind; +use rustc_session::cstore::LinkagePreference::{self, RequireDynamic, RequireStatic}; + +pub(crate) fn calculate(tcx: TyCtxt<'_>) -> Dependencies { + tcx.sess + .crate_types() + .iter() + .map(|&ty| { + let linkage = calculate_type(tcx, ty); + verify_ok(tcx, &linkage); + (ty, linkage) + }) + .collect::<Vec<_>>() +} + +fn calculate_type(tcx: TyCtxt<'_>, ty: CrateType) -> DependencyList { + let sess = &tcx.sess; + + if !sess.opts.output_types.should_codegen() { + return Vec::new(); + } + + let preferred_linkage = match ty { + // Generating a dylib without `-C prefer-dynamic` means that we're going + // to try to eagerly statically link all dependencies. This is normally + // done for end-product dylibs, not intermediate products. + // + // Treat cdylibs similarly. If `-C prefer-dynamic` is set, the caller may + // be code-size conscious, but without it, it makes sense to statically + // link a cdylib. + CrateType::Dylib | CrateType::Cdylib if !sess.opts.cg.prefer_dynamic => Linkage::Static, + CrateType::Dylib | CrateType::Cdylib => Linkage::Dynamic, + + // If the global prefer_dynamic switch is turned off, or the final + // executable will be statically linked, prefer static crate linkage. + CrateType::Executable if !sess.opts.cg.prefer_dynamic || sess.crt_static(Some(ty)) => { + Linkage::Static + } + CrateType::Executable => Linkage::Dynamic, + + // proc-macro crates are mostly cdylibs, but we also need metadata. + CrateType::ProcMacro => Linkage::Static, + + // No linkage happens with rlibs, we just needed the metadata (which we + // got long ago), so don't bother with anything. + CrateType::Rlib => Linkage::NotLinked, + + // staticlibs must have all static dependencies. + CrateType::Staticlib => Linkage::Static, + }; + + if preferred_linkage == Linkage::NotLinked { + // If the crate is not linked, there are no link-time dependencies. + return Vec::new(); + } + + if preferred_linkage == Linkage::Static { + // Attempt static linkage first. For dylibs and executables, we may be + // able to retry below with dynamic linkage. + if let Some(v) = attempt_static(tcx) { + return v; + } + + // Staticlibs and static executables must have all static dependencies. + // If any are not found, generate some nice pretty errors. + if ty == CrateType::Staticlib + || (ty == CrateType::Executable + && sess.crt_static(Some(ty)) + && !sess.target.crt_static_allows_dylibs) + { + for &cnum in tcx.crates(()).iter() { + if tcx.dep_kind(cnum).macros_only() { + continue; + } + let src = tcx.used_crate_source(cnum); + if src.rlib.is_some() { + continue; + } + sess.err(&format!( + "crate `{}` required to be available in rlib format, \ + but was not found in this form", + tcx.crate_name(cnum) + )); + } + return Vec::new(); + } + } + + let mut formats = FxHashMap::default(); + + // Sweep all crates for found dylibs. Add all dylibs, as well as their + // dependencies, ensuring there are no conflicts. The only valid case for a + // dependency to be relied upon twice is for both cases to rely on a dylib. + for &cnum in tcx.crates(()).iter() { + if tcx.dep_kind(cnum).macros_only() { + continue; + } + let name = tcx.crate_name(cnum); + let src = tcx.used_crate_source(cnum); + if src.dylib.is_some() { + tracing::info!("adding dylib: {}", name); + add_library(tcx, cnum, RequireDynamic, &mut formats); + let deps = tcx.dylib_dependency_formats(cnum); + for &(depnum, style) in deps.iter() { + tracing::info!("adding {:?}: {}", style, tcx.crate_name(depnum)); + add_library(tcx, depnum, style, &mut formats); + } + } + } + + // Collect what we've got so far in the return vector. + let last_crate = tcx.crates(()).len(); + let mut ret = (1..last_crate + 1) + .map(|cnum| match formats.get(&CrateNum::new(cnum)) { + Some(&RequireDynamic) => Linkage::Dynamic, + Some(&RequireStatic) => Linkage::IncludedFromDylib, + None => Linkage::NotLinked, + }) + .collect::<Vec<_>>(); + + // Run through the dependency list again, and add any missing libraries as + // static libraries. + // + // If the crate hasn't been included yet and it's not actually required + // (e.g., it's an allocator) then we skip it here as well. + for &cnum in tcx.crates(()).iter() { + let src = tcx.used_crate_source(cnum); + if src.dylib.is_none() + && !formats.contains_key(&cnum) + && tcx.dep_kind(cnum) == CrateDepKind::Explicit + { + assert!(src.rlib.is_some() || src.rmeta.is_some()); + tracing::info!("adding staticlib: {}", tcx.crate_name(cnum)); + add_library(tcx, cnum, RequireStatic, &mut formats); + ret[cnum.as_usize() - 1] = Linkage::Static; + } + } + + // We've gotten this far because we're emitting some form of a final + // artifact which means that we may need to inject dependencies of some + // form. + // + // Things like allocators and panic runtimes may not have been activated + // quite yet, so do so here. + activate_injected_dep(CStore::from_tcx(tcx).injected_panic_runtime(), &mut ret, &|cnum| { + tcx.is_panic_runtime(cnum) + }); + + // When dylib B links to dylib A, then when using B we must also link to A. + // It could be the case, however, that the rlib for A is present (hence we + // found metadata), but the dylib for A has since been removed. + // + // For situations like this, we perform one last pass over the dependencies, + // making sure that everything is available in the requested format. + for (cnum, kind) in ret.iter().enumerate() { + let cnum = CrateNum::new(cnum + 1); + let src = tcx.used_crate_source(cnum); + match *kind { + Linkage::NotLinked | Linkage::IncludedFromDylib => {} + Linkage::Static if src.rlib.is_some() => continue, + Linkage::Dynamic if src.dylib.is_some() => continue, + kind => { + let kind = match kind { + Linkage::Static => "rlib", + _ => "dylib", + }; + sess.err(&format!( + "crate `{}` required to be available in {} format, \ + but was not found in this form", + tcx.crate_name(cnum), + kind + )); + } + } + } + + ret +} + +fn add_library( + tcx: TyCtxt<'_>, + cnum: CrateNum, + link: LinkagePreference, + m: &mut FxHashMap<CrateNum, LinkagePreference>, +) { + match m.get(&cnum) { + Some(&link2) => { + // If the linkages differ, then we'd have two copies of the library + // if we continued linking. If the linkages are both static, then we + // would also have two copies of the library (static from two + // different locations). + // + // This error is probably a little obscure, but I imagine that it + // can be refined over time. + if link2 != link || link == RequireStatic { + tcx.sess + .struct_err(&format!( + "cannot satisfy dependencies so `{}` only \ + shows up once", + tcx.crate_name(cnum) + )) + .help( + "having upstream crates all available in one format \ + will likely make this go away", + ) + .emit(); + } + } + None => { + m.insert(cnum, link); + } + } +} + +fn attempt_static(tcx: TyCtxt<'_>) -> Option<DependencyList> { + let all_crates_available_as_rlib = tcx + .crates(()) + .iter() + .copied() + .filter_map(|cnum| { + if tcx.dep_kind(cnum).macros_only() { + return None; + } + Some(tcx.used_crate_source(cnum).rlib.is_some()) + }) + .all(|is_rlib| is_rlib); + if !all_crates_available_as_rlib { + return None; + } + + // All crates are available in an rlib format, so we're just going to link + // everything in explicitly so long as it's actually required. + let mut ret = tcx + .crates(()) + .iter() + .map(|&cnum| { + if tcx.dep_kind(cnum) == CrateDepKind::Explicit { + Linkage::Static + } else { + Linkage::NotLinked + } + }) + .collect::<Vec<_>>(); + + // Our allocator/panic runtime may not have been linked above if it wasn't + // explicitly linked, which is the case for any injected dependency. Handle + // that here and activate them. + activate_injected_dep(CStore::from_tcx(tcx).injected_panic_runtime(), &mut ret, &|cnum| { + tcx.is_panic_runtime(cnum) + }); + + Some(ret) +} + +// Given a list of how to link upstream dependencies so far, ensure that an +// injected dependency is activated. This will not do anything if one was +// transitively included already (e.g., via a dylib or explicitly so). +// +// If an injected dependency was not found then we're guaranteed the +// metadata::creader module has injected that dependency (not listed as +// a required dependency) in one of the session's field. If this field is not +// set then this compilation doesn't actually need the dependency and we can +// also skip this step entirely. +fn activate_injected_dep( + injected: Option<CrateNum>, + list: &mut DependencyList, + replaces_injected: &dyn Fn(CrateNum) -> bool, +) { + for (i, slot) in list.iter().enumerate() { + let cnum = CrateNum::new(i + 1); + if !replaces_injected(cnum) { + continue; + } + if *slot != Linkage::NotLinked { + return; + } + } + if let Some(injected) = injected { + let idx = injected.as_usize() - 1; + assert_eq!(list[idx], Linkage::NotLinked); + list[idx] = Linkage::Static; + } +} + +// After the linkage for a crate has been determined we need to verify that +// there's only going to be one allocator in the output. +fn verify_ok(tcx: TyCtxt<'_>, list: &[Linkage]) { + let sess = &tcx.sess; + if list.is_empty() { + return; + } + let mut panic_runtime = None; + for (i, linkage) in list.iter().enumerate() { + if let Linkage::NotLinked = *linkage { + continue; + } + let cnum = CrateNum::new(i + 1); + + if tcx.is_panic_runtime(cnum) { + if let Some((prev, _)) = panic_runtime { + let prev_name = tcx.crate_name(prev); + let cur_name = tcx.crate_name(cnum); + sess.err(&format!( + "cannot link together two \ + panic runtimes: {} and {}", + prev_name, cur_name + )); + } + panic_runtime = Some(( + cnum, + tcx.required_panic_strategy(cnum).unwrap_or_else(|| { + bug!("cannot determine panic strategy of a panic runtime"); + }), + )); + } + } + + // If we found a panic runtime, then we know by this point that it's the + // only one, but we perform validation here that all the panic strategy + // compilation modes for the whole DAG are valid. + if let Some((runtime_cnum, found_strategy)) = panic_runtime { + let desired_strategy = sess.panic_strategy(); + + // First up, validate that our selected panic runtime is indeed exactly + // our same strategy. + if found_strategy != desired_strategy { + sess.err(&format!( + "the linked panic runtime `{}` is \ + not compiled with this crate's \ + panic strategy `{}`", + tcx.crate_name(runtime_cnum), + desired_strategy.desc() + )); + } + + // Next up, verify that all other crates are compatible with this panic + // strategy. If the dep isn't linked, we ignore it, and if our strategy + // is abort then it's compatible with everything. Otherwise all crates' + // panic strategy must match our own. + for (i, linkage) in list.iter().enumerate() { + if let Linkage::NotLinked = *linkage { + continue; + } + let cnum = CrateNum::new(i + 1); + if cnum == runtime_cnum || tcx.is_compiler_builtins(cnum) { + continue; + } + + if let Some(found_strategy) = tcx.required_panic_strategy(cnum) && desired_strategy != found_strategy { + sess.err(&format!( + "the crate `{}` requires \ + panic strategy `{}` which is \ + incompatible with this crate's \ + strategy of `{}`", + tcx.crate_name(cnum), + found_strategy.desc(), + desired_strategy.desc() + )); + } + + let found_drop_strategy = tcx.panic_in_drop_strategy(cnum); + if tcx.sess.opts.unstable_opts.panic_in_drop != found_drop_strategy { + sess.err(&format!( + "the crate `{}` is compiled with the \ + panic-in-drop strategy `{}` which is \ + incompatible with this crate's \ + strategy of `{}`", + tcx.crate_name(cnum), + found_drop_strategy.desc(), + tcx.sess.opts.unstable_opts.panic_in_drop.desc() + )); + } + } + } +} diff --git a/compiler/rustc_metadata/src/foreign_modules.rs b/compiler/rustc_metadata/src/foreign_modules.rs new file mode 100644 index 000000000..2ca4cd17f --- /dev/null +++ b/compiler/rustc_metadata/src/foreign_modules.rs @@ -0,0 +1,19 @@ +use rustc_hir as hir; +use rustc_hir::def::DefKind; +use rustc_middle::ty::TyCtxt; +use rustc_session::cstore::ForeignModule; + +pub(crate) fn collect(tcx: TyCtxt<'_>) -> Vec<ForeignModule> { + let mut modules = Vec::new(); + for id in tcx.hir().items() { + if !matches!(tcx.def_kind(id.def_id), DefKind::ForeignMod) { + continue; + } + let item = tcx.hir().item(id); + if let hir::ItemKind::ForeignMod { items, .. } = item.kind { + let foreign_items = items.iter().map(|it| it.id.def_id.to_def_id()).collect(); + modules.push(ForeignModule { foreign_items, def_id: id.def_id.to_def_id() }); + } + } + modules +} diff --git a/compiler/rustc_metadata/src/fs.rs b/compiler/rustc_metadata/src/fs.rs new file mode 100644 index 000000000..e6072901a --- /dev/null +++ b/compiler/rustc_metadata/src/fs.rs @@ -0,0 +1,137 @@ +use crate::{encode_metadata, EncodedMetadata}; + +use rustc_data_structures::temp_dir::MaybeTempDir; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_middle::ty::TyCtxt; +use rustc_session::config::{CrateType, OutputFilenames, OutputType}; +use rustc_session::output::filename_for_metadata; +use rustc_session::Session; +use tempfile::Builder as TempFileBuilder; + +use std::fs; +use std::path::{Path, PathBuf}; + +// FIXME(eddyb) maybe include the crate name in this? +pub const METADATA_FILENAME: &str = "lib.rmeta"; + +/// We use a temp directory here to avoid races between concurrent rustc processes, +/// such as builds in the same directory using the same filename for metadata while +/// building an `.rlib` (stomping over one another), or writing an `.rmeta` into a +/// directory being searched for `extern crate` (observing an incomplete file). +/// The returned path is the temporary file containing the complete metadata. +pub fn emit_metadata(sess: &Session, metadata: &[u8], tmpdir: &MaybeTempDir) -> PathBuf { + let out_filename = tmpdir.as_ref().join(METADATA_FILENAME); + let result = fs::write(&out_filename, metadata); + + if let Err(e) = result { + sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e)); + } + + out_filename +} + +pub fn encode_and_write_metadata( + tcx: TyCtxt<'_>, + outputs: &OutputFilenames, +) -> (EncodedMetadata, bool) { + #[derive(PartialEq, Eq, PartialOrd, Ord)] + enum MetadataKind { + None, + Uncompressed, + Compressed, + } + + let metadata_kind = tcx + .sess + .crate_types() + .iter() + .map(|ty| match *ty { + CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => MetadataKind::None, + + CrateType::Rlib => MetadataKind::Uncompressed, + + CrateType::Dylib | CrateType::ProcMacro => MetadataKind::Compressed, + }) + .max() + .unwrap_or(MetadataKind::None); + + let crate_name = tcx.crate_name(LOCAL_CRATE); + let out_filename = filename_for_metadata(tcx.sess, crate_name.as_str(), outputs); + // To avoid races with another rustc process scanning the output directory, + // we need to write the file somewhere else and atomically move it to its + // final destination, with an `fs::rename` call. In order for the rename to + // always succeed, the temporary file needs to be on the same filesystem, + // which is why we create it inside the output directory specifically. + let metadata_tmpdir = TempFileBuilder::new() + .prefix("rmeta") + .tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new(""))) + .unwrap_or_else(|err| tcx.sess.fatal(&format!("couldn't create a temp dir: {}", err))); + let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps); + let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME); + + // Always create a file at `metadata_filename`, even if we have nothing to write to it. + // This simplifies the creation of the output `out_filename` when requested. + match metadata_kind { + MetadataKind::None => { + std::fs::File::create(&metadata_filename).unwrap_or_else(|e| { + tcx.sess.fatal(&format!( + "failed to create the file {}: {}", + metadata_filename.display(), + e + )) + }); + } + MetadataKind::Uncompressed | MetadataKind::Compressed => { + encode_metadata(tcx, &metadata_filename); + } + }; + + let _prof_timer = tcx.sess.prof.generic_activity("write_crate_metadata"); + + // If the user requests metadata as output, rename `metadata_filename` + // to the expected output `out_filename`. The match above should ensure + // this file always exists. + let need_metadata_file = tcx.sess.opts.output_types.contains_key(&OutputType::Metadata); + let (metadata_filename, metadata_tmpdir) = if need_metadata_file { + if let Err(e) = non_durable_rename(&metadata_filename, &out_filename) { + tcx.sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e)); + } + if tcx.sess.opts.json_artifact_notifications { + tcx.sess + .parse_sess + .span_diagnostic + .emit_artifact_notification(&out_filename, "metadata"); + } + (out_filename, None) + } else { + (metadata_filename, Some(metadata_tmpdir)) + }; + + // Load metadata back to memory: codegen may need to include it in object files. + let metadata = + EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|e| { + tcx.sess.fatal(&format!("failed to create encoded metadata from file: {}", e)) + }); + + let need_metadata_module = metadata_kind == MetadataKind::Compressed; + + (metadata, need_metadata_module) +} + +#[cfg(not(target_os = "linux"))] +pub fn non_durable_rename(src: &Path, dst: &Path) -> std::io::Result<()> { + std::fs::rename(src, dst) +} + +/// This function attempts to bypass the auto_da_alloc heuristic implemented by some filesystems +/// such as btrfs and ext4. When renaming over a file that already exists then they will "helpfully" +/// write back the source file before committing the rename in case a developer forgot some of +/// the fsyncs in the open/write/fsync(file)/rename/fsync(dir) dance for atomic file updates. +/// +/// To avoid triggering this heuristic we delete the destination first, if it exists. +/// The cost of an extra syscall is much lower than getting descheduled for the sync IO. +#[cfg(target_os = "linux")] +pub fn non_durable_rename(src: &Path, dst: &Path) -> std::io::Result<()> { + let _ = std::fs::remove_file(dst); + std::fs::rename(src, dst) +} diff --git a/compiler/rustc_metadata/src/lib.rs b/compiler/rustc_metadata/src/lib.rs new file mode 100644 index 000000000..6440f3e39 --- /dev/null +++ b/compiler/rustc_metadata/src/lib.rs @@ -0,0 +1,41 @@ +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(decl_macro)] +#![feature(drain_filter)] +#![feature(generators)] +#![feature(generic_associated_types)] +#![feature(iter_from_generator)] +#![feature(let_chains)] +#![feature(let_else)] +#![feature(once_cell)] +#![feature(proc_macro_internals)] +#![feature(macro_metavar_expr)] +#![feature(min_specialization)] +#![feature(slice_as_chunks)] +#![feature(trusted_len)] +#![feature(try_blocks)] +#![feature(never_type)] +#![recursion_limit = "256"] +#![allow(rustc::potential_query_instability)] + +extern crate proc_macro; + +#[macro_use] +extern crate rustc_macros; +#[macro_use] +extern crate rustc_middle; +#[macro_use] +extern crate rustc_data_structures; + +pub use rmeta::{provide, provide_extern}; + +mod dependency_format; +mod foreign_modules; +mod native_libs; +mod rmeta; + +pub mod creader; +pub mod fs; +pub mod locator; + +pub use fs::{emit_metadata, METADATA_FILENAME}; +pub use rmeta::{encode_metadata, EncodedMetadata, METADATA_HEADER}; diff --git a/compiler/rustc_metadata/src/locator.rs b/compiler/rustc_metadata/src/locator.rs new file mode 100644 index 000000000..2c1c84b0b --- /dev/null +++ b/compiler/rustc_metadata/src/locator.rs @@ -0,0 +1,1222 @@ +//! Finds crate binaries and loads their metadata +//! +//! Might I be the first to welcome you to a world of platform differences, +//! version requirements, dependency graphs, conflicting desires, and fun! This +//! is the major guts (along with metadata::creader) of the compiler for loading +//! crates and resolving dependencies. Let's take a tour! +//! +//! # The problem +//! +//! Each invocation of the compiler is immediately concerned with one primary +//! problem, to connect a set of crates to resolved crates on the filesystem. +//! Concretely speaking, the compiler follows roughly these steps to get here: +//! +//! 1. Discover a set of `extern crate` statements. +//! 2. Transform these directives into crate names. If the directive does not +//! have an explicit name, then the identifier is the name. +//! 3. For each of these crate names, find a corresponding crate on the +//! filesystem. +//! +//! Sounds easy, right? Let's walk into some of the nuances. +//! +//! ## Transitive Dependencies +//! +//! Let's say we've got three crates: A, B, and C. A depends on B, and B depends +//! on C. When we're compiling A, we primarily need to find and locate B, but we +//! also end up needing to find and locate C as well. +//! +//! The reason for this is that any of B's types could be composed of C's types, +//! any function in B could return a type from C, etc. To be able to guarantee +//! that we can always type-check/translate any function, we have to have +//! complete knowledge of the whole ecosystem, not just our immediate +//! dependencies. +//! +//! So now as part of the "find a corresponding crate on the filesystem" step +//! above, this involves also finding all crates for *all upstream +//! dependencies*. This includes all dependencies transitively. +//! +//! ## Rlibs and Dylibs +//! +//! The compiler has two forms of intermediate dependencies. These are dubbed +//! rlibs and dylibs for the static and dynamic variants, respectively. An rlib +//! is a rustc-defined file format (currently just an ar archive) while a dylib +//! is a platform-defined dynamic library. Each library has a metadata somewhere +//! inside of it. +//! +//! A third kind of dependency is an rmeta file. These are metadata files and do +//! not contain any code, etc. To a first approximation, these are treated in the +//! same way as rlibs. Where there is both an rlib and an rmeta file, the rlib +//! gets priority (even if the rmeta file is newer). An rmeta file is only +//! useful for checking a downstream crate, attempting to link one will cause an +//! error. +//! +//! When translating a crate name to a crate on the filesystem, we all of a +//! sudden need to take into account both rlibs and dylibs! Linkage later on may +//! use either one of these files, as each has their pros/cons. The job of crate +//! loading is to discover what's possible by finding all candidates. +//! +//! Most parts of this loading systems keep the dylib/rlib as just separate +//! variables. +//! +//! ## Where to look? +//! +//! We can't exactly scan your whole hard drive when looking for dependencies, +//! so we need to places to look. Currently the compiler will implicitly add the +//! target lib search path ($prefix/lib/rustlib/$target/lib) to any compilation, +//! and otherwise all -L flags are added to the search paths. +//! +//! ## What criterion to select on? +//! +//! This is a pretty tricky area of loading crates. Given a file, how do we know +//! whether it's the right crate? Currently, the rules look along these lines: +//! +//! 1. Does the filename match an rlib/dylib pattern? That is to say, does the +//! filename have the right prefix/suffix? +//! 2. Does the filename have the right prefix for the crate name being queried? +//! This is filtering for files like `libfoo*.rlib` and such. If the crate +//! we're looking for was originally compiled with -C extra-filename, the +//! extra filename will be included in this prefix to reduce reading +//! metadata from crates that would otherwise share our prefix. +//! 3. Is the file an actual rust library? This is done by loading the metadata +//! from the library and making sure it's actually there. +//! 4. Does the name in the metadata agree with the name of the library? +//! 5. Does the target in the metadata agree with the current target? +//! 6. Does the SVH match? (more on this later) +//! +//! If the file answers `yes` to all these questions, then the file is +//! considered as being *candidate* for being accepted. It is illegal to have +//! more than two candidates as the compiler has no method by which to resolve +//! this conflict. Additionally, rlib/dylib candidates are considered +//! separately. +//! +//! After all this has happened, we have 1 or two files as candidates. These +//! represent the rlib/dylib file found for a library, and they're returned as +//! being found. +//! +//! ### What about versions? +//! +//! A lot of effort has been put forth to remove versioning from the compiler. +//! There have been forays in the past to have versioning baked in, but it was +//! largely always deemed insufficient to the point that it was recognized that +//! it's probably something the compiler shouldn't do anyway due to its +//! complicated nature and the state of the half-baked solutions. +//! +//! With a departure from versioning, the primary criterion for loading crates +//! is just the name of a crate. If we stopped here, it would imply that you +//! could never link two crates of the same name from different sources +//! together, which is clearly a bad state to be in. +//! +//! To resolve this problem, we come to the next section! +//! +//! # Expert Mode +//! +//! A number of flags have been added to the compiler to solve the "version +//! problem" in the previous section, as well as generally enabling more +//! powerful usage of the crate loading system of the compiler. The goal of +//! these flags and options are to enable third-party tools to drive the +//! compiler with prior knowledge about how the world should look. +//! +//! ## The `--extern` flag +//! +//! The compiler accepts a flag of this form a number of times: +//! +//! ```text +//! --extern crate-name=path/to/the/crate.rlib +//! ``` +//! +//! This flag is basically the following letter to the compiler: +//! +//! > Dear rustc, +//! > +//! > When you are attempting to load the immediate dependency `crate-name`, I +//! > would like you to assume that the library is located at +//! > `path/to/the/crate.rlib`, and look nowhere else. Also, please do not +//! > assume that the path I specified has the name `crate-name`. +//! +//! This flag basically overrides most matching logic except for validating that +//! the file is indeed a rust library. The same `crate-name` can be specified +//! twice to specify the rlib/dylib pair. +//! +//! ## Enabling "multiple versions" +//! +//! This basically boils down to the ability to specify arbitrary packages to +//! the compiler. For example, if crate A wanted to use Bv1 and Bv2, then it +//! would look something like: +//! +//! ```compile_fail,E0463 +//! extern crate b1; +//! extern crate b2; +//! +//! fn main() {} +//! ``` +//! +//! and the compiler would be invoked as: +//! +//! ```text +//! rustc a.rs --extern b1=path/to/libb1.rlib --extern b2=path/to/libb2.rlib +//! ``` +//! +//! In this scenario there are two crates named `b` and the compiler must be +//! manually driven to be informed where each crate is. +//! +//! ## Frobbing symbols +//! +//! One of the immediate problems with linking the same library together twice +//! in the same problem is dealing with duplicate symbols. The primary way to +//! deal with this in rustc is to add hashes to the end of each symbol. +//! +//! In order to force hashes to change between versions of a library, if +//! desired, the compiler exposes an option `-C metadata=foo`, which is used to +//! initially seed each symbol hash. The string `foo` is prepended to each +//! string-to-hash to ensure that symbols change over time. +//! +//! ## Loading transitive dependencies +//! +//! Dealing with same-named-but-distinct crates is not just a local problem, but +//! one that also needs to be dealt with for transitive dependencies. Note that +//! in the letter above `--extern` flags only apply to the *local* set of +//! dependencies, not the upstream transitive dependencies. Consider this +//! dependency graph: +//! +//! ```text +//! A.1 A.2 +//! | | +//! | | +//! B C +//! \ / +//! \ / +//! D +//! ``` +//! +//! In this scenario, when we compile `D`, we need to be able to distinctly +//! resolve `A.1` and `A.2`, but an `--extern` flag cannot apply to these +//! transitive dependencies. +//! +//! Note that the key idea here is that `B` and `C` are both *already compiled*. +//! That is, they have already resolved their dependencies. Due to unrelated +//! technical reasons, when a library is compiled, it is only compatible with +//! the *exact same* version of the upstream libraries it was compiled against. +//! We use the "Strict Version Hash" to identify the exact copy of an upstream +//! library. +//! +//! With this knowledge, we know that `B` and `C` will depend on `A` with +//! different SVH values, so we crawl the normal `-L` paths looking for +//! `liba*.rlib` and filter based on the contained SVH. +//! +//! In the end, this ends up not needing `--extern` to specify upstream +//! transitive dependencies. +//! +//! # Wrapping up +//! +//! That's the general overview of loading crates in the compiler, but it's by +//! no means all of the necessary details. Take a look at the rest of +//! metadata::locator or metadata::creader for all the juicy details! + +use crate::creader::Library; +use crate::rmeta::{rustc_version, MetadataBlob, METADATA_HEADER}; + +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::memmap::Mmap; +use rustc_data_structures::owning_ref::OwningRef; +use rustc_data_structures::svh::Svh; +use rustc_data_structures::sync::MetadataRef; +use rustc_errors::{struct_span_err, FatalError}; +use rustc_session::config::{self, CrateType}; +use rustc_session::cstore::{CrateSource, MetadataLoader}; +use rustc_session::filesearch::FileSearch; +use rustc_session::search_paths::PathKind; +use rustc_session::utils::CanonicalizedPath; +use rustc_session::Session; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::Span; +use rustc_target::spec::{Target, TargetTriple}; + +use snap::read::FrameDecoder; +use std::fmt::Write as _; +use std::io::{Read, Result as IoResult, Write}; +use std::path::{Path, PathBuf}; +use std::{cmp, fmt, fs}; +use tracing::{debug, info}; + +#[derive(Clone)] +pub(crate) struct CrateLocator<'a> { + // Immutable per-session configuration. + only_needs_metadata: bool, + sysroot: &'a Path, + metadata_loader: &'a dyn MetadataLoader, + + // Immutable per-search configuration. + crate_name: Symbol, + exact_paths: Vec<CanonicalizedPath>, + pub hash: Option<Svh>, + extra_filename: Option<&'a str>, + pub target: &'a Target, + pub triple: TargetTriple, + pub filesearch: FileSearch<'a>, + pub is_proc_macro: bool, + + // Mutable in-progress state or output. + crate_rejections: CrateRejections, +} + +#[derive(Clone)] +pub(crate) struct CratePaths { + name: Symbol, + source: CrateSource, +} + +impl CratePaths { + pub(crate) fn new(name: Symbol, source: CrateSource) -> CratePaths { + CratePaths { name, source } + } +} + +#[derive(Copy, Clone, PartialEq)] +pub(crate) enum CrateFlavor { + Rlib, + Rmeta, + Dylib, +} + +impl fmt::Display for CrateFlavor { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match *self { + CrateFlavor::Rlib => "rlib", + CrateFlavor::Rmeta => "rmeta", + CrateFlavor::Dylib => "dylib", + }) + } +} + +impl<'a> CrateLocator<'a> { + pub(crate) fn new( + sess: &'a Session, + metadata_loader: &'a dyn MetadataLoader, + crate_name: Symbol, + hash: Option<Svh>, + extra_filename: Option<&'a str>, + is_host: bool, + path_kind: PathKind, + ) -> CrateLocator<'a> { + // The all loop is because `--crate-type=rlib --crate-type=rlib` is + // legal and produces both inside this type. + let is_rlib = sess.crate_types().iter().all(|c| *c == CrateType::Rlib); + let needs_object_code = sess.opts.output_types.should_codegen(); + // If we're producing an rlib, then we don't need object code. + // Or, if we're not producing object code, then we don't need it either + // (e.g., if we're a cdylib but emitting just metadata). + let only_needs_metadata = is_rlib || !needs_object_code; + + CrateLocator { + only_needs_metadata, + sysroot: &sess.sysroot, + metadata_loader, + crate_name, + exact_paths: if hash.is_none() { + sess.opts + .externs + .get(crate_name.as_str()) + .into_iter() + .filter_map(|entry| entry.files()) + .flatten() + .cloned() + .collect() + } else { + // SVH being specified means this is a transitive dependency, + // so `--extern` options do not apply. + Vec::new() + }, + hash, + extra_filename, + target: if is_host { &sess.host } else { &sess.target }, + triple: if is_host { + TargetTriple::from_triple(config::host_triple()) + } else { + sess.opts.target_triple.clone() + }, + filesearch: if is_host { + sess.host_filesearch(path_kind) + } else { + sess.target_filesearch(path_kind) + }, + is_proc_macro: false, + crate_rejections: CrateRejections::default(), + } + } + + pub(crate) fn reset(&mut self) { + self.crate_rejections.via_hash.clear(); + self.crate_rejections.via_triple.clear(); + self.crate_rejections.via_kind.clear(); + self.crate_rejections.via_version.clear(); + self.crate_rejections.via_filename.clear(); + self.crate_rejections.via_invalid.clear(); + } + + pub(crate) fn maybe_load_library_crate(&mut self) -> Result<Option<Library>, CrateError> { + if !self.exact_paths.is_empty() { + return self.find_commandline_library(); + } + let mut seen_paths = FxHashSet::default(); + if let Some(extra_filename) = self.extra_filename { + if let library @ Some(_) = self.find_library_crate(extra_filename, &mut seen_paths)? { + return Ok(library); + } + } + self.find_library_crate("", &mut seen_paths) + } + + fn find_library_crate( + &mut self, + extra_prefix: &str, + seen_paths: &mut FxHashSet<PathBuf>, + ) -> Result<Option<Library>, CrateError> { + let rmeta_prefix = &format!("lib{}{}", self.crate_name, extra_prefix); + let rlib_prefix = rmeta_prefix; + let dylib_prefix = + &format!("{}{}{}", self.target.dll_prefix, self.crate_name, extra_prefix); + let staticlib_prefix = + &format!("{}{}{}", self.target.staticlib_prefix, self.crate_name, extra_prefix); + + let rmeta_suffix = ".rmeta"; + let rlib_suffix = ".rlib"; + let dylib_suffix = &self.target.dll_suffix; + let staticlib_suffix = &self.target.staticlib_suffix; + + let mut candidates: FxHashMap<_, (FxHashMap<_, _>, FxHashMap<_, _>, FxHashMap<_, _>)> = + Default::default(); + + // First, find all possible candidate rlibs and dylibs purely based on + // the name of the files themselves. We're trying to match against an + // exact crate name and a possibly an exact hash. + // + // During this step, we can filter all found libraries based on the + // name and id found in the crate id (we ignore the path portion for + // filename matching), as well as the exact hash (if specified). If we + // end up having many candidates, we must look at the metadata to + // perform exact matches against hashes/crate ids. Note that opening up + // the metadata is where we do an exact match against the full contents + // of the crate id (path/name/id). + // + // The goal of this step is to look at as little metadata as possible. + // Unfortunately, the prefix-based matching sometimes is over-eager. + // E.g. if `rlib_suffix` is `libstd` it'll match the file + // `libstd_detect-8d6701fb958915ad.rlib` (incorrect) as well as + // `libstd-f3ab5b1dea981f17.rlib` (correct). But this is hard to avoid + // given that `extra_filename` comes from the `-C extra-filename` + // option and thus can be anything, and the incorrect match will be + // handled safely in `extract_one`. + for search_path in self.filesearch.search_paths() { + debug!("searching {}", search_path.dir.display()); + for spf in search_path.files.iter() { + debug!("testing {}", spf.path.display()); + + let f = &spf.file_name_str; + let (hash, kind) = if f.starts_with(rlib_prefix) && f.ends_with(rlib_suffix) { + (&f[rlib_prefix.len()..(f.len() - rlib_suffix.len())], CrateFlavor::Rlib) + } else if f.starts_with(rmeta_prefix) && f.ends_with(rmeta_suffix) { + (&f[rmeta_prefix.len()..(f.len() - rmeta_suffix.len())], CrateFlavor::Rmeta) + } else if f.starts_with(dylib_prefix) && f.ends_with(dylib_suffix.as_ref()) { + (&f[dylib_prefix.len()..(f.len() - dylib_suffix.len())], CrateFlavor::Dylib) + } else { + if f.starts_with(staticlib_prefix) && f.ends_with(staticlib_suffix.as_ref()) { + self.crate_rejections.via_kind.push(CrateMismatch { + path: spf.path.clone(), + got: "static".to_string(), + }); + } + continue; + }; + + info!("lib candidate: {}", spf.path.display()); + + let (rlibs, rmetas, dylibs) = candidates.entry(hash.to_string()).or_default(); + let path = fs::canonicalize(&spf.path).unwrap_or_else(|_| spf.path.clone()); + if seen_paths.contains(&path) { + continue; + }; + seen_paths.insert(path.clone()); + match kind { + CrateFlavor::Rlib => rlibs.insert(path, search_path.kind), + CrateFlavor::Rmeta => rmetas.insert(path, search_path.kind), + CrateFlavor::Dylib => dylibs.insert(path, search_path.kind), + }; + } + } + + // We have now collected all known libraries into a set of candidates + // keyed of the filename hash listed. For each filename, we also have a + // list of rlibs/dylibs that apply. Here, we map each of these lists + // (per hash), to a Library candidate for returning. + // + // A Library candidate is created if the metadata for the set of + // libraries corresponds to the crate id and hash criteria that this + // search is being performed for. + let mut libraries = FxHashMap::default(); + for (_hash, (rlibs, rmetas, dylibs)) in candidates { + if let Some((svh, lib)) = self.extract_lib(rlibs, rmetas, dylibs)? { + libraries.insert(svh, lib); + } + } + + // Having now translated all relevant found hashes into libraries, see + // what we've got and figure out if we found multiple candidates for + // libraries or not. + match libraries.len() { + 0 => Ok(None), + 1 => Ok(Some(libraries.into_iter().next().unwrap().1)), + _ => Err(CrateError::MultipleMatchingCrates(self.crate_name, libraries)), + } + } + + fn extract_lib( + &mut self, + rlibs: FxHashMap<PathBuf, PathKind>, + rmetas: FxHashMap<PathBuf, PathKind>, + dylibs: FxHashMap<PathBuf, PathKind>, + ) -> Result<Option<(Svh, Library)>, CrateError> { + let mut slot = None; + // Order here matters, rmeta should come first. See comment in + // `extract_one` below. + let source = CrateSource { + rmeta: self.extract_one(rmetas, CrateFlavor::Rmeta, &mut slot)?, + rlib: self.extract_one(rlibs, CrateFlavor::Rlib, &mut slot)?, + dylib: self.extract_one(dylibs, CrateFlavor::Dylib, &mut slot)?, + }; + Ok(slot.map(|(svh, metadata)| (svh, Library { source, metadata }))) + } + + fn needs_crate_flavor(&self, flavor: CrateFlavor) -> bool { + if flavor == CrateFlavor::Dylib && self.is_proc_macro { + return true; + } + + if self.only_needs_metadata { + flavor == CrateFlavor::Rmeta + } else { + // we need all flavors (perhaps not true, but what we do for now) + true + } + } + + // Attempts to extract *one* library from the set `m`. If the set has no + // elements, `None` is returned. If the set has more than one element, then + // the errors and notes are emitted about the set of libraries. + // + // With only one library in the set, this function will extract it, and then + // read the metadata from it if `*slot` is `None`. If the metadata couldn't + // be read, it is assumed that the file isn't a valid rust library (no + // errors are emitted). + fn extract_one( + &mut self, + m: FxHashMap<PathBuf, PathKind>, + flavor: CrateFlavor, + slot: &mut Option<(Svh, MetadataBlob)>, + ) -> Result<Option<(PathBuf, PathKind)>, CrateError> { + // If we are producing an rlib, and we've already loaded metadata, then + // we should not attempt to discover further crate sources (unless we're + // locating a proc macro; exact logic is in needs_crate_flavor). This means + // that under -Zbinary-dep-depinfo we will not emit a dependency edge on + // the *unused* rlib, and by returning `None` here immediately we + // guarantee that we do indeed not use it. + // + // See also #68149 which provides more detail on why emitting the + // dependency on the rlib is a bad thing. + // + // We currently do not verify that these other sources are even in sync, + // and this is arguably a bug (see #10786), but because reading metadata + // is quite slow (especially from dylibs) we currently do not read it + // from the other crate sources. + if slot.is_some() { + if m.is_empty() || !self.needs_crate_flavor(flavor) { + return Ok(None); + } else if m.len() == 1 { + return Ok(Some(m.into_iter().next().unwrap())); + } + } + + let mut ret: Option<(PathBuf, PathKind)> = None; + let mut err_data: Option<Vec<PathBuf>> = None; + for (lib, kind) in m { + info!("{} reading metadata from: {}", flavor, lib.display()); + if flavor == CrateFlavor::Rmeta && lib.metadata().map_or(false, |m| m.len() == 0) { + // Empty files will cause get_metadata_section to fail. Rmeta + // files can be empty, for example with binaries (which can + // often appear with `cargo check` when checking a library as + // a unittest). We don't want to emit a user-visible warning + // in this case as it is not a real problem. + debug!("skipping empty file"); + continue; + } + let (hash, metadata) = + match get_metadata_section(self.target, flavor, &lib, self.metadata_loader) { + Ok(blob) => { + if let Some(h) = self.crate_matches(&blob, &lib) { + (h, blob) + } else { + info!("metadata mismatch"); + continue; + } + } + Err(MetadataError::LoadFailure(err)) => { + info!("no metadata found: {}", err); + // The file was present and created by the same compiler version, but we + // couldn't load it for some reason. Give a hard error instead of silently + // ignoring it, but only if we would have given an error anyway. + self.crate_rejections + .via_invalid + .push(CrateMismatch { path: lib, got: err }); + continue; + } + Err(err @ MetadataError::NotPresent(_)) => { + info!("no metadata found: {}", err); + continue; + } + }; + // If we see multiple hashes, emit an error about duplicate candidates. + if slot.as_ref().map_or(false, |s| s.0 != hash) { + if let Some(candidates) = err_data { + return Err(CrateError::MultipleCandidates( + self.crate_name, + flavor, + candidates, + )); + } + err_data = Some(vec![ret.as_ref().unwrap().0.clone()]); + *slot = None; + } + if let Some(candidates) = &mut err_data { + candidates.push(lib); + continue; + } + + // Ok so at this point we've determined that `(lib, kind)` above is + // a candidate crate to load, and that `slot` is either none (this + // is the first crate of its kind) or if some the previous path has + // the exact same hash (e.g., it's the exact same crate). + // + // In principle these two candidate crates are exactly the same so + // we can choose either of them to link. As a stupidly gross hack, + // however, we favor crate in the sysroot. + // + // You can find more info in rust-lang/rust#39518 and various linked + // issues, but the general gist is that during testing libstd the + // compilers has two candidates to choose from: one in the sysroot + // and one in the deps folder. These two crates are the exact same + // crate but if the compiler chooses the one in the deps folder + // it'll cause spurious errors on Windows. + // + // As a result, we favor the sysroot crate here. Note that the + // candidates are all canonicalized, so we canonicalize the sysroot + // as well. + if let Some((prev, _)) = &ret { + let sysroot = self.sysroot; + let sysroot = sysroot.canonicalize().unwrap_or_else(|_| sysroot.to_path_buf()); + if prev.starts_with(&sysroot) { + continue; + } + } + *slot = Some((hash, metadata)); + ret = Some((lib, kind)); + } + + if let Some(candidates) = err_data { + Err(CrateError::MultipleCandidates(self.crate_name, flavor, candidates)) + } else { + Ok(ret) + } + } + + fn crate_matches(&mut self, metadata: &MetadataBlob, libpath: &Path) -> Option<Svh> { + let rustc_version = rustc_version(); + let found_version = metadata.get_rustc_version(); + if found_version != rustc_version { + info!("Rejecting via version: expected {} got {}", rustc_version, found_version); + self.crate_rejections + .via_version + .push(CrateMismatch { path: libpath.to_path_buf(), got: found_version }); + return None; + } + + let root = metadata.get_root(); + if root.is_proc_macro_crate() != self.is_proc_macro { + info!( + "Rejecting via proc macro: expected {} got {}", + self.is_proc_macro, + root.is_proc_macro_crate(), + ); + return None; + } + + if self.exact_paths.is_empty() && self.crate_name != root.name() { + info!("Rejecting via crate name"); + return None; + } + + if root.triple() != &self.triple { + info!("Rejecting via crate triple: expected {} got {}", self.triple, root.triple()); + self.crate_rejections.via_triple.push(CrateMismatch { + path: libpath.to_path_buf(), + got: root.triple().to_string(), + }); + return None; + } + + let hash = root.hash(); + if let Some(expected_hash) = self.hash { + if hash != expected_hash { + info!("Rejecting via hash: expected {} got {}", expected_hash, hash); + self.crate_rejections + .via_hash + .push(CrateMismatch { path: libpath.to_path_buf(), got: hash.to_string() }); + return None; + } + } + + Some(hash) + } + + fn find_commandline_library(&mut self) -> Result<Option<Library>, CrateError> { + // First, filter out all libraries that look suspicious. We only accept + // files which actually exist that have the correct naming scheme for + // rlibs/dylibs. + let mut rlibs = FxHashMap::default(); + let mut rmetas = FxHashMap::default(); + let mut dylibs = FxHashMap::default(); + for loc in &self.exact_paths { + if !loc.canonicalized().exists() { + return Err(CrateError::ExternLocationNotExist( + self.crate_name, + loc.original().clone(), + )); + } + let Some(file) = loc.original().file_name().and_then(|s| s.to_str()) else { + return Err(CrateError::ExternLocationNotFile( + self.crate_name, + loc.original().clone(), + )); + }; + + if file.starts_with("lib") && (file.ends_with(".rlib") || file.ends_with(".rmeta")) + || file.starts_with(self.target.dll_prefix.as_ref()) + && file.ends_with(self.target.dll_suffix.as_ref()) + { + // Make sure there's at most one rlib and at most one dylib. + // Note to take care and match against the non-canonicalized name: + // some systems save build artifacts into content-addressed stores + // that do not preserve extensions, and then link to them using + // e.g. symbolic links. If we canonicalize too early, we resolve + // the symlink, the file type is lost and we might treat rlibs and + // rmetas as dylibs. + let loc_canon = loc.canonicalized().clone(); + let loc = loc.original(); + if loc.file_name().unwrap().to_str().unwrap().ends_with(".rlib") { + rlibs.insert(loc_canon, PathKind::ExternFlag); + } else if loc.file_name().unwrap().to_str().unwrap().ends_with(".rmeta") { + rmetas.insert(loc_canon, PathKind::ExternFlag); + } else { + dylibs.insert(loc_canon, PathKind::ExternFlag); + } + } else { + self.crate_rejections + .via_filename + .push(CrateMismatch { path: loc.original().clone(), got: String::new() }); + } + } + + // Extract the dylib/rlib/rmeta triple. + Ok(self.extract_lib(rlibs, rmetas, dylibs)?.map(|(_, lib)| lib)) + } + + pub(crate) fn into_error(self, root: Option<CratePaths>) -> CrateError { + CrateError::LocatorCombined(CombinedLocatorError { + crate_name: self.crate_name, + root, + triple: self.triple, + dll_prefix: self.target.dll_prefix.to_string(), + dll_suffix: self.target.dll_suffix.to_string(), + crate_rejections: self.crate_rejections, + }) + } +} + +fn get_metadata_section<'p>( + target: &Target, + flavor: CrateFlavor, + filename: &'p Path, + loader: &dyn MetadataLoader, +) -> Result<MetadataBlob, MetadataError<'p>> { + if !filename.exists() { + return Err(MetadataError::NotPresent(filename)); + } + let raw_bytes: MetadataRef = match flavor { + CrateFlavor::Rlib => { + loader.get_rlib_metadata(target, filename).map_err(MetadataError::LoadFailure)? + } + CrateFlavor::Dylib => { + let buf = + loader.get_dylib_metadata(target, filename).map_err(MetadataError::LoadFailure)?; + // The header is uncompressed + let header_len = METADATA_HEADER.len(); + debug!("checking {} bytes of metadata-version stamp", header_len); + let header = &buf[..cmp::min(header_len, buf.len())]; + if header != METADATA_HEADER { + return Err(MetadataError::LoadFailure(format!( + "invalid metadata version found: {}", + filename.display() + ))); + } + + // Header is okay -> inflate the actual metadata + let compressed_bytes = &buf[header_len..]; + debug!("inflating {} bytes of compressed metadata", compressed_bytes.len()); + // Assume the decompressed data will be at least the size of the compressed data, so we + // don't have to grow the buffer as much. + let mut inflated = Vec::with_capacity(compressed_bytes.len()); + match FrameDecoder::new(compressed_bytes).read_to_end(&mut inflated) { + Ok(_) => rustc_erase_owner!(OwningRef::new(inflated).map_owner_box()), + Err(_) => { + return Err(MetadataError::LoadFailure(format!( + "failed to decompress metadata: {}", + filename.display() + ))); + } + } + } + CrateFlavor::Rmeta => { + // mmap the file, because only a small fraction of it is read. + let file = std::fs::File::open(filename).map_err(|_| { + MetadataError::LoadFailure(format!( + "failed to open rmeta metadata: '{}'", + filename.display() + )) + })?; + let mmap = unsafe { Mmap::map(file) }; + let mmap = mmap.map_err(|_| { + MetadataError::LoadFailure(format!( + "failed to mmap rmeta metadata: '{}'", + filename.display() + )) + })?; + + rustc_erase_owner!(OwningRef::new(mmap).map_owner_box()) + } + }; + let blob = MetadataBlob::new(raw_bytes); + if blob.is_compatible() { + Ok(blob) + } else { + Err(MetadataError::LoadFailure(format!( + "invalid metadata version found: {}", + filename.display() + ))) + } +} + +/// Look for a plugin registrar. Returns its library path and crate disambiguator. +pub fn find_plugin_registrar( + sess: &Session, + metadata_loader: &dyn MetadataLoader, + span: Span, + name: Symbol, +) -> PathBuf { + find_plugin_registrar_impl(sess, metadata_loader, name).unwrap_or_else(|err| { + // `core` is always available if we got as far as loading plugins. + err.report(sess, span, false); + FatalError.raise() + }) +} + +fn find_plugin_registrar_impl<'a>( + sess: &'a Session, + metadata_loader: &dyn MetadataLoader, + name: Symbol, +) -> Result<PathBuf, CrateError> { + info!("find plugin registrar `{}`", name); + let mut locator = CrateLocator::new( + sess, + metadata_loader, + name, + None, // hash + None, // extra_filename + true, // is_host + PathKind::Crate, + ); + + match locator.maybe_load_library_crate()? { + Some(library) => match library.source.dylib { + Some(dylib) => Ok(dylib.0), + None => Err(CrateError::NonDylibPlugin(name)), + }, + None => Err(locator.into_error(None)), + } +} + +/// A diagnostic function for dumping crate metadata to an output stream. +pub fn list_file_metadata( + target: &Target, + path: &Path, + metadata_loader: &dyn MetadataLoader, + out: &mut dyn Write, +) -> IoResult<()> { + let filename = path.file_name().unwrap().to_str().unwrap(); + let flavor = if filename.ends_with(".rlib") { + CrateFlavor::Rlib + } else if filename.ends_with(".rmeta") { + CrateFlavor::Rmeta + } else { + CrateFlavor::Dylib + }; + match get_metadata_section(target, flavor, path, metadata_loader) { + Ok(metadata) => metadata.list_crate_metadata(out), + Err(msg) => write!(out, "{}\n", msg), + } +} + +// ------------------------------------------ Error reporting ------------------------------------- + +#[derive(Clone)] +struct CrateMismatch { + path: PathBuf, + got: String, +} + +#[derive(Clone, Default)] +struct CrateRejections { + via_hash: Vec<CrateMismatch>, + via_triple: Vec<CrateMismatch>, + via_kind: Vec<CrateMismatch>, + via_version: Vec<CrateMismatch>, + via_filename: Vec<CrateMismatch>, + via_invalid: Vec<CrateMismatch>, +} + +/// Candidate rejection reasons collected during crate search. +/// If no candidate is accepted, then these reasons are presented to the user, +/// otherwise they are ignored. +pub(crate) struct CombinedLocatorError { + crate_name: Symbol, + root: Option<CratePaths>, + triple: TargetTriple, + dll_prefix: String, + dll_suffix: String, + crate_rejections: CrateRejections, +} + +pub(crate) enum CrateError { + NonAsciiName(Symbol), + ExternLocationNotExist(Symbol, PathBuf), + ExternLocationNotFile(Symbol, PathBuf), + MultipleCandidates(Symbol, CrateFlavor, Vec<PathBuf>), + MultipleMatchingCrates(Symbol, FxHashMap<Svh, Library>), + SymbolConflictsCurrent(Symbol), + SymbolConflictsOthers(Symbol), + StableCrateIdCollision(Symbol, Symbol), + DlOpen(String), + DlSym(String), + LocatorCombined(CombinedLocatorError), + NonDylibPlugin(Symbol), +} + +enum MetadataError<'a> { + /// The file was missing. + NotPresent(&'a Path), + /// The file was present and invalid. + LoadFailure(String), +} + +impl fmt::Display for MetadataError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MetadataError::NotPresent(filename) => { + f.write_str(&format!("no such file: '{}'", filename.display())) + } + MetadataError::LoadFailure(msg) => f.write_str(msg), + } + } +} + +impl CrateError { + pub(crate) fn report(self, sess: &Session, span: Span, missing_core: bool) { + let mut diag = match self { + CrateError::NonAsciiName(crate_name) => sess.struct_span_err( + span, + &format!("cannot load a crate with a non-ascii name `{}`", crate_name), + ), + CrateError::ExternLocationNotExist(crate_name, loc) => sess.struct_span_err( + span, + &format!("extern location for {} does not exist: {}", crate_name, loc.display()), + ), + CrateError::ExternLocationNotFile(crate_name, loc) => sess.struct_span_err( + span, + &format!("extern location for {} is not a file: {}", crate_name, loc.display()), + ), + CrateError::MultipleCandidates(crate_name, flavor, candidates) => { + let mut err = struct_span_err!( + sess, + span, + E0465, + "multiple {} candidates for `{}` found", + flavor, + crate_name, + ); + for (i, candidate) in candidates.iter().enumerate() { + err.span_note(span, &format!("candidate #{}: {}", i + 1, candidate.display())); + } + err + } + CrateError::MultipleMatchingCrates(crate_name, libraries) => { + let mut err = struct_span_err!( + sess, + span, + E0464, + "multiple matching crates for `{}`", + crate_name + ); + let mut libraries: Vec<_> = libraries.into_values().collect(); + // Make ordering of candidates deterministic. + // This has to `clone()` to work around lifetime restrictions with `sort_by_key()`. + // `sort_by()` could be used instead, but this is in the error path, + // so the performance shouldn't matter. + libraries.sort_by_cached_key(|lib| lib.source.paths().next().unwrap().clone()); + let candidates = libraries + .iter() + .map(|lib| { + let crate_name = lib.metadata.get_root().name(); + let crate_name = crate_name.as_str(); + let mut paths = lib.source.paths(); + + // This `unwrap()` should be okay because there has to be at least one + // source file. `CrateSource`'s docs confirm that too. + let mut s = format!( + "\ncrate `{}`: {}", + crate_name, + paths.next().unwrap().display() + ); + let padding = 8 + crate_name.len(); + for path in paths { + write!(s, "\n{:>padding$}", path.display(), padding = padding).unwrap(); + } + s + }) + .collect::<String>(); + err.note(&format!("candidates:{}", candidates)); + err + } + CrateError::SymbolConflictsCurrent(root_name) => struct_span_err!( + sess, + span, + E0519, + "the current crate is indistinguishable from one of its dependencies: it has the \ + same crate-name `{}` and was compiled with the same `-C metadata` arguments. \ + This will result in symbol conflicts between the two.", + root_name, + ), + CrateError::SymbolConflictsOthers(root_name) => struct_span_err!( + sess, + span, + E0523, + "found two different crates with name `{}` that are not distinguished by differing \ + `-C metadata`. This will result in symbol conflicts between the two.", + root_name, + ), + CrateError::StableCrateIdCollision(crate_name0, crate_name1) => { + let msg = format!( + "found crates (`{}` and `{}`) with colliding StableCrateId values.", + crate_name0, crate_name1 + ); + sess.struct_span_err(span, &msg) + } + CrateError::DlOpen(s) | CrateError::DlSym(s) => sess.struct_span_err(span, &s), + CrateError::LocatorCombined(locator) => { + let crate_name = locator.crate_name; + let add = match &locator.root { + None => String::new(), + Some(r) => format!(" which `{}` depends on", r.name), + }; + let mut msg = "the following crate versions were found:".to_string(); + let mut err = if !locator.crate_rejections.via_hash.is_empty() { + let mut err = struct_span_err!( + sess, + span, + E0460, + "found possibly newer version of crate `{}`{}", + crate_name, + add, + ); + err.note("perhaps that crate needs to be recompiled?"); + let mismatches = locator.crate_rejections.via_hash.iter(); + for CrateMismatch { path, .. } in mismatches { + msg.push_str(&format!("\ncrate `{}`: {}", crate_name, path.display())); + } + if let Some(r) = locator.root { + for path in r.source.paths() { + msg.push_str(&format!("\ncrate `{}`: {}", r.name, path.display())); + } + } + err.note(&msg); + err + } else if !locator.crate_rejections.via_triple.is_empty() { + let mut err = struct_span_err!( + sess, + span, + E0461, + "couldn't find crate `{}` with expected target triple {}{}", + crate_name, + locator.triple, + add, + ); + let mismatches = locator.crate_rejections.via_triple.iter(); + for CrateMismatch { path, got } in mismatches { + msg.push_str(&format!( + "\ncrate `{}`, target triple {}: {}", + crate_name, + got, + path.display(), + )); + } + err.note(&msg); + err + } else if !locator.crate_rejections.via_kind.is_empty() { + let mut err = struct_span_err!( + sess, + span, + E0462, + "found staticlib `{}` instead of rlib or dylib{}", + crate_name, + add, + ); + err.help("please recompile that crate using --crate-type lib"); + let mismatches = locator.crate_rejections.via_kind.iter(); + for CrateMismatch { path, .. } in mismatches { + msg.push_str(&format!("\ncrate `{}`: {}", crate_name, path.display())); + } + err.note(&msg); + err + } else if !locator.crate_rejections.via_version.is_empty() { + let mut err = struct_span_err!( + sess, + span, + E0514, + "found crate `{}` compiled by an incompatible version of rustc{}", + crate_name, + add, + ); + err.help(&format!( + "please recompile that crate using this compiler ({}) \ + (consider running `cargo clean` first)", + rustc_version(), + )); + let mismatches = locator.crate_rejections.via_version.iter(); + for CrateMismatch { path, got } in mismatches { + msg.push_str(&format!( + "\ncrate `{}` compiled by {}: {}", + crate_name, + got, + path.display(), + )); + } + err.note(&msg); + err + } else if !locator.crate_rejections.via_invalid.is_empty() { + let mut err = struct_span_err!( + sess, + span, + E0786, + "found invalid metadata files for crate `{}`{}", + crate_name, + add, + ); + for CrateMismatch { path: _, got } in locator.crate_rejections.via_invalid { + err.note(&got); + } + err + } else { + let mut err = struct_span_err!( + sess, + span, + E0463, + "can't find crate for `{}`{}", + crate_name, + add, + ); + + if (crate_name == sym::std || crate_name == sym::core) + && locator.triple != TargetTriple::from_triple(config::host_triple()) + { + if missing_core { + err.note(&format!( + "the `{}` target may not be installed", + locator.triple + )); + } else { + err.note(&format!( + "the `{}` target may not support the standard library", + locator.triple + )); + } + // NOTE: this suggests using rustup, even though the user may not have it installed. + // That's because they could choose to install it; or this may give them a hint which + // target they need to install from their distro. + if missing_core { + err.help(&format!( + "consider downloading the target with `rustup target add {}`", + locator.triple + )); + } + // Suggest using #![no_std]. #[no_core] is unstable and not really supported anyway. + // NOTE: this is a dummy span if `extern crate std` was injected by the compiler. + // If it's not a dummy, that means someone added `extern crate std` explicitly and `#![no_std]` won't help. + if !missing_core && span.is_dummy() { + let current_crate = + sess.opts.crate_name.as_deref().unwrap_or("<unknown>"); + err.note(&format!( + "`std` is required by `{}` because it does not declare `#![no_std]`", + current_crate + )); + } + if sess.is_nightly_build() { + err.help("consider building the standard library from source with `cargo build -Zbuild-std`"); + } + } else if crate_name + == Symbol::intern(&sess.opts.unstable_opts.profiler_runtime) + { + err.note("the compiler may have been built without the profiler runtime"); + } else if crate_name.as_str().starts_with("rustc_") { + err.help( + "maybe you need to install the missing components with: \ + `rustup component add rust-src rustc-dev llvm-tools-preview`", + ); + } + err.span_label(span, "can't find crate"); + err + }; + + if !locator.crate_rejections.via_filename.is_empty() { + let mismatches = locator.crate_rejections.via_filename.iter(); + for CrateMismatch { path, .. } in mismatches { + err.note(&format!( + "extern location for {} is of an unknown type: {}", + crate_name, + path.display(), + )) + .help(&format!( + "file name should be lib*.rlib or {}*.{}", + locator.dll_prefix, locator.dll_suffix + )); + } + } + err + } + CrateError::NonDylibPlugin(crate_name) => struct_span_err!( + sess, + span, + E0457, + "plugin `{}` only found in rlib format, but must be available in dylib format", + crate_name, + ), + }; + + diag.emit(); + } +} diff --git a/compiler/rustc_metadata/src/native_libs.rs b/compiler/rustc_metadata/src/native_libs.rs new file mode 100644 index 000000000..9f6079ecb --- /dev/null +++ b/compiler/rustc_metadata/src/native_libs.rs @@ -0,0 +1,504 @@ +use rustc_ast::{NestedMetaItem, CRATE_NODE_ID}; +use rustc_attr as attr; +use rustc_data_structures::fx::FxHashSet; +use rustc_errors::struct_span_err; +use rustc_hir as hir; +use rustc_hir::def::DefKind; +use rustc_middle::ty::{List, ParamEnv, ParamEnvAnd, Ty, TyCtxt}; +use rustc_session::cstore::{DllCallingConvention, DllImport, NativeLib}; +use rustc_session::parse::feature_err; +use rustc_session::utils::NativeLibKind; +use rustc_session::Session; +use rustc_span::symbol::{sym, Symbol}; +use rustc_target::spec::abi::Abi; + +pub(crate) fn collect(tcx: TyCtxt<'_>) -> Vec<NativeLib> { + let mut collector = Collector { tcx, libs: Vec::new() }; + for id in tcx.hir().items() { + collector.process_item(id); + } + collector.process_command_line(); + collector.libs +} + +pub(crate) fn relevant_lib(sess: &Session, lib: &NativeLib) -> bool { + match lib.cfg { + Some(ref cfg) => attr::cfg_matches(cfg, &sess.parse_sess, CRATE_NODE_ID, None), + None => true, + } +} + +struct Collector<'tcx> { + tcx: TyCtxt<'tcx>, + libs: Vec<NativeLib>, +} + +impl<'tcx> Collector<'tcx> { + fn process_item(&mut self, id: rustc_hir::ItemId) { + if !matches!(self.tcx.def_kind(id.def_id), DefKind::ForeignMod) { + return; + } + + let it = self.tcx.hir().item(id); + let hir::ItemKind::ForeignMod { abi, items: foreign_mod_items } = it.kind else { + return; + }; + + if abi == Abi::Rust || abi == Abi::RustIntrinsic || abi == Abi::PlatformIntrinsic { + return; + } + + // Process all of the #[link(..)]-style arguments + let sess = &self.tcx.sess; + let features = self.tcx.features(); + for m in self.tcx.hir().attrs(it.hir_id()).iter().filter(|a| a.has_name(sym::link)) { + let Some(items) = m.meta_item_list() else { + continue; + }; + + let mut name = None; + let mut kind = None; + let mut modifiers = None; + let mut cfg = None; + let mut wasm_import_module = None; + for item in items.iter() { + match item.name_or_empty() { + sym::name => { + if name.is_some() { + let msg = "multiple `name` arguments in a single `#[link]` attribute"; + sess.span_err(item.span(), msg); + continue; + } + let Some(link_name) = item.value_str() else { + let msg = "link name must be of the form `name = \"string\"`"; + sess.span_err(item.span(), msg); + continue; + }; + let span = item.name_value_literal_span().unwrap(); + if link_name.is_empty() { + struct_span_err!(sess, span, E0454, "link name must not be empty") + .span_label(span, "empty link name") + .emit(); + } + name = Some((link_name, span)); + } + sym::kind => { + if kind.is_some() { + let msg = "multiple `kind` arguments in a single `#[link]` attribute"; + sess.span_err(item.span(), msg); + continue; + } + let Some(link_kind) = item.value_str() else { + let msg = "link kind must be of the form `kind = \"string\"`"; + sess.span_err(item.span(), msg); + continue; + }; + + let span = item.name_value_literal_span().unwrap(); + let link_kind = match link_kind.as_str() { + "static" => NativeLibKind::Static { bundle: None, whole_archive: None }, + "dylib" => NativeLibKind::Dylib { as_needed: None }, + "framework" => { + if !sess.target.is_like_osx { + struct_span_err!( + sess, + span, + E0455, + "link kind `framework` is only supported on Apple targets" + ) + .emit(); + } + NativeLibKind::Framework { as_needed: None } + } + "raw-dylib" => { + if !sess.target.is_like_windows { + struct_span_err!( + sess, + span, + E0455, + "link kind `raw-dylib` is only supported on Windows targets" + ) + .emit(); + } else if !features.raw_dylib { + feature_err( + &sess.parse_sess, + sym::raw_dylib, + span, + "link kind `raw-dylib` is unstable", + ) + .emit(); + } + NativeLibKind::RawDylib + } + kind => { + let msg = format!( + "unknown link kind `{kind}`, expected one of: \ + static, dylib, framework, raw-dylib" + ); + struct_span_err!(sess, span, E0458, "{}", msg) + .span_label(span, "unknown link kind") + .emit(); + continue; + } + }; + kind = Some(link_kind); + } + sym::modifiers => { + if modifiers.is_some() { + let msg = + "multiple `modifiers` arguments in a single `#[link]` attribute"; + sess.span_err(item.span(), msg); + continue; + } + let Some(link_modifiers) = item.value_str() else { + let msg = "link modifiers must be of the form `modifiers = \"string\"`"; + sess.span_err(item.span(), msg); + continue; + }; + modifiers = Some((link_modifiers, item.name_value_literal_span().unwrap())); + } + sym::cfg => { + if cfg.is_some() { + let msg = "multiple `cfg` arguments in a single `#[link]` attribute"; + sess.span_err(item.span(), msg); + continue; + } + let Some(link_cfg) = item.meta_item_list() else { + let msg = "link cfg must be of the form `cfg(/* predicate */)`"; + sess.span_err(item.span(), msg); + continue; + }; + let [NestedMetaItem::MetaItem(link_cfg)] = link_cfg else { + let msg = "link cfg must have a single predicate argument"; + sess.span_err(item.span(), msg); + continue; + }; + if !features.link_cfg { + feature_err( + &sess.parse_sess, + sym::link_cfg, + item.span(), + "link cfg is unstable", + ) + .emit(); + } + cfg = Some(link_cfg.clone()); + } + sym::wasm_import_module => { + if wasm_import_module.is_some() { + let msg = "multiple `wasm_import_module` arguments \ + in a single `#[link]` attribute"; + sess.span_err(item.span(), msg); + continue; + } + let Some(link_wasm_import_module) = item.value_str() else { + let msg = "wasm import module must be of the form \ + `wasm_import_module = \"string\"`"; + sess.span_err(item.span(), msg); + continue; + }; + wasm_import_module = Some((link_wasm_import_module, item.span())); + } + _ => { + let msg = "unexpected `#[link]` argument, expected one of: \ + name, kind, modifiers, cfg, wasm_import_module"; + sess.span_err(item.span(), msg); + } + } + } + + // Do this outside the above loop so we don't depend on modifiers coming after kinds + let mut verbatim = None; + if let Some((modifiers, span)) = modifiers { + for modifier in modifiers.as_str().split(',') { + let (modifier, value) = match modifier.strip_prefix(&['+', '-']) { + Some(m) => (m, modifier.starts_with('+')), + None => { + sess.span_err( + span, + "invalid linking modifier syntax, expected '+' or '-' prefix \ + before one of: bundle, verbatim, whole-archive, as-needed", + ); + continue; + } + }; + + macro report_unstable_modifier($feature: ident) { + if !features.$feature { + feature_err( + &sess.parse_sess, + sym::$feature, + span, + &format!("linking modifier `{modifier}` is unstable"), + ) + .emit(); + } + } + let assign_modifier = |dst: &mut Option<bool>| { + if dst.is_some() { + let msg = format!( + "multiple `{modifier}` modifiers in a single `modifiers` argument" + ); + sess.span_err(span, &msg); + } else { + *dst = Some(value); + } + }; + match (modifier, &mut kind) { + ("bundle", Some(NativeLibKind::Static { bundle, .. })) => { + assign_modifier(bundle) + } + ("bundle", _) => { + sess.span_err( + span, + "linking modifier `bundle` is only compatible with \ + `static` linking kind", + ); + } + + ("verbatim", _) => { + report_unstable_modifier!(native_link_modifiers_verbatim); + assign_modifier(&mut verbatim) + } + + ("whole-archive", Some(NativeLibKind::Static { whole_archive, .. })) => { + assign_modifier(whole_archive) + } + ("whole-archive", _) => { + sess.span_err( + span, + "linking modifier `whole-archive` is only compatible with \ + `static` linking kind", + ); + } + + ("as-needed", Some(NativeLibKind::Dylib { as_needed })) + | ("as-needed", Some(NativeLibKind::Framework { as_needed })) => { + report_unstable_modifier!(native_link_modifiers_as_needed); + assign_modifier(as_needed) + } + ("as-needed", _) => { + sess.span_err( + span, + "linking modifier `as-needed` is only compatible with \ + `dylib` and `framework` linking kinds", + ); + } + + _ => { + sess.span_err( + span, + format!( + "unknown linking modifier `{modifier}`, expected one of: \ + bundle, verbatim, whole-archive, as-needed" + ), + ); + } + } + } + } + + if let Some((_, span)) = wasm_import_module { + if name.is_some() || kind.is_some() || modifiers.is_some() || cfg.is_some() { + let msg = "`wasm_import_module` is incompatible with \ + other arguments in `#[link]` attributes"; + sess.span_err(span, msg); + } + } else if name.is_none() { + struct_span_err!( + sess, + m.span, + E0459, + "`#[link]` attribute requires a `name = \"string\"` argument" + ) + .span_label(m.span, "missing `name` argument") + .emit(); + } + + let dll_imports = match kind { + Some(NativeLibKind::RawDylib) => { + if let Some((name, span)) = name && name.as_str().contains('\0') { + sess.span_err( + span, + "link name must not contain NUL characters if link kind is `raw-dylib`", + ); + } + foreign_mod_items + .iter() + .map(|child_item| self.build_dll_import(abi, child_item)) + .collect() + } + _ => Vec::new(), + }; + self.libs.push(NativeLib { + name: name.map(|(name, _)| name), + kind: kind.unwrap_or(NativeLibKind::Unspecified), + cfg, + foreign_module: Some(it.def_id.to_def_id()), + wasm_import_module: wasm_import_module.map(|(name, _)| name), + verbatim, + dll_imports, + }); + } + } + + // Process libs passed on the command line + fn process_command_line(&mut self) { + // First, check for errors + let mut renames = FxHashSet::default(); + for lib in &self.tcx.sess.opts.libs { + if let NativeLibKind::Framework { .. } = lib.kind && !self.tcx.sess.target.is_like_osx { + // Cannot check this when parsing options because the target is not yet available. + self.tcx.sess.err("library kind `framework` is only supported on Apple targets"); + } + if let Some(ref new_name) = lib.new_name { + let any_duplicate = self + .libs + .iter() + .filter_map(|lib| lib.name.as_ref()) + .any(|n| n.as_str() == lib.name); + if new_name.is_empty() { + self.tcx.sess.err(format!( + "an empty renaming target was specified for library `{}`", + lib.name + )); + } else if !any_duplicate { + self.tcx.sess.err(format!( + "renaming of the library `{}` was specified, \ + however this crate contains no `#[link(...)]` \ + attributes referencing this library", + lib.name + )); + } else if !renames.insert(&lib.name) { + self.tcx.sess.err(format!( + "multiple renamings were \ + specified for library `{}`", + lib.name + )); + } + } + } + + // Update kind and, optionally, the name of all native libraries + // (there may be more than one) with the specified name. If any + // library is mentioned more than once, keep the latest mention + // of it, so that any possible dependent libraries appear before + // it. (This ensures that the linker is able to see symbols from + // all possible dependent libraries before linking in the library + // in question.) + for passed_lib in &self.tcx.sess.opts.libs { + // If we've already added any native libraries with the same + // name, they will be pulled out into `existing`, so that we + // can move them to the end of the list below. + let mut existing = self + .libs + .drain_filter(|lib| { + if let Some(lib_name) = lib.name { + if lib_name.as_str() == passed_lib.name { + // FIXME: This whole logic is questionable, whether modifiers are + // involved or not, library reordering and kind overriding without + // explicit `:rename` in particular. + if lib.has_modifiers() || passed_lib.has_modifiers() { + let msg = "overriding linking modifiers from command line is not supported"; + match lib.foreign_module { + Some(def_id) => self.tcx.sess.span_err(self.tcx.def_span(def_id), msg), + None => self.tcx.sess.err(msg), + }; + } + if passed_lib.kind != NativeLibKind::Unspecified { + lib.kind = passed_lib.kind; + } + if let Some(new_name) = &passed_lib.new_name { + lib.name = Some(Symbol::intern(new_name)); + } + lib.verbatim = passed_lib.verbatim; + return true; + } + } + false + }) + .collect::<Vec<_>>(); + if existing.is_empty() { + // Add if not found + let new_name: Option<&str> = passed_lib.new_name.as_deref(); + self.libs.push(NativeLib { + name: Some(Symbol::intern(new_name.unwrap_or(&passed_lib.name))), + kind: passed_lib.kind, + cfg: None, + foreign_module: None, + wasm_import_module: None, + verbatim: passed_lib.verbatim, + dll_imports: Vec::new(), + }); + } else { + // Move all existing libraries with the same name to the + // end of the command line. + self.libs.append(&mut existing); + } + } + } + + fn i686_arg_list_size(&self, item: &hir::ForeignItemRef) -> usize { + let argument_types: &List<Ty<'_>> = self.tcx.erase_late_bound_regions( + self.tcx + .type_of(item.id.def_id) + .fn_sig(self.tcx) + .inputs() + .map_bound(|slice| self.tcx.mk_type_list(slice.iter())), + ); + + argument_types + .iter() + .map(|ty| { + let layout = self + .tcx + .layout_of(ParamEnvAnd { param_env: ParamEnv::empty(), value: ty }) + .expect("layout") + .layout; + // In both stdcall and fastcall, we always round up the argument size to the + // nearest multiple of 4 bytes. + (layout.size().bytes_usize() + 3) & !3 + }) + .sum() + } + + fn build_dll_import(&self, abi: Abi, item: &hir::ForeignItemRef) -> DllImport { + let calling_convention = if self.tcx.sess.target.arch == "x86" { + match abi { + Abi::C { .. } | Abi::Cdecl { .. } => DllCallingConvention::C, + Abi::Stdcall { .. } | Abi::System { .. } => { + DllCallingConvention::Stdcall(self.i686_arg_list_size(item)) + } + Abi::Fastcall { .. } => { + DllCallingConvention::Fastcall(self.i686_arg_list_size(item)) + } + Abi::Vectorcall { .. } => { + DllCallingConvention::Vectorcall(self.i686_arg_list_size(item)) + } + _ => { + self.tcx.sess.span_fatal( + item.span, + r#"ABI not supported by `#[link(kind = "raw-dylib")]` on i686"#, + ); + } + } + } else { + match abi { + Abi::C { .. } | Abi::Win64 { .. } | Abi::System { .. } => DllCallingConvention::C, + _ => { + self.tcx.sess.span_fatal( + item.span, + r#"ABI not supported by `#[link(kind = "raw-dylib")]` on this architecture"#, + ); + } + } + }; + + DllImport { + name: item.ident.name, + ordinal: self.tcx.codegen_fn_attrs(item.id.def_id).link_ordinal, + calling_convention, + span: item.span, + } + } +} diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs new file mode 100644 index 000000000..40dc4fb05 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -0,0 +1,1820 @@ +// Decoding metadata from a single crate's metadata + +use crate::creader::{CStore, CrateMetadataRef}; +use crate::rmeta::*; + +use rustc_ast as ast; +use rustc_ast::ptr::P; +use rustc_data_structures::captures::Captures; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::svh::Svh; +use rustc_data_structures::sync::{Lock, LockGuard, Lrc, OnceCell}; +use rustc_data_structures::unhash::UnhashMap; +use rustc_expand::base::{SyntaxExtension, SyntaxExtensionKind}; +use rustc_expand::proc_macro::{AttrProcMacro, BangProcMacro, DeriveProcMacro}; +use rustc_hir::def::{CtorKind, CtorOf, DefKind, Res}; +use rustc_hir::def_id::{CrateNum, DefId, DefIndex, CRATE_DEF_INDEX, LOCAL_CRATE}; +use rustc_hir::definitions::{DefKey, DefPath, DefPathData, DefPathHash}; +use rustc_hir::diagnostic_items::DiagnosticItems; +use rustc_hir::lang_items; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::metadata::ModChild; +use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo}; +use rustc_middle::mir::interpret::{AllocDecodingSession, AllocDecodingState}; +use rustc_middle::ty::codec::TyDecoder; +use rustc_middle::ty::fast_reject::SimplifiedType; +use rustc_middle::ty::GeneratorDiagnosticData; +use rustc_middle::ty::{self, ParameterizedOverTcx, Ty, TyCtxt, Visibility}; +use rustc_serialize::opaque::MemDecoder; +use rustc_serialize::{Decodable, Decoder}; +use rustc_session::cstore::{ + CrateSource, ExternCrate, ForeignModule, LinkagePreference, NativeLib, +}; +use rustc_session::Session; +use rustc_span::hygiene::{ExpnIndex, MacroKind}; +use rustc_span::source_map::{respan, Spanned}; +use rustc_span::symbol::{sym, Ident, Symbol}; +use rustc_span::{self, BytePos, ExpnId, Pos, Span, SyntaxContext, DUMMY_SP}; + +use proc_macro::bridge::client::ProcMacro; +use std::io; +use std::iter::TrustedLen; +use std::mem; +use std::num::NonZeroUsize; +use std::path::Path; +use tracing::debug; + +pub(super) use cstore_impl::provide; +pub use cstore_impl::provide_extern; +use rustc_span::hygiene::HygieneDecodeContext; + +mod cstore_impl; + +/// A reference to the raw binary version of crate metadata. +/// A `MetadataBlob` internally is just a reference counted pointer to +/// the actual data, so cloning it is cheap. +#[derive(Clone)] +pub(crate) struct MetadataBlob(Lrc<MetadataRef>); + +// This is needed so we can create an OwningRef into the blob. +// The data behind a `MetadataBlob` has a stable address because it is +// contained within an Rc/Arc. +unsafe impl rustc_data_structures::owning_ref::StableAddress for MetadataBlob {} + +// This is needed so we can create an OwningRef into the blob. +impl std::ops::Deref for MetadataBlob { + type Target = [u8]; + + #[inline] + fn deref(&self) -> &[u8] { + &self.0[..] + } +} + +// A map from external crate numbers (as decoded from some crate file) to +// local crate numbers (as generated during this session). Each external +// crate may refer to types in other external crates, and each has their +// own crate numbers. +pub(crate) type CrateNumMap = IndexVec<CrateNum, CrateNum>; + +pub(crate) struct CrateMetadata { + /// The primary crate data - binary metadata blob. + blob: MetadataBlob, + + // --- Some data pre-decoded from the metadata blob, usually for performance --- + /// NOTE(eddyb) we pass `'static` to a `'tcx` parameter because this + /// lifetime is only used behind `LazyValue`, `LazyArray`, or `LazyTable`, and therefore acts like a + /// universal (`for<'tcx>`), that is paired up with whichever `TyCtxt` + /// is being used to decode those values. + root: CrateRoot, + /// Trait impl data. + /// FIXME: Used only from queries and can use query cache, + /// so pre-decoding can probably be avoided. + trait_impls: FxHashMap<(u32, DefIndex), LazyArray<(DefIndex, Option<SimplifiedType>)>>, + /// Inherent impls which do not follow the normal coherence rules. + /// + /// These can be introduced using either `#![rustc_coherence_is_core]` + /// or `#[rustc_allow_incoherent_impl]`. + incoherent_impls: FxHashMap<SimplifiedType, LazyArray<DefIndex>>, + /// Proc macro descriptions for this crate, if it's a proc macro crate. + raw_proc_macros: Option<&'static [ProcMacro]>, + /// Source maps for code from the crate. + source_map_import_info: OnceCell<Vec<ImportedSourceFile>>, + /// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`. + def_path_hash_map: DefPathHashMapRef<'static>, + /// Likewise for ExpnHash. + expn_hash_map: OnceCell<UnhashMap<ExpnHash, ExpnIndex>>, + /// Used for decoding interpret::AllocIds in a cached & thread-safe manner. + alloc_decoding_state: AllocDecodingState, + /// Caches decoded `DefKey`s. + def_key_cache: Lock<FxHashMap<DefIndex, DefKey>>, + /// Caches decoded `DefPathHash`es. + def_path_hash_cache: Lock<FxHashMap<DefIndex, DefPathHash>>, + + // --- Other significant crate properties --- + /// ID of this crate, from the current compilation session's point of view. + cnum: CrateNum, + /// Maps crate IDs as they are were seen from this crate's compilation sessions into + /// IDs as they are seen from the current compilation session. + cnum_map: CrateNumMap, + /// Same ID set as `cnum_map` plus maybe some injected crates like panic runtime. + dependencies: Lock<Vec<CrateNum>>, + /// How to link (or not link) this crate to the currently compiled crate. + dep_kind: Lock<CrateDepKind>, + /// Filesystem location of this crate. + source: Lrc<CrateSource>, + /// Whether or not this crate should be consider a private dependency + /// for purposes of the 'exported_private_dependencies' lint + private_dep: bool, + /// The hash for the host proc macro. Used to support `-Z dual-proc-macro`. + host_hash: Option<Svh>, + + /// Additional data used for decoding `HygieneData` (e.g. `SyntaxContext` + /// and `ExpnId`). + /// Note that we store a `HygieneDecodeContext` for each `CrateMetadat`. This is + /// because `SyntaxContext` ids are not globally unique, so we need + /// to track which ids we've decoded on a per-crate basis. + hygiene_context: HygieneDecodeContext, + + // --- Data used only for improving diagnostics --- + /// Information about the `extern crate` item or path that caused this crate to be loaded. + /// If this is `None`, then the crate was injected (e.g., by the allocator). + extern_crate: Lock<Option<ExternCrate>>, +} + +/// Holds information about a rustc_span::SourceFile imported from another crate. +/// See `imported_source_files()` for more information. +struct ImportedSourceFile { + /// This SourceFile's byte-offset within the source_map of its original crate + original_start_pos: rustc_span::BytePos, + /// The end of this SourceFile within the source_map of its original crate + original_end_pos: rustc_span::BytePos, + /// The imported SourceFile's representation within the local source_map + translated_source_file: Lrc<rustc_span::SourceFile>, +} + +pub(super) struct DecodeContext<'a, 'tcx> { + opaque: MemDecoder<'a>, + cdata: Option<CrateMetadataRef<'a>>, + blob: &'a MetadataBlob, + sess: Option<&'tcx Session>, + tcx: Option<TyCtxt<'tcx>>, + + // Cache the last used source_file for translating spans as an optimization. + last_source_file_index: usize, + + lazy_state: LazyState, + + // Used for decoding interpret::AllocIds in a cached & thread-safe manner. + alloc_decoding_session: Option<AllocDecodingSession<'a>>, +} + +/// Abstract over the various ways one can create metadata decoders. +pub(super) trait Metadata<'a, 'tcx>: Copy { + fn blob(self) -> &'a MetadataBlob; + + fn cdata(self) -> Option<CrateMetadataRef<'a>> { + None + } + fn sess(self) -> Option<&'tcx Session> { + None + } + fn tcx(self) -> Option<TyCtxt<'tcx>> { + None + } + + fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> { + let tcx = self.tcx(); + DecodeContext { + opaque: MemDecoder::new(self.blob(), pos), + cdata: self.cdata(), + blob: self.blob(), + sess: self.sess().or(tcx.map(|tcx| tcx.sess)), + tcx, + last_source_file_index: 0, + lazy_state: LazyState::NoNode, + alloc_decoding_session: self + .cdata() + .map(|cdata| cdata.cdata.alloc_decoding_state.new_decoding_session()), + } + } +} + +impl<'a, 'tcx> Metadata<'a, 'tcx> for &'a MetadataBlob { + #[inline] + fn blob(self) -> &'a MetadataBlob { + self + } +} + +impl<'a, 'tcx> Metadata<'a, 'tcx> for (&'a MetadataBlob, &'tcx Session) { + #[inline] + fn blob(self) -> &'a MetadataBlob { + self.0 + } + + #[inline] + fn sess(self) -> Option<&'tcx Session> { + let (_, sess) = self; + Some(sess) + } +} + +impl<'a, 'tcx> Metadata<'a, 'tcx> for CrateMetadataRef<'a> { + #[inline] + fn blob(self) -> &'a MetadataBlob { + &self.cdata.blob + } + #[inline] + fn cdata(self) -> Option<CrateMetadataRef<'a>> { + Some(self) + } +} + +impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, &'tcx Session) { + #[inline] + fn blob(self) -> &'a MetadataBlob { + &self.0.cdata.blob + } + #[inline] + fn cdata(self) -> Option<CrateMetadataRef<'a>> { + Some(self.0) + } + #[inline] + fn sess(self) -> Option<&'tcx Session> { + Some(self.1) + } +} + +impl<'a, 'tcx> Metadata<'a, 'tcx> for (CrateMetadataRef<'a>, TyCtxt<'tcx>) { + #[inline] + fn blob(self) -> &'a MetadataBlob { + &self.0.cdata.blob + } + #[inline] + fn cdata(self) -> Option<CrateMetadataRef<'a>> { + Some(self.0) + } + #[inline] + fn tcx(self) -> Option<TyCtxt<'tcx>> { + Some(self.1) + } +} + +impl<T: ParameterizedOverTcx> LazyValue<T> { + fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>(self, metadata: M) -> T::Value<'tcx> + where + T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>, + { + let mut dcx = metadata.decoder(self.position.get()); + dcx.lazy_state = LazyState::NodeStart(self.position); + T::Value::decode(&mut dcx) + } +} + +struct DecodeIterator<'a, 'tcx, T> { + elem_counter: std::ops::Range<usize>, + dcx: DecodeContext<'a, 'tcx>, + _phantom: PhantomData<fn() -> T>, +} + +impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> Iterator for DecodeIterator<'a, 'tcx, T> { + type Item = T; + + #[inline(always)] + fn next(&mut self) -> Option<Self::Item> { + self.elem_counter.next().map(|_| T::decode(&mut self.dcx)) + } + + #[inline(always)] + fn size_hint(&self) -> (usize, Option<usize>) { + self.elem_counter.size_hint() + } +} + +impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> ExactSizeIterator + for DecodeIterator<'a, 'tcx, T> +{ + fn len(&self) -> usize { + self.elem_counter.len() + } +} + +unsafe impl<'a, 'tcx, T: Decodable<DecodeContext<'a, 'tcx>>> TrustedLen + for DecodeIterator<'a, 'tcx, T> +{ +} + +impl<T: ParameterizedOverTcx> LazyArray<T> { + fn decode<'a, 'tcx, M: Metadata<'a, 'tcx>>( + self, + metadata: M, + ) -> DecodeIterator<'a, 'tcx, T::Value<'tcx>> + where + T::Value<'tcx>: Decodable<DecodeContext<'a, 'tcx>>, + { + let mut dcx = metadata.decoder(self.position.get()); + dcx.lazy_state = LazyState::NodeStart(self.position); + DecodeIterator { elem_counter: (0..self.num_elems), dcx, _phantom: PhantomData } + } +} + +impl<'a, 'tcx> DecodeContext<'a, 'tcx> { + #[inline] + fn tcx(&self) -> TyCtxt<'tcx> { + debug_assert!(self.tcx.is_some(), "missing TyCtxt in DecodeContext"); + self.tcx.unwrap() + } + + #[inline] + pub fn blob(&self) -> &'a MetadataBlob { + self.blob + } + + #[inline] + pub fn cdata(&self) -> CrateMetadataRef<'a> { + debug_assert!(self.cdata.is_some(), "missing CrateMetadata in DecodeContext"); + self.cdata.unwrap() + } + + #[inline] + fn map_encoded_cnum_to_current(&self, cnum: CrateNum) -> CrateNum { + self.cdata().map_encoded_cnum_to_current(cnum) + } + + #[inline] + fn read_lazy_offset_then<T>(&mut self, f: impl Fn(NonZeroUsize) -> T) -> T { + let distance = self.read_usize(); + let position = match self.lazy_state { + LazyState::NoNode => bug!("read_lazy_with_meta: outside of a metadata node"), + LazyState::NodeStart(start) => { + let start = start.get(); + assert!(distance <= start); + start - distance + } + LazyState::Previous(last_pos) => last_pos.get() + distance, + }; + let position = NonZeroUsize::new(position).unwrap(); + self.lazy_state = LazyState::Previous(position); + f(position) + } + + fn read_lazy<T>(&mut self) -> LazyValue<T> { + self.read_lazy_offset_then(|pos| LazyValue::from_position(pos)) + } + + fn read_lazy_array<T>(&mut self, len: usize) -> LazyArray<T> { + self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len)) + } + + fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> { + self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len)) + } + + #[inline] + pub fn read_raw_bytes(&mut self, len: usize) -> &[u8] { + self.opaque.read_raw_bytes(len) + } +} + +impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> { + const CLEAR_CROSS_CRATE: bool = true; + + type I = TyCtxt<'tcx>; + + #[inline] + fn interner(&self) -> Self::I { + self.tcx() + } + + #[inline] + fn peek_byte(&self) -> u8 { + self.opaque.data[self.opaque.position()] + } + + #[inline] + fn position(&self) -> usize { + self.opaque.position() + } + + fn cached_ty_for_shorthand<F>(&mut self, shorthand: usize, or_insert_with: F) -> Ty<'tcx> + where + F: FnOnce(&mut Self) -> Ty<'tcx>, + { + let tcx = self.tcx(); + + let key = ty::CReaderCacheKey { cnum: Some(self.cdata().cnum), pos: shorthand }; + + if let Some(&ty) = tcx.ty_rcache.borrow().get(&key) { + return ty; + } + + let ty = or_insert_with(self); + tcx.ty_rcache.borrow_mut().insert(key, ty); + ty + } + + fn with_position<F, R>(&mut self, pos: usize, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + let new_opaque = MemDecoder::new(self.opaque.data, pos); + let old_opaque = mem::replace(&mut self.opaque, new_opaque); + let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode); + let r = f(self); + self.opaque = old_opaque; + self.lazy_state = old_state; + r + } + + fn decode_alloc_id(&mut self) -> rustc_middle::mir::interpret::AllocId { + if let Some(alloc_decoding_session) = self.alloc_decoding_session { + alloc_decoding_session.decode_alloc_id(self) + } else { + bug!("Attempting to decode interpret::AllocId without CrateMetadata") + } + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for CrateNum { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> CrateNum { + let cnum = CrateNum::from_u32(d.read_u32()); + d.map_encoded_cnum_to_current(cnum) + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefIndex { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefIndex { + DefIndex::from_u32(d.read_u32()) + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnIndex { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> ExpnIndex { + ExpnIndex::from_u32(d.read_u32()) + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SyntaxContext { + fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SyntaxContext { + let cdata = decoder.cdata(); + let sess = decoder.sess.unwrap(); + let cname = cdata.root.name; + rustc_span::hygiene::decode_syntax_context(decoder, &cdata.hygiene_context, |_, id| { + debug!("SpecializedDecoder<SyntaxContext>: decoding {}", id); + cdata + .root + .syntax_contexts + .get(cdata, id) + .unwrap_or_else(|| panic!("Missing SyntaxContext {:?} for crate {:?}", id, cname)) + .decode((cdata, sess)) + }) + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId { + fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> ExpnId { + let local_cdata = decoder.cdata(); + let sess = decoder.sess.unwrap(); + + let cnum = CrateNum::decode(decoder); + let index = u32::decode(decoder); + + let expn_id = rustc_span::hygiene::decode_expn_id(cnum, index, |expn_id| { + let ExpnId { krate: cnum, local_id: index } = expn_id; + // Lookup local `ExpnData`s in our own crate data. Foreign `ExpnData`s + // are stored in the owning crate, to avoid duplication. + debug_assert_ne!(cnum, LOCAL_CRATE); + let crate_data = if cnum == local_cdata.cnum { + local_cdata + } else { + local_cdata.cstore.get_crate_data(cnum) + }; + let expn_data = crate_data + .root + .expn_data + .get(crate_data, index) + .unwrap() + .decode((crate_data, sess)); + let expn_hash = crate_data + .root + .expn_hashes + .get(crate_data, index) + .unwrap() + .decode((crate_data, sess)); + (expn_data, expn_hash) + }); + expn_id + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span { + fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span { + let ctxt = SyntaxContext::decode(decoder); + let tag = u8::decode(decoder); + + if tag == TAG_PARTIAL_SPAN { + return DUMMY_SP.with_ctxt(ctxt); + } + + debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN); + + let lo = BytePos::decode(decoder); + let len = BytePos::decode(decoder); + let hi = lo + len; + + let Some(sess) = decoder.sess else { + bug!("Cannot decode Span without Session.") + }; + + // There are two possibilities here: + // 1. This is a 'local span', which is located inside a `SourceFile` + // that came from this crate. In this case, we use the source map data + // encoded in this crate. This branch should be taken nearly all of the time. + // 2. This is a 'foreign span', which is located inside a `SourceFile` + // that came from a *different* crate (some crate upstream of the one + // whose metadata we're looking at). For example, consider this dependency graph: + // + // A -> B -> C + // + // Suppose that we're currently compiling crate A, and start deserializing + // metadata from crate B. When we deserialize a Span from crate B's metadata, + // there are two possibilities: + // + // 1. The span references a file from crate B. This makes it a 'local' span, + // which means that we can use crate B's serialized source map information. + // 2. The span references a file from crate C. This makes it a 'foreign' span, + // which means we need to use Crate *C* (not crate B) to determine the source + // map information. We only record source map information for a file in the + // crate that 'owns' it, so deserializing a Span may require us to look at + // a transitive dependency. + // + // When we encode a foreign span, we adjust its 'lo' and 'high' values + // to be based on the *foreign* crate (e.g. crate C), not the crate + // we are writing metadata for (e.g. crate B). This allows us to + // treat the 'local' and 'foreign' cases almost identically during deserialization: + // we can call `imported_source_files` for the proper crate, and binary search + // through the returned slice using our span. + let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL { + decoder.cdata().imported_source_files(sess) + } else { + // When we encode a proc-macro crate, all `Span`s should be encoded + // with `TAG_VALID_SPAN_LOCAL` + if decoder.cdata().root.is_proc_macro_crate() { + // Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE + // since we don't have `cnum_map` populated. + let cnum = u32::decode(decoder); + panic!( + "Decoding of crate {:?} tried to access proc-macro dep {:?}", + decoder.cdata().root.name, + cnum + ); + } + // tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above + let cnum = CrateNum::decode(decoder); + debug!( + "SpecializedDecoder<Span>::specialized_decode: loading source files from cnum {:?}", + cnum + ); + + // Decoding 'foreign' spans should be rare enough that it's + // not worth it to maintain a per-CrateNum cache for `last_source_file_index`. + // We just set it to 0, to ensure that we don't try to access something out + // of bounds for our initial 'guess' + decoder.last_source_file_index = 0; + + let foreign_data = decoder.cdata().cstore.get_crate_data(cnum); + foreign_data.imported_source_files(sess) + }; + + let source_file = { + // Optimize for the case that most spans within a translated item + // originate from the same source_file. + let last_source_file = &imported_source_files[decoder.last_source_file_index]; + + if lo >= last_source_file.original_start_pos && lo <= last_source_file.original_end_pos + { + last_source_file + } else { + let index = imported_source_files + .binary_search_by_key(&lo, |source_file| source_file.original_start_pos) + .unwrap_or_else(|index| index - 1); + + // Don't try to cache the index for foreign spans, + // as this would require a map from CrateNums to indices + if tag == TAG_VALID_SPAN_LOCAL { + decoder.last_source_file_index = index; + } + &imported_source_files[index] + } + }; + + // Make sure our binary search above is correct. + debug_assert!( + lo >= source_file.original_start_pos && lo <= source_file.original_end_pos, + "Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", + lo, + source_file.original_start_pos, + source_file.original_end_pos + ); + + // Make sure we correctly filtered out invalid spans during encoding + debug_assert!( + hi >= source_file.original_start_pos && hi <= source_file.original_end_pos, + "Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}", + hi, + source_file.original_start_pos, + source_file.original_end_pos + ); + + let lo = + (lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos; + let hi = + (hi + source_file.translated_source_file.start_pos) - source_file.original_start_pos; + + // Do not try to decode parent for foreign spans. + Span::new(lo, hi, ctxt, None) + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for &'tcx [ty::abstract_const::Node<'tcx>] { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self { + ty::codec::RefDecodable::decode(d) + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for &'tcx [(ty::Predicate<'tcx>, Span)] { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self { + ty::codec::RefDecodable::decode(d) + } +} + +impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyValue<T> { + fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self { + decoder.read_lazy() + } +} + +impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> { + fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self { + let len = decoder.read_usize(); + if len == 0 { LazyArray::empty() } else { decoder.read_lazy_array(len) } + } +} + +impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> { + fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self { + let len = decoder.read_usize(); + decoder.read_lazy_table(len) + } +} + +implement_ty_decoder!(DecodeContext<'a, 'tcx>); + +impl MetadataBlob { + pub(crate) fn new(metadata_ref: MetadataRef) -> MetadataBlob { + MetadataBlob(Lrc::new(metadata_ref)) + } + + pub(crate) fn is_compatible(&self) -> bool { + self.blob().starts_with(METADATA_HEADER) + } + + pub(crate) fn get_rustc_version(&self) -> String { + LazyValue::<String>::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 4).unwrap()) + .decode(self) + } + + pub(crate) fn get_root(&self) -> CrateRoot { + let slice = &self.blob()[..]; + let offset = METADATA_HEADER.len(); + let pos = (((slice[offset + 0] as u32) << 24) + | ((slice[offset + 1] as u32) << 16) + | ((slice[offset + 2] as u32) << 8) + | ((slice[offset + 3] as u32) << 0)) as usize; + LazyValue::<CrateRoot>::from_position(NonZeroUsize::new(pos).unwrap()).decode(self) + } + + pub(crate) fn list_crate_metadata(&self, out: &mut dyn io::Write) -> io::Result<()> { + let root = self.get_root(); + writeln!(out, "Crate info:")?; + writeln!(out, "name {}{}", root.name, root.extra_filename)?; + writeln!(out, "hash {} stable_crate_id {:?}", root.hash, root.stable_crate_id)?; + writeln!(out, "proc_macro {:?}", root.proc_macro_data.is_some())?; + writeln!(out, "=External Dependencies=")?; + for (i, dep) in root.crate_deps.decode(self).enumerate() { + writeln!( + out, + "{} {}{} hash {} host_hash {:?} kind {:?}", + i + 1, + dep.name, + dep.extra_filename, + dep.hash, + dep.host_hash, + dep.kind + )?; + } + write!(out, "\n")?; + Ok(()) + } +} + +impl CrateRoot { + pub(crate) fn is_proc_macro_crate(&self) -> bool { + self.proc_macro_data.is_some() + } + + pub(crate) fn name(&self) -> Symbol { + self.name + } + + pub(crate) fn hash(&self) -> Svh { + self.hash + } + + pub(crate) fn stable_crate_id(&self) -> StableCrateId { + self.stable_crate_id + } + + pub(crate) fn triple(&self) -> &TargetTriple { + &self.triple + } + + pub(crate) fn decode_crate_deps<'a>( + &self, + metadata: &'a MetadataBlob, + ) -> impl ExactSizeIterator<Item = CrateDep> + Captures<'a> { + self.crate_deps.decode(metadata) + } +} + +impl<'a, 'tcx> CrateMetadataRef<'a> { + fn raw_proc_macro(self, id: DefIndex) -> &'a ProcMacro { + // DefIndex's in root.proc_macro_data have a one-to-one correspondence + // with items in 'raw_proc_macros'. + let pos = self + .root + .proc_macro_data + .as_ref() + .unwrap() + .macros + .decode(self) + .position(|i| i == id) + .unwrap(); + &self.raw_proc_macros.unwrap()[pos] + } + + fn opt_item_name(self, item_index: DefIndex) -> Option<Symbol> { + self.def_key(item_index).disambiguated_data.data.get_opt_name() + } + + fn item_name(self, item_index: DefIndex) -> Symbol { + self.opt_item_name(item_index).expect("no encoded ident for item") + } + + fn opt_item_ident(self, item_index: DefIndex, sess: &Session) -> Option<Ident> { + let name = self.opt_item_name(item_index)?; + let span = + self.root.tables.def_ident_span.get(self, item_index).unwrap().decode((self, sess)); + Some(Ident::new(name, span)) + } + + fn item_ident(self, item_index: DefIndex, sess: &Session) -> Ident { + self.opt_item_ident(item_index, sess).expect("no encoded ident for item") + } + + fn maybe_kind(self, item_id: DefIndex) -> Option<EntryKind> { + self.root.tables.kind.get(self, item_id).map(|k| k.decode(self)) + } + + #[inline] + pub(super) fn map_encoded_cnum_to_current(self, cnum: CrateNum) -> CrateNum { + if cnum == LOCAL_CRATE { self.cnum } else { self.cnum_map[cnum] } + } + + fn kind(self, item_id: DefIndex) -> EntryKind { + self.maybe_kind(item_id).unwrap_or_else(|| { + bug!( + "CrateMetadata::kind({:?}): id not found, in crate {:?} with number {}", + item_id, + self.root.name, + self.cnum, + ) + }) + } + + fn def_kind(self, item_id: DefIndex) -> DefKind { + self.root.tables.opt_def_kind.get(self, item_id).unwrap_or_else(|| { + bug!( + "CrateMetadata::def_kind({:?}): id not found, in crate {:?} with number {}", + item_id, + self.root.name, + self.cnum, + ) + }) + } + + fn get_span(self, index: DefIndex, sess: &Session) -> Span { + self.root + .tables + .def_span + .get(self, index) + .unwrap_or_else(|| panic!("Missing span for {:?}", index)) + .decode((self, sess)) + } + + fn load_proc_macro(self, id: DefIndex, sess: &Session) -> SyntaxExtension { + let (name, kind, helper_attrs) = match *self.raw_proc_macro(id) { + ProcMacro::CustomDerive { trait_name, attributes, client } => { + let helper_attrs = + attributes.iter().cloned().map(Symbol::intern).collect::<Vec<_>>(); + ( + trait_name, + SyntaxExtensionKind::Derive(Box::new(DeriveProcMacro { client })), + helper_attrs, + ) + } + ProcMacro::Attr { name, client } => { + (name, SyntaxExtensionKind::Attr(Box::new(AttrProcMacro { client })), Vec::new()) + } + ProcMacro::Bang { name, client } => { + (name, SyntaxExtensionKind::Bang(Box::new(BangProcMacro { client })), Vec::new()) + } + }; + + let attrs: Vec<_> = self.get_item_attrs(id, sess).collect(); + SyntaxExtension::new( + sess, + kind, + self.get_span(id, sess), + helper_attrs, + self.root.edition, + Symbol::intern(name), + &attrs, + ) + } + + fn get_variant(self, kind: &EntryKind, index: DefIndex, parent_did: DefId) -> ty::VariantDef { + let data = match kind { + EntryKind::Variant(data) | EntryKind::Struct(data) | EntryKind::Union(data) => { + data.decode(self) + } + _ => bug!(), + }; + + let adt_kind = match kind { + EntryKind::Variant(_) => ty::AdtKind::Enum, + EntryKind::Struct(..) => ty::AdtKind::Struct, + EntryKind::Union(..) => ty::AdtKind::Union, + _ => bug!(), + }; + + let variant_did = + if adt_kind == ty::AdtKind::Enum { Some(self.local_def_id(index)) } else { None }; + let ctor_did = data.ctor.map(|index| self.local_def_id(index)); + + ty::VariantDef::new( + self.item_name(index), + variant_did, + ctor_did, + data.discr, + self.root + .tables + .children + .get(self, index) + .unwrap_or_else(LazyArray::empty) + .decode(self) + .map(|index| ty::FieldDef { + did: self.local_def_id(index), + name: self.item_name(index), + vis: self.get_visibility(index), + }) + .collect(), + data.ctor_kind, + adt_kind, + parent_did, + false, + data.is_non_exhaustive, + ) + } + + fn get_adt_def(self, item_id: DefIndex, tcx: TyCtxt<'tcx>) -> ty::AdtDef<'tcx> { + let kind = self.kind(item_id); + let did = self.local_def_id(item_id); + + let adt_kind = match kind { + EntryKind::Enum => ty::AdtKind::Enum, + EntryKind::Struct(_) => ty::AdtKind::Struct, + EntryKind::Union(_) => ty::AdtKind::Union, + _ => bug!("get_adt_def called on a non-ADT {:?}", did), + }; + let repr = self.root.tables.repr_options.get(self, item_id).unwrap().decode(self); + + let variants = if let ty::AdtKind::Enum = adt_kind { + self.root + .tables + .children + .get(self, item_id) + .unwrap_or_else(LazyArray::empty) + .decode(self) + .map(|index| self.get_variant(&self.kind(index), index, did)) + .collect() + } else { + std::iter::once(self.get_variant(&kind, item_id, did)).collect() + }; + + tcx.alloc_adt_def(did, adt_kind, variants, repr) + } + + fn get_generics(self, item_id: DefIndex, sess: &Session) -> ty::Generics { + self.root.tables.generics_of.get(self, item_id).unwrap().decode((self, sess)) + } + + fn get_visibility(self, id: DefIndex) -> ty::Visibility { + self.root.tables.visibility.get(self, id).unwrap().decode(self) + } + + fn get_trait_item_def_id(self, id: DefIndex) -> Option<DefId> { + self.root.tables.trait_item_def_id.get(self, id).map(|d| d.decode_from_cdata(self)) + } + + fn get_expn_that_defined(self, id: DefIndex, sess: &Session) -> ExpnId { + self.root.tables.expn_that_defined.get(self, id).unwrap().decode((self, sess)) + } + + fn get_debugger_visualizers(self) -> Vec<rustc_span::DebuggerVisualizerFile> { + self.root.debugger_visualizers.decode(self).collect::<Vec<_>>() + } + + /// Iterates over all the stability attributes in the given crate. + fn get_lib_features(self, tcx: TyCtxt<'tcx>) -> &'tcx [(Symbol, Option<Symbol>)] { + tcx.arena.alloc_from_iter(self.root.lib_features.decode(self)) + } + + /// Iterates over the stability implications in the given crate (when a `#[unstable]` attribute + /// has an `implied_by` meta item, then the mapping from the implied feature to the actual + /// feature is a stability implication). + fn get_stability_implications(self, tcx: TyCtxt<'tcx>) -> &'tcx [(Symbol, Symbol)] { + tcx.arena.alloc_from_iter(self.root.stability_implications.decode(self)) + } + + /// Iterates over the language items in the given crate. + fn get_lang_items(self, tcx: TyCtxt<'tcx>) -> &'tcx [(DefId, usize)] { + tcx.arena.alloc_from_iter( + self.root + .lang_items + .decode(self) + .map(move |(def_index, index)| (self.local_def_id(def_index), index)), + ) + } + + /// Iterates over the diagnostic items in the given crate. + fn get_diagnostic_items(self) -> DiagnosticItems { + let mut id_to_name = FxHashMap::default(); + let name_to_id = self + .root + .diagnostic_items + .decode(self) + .map(|(name, def_index)| { + let id = self.local_def_id(def_index); + id_to_name.insert(id, name); + (name, id) + }) + .collect(); + DiagnosticItems { id_to_name, name_to_id } + } + + /// Iterates over all named children of the given module, + /// including both proper items and reexports. + /// Module here is understood in name resolution sense - it can be a `mod` item, + /// or a crate root, or an enum, or a trait. + fn for_each_module_child( + self, + id: DefIndex, + mut callback: impl FnMut(ModChild), + sess: &Session, + ) { + if let Some(data) = &self.root.proc_macro_data { + // If we are loading as a proc macro, we want to return + // the view of this crate as a proc macro crate. + if id == CRATE_DEF_INDEX { + for def_index in data.macros.decode(self) { + let raw_macro = self.raw_proc_macro(def_index); + let res = Res::Def( + DefKind::Macro(macro_kind(raw_macro)), + self.local_def_id(def_index), + ); + let ident = self.item_ident(def_index, sess); + callback(ModChild { + ident, + res, + vis: ty::Visibility::Public, + span: ident.span, + macro_rules: false, + }); + } + } + return; + } + + // Iterate over all children. + if let Some(children) = self.root.tables.children.get(self, id) { + for child_index in children.decode((self, sess)) { + let ident = self.item_ident(child_index, sess); + let kind = self.def_kind(child_index); + let def_id = self.local_def_id(child_index); + let res = Res::Def(kind, def_id); + let vis = self.get_visibility(child_index); + let span = self.get_span(child_index, sess); + let macro_rules = match kind { + DefKind::Macro(..) => match self.kind(child_index) { + EntryKind::MacroDef(_, macro_rules) => macro_rules, + _ => unreachable!(), + }, + _ => false, + }; + + callback(ModChild { ident, res, vis, span, macro_rules }); + + // For non-re-export structs and variants add their constructors to children. + // Re-export lists automatically contain constructors when necessary. + match kind { + DefKind::Struct => { + if let Some((ctor_def_id, ctor_kind)) = + self.get_ctor_def_id_and_kind(child_index) + { + let ctor_res = + Res::Def(DefKind::Ctor(CtorOf::Struct, ctor_kind), ctor_def_id); + let vis = self.get_visibility(ctor_def_id.index); + callback(ModChild { + ident, + res: ctor_res, + vis, + span, + macro_rules: false, + }); + } + } + DefKind::Variant => { + // Braced variants, unlike structs, generate unusable names in + // value namespace, they are reserved for possible future use. + // It's ok to use the variant's id as a ctor id since an + // error will be reported on any use of such resolution anyway. + let (ctor_def_id, ctor_kind) = self + .get_ctor_def_id_and_kind(child_index) + .unwrap_or((def_id, CtorKind::Fictive)); + let ctor_res = + Res::Def(DefKind::Ctor(CtorOf::Variant, ctor_kind), ctor_def_id); + let mut vis = self.get_visibility(ctor_def_id.index); + if ctor_def_id == def_id && vis.is_public() { + // For non-exhaustive variants lower the constructor visibility to + // within the crate. We only need this for fictive constructors, + // for other constructors correct visibilities + // were already encoded in metadata. + let mut attrs = self.get_item_attrs(def_id.index, sess); + if attrs.any(|item| item.has_name(sym::non_exhaustive)) { + let crate_def_id = self.local_def_id(CRATE_DEF_INDEX); + vis = ty::Visibility::Restricted(crate_def_id); + } + } + callback(ModChild { ident, res: ctor_res, vis, span, macro_rules: false }); + } + _ => {} + } + } + } + + match self.kind(id) { + EntryKind::Mod(exports) => { + for exp in exports.decode((self, sess)) { + callback(exp); + } + } + EntryKind::Enum | EntryKind::Trait => {} + _ => bug!("`for_each_module_child` is called on a non-module: {:?}", self.def_kind(id)), + } + } + + fn is_ctfe_mir_available(self, id: DefIndex) -> bool { + self.root.tables.mir_for_ctfe.get(self, id).is_some() + } + + fn is_item_mir_available(self, id: DefIndex) -> bool { + self.root.tables.optimized_mir.get(self, id).is_some() + } + + fn module_expansion(self, id: DefIndex, sess: &Session) -> ExpnId { + match self.kind(id) { + EntryKind::Mod(_) | EntryKind::Enum | EntryKind::Trait => { + self.get_expn_that_defined(id, sess) + } + _ => panic!("Expected module, found {:?}", self.local_def_id(id)), + } + } + + fn get_fn_has_self_parameter(self, id: DefIndex) -> bool { + match self.kind(id) { + EntryKind::AssocFn { has_self, .. } => has_self, + _ => false, + } + } + + fn get_associated_item_def_ids( + self, + id: DefIndex, + sess: &'a Session, + ) -> impl Iterator<Item = DefId> + 'a { + self.root + .tables + .children + .get(self, id) + .unwrap_or_else(LazyArray::empty) + .decode((self, sess)) + .map(move |child_index| self.local_def_id(child_index)) + } + + fn get_associated_item(self, id: DefIndex) -> ty::AssocItem { + let name = self.item_name(id); + + let (kind, container, has_self) = match self.kind(id) { + EntryKind::AssocConst(container) => (ty::AssocKind::Const, container, false), + EntryKind::AssocFn { container, has_self } => (ty::AssocKind::Fn, container, has_self), + EntryKind::AssocType(container) => (ty::AssocKind::Type, container, false), + _ => bug!("cannot get associated-item of `{:?}`", id), + }; + + ty::AssocItem { + name, + kind, + def_id: self.local_def_id(id), + trait_item_def_id: self.get_trait_item_def_id(id), + container, + fn_has_self_parameter: has_self, + } + } + + fn get_ctor_def_id_and_kind(self, node_id: DefIndex) -> Option<(DefId, CtorKind)> { + match self.kind(node_id) { + EntryKind::Struct(data) | EntryKind::Variant(data) => { + let vdata = data.decode(self); + vdata.ctor.map(|index| (self.local_def_id(index), vdata.ctor_kind)) + } + _ => None, + } + } + + fn get_item_attrs( + self, + id: DefIndex, + sess: &'a Session, + ) -> impl Iterator<Item = ast::Attribute> + 'a { + self.root + .tables + .attributes + .get(self, id) + .unwrap_or_else(|| { + // Structure and variant constructors don't have any attributes encoded for them, + // but we assume that someone passing a constructor ID actually wants to look at + // the attributes on the corresponding struct or variant. + let def_key = self.def_key(id); + assert_eq!(def_key.disambiguated_data.data, DefPathData::Ctor); + let parent_id = def_key.parent.expect("no parent for a constructor"); + self.root + .tables + .attributes + .get(self, parent_id) + .expect("no encoded attributes for a structure or variant") + }) + .decode((self, sess)) + } + + fn get_struct_field_names( + self, + id: DefIndex, + sess: &'a Session, + ) -> impl Iterator<Item = Spanned<Symbol>> + 'a { + self.root + .tables + .children + .get(self, id) + .unwrap_or_else(LazyArray::empty) + .decode(self) + .map(move |index| respan(self.get_span(index, sess), self.item_name(index))) + } + + fn get_struct_field_visibilities(self, id: DefIndex) -> impl Iterator<Item = Visibility> + 'a { + self.root + .tables + .children + .get(self, id) + .unwrap_or_else(LazyArray::empty) + .decode(self) + .map(move |field_index| self.get_visibility(field_index)) + } + + fn get_inherent_implementations_for_type( + self, + tcx: TyCtxt<'tcx>, + id: DefIndex, + ) -> &'tcx [DefId] { + tcx.arena.alloc_from_iter( + self.root + .tables + .inherent_impls + .get(self, id) + .unwrap_or_else(LazyArray::empty) + .decode(self) + .map(|index| self.local_def_id(index)), + ) + } + + /// Decodes all inherent impls in the crate (for rustdoc). + fn get_inherent_impls(self) -> impl Iterator<Item = (DefId, DefId)> + 'a { + (0..self.root.tables.inherent_impls.size()).flat_map(move |i| { + let ty_index = DefIndex::from_usize(i); + let ty_def_id = self.local_def_id(ty_index); + self.root + .tables + .inherent_impls + .get(self, ty_index) + .unwrap_or_else(LazyArray::empty) + .decode(self) + .map(move |impl_index| (ty_def_id, self.local_def_id(impl_index))) + }) + } + + /// Decodes all traits in the crate (for rustdoc and rustc diagnostics). + fn get_traits(self) -> impl Iterator<Item = DefId> + 'a { + self.root.traits.decode(self).map(move |index| self.local_def_id(index)) + } + + /// Decodes all trait impls in the crate (for rustdoc). + fn get_trait_impls(self) -> impl Iterator<Item = (DefId, DefId, Option<SimplifiedType>)> + 'a { + self.cdata.trait_impls.iter().flat_map(move |(&(trait_cnum_raw, trait_index), impls)| { + let trait_def_id = DefId { + krate: self.cnum_map[CrateNum::from_u32(trait_cnum_raw)], + index: trait_index, + }; + impls.decode(self).map(move |(impl_index, simplified_self_ty)| { + (trait_def_id, self.local_def_id(impl_index), simplified_self_ty) + }) + }) + } + + fn get_all_incoherent_impls(self) -> impl Iterator<Item = DefId> + 'a { + self.cdata + .incoherent_impls + .values() + .flat_map(move |impls| impls.decode(self).map(move |idx| self.local_def_id(idx))) + } + + fn get_incoherent_impls(self, tcx: TyCtxt<'tcx>, simp: SimplifiedType) -> &'tcx [DefId] { + if let Some(impls) = self.cdata.incoherent_impls.get(&simp) { + tcx.arena.alloc_from_iter(impls.decode(self).map(|idx| self.local_def_id(idx))) + } else { + &[] + } + } + + fn get_implementations_of_trait( + self, + tcx: TyCtxt<'tcx>, + trait_def_id: DefId, + ) -> &'tcx [(DefId, Option<SimplifiedType>)] { + if self.trait_impls.is_empty() { + return &[]; + } + + // Do a reverse lookup beforehand to avoid touching the crate_num + // hash map in the loop below. + let key = match self.reverse_translate_def_id(trait_def_id) { + Some(def_id) => (def_id.krate.as_u32(), def_id.index), + None => return &[], + }; + + if let Some(impls) = self.trait_impls.get(&key) { + tcx.arena.alloc_from_iter( + impls + .decode(self) + .map(|(idx, simplified_self_ty)| (self.local_def_id(idx), simplified_self_ty)), + ) + } else { + &[] + } + } + + fn get_native_libraries(self, sess: &'a Session) -> impl Iterator<Item = NativeLib> + 'a { + self.root.native_libraries.decode((self, sess)) + } + + fn get_proc_macro_quoted_span(self, index: usize, sess: &Session) -> Span { + self.root + .tables + .proc_macro_quoted_spans + .get(self, index) + .unwrap_or_else(|| panic!("Missing proc macro quoted span: {:?}", index)) + .decode((self, sess)) + } + + fn get_foreign_modules(self, sess: &'a Session) -> impl Iterator<Item = ForeignModule> + '_ { + self.root.foreign_modules.decode((self, sess)) + } + + fn get_dylib_dependency_formats( + self, + tcx: TyCtxt<'tcx>, + ) -> &'tcx [(CrateNum, LinkagePreference)] { + tcx.arena.alloc_from_iter( + self.root.dylib_dependency_formats.decode(self).enumerate().flat_map(|(i, link)| { + let cnum = CrateNum::new(i + 1); + link.map(|link| (self.cnum_map[cnum], link)) + }), + ) + } + + fn get_missing_lang_items(self, tcx: TyCtxt<'tcx>) -> &'tcx [lang_items::LangItem] { + tcx.arena.alloc_from_iter(self.root.lang_items_missing.decode(self)) + } + + fn exported_symbols( + self, + tcx: TyCtxt<'tcx>, + ) -> &'tcx [(ExportedSymbol<'tcx>, SymbolExportInfo)] { + tcx.arena.alloc_from_iter(self.root.exported_symbols.decode((self, tcx))) + } + + fn get_macro(self, id: DefIndex, sess: &Session) -> ast::MacroDef { + match self.kind(id) { + EntryKind::MacroDef(mac_args, macro_rules) => { + ast::MacroDef { body: P(mac_args.decode((self, sess))), macro_rules } + } + _ => bug!(), + } + } + + fn is_foreign_item(self, id: DefIndex) -> bool { + match self.kind(id) { + EntryKind::ForeignStatic | EntryKind::ForeignFn => true, + _ => false, + } + } + + #[inline] + fn def_key(self, index: DefIndex) -> DefKey { + *self + .def_key_cache + .lock() + .entry(index) + .or_insert_with(|| self.root.tables.def_keys.get(self, index).unwrap().decode(self)) + } + + // Returns the path leading to the thing with this `id`. + fn def_path(self, id: DefIndex) -> DefPath { + debug!("def_path(cnum={:?}, id={:?})", self.cnum, id); + DefPath::make(self.cnum, id, |parent| self.def_key(parent)) + } + + fn def_path_hash_unlocked( + self, + index: DefIndex, + def_path_hashes: &mut FxHashMap<DefIndex, DefPathHash>, + ) -> DefPathHash { + *def_path_hashes + .entry(index) + .or_insert_with(|| self.root.tables.def_path_hashes.get(self, index).unwrap()) + } + + #[inline] + fn def_path_hash(self, index: DefIndex) -> DefPathHash { + let mut def_path_hashes = self.def_path_hash_cache.lock(); + self.def_path_hash_unlocked(index, &mut def_path_hashes) + } + + #[inline] + fn def_path_hash_to_def_index(self, hash: DefPathHash) -> DefIndex { + self.def_path_hash_map.def_path_hash_to_def_index(&hash) + } + + fn expn_hash_to_expn_id(self, sess: &Session, index_guess: u32, hash: ExpnHash) -> ExpnId { + debug_assert_eq!(ExpnId::from_hash(hash), None); + let index_guess = ExpnIndex::from_u32(index_guess); + let old_hash = self.root.expn_hashes.get(self, index_guess).map(|lazy| lazy.decode(self)); + + let index = if old_hash == Some(hash) { + // Fast path: the expn and its index is unchanged from the + // previous compilation session. There is no need to decode anything + // else. + index_guess + } else { + // Slow path: We need to find out the new `DefIndex` of the provided + // `DefPathHash`, if its still exists. This requires decoding every `DefPathHash` + // stored in this crate. + let map = self.cdata.expn_hash_map.get_or_init(|| { + let end_id = self.root.expn_hashes.size() as u32; + let mut map = + UnhashMap::with_capacity_and_hasher(end_id as usize, Default::default()); + for i in 0..end_id { + let i = ExpnIndex::from_u32(i); + if let Some(hash) = self.root.expn_hashes.get(self, i) { + map.insert(hash.decode(self), i); + } + } + map + }); + map[&hash] + }; + + let data = self.root.expn_data.get(self, index).unwrap().decode((self, sess)); + rustc_span::hygiene::register_expn_id(self.cnum, index, data, hash) + } + + /// Imports the source_map from an external crate into the source_map of the crate + /// currently being compiled (the "local crate"). + /// + /// The import algorithm works analogous to how AST items are inlined from an + /// external crate's metadata: + /// For every SourceFile in the external source_map an 'inline' copy is created in the + /// local source_map. The correspondence relation between external and local + /// SourceFiles is recorded in the `ImportedSourceFile` objects returned from this + /// function. When an item from an external crate is later inlined into this + /// crate, this correspondence information is used to translate the span + /// information of the inlined item so that it refers the correct positions in + /// the local source_map (see `<decoder::DecodeContext as SpecializedDecoder<Span>>`). + /// + /// The import algorithm in the function below will reuse SourceFiles already + /// existing in the local source_map. For example, even if the SourceFile of some + /// source file of libstd gets imported many times, there will only ever be + /// one SourceFile object for the corresponding file in the local source_map. + /// + /// Note that imported SourceFiles do not actually contain the source code of the + /// file they represent, just information about length, line breaks, and + /// multibyte characters. This information is enough to generate valid debuginfo + /// for items inlined from other crates. + /// + /// Proc macro crates don't currently export spans, so this function does not have + /// to work for them. + fn imported_source_files(self, sess: &Session) -> &'a [ImportedSourceFile] { + fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> { + path.filter(|_| { + // Only spend time on further checks if we have what to translate *to*. + sess.opts.real_rust_source_base_dir.is_some() + // Some tests need the translation to be always skipped. + && sess.opts.unstable_opts.translate_remapped_path_to_local_path + }) + .filter(|virtual_dir| { + // Don't translate away `/rustc/$hash` if we're still remapping to it, + // since that means we're still building `std`/`rustc` that need it, + // and we don't want the real path to leak into codegen/debuginfo. + !sess.opts.remap_path_prefix.iter().any(|(_from, to)| to == virtual_dir) + }) + } + + // Translate the virtual `/rustc/$hash` prefix back to a real directory + // that should hold actual sources, where possible. + // + // NOTE: if you update this, you might need to also update bootstrap's code for generating + // the `rust-src` component in `Src::run` in `src/bootstrap/dist.rs`. + let virtual_rust_source_base_dir = [ + filter(sess, option_env!("CFG_VIRTUAL_RUST_SOURCE_BASE_DIR").map(Path::new)), + filter(sess, sess.opts.unstable_opts.simulate_remapped_rust_src_base.as_deref()), + ]; + + let try_to_translate_virtual_to_real = |name: &mut rustc_span::FileName| { + debug!( + "try_to_translate_virtual_to_real(name={:?}): \ + virtual_rust_source_base_dir={:?}, real_rust_source_base_dir={:?}", + name, virtual_rust_source_base_dir, sess.opts.real_rust_source_base_dir, + ); + + for virtual_dir in virtual_rust_source_base_dir.iter().flatten() { + if let Some(real_dir) = &sess.opts.real_rust_source_base_dir { + if let rustc_span::FileName::Real(old_name) = name { + if let rustc_span::RealFileName::Remapped { local_path: _, virtual_name } = + old_name + { + if let Ok(rest) = virtual_name.strip_prefix(virtual_dir) { + let virtual_name = virtual_name.clone(); + + // The std library crates are in + // `$sysroot/lib/rustlib/src/rust/library`, whereas other crates + // may be in `$sysroot/lib/rustlib/src/rust/` directly. So we + // detect crates from the std libs and handle them specially. + const STD_LIBS: &[&str] = &[ + "core", + "alloc", + "std", + "test", + "term", + "unwind", + "proc_macro", + "panic_abort", + "panic_unwind", + "profiler_builtins", + "rtstartup", + "rustc-std-workspace-core", + "rustc-std-workspace-alloc", + "rustc-std-workspace-std", + "backtrace", + ]; + let is_std_lib = STD_LIBS.iter().any(|l| rest.starts_with(l)); + + let new_path = if is_std_lib { + real_dir.join("library").join(rest) + } else { + real_dir.join(rest) + }; + + debug!( + "try_to_translate_virtual_to_real: `{}` -> `{}`", + virtual_name.display(), + new_path.display(), + ); + let new_name = rustc_span::RealFileName::Remapped { + local_path: Some(new_path), + virtual_name, + }; + *old_name = new_name; + } + } + } + } + } + }; + + self.cdata.source_map_import_info.get_or_init(|| { + let external_source_map = self.root.source_map.decode(self); + + external_source_map + .map(|source_file_to_import| { + // We can't reuse an existing SourceFile, so allocate a new one + // containing the information we need. + let rustc_span::SourceFile { + mut name, + src_hash, + start_pos, + end_pos, + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + name_hash, + .. + } = source_file_to_import; + + // If this file is under $sysroot/lib/rustlib/src/ but has not been remapped + // during rust bootstrapping by `remap-debuginfo = true`, and the user + // wish to simulate that behaviour by -Z simulate-remapped-rust-src-base, + // then we change `name` to a similar state as if the rust was bootstrapped + // with `remap-debuginfo = true`. + // This is useful for testing so that tests about the effects of + // `try_to_translate_virtual_to_real` don't have to worry about how the + // compiler is bootstrapped. + if let Some(virtual_dir) = + &sess.opts.unstable_opts.simulate_remapped_rust_src_base + { + if let Some(real_dir) = &sess.opts.real_rust_source_base_dir { + if let rustc_span::FileName::Real(ref mut old_name) = name { + if let rustc_span::RealFileName::LocalPath(local) = old_name { + if let Ok(rest) = local.strip_prefix(real_dir) { + *old_name = rustc_span::RealFileName::Remapped { + local_path: None, + virtual_name: virtual_dir.join(rest), + }; + } + } + } + } + } + + // If this file's path has been remapped to `/rustc/$hash`, + // we might be able to reverse that (also see comments above, + // on `try_to_translate_virtual_to_real`). + try_to_translate_virtual_to_real(&mut name); + + let source_length = (end_pos - start_pos).to_usize(); + + let local_version = sess.source_map().new_imported_source_file( + name, + src_hash, + name_hash, + source_length, + self.cnum, + lines, + multibyte_chars, + non_narrow_chars, + normalized_pos, + start_pos, + end_pos, + ); + debug!( + "CrateMetaData::imported_source_files alloc \ + source_file {:?} original (start_pos {:?} end_pos {:?}) \ + translated (start_pos {:?} end_pos {:?})", + local_version.name, + start_pos, + end_pos, + local_version.start_pos, + local_version.end_pos + ); + + ImportedSourceFile { + original_start_pos: start_pos, + original_end_pos: end_pos, + translated_source_file: local_version, + } + }) + .collect() + }) + } + + fn get_generator_diagnostic_data( + self, + tcx: TyCtxt<'tcx>, + id: DefIndex, + ) -> Option<GeneratorDiagnosticData<'tcx>> { + self.root + .tables + .generator_diagnostic_data + .get(self, id) + .map(|param| param.decode((self, tcx))) + .map(|generator_data| GeneratorDiagnosticData { + generator_interior_types: generator_data.generator_interior_types, + hir_owner: generator_data.hir_owner, + nodes_types: generator_data.nodes_types, + adjustments: generator_data.adjustments, + }) + } + + fn get_may_have_doc_links(self, index: DefIndex) -> bool { + self.root.tables.may_have_doc_links.get(self, index).is_some() + } + + fn get_is_intrinsic(self, index: DefIndex) -> bool { + self.root.tables.is_intrinsic.get(self, index).is_some() + } +} + +impl CrateMetadata { + pub(crate) fn new( + sess: &Session, + cstore: &CStore, + blob: MetadataBlob, + root: CrateRoot, + raw_proc_macros: Option<&'static [ProcMacro]>, + cnum: CrateNum, + cnum_map: CrateNumMap, + dep_kind: CrateDepKind, + source: CrateSource, + private_dep: bool, + host_hash: Option<Svh>, + ) -> CrateMetadata { + let trait_impls = root + .impls + .decode((&blob, sess)) + .map(|trait_impls| (trait_impls.trait_id, trait_impls.impls)) + .collect(); + let alloc_decoding_state = + AllocDecodingState::new(root.interpret_alloc_index.decode(&blob).collect()); + let dependencies = Lock::new(cnum_map.iter().cloned().collect()); + + // Pre-decode the DefPathHash->DefIndex table. This is a cheap operation + // that does not copy any data. It just does some data verification. + let def_path_hash_map = root.def_path_hash_map.decode(&blob); + + let mut cdata = CrateMetadata { + blob, + root, + trait_impls, + incoherent_impls: Default::default(), + raw_proc_macros, + source_map_import_info: OnceCell::new(), + def_path_hash_map, + expn_hash_map: Default::default(), + alloc_decoding_state, + cnum, + cnum_map, + dependencies, + dep_kind: Lock::new(dep_kind), + source: Lrc::new(source), + private_dep, + host_hash, + extern_crate: Lock::new(None), + hygiene_context: Default::default(), + def_key_cache: Default::default(), + def_path_hash_cache: Default::default(), + }; + + // Need `CrateMetadataRef` to decode `DefId`s in simplified types. + cdata.incoherent_impls = cdata + .root + .incoherent_impls + .decode(CrateMetadataRef { cdata: &cdata, cstore }) + .map(|incoherent_impls| (incoherent_impls.self_ty, incoherent_impls.impls)) + .collect(); + + cdata + } + + pub(crate) fn dependencies(&self) -> LockGuard<'_, Vec<CrateNum>> { + self.dependencies.borrow() + } + + pub(crate) fn add_dependency(&self, cnum: CrateNum) { + self.dependencies.borrow_mut().push(cnum); + } + + pub(crate) fn update_extern_crate(&self, new_extern_crate: ExternCrate) -> bool { + let mut extern_crate = self.extern_crate.borrow_mut(); + let update = Some(new_extern_crate.rank()) > extern_crate.as_ref().map(ExternCrate::rank); + if update { + *extern_crate = Some(new_extern_crate); + } + update + } + + pub(crate) fn source(&self) -> &CrateSource { + &*self.source + } + + pub(crate) fn dep_kind(&self) -> CrateDepKind { + *self.dep_kind.lock() + } + + pub(crate) fn update_dep_kind(&self, f: impl FnOnce(CrateDepKind) -> CrateDepKind) { + self.dep_kind.with_lock(|dep_kind| *dep_kind = f(*dep_kind)) + } + + pub(crate) fn required_panic_strategy(&self) -> Option<PanicStrategy> { + self.root.required_panic_strategy + } + + pub(crate) fn needs_panic_runtime(&self) -> bool { + self.root.needs_panic_runtime + } + + pub(crate) fn is_panic_runtime(&self) -> bool { + self.root.panic_runtime + } + + pub(crate) fn is_profiler_runtime(&self) -> bool { + self.root.profiler_runtime + } + + pub(crate) fn needs_allocator(&self) -> bool { + self.root.needs_allocator + } + + pub(crate) fn has_global_allocator(&self) -> bool { + self.root.has_global_allocator + } + + pub(crate) fn has_default_lib_allocator(&self) -> bool { + self.root.has_default_lib_allocator + } + + pub(crate) fn is_proc_macro_crate(&self) -> bool { + self.root.is_proc_macro_crate() + } + + pub(crate) fn name(&self) -> Symbol { + self.root.name + } + + pub(crate) fn stable_crate_id(&self) -> StableCrateId { + self.root.stable_crate_id + } + + pub(crate) fn hash(&self) -> Svh { + self.root.hash + } + + fn num_def_ids(&self) -> usize { + self.root.tables.def_keys.size() + } + + fn local_def_id(&self, index: DefIndex) -> DefId { + DefId { krate: self.cnum, index } + } + + // Translate a DefId from the current compilation environment to a DefId + // for an external crate. + fn reverse_translate_def_id(&self, did: DefId) -> Option<DefId> { + for (local, &global) in self.cnum_map.iter_enumerated() { + if global == did.krate { + return Some(DefId { krate: local, index: did.index }); + } + } + + None + } +} + +// Cannot be implemented on 'ProcMacro', as libproc_macro +// does not depend on librustc_ast +fn macro_kind(raw: &ProcMacro) -> MacroKind { + match raw { + ProcMacro::CustomDerive { .. } => MacroKind::Derive, + ProcMacro::Attr { .. } => MacroKind::Attr, + ProcMacro::Bang { .. } => MacroKind::Bang, + } +} diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs new file mode 100644 index 000000000..38ce50e83 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -0,0 +1,680 @@ +use crate::creader::{CStore, LoadedMacro}; +use crate::foreign_modules; +use crate::native_libs; + +use rustc_ast as ast; +use rustc_attr::Deprecation; +use rustc_hir::def::{CtorKind, DefKind, Res}; +use rustc_hir::def_id::{CrateNum, DefId, DefIdMap, LOCAL_CRATE}; +use rustc_hir::definitions::{DefKey, DefPath, DefPathHash}; +use rustc_middle::arena::ArenaAllocatable; +use rustc_middle::metadata::ModChild; +use rustc_middle::middle::exported_symbols::ExportedSymbol; +use rustc_middle::middle::stability::DeprecationEntry; +use rustc_middle::ty::fast_reject::SimplifiedType; +use rustc_middle::ty::query::{ExternProviders, Providers}; +use rustc_middle::ty::{self, TyCtxt, Visibility}; +use rustc_session::cstore::{CrateSource, CrateStore}; +use rustc_session::utils::NativeLibKind; +use rustc_session::{Session, StableCrateId}; +use rustc_span::hygiene::{ExpnHash, ExpnId}; +use rustc_span::source_map::{Span, Spanned}; +use rustc_span::symbol::{kw, Symbol}; + +use rustc_data_structures::sync::Lrc; +use smallvec::SmallVec; +use std::any::Any; + +use super::{Decodable, DecodeContext, DecodeIterator}; + +trait ProcessQueryValue<'tcx, T> { + fn process_decoded(self, _tcx: TyCtxt<'tcx>, _err: impl Fn() -> !) -> T; +} + +impl<T> ProcessQueryValue<'_, Option<T>> for Option<T> { + #[inline(always)] + fn process_decoded(self, _tcx: TyCtxt<'_>, _err: impl Fn() -> !) -> Option<T> { + self + } +} + +impl<T> ProcessQueryValue<'_, T> for Option<T> { + #[inline(always)] + fn process_decoded(self, _tcx: TyCtxt<'_>, err: impl Fn() -> !) -> T { + if let Some(value) = self { value } else { err() } + } +} + +impl<'tcx, T: ArenaAllocatable<'tcx>> ProcessQueryValue<'tcx, &'tcx T> for Option<T> { + #[inline(always)] + fn process_decoded(self, tcx: TyCtxt<'tcx>, err: impl Fn() -> !) -> &'tcx T { + if let Some(value) = self { tcx.arena.alloc(value) } else { err() } + } +} + +impl<T, E> ProcessQueryValue<'_, Result<Option<T>, E>> for Option<T> { + #[inline(always)] + fn process_decoded(self, _tcx: TyCtxt<'_>, _err: impl Fn() -> !) -> Result<Option<T>, E> { + Ok(self) + } +} + +impl<'a, 'tcx, T: Copy + Decodable<DecodeContext<'a, 'tcx>>> ProcessQueryValue<'tcx, &'tcx [T]> + for Option<DecodeIterator<'a, 'tcx, T>> +{ + #[inline(always)] + fn process_decoded(self, tcx: TyCtxt<'tcx>, _err: impl Fn() -> !) -> &'tcx [T] { + if let Some(iter) = self { tcx.arena.alloc_from_iter(iter) } else { &[] } + } +} + +impl ProcessQueryValue<'_, Option<DeprecationEntry>> for Option<Deprecation> { + #[inline(always)] + fn process_decoded(self, _tcx: TyCtxt<'_>, _err: impl Fn() -> !) -> Option<DeprecationEntry> { + self.map(DeprecationEntry::external) + } +} + +macro_rules! provide_one { + (<$lt:tt> $tcx:ident, $def_id:ident, $other:ident, $cdata:ident, $name:ident => { table }) => { + provide_one! { + <$lt> $tcx, $def_id, $other, $cdata, $name => { + $cdata + .root + .tables + .$name + .get($cdata, $def_id.index) + .map(|lazy| lazy.decode(($cdata, $tcx))) + .process_decoded($tcx, || panic!("{:?} does not have a {:?}", $def_id, stringify!($name))) + } + } + }; + (<$lt:tt> $tcx:ident, $def_id:ident, $other:ident, $cdata:ident, $name:ident => { table_direct }) => { + provide_one! { + <$lt> $tcx, $def_id, $other, $cdata, $name => { + // We don't decode `table_direct`, since it's not a Lazy, but an actual value + $cdata + .root + .tables + .$name + .get($cdata, $def_id.index) + .process_decoded($tcx, || panic!("{:?} does not have a {:?}", $def_id, stringify!($name))) + } + } + }; + (<$lt:tt> $tcx:ident, $def_id:ident, $other:ident, $cdata:ident, $name:ident => $compute:block) => { + fn $name<$lt>( + $tcx: TyCtxt<$lt>, + def_id_arg: ty::query::query_keys::$name<$lt>, + ) -> ty::query::query_values::$name<$lt> { + let _prof_timer = + $tcx.prof.generic_activity(concat!("metadata_decode_entry_", stringify!($name))); + + #[allow(unused_variables)] + let ($def_id, $other) = def_id_arg.into_args(); + assert!(!$def_id.is_local()); + + // External query providers call `crate_hash` in order to register a dependency + // on the crate metadata. The exception is `crate_hash` itself, which obviously + // doesn't need to do this (and can't, as it would cause a query cycle). + use rustc_middle::dep_graph::DepKind; + if DepKind::$name != DepKind::crate_hash && $tcx.dep_graph.is_fully_enabled() { + $tcx.ensure().crate_hash($def_id.krate); + } + + let $cdata = CStore::from_tcx($tcx).get_crate_data($def_id.krate); + + $compute + } + }; +} + +macro_rules! provide { + (<$lt:tt> $tcx:ident, $def_id:ident, $other:ident, $cdata:ident, + $($name:ident => { $($compute:tt)* })*) => { + pub fn provide_extern(providers: &mut ExternProviders) { + $(provide_one! { + <$lt> $tcx, $def_id, $other, $cdata, $name => { $($compute)* } + })* + + *providers = ExternProviders { + $($name,)* + ..*providers + }; + } + } +} + +// small trait to work around different signature queries all being defined via +// the macro above. +trait IntoArgs { + type Other; + fn into_args(self) -> (DefId, Self::Other); +} + +impl IntoArgs for DefId { + type Other = (); + fn into_args(self) -> (DefId, ()) { + (self, ()) + } +} + +impl IntoArgs for CrateNum { + type Other = (); + fn into_args(self) -> (DefId, ()) { + (self.as_def_id(), ()) + } +} + +impl IntoArgs for (CrateNum, DefId) { + type Other = DefId; + fn into_args(self) -> (DefId, DefId) { + (self.0.as_def_id(), self.1) + } +} + +impl<'tcx> IntoArgs for ty::InstanceDef<'tcx> { + type Other = (); + fn into_args(self) -> (DefId, ()) { + (self.def_id(), ()) + } +} + +impl IntoArgs for (CrateNum, SimplifiedType) { + type Other = SimplifiedType; + fn into_args(self) -> (DefId, SimplifiedType) { + (self.0.as_def_id(), self.1) + } +} + +provide! { <'tcx> tcx, def_id, other, cdata, + explicit_item_bounds => { table } + explicit_predicates_of => { table } + generics_of => { table } + inferred_outlives_of => { table } + super_predicates_of => { table } + type_of => { table } + variances_of => { table } + fn_sig => { table } + codegen_fn_attrs => { table } + impl_trait_ref => { table } + const_param_default => { table } + thir_abstract_const => { table } + optimized_mir => { table } + mir_for_ctfe => { table } + promoted_mir => { table } + def_span => { table } + def_ident_span => { table } + lookup_stability => { table } + lookup_const_stability => { table } + lookup_deprecation_entry => { table } + visibility => { table } + unused_generic_params => { table } + opt_def_kind => { table_direct } + impl_parent => { table } + impl_polarity => { table_direct } + impl_defaultness => { table_direct } + constness => { table_direct } + coerce_unsized_info => { table } + mir_const_qualif => { table } + rendered_const => { table } + asyncness => { table_direct } + fn_arg_names => { table } + generator_kind => { table } + trait_def => { table } + + adt_def => { cdata.get_adt_def(def_id.index, tcx) } + adt_destructor => { + let _ = cdata; + tcx.calculate_dtor(def_id, |_,_| Ok(())) + } + associated_item_def_ids => { + tcx.arena.alloc_from_iter(cdata.get_associated_item_def_ids(def_id.index, tcx.sess)) + } + associated_item => { cdata.get_associated_item(def_id.index) } + inherent_impls => { cdata.get_inherent_implementations_for_type(tcx, def_id.index) } + is_foreign_item => { cdata.is_foreign_item(def_id.index) } + item_attrs => { tcx.arena.alloc_from_iter(cdata.get_item_attrs(def_id.index, tcx.sess)) } + is_mir_available => { cdata.is_item_mir_available(def_id.index) } + is_ctfe_mir_available => { cdata.is_ctfe_mir_available(def_id.index) } + + dylib_dependency_formats => { cdata.get_dylib_dependency_formats(tcx) } + is_private_dep => { cdata.private_dep } + is_panic_runtime => { cdata.root.panic_runtime } + is_compiler_builtins => { cdata.root.compiler_builtins } + has_global_allocator => { cdata.root.has_global_allocator } + has_panic_handler => { cdata.root.has_panic_handler } + is_profiler_runtime => { cdata.root.profiler_runtime } + required_panic_strategy => { cdata.root.required_panic_strategy } + panic_in_drop_strategy => { cdata.root.panic_in_drop_strategy } + extern_crate => { + let r = *cdata.extern_crate.lock(); + r.map(|c| &*tcx.arena.alloc(c)) + } + is_no_builtins => { cdata.root.no_builtins } + symbol_mangling_version => { cdata.root.symbol_mangling_version } + reachable_non_generics => { + let reachable_non_generics = tcx + .exported_symbols(cdata.cnum) + .iter() + .filter_map(|&(exported_symbol, export_info)| { + if let ExportedSymbol::NonGeneric(def_id) = exported_symbol { + Some((def_id, export_info)) + } else { + None + } + }) + .collect(); + + reachable_non_generics + } + native_libraries => { cdata.get_native_libraries(tcx.sess).collect() } + foreign_modules => { cdata.get_foreign_modules(tcx.sess).map(|m| (m.def_id, m)).collect() } + crate_hash => { cdata.root.hash } + crate_host_hash => { cdata.host_hash } + crate_name => { cdata.root.name } + + extra_filename => { cdata.root.extra_filename.clone() } + + traits_in_crate => { tcx.arena.alloc_from_iter(cdata.get_traits()) } + implementations_of_trait => { cdata.get_implementations_of_trait(tcx, other) } + crate_incoherent_impls => { cdata.get_incoherent_impls(tcx, other) } + + dep_kind => { + let r = *cdata.dep_kind.lock(); + r + } + module_children => { + let mut result = SmallVec::<[_; 8]>::new(); + cdata.for_each_module_child(def_id.index, |child| result.push(child), tcx.sess); + tcx.arena.alloc_slice(&result) + } + defined_lib_features => { cdata.get_lib_features(tcx) } + stability_implications => { + cdata.get_stability_implications(tcx).iter().copied().collect() + } + is_intrinsic => { cdata.get_is_intrinsic(def_id.index) } + defined_lang_items => { cdata.get_lang_items(tcx) } + diagnostic_items => { cdata.get_diagnostic_items() } + missing_lang_items => { cdata.get_missing_lang_items(tcx) } + + missing_extern_crate_item => { + let r = matches!(*cdata.extern_crate.borrow(), Some(extern_crate) if !extern_crate.is_direct()); + r + } + + used_crate_source => { Lrc::clone(&cdata.source) } + debugger_visualizers => { cdata.get_debugger_visualizers() } + + exported_symbols => { + let syms = cdata.exported_symbols(tcx); + + // FIXME rust-lang/rust#64319, rust-lang/rust#64872: We want + // to block export of generics from dylibs, but we must fix + // rust-lang/rust#65890 before we can do that robustly. + + syms + } + + crate_extern_paths => { cdata.source().paths().cloned().collect() } + expn_that_defined => { cdata.get_expn_that_defined(def_id.index, tcx.sess) } + generator_diagnostic_data => { cdata.get_generator_diagnostic_data(tcx, def_id.index) } +} + +pub(in crate::rmeta) fn provide(providers: &mut Providers) { + // FIXME(#44234) - almost all of these queries have no sub-queries and + // therefore no actual inputs, they're just reading tables calculated in + // resolve! Does this work? Unsure! That's what the issue is about + *providers = Providers { + allocator_kind: |tcx, ()| CStore::from_tcx(tcx).allocator_kind(), + is_dllimport_foreign_item: |tcx, id| match tcx.native_library_kind(id) { + Some( + NativeLibKind::Dylib { .. } | NativeLibKind::RawDylib | NativeLibKind::Unspecified, + ) => true, + _ => false, + }, + is_statically_included_foreign_item: |tcx, id| { + matches!(tcx.native_library_kind(id), Some(NativeLibKind::Static { .. })) + }, + is_private_dep: |_tcx, cnum| { + assert_eq!(cnum, LOCAL_CRATE); + false + }, + native_library_kind: |tcx, id| { + tcx.native_libraries(id.krate) + .iter() + .filter(|lib| native_libs::relevant_lib(&tcx.sess, lib)) + .find(|lib| { + let Some(fm_id) = lib.foreign_module else { + return false; + }; + let map = tcx.foreign_modules(id.krate); + map.get(&fm_id) + .expect("failed to find foreign module") + .foreign_items + .contains(&id) + }) + .map(|l| l.kind) + }, + native_libraries: |tcx, cnum| { + assert_eq!(cnum, LOCAL_CRATE); + native_libs::collect(tcx) + }, + foreign_modules: |tcx, cnum| { + assert_eq!(cnum, LOCAL_CRATE); + foreign_modules::collect(tcx).into_iter().map(|m| (m.def_id, m)).collect() + }, + + // Returns a map from a sufficiently visible external item (i.e., an + // external item that is visible from at least one local module) to a + // sufficiently visible parent (considering modules that re-export the + // external item to be parents). + visible_parent_map: |tcx, ()| { + use std::collections::hash_map::Entry; + use std::collections::vec_deque::VecDeque; + + let mut visible_parent_map: DefIdMap<DefId> = Default::default(); + // This is a secondary visible_parent_map, storing the DefId of + // parents that re-export the child as `_` or module parents + // which are `#[doc(hidden)]`. Since we prefer paths that don't + // do this, merge this map at the end, only if we're missing + // keys from the former. + // This is a rudimentary check that does not catch all cases, + // just the easiest. + let mut fallback_map: DefIdMap<DefId> = Default::default(); + + // Issue 46112: We want the map to prefer the shortest + // paths when reporting the path to an item. Therefore we + // build up the map via a breadth-first search (BFS), + // which naturally yields minimal-length paths. + // + // Note that it needs to be a BFS over the whole forest of + // crates, not just each individual crate; otherwise you + // only get paths that are locally minimal with respect to + // whatever crate we happened to encounter first in this + // traversal, but not globally minimal across all crates. + let bfs_queue = &mut VecDeque::new(); + + for &cnum in tcx.crates(()) { + // Ignore crates without a corresponding local `extern crate` item. + if tcx.missing_extern_crate_item(cnum) { + continue; + } + + bfs_queue.push_back(cnum.as_def_id()); + } + + let mut add_child = |bfs_queue: &mut VecDeque<_>, child: &ModChild, parent: DefId| { + if !child.vis.is_public() { + return; + } + + if let Some(def_id) = child.res.opt_def_id() { + if child.ident.name == kw::Underscore { + fallback_map.insert(def_id, parent); + return; + } + + if ty::util::is_doc_hidden(tcx, parent) { + fallback_map.insert(def_id, parent); + return; + } + + match visible_parent_map.entry(def_id) { + Entry::Occupied(mut entry) => { + // If `child` is defined in crate `cnum`, ensure + // that it is mapped to a parent in `cnum`. + if def_id.is_local() && entry.get().is_local() { + entry.insert(parent); + } + } + Entry::Vacant(entry) => { + entry.insert(parent); + if matches!( + child.res, + Res::Def(DefKind::Mod | DefKind::Enum | DefKind::Trait, _) + ) { + bfs_queue.push_back(def_id); + } + } + } + } + }; + + while let Some(def) = bfs_queue.pop_front() { + for child in tcx.module_children(def).iter() { + add_child(bfs_queue, child, def); + } + } + + // Fill in any missing entries with the less preferable path. + // If this path re-exports the child as `_`, we still use this + // path in a diagnostic that suggests importing `::*`. + for (child, parent) in fallback_map { + visible_parent_map.entry(child).or_insert(parent); + } + + visible_parent_map + }, + + dependency_formats: |tcx, ()| Lrc::new(crate::dependency_format::calculate(tcx)), + has_global_allocator: |tcx, cnum| { + assert_eq!(cnum, LOCAL_CRATE); + CStore::from_tcx(tcx).has_global_allocator() + }, + postorder_cnums: |tcx, ()| { + tcx.arena + .alloc_slice(&CStore::from_tcx(tcx).crate_dependencies_in_postorder(LOCAL_CRATE)) + }, + crates: |tcx, ()| tcx.arena.alloc_from_iter(CStore::from_tcx(tcx).crates_untracked()), + ..*providers + }; +} + +impl CStore { + pub fn struct_field_names_untracked<'a>( + &'a self, + def: DefId, + sess: &'a Session, + ) -> impl Iterator<Item = Spanned<Symbol>> + 'a { + self.get_crate_data(def.krate).get_struct_field_names(def.index, sess) + } + + pub fn struct_field_visibilities_untracked( + &self, + def: DefId, + ) -> impl Iterator<Item = Visibility> + '_ { + self.get_crate_data(def.krate).get_struct_field_visibilities(def.index) + } + + pub fn ctor_def_id_and_kind_untracked(&self, def: DefId) -> Option<(DefId, CtorKind)> { + self.get_crate_data(def.krate).get_ctor_def_id_and_kind(def.index) + } + + pub fn visibility_untracked(&self, def: DefId) -> Visibility { + self.get_crate_data(def.krate).get_visibility(def.index) + } + + pub fn module_children_untracked(&self, def_id: DefId, sess: &Session) -> Vec<ModChild> { + let mut result = vec![]; + self.get_crate_data(def_id.krate).for_each_module_child( + def_id.index, + |child| result.push(child), + sess, + ); + result + } + + pub fn load_macro_untracked(&self, id: DefId, sess: &Session) -> LoadedMacro { + let _prof_timer = sess.prof.generic_activity("metadata_load_macro"); + + let data = self.get_crate_data(id.krate); + if data.root.is_proc_macro_crate() { + return LoadedMacro::ProcMacro(data.load_proc_macro(id.index, sess)); + } + + let span = data.get_span(id.index, sess); + + LoadedMacro::MacroDef( + ast::Item { + ident: data.item_ident(id.index, sess), + id: ast::DUMMY_NODE_ID, + span, + attrs: data.get_item_attrs(id.index, sess).collect(), + kind: ast::ItemKind::MacroDef(data.get_macro(id.index, sess)), + vis: ast::Visibility { + span: span.shrink_to_lo(), + kind: ast::VisibilityKind::Inherited, + tokens: None, + }, + tokens: None, + }, + data.root.edition, + ) + } + + pub fn fn_has_self_parameter_untracked(&self, def: DefId) -> bool { + self.get_crate_data(def.krate).get_fn_has_self_parameter(def.index) + } + + pub fn crate_source_untracked(&self, cnum: CrateNum) -> Lrc<CrateSource> { + self.get_crate_data(cnum).source.clone() + } + + pub fn get_span_untracked(&self, def_id: DefId, sess: &Session) -> Span { + self.get_crate_data(def_id.krate).get_span(def_id.index, sess) + } + + pub fn def_kind(&self, def: DefId) -> DefKind { + self.get_crate_data(def.krate).def_kind(def.index) + } + + pub fn crates_untracked(&self) -> impl Iterator<Item = CrateNum> + '_ { + self.iter_crate_data().map(|(cnum, _)| cnum) + } + + pub fn item_generics_num_lifetimes(&self, def_id: DefId, sess: &Session) -> usize { + self.get_crate_data(def_id.krate).get_generics(def_id.index, sess).own_counts().lifetimes + } + + pub fn module_expansion_untracked(&self, def_id: DefId, sess: &Session) -> ExpnId { + self.get_crate_data(def_id.krate).module_expansion(def_id.index, sess) + } + + /// Only public-facing way to traverse all the definitions in a non-local crate. + /// Critically useful for this third-party project: <https://github.com/hacspec/hacspec>. + /// See <https://github.com/rust-lang/rust/pull/85889> for context. + pub fn num_def_ids_untracked(&self, cnum: CrateNum) -> usize { + self.get_crate_data(cnum).num_def_ids() + } + + pub fn item_attrs_untracked<'a>( + &'a self, + def_id: DefId, + sess: &'a Session, + ) -> impl Iterator<Item = ast::Attribute> + 'a { + self.get_crate_data(def_id.krate).get_item_attrs(def_id.index, sess) + } + + pub fn get_proc_macro_quoted_span_untracked( + &self, + cnum: CrateNum, + id: usize, + sess: &Session, + ) -> Span { + self.get_crate_data(cnum).get_proc_macro_quoted_span(id, sess) + } + + /// Decodes all traits in the crate (for rustdoc). + pub fn traits_in_crate_untracked(&self, cnum: CrateNum) -> impl Iterator<Item = DefId> + '_ { + self.get_crate_data(cnum).get_traits() + } + + /// Decodes all trait impls in the crate (for rustdoc). + pub fn trait_impls_in_crate_untracked( + &self, + cnum: CrateNum, + ) -> impl Iterator<Item = (DefId, DefId, Option<SimplifiedType>)> + '_ { + self.get_crate_data(cnum).get_trait_impls() + } + + /// Decodes all inherent impls in the crate (for rustdoc). + pub fn inherent_impls_in_crate_untracked( + &self, + cnum: CrateNum, + ) -> impl Iterator<Item = (DefId, DefId)> + '_ { + self.get_crate_data(cnum).get_inherent_impls() + } + + /// Decodes all incoherent inherent impls in the crate (for rustdoc). + pub fn incoherent_impls_in_crate_untracked( + &self, + cnum: CrateNum, + ) -> impl Iterator<Item = DefId> + '_ { + self.get_crate_data(cnum).get_all_incoherent_impls() + } + + pub fn associated_item_def_ids_untracked<'a>( + &'a self, + def_id: DefId, + sess: &'a Session, + ) -> impl Iterator<Item = DefId> + 'a { + self.get_crate_data(def_id.krate).get_associated_item_def_ids(def_id.index, sess) + } + + pub fn may_have_doc_links_untracked(&self, def_id: DefId) -> bool { + self.get_crate_data(def_id.krate).get_may_have_doc_links(def_id.index) + } +} + +impl CrateStore for CStore { + fn as_any(&self) -> &dyn Any { + self + } + + fn crate_name(&self, cnum: CrateNum) -> Symbol { + self.get_crate_data(cnum).root.name + } + + fn stable_crate_id(&self, cnum: CrateNum) -> StableCrateId { + self.get_crate_data(cnum).root.stable_crate_id + } + + fn stable_crate_id_to_crate_num(&self, stable_crate_id: StableCrateId) -> CrateNum { + self.stable_crate_ids[&stable_crate_id] + } + + /// Returns the `DefKey` for a given `DefId`. This indicates the + /// parent `DefId` as well as some idea of what kind of data the + /// `DefId` refers to. + fn def_key(&self, def: DefId) -> DefKey { + self.get_crate_data(def.krate).def_key(def.index) + } + + fn def_path(&self, def: DefId) -> DefPath { + self.get_crate_data(def.krate).def_path(def.index) + } + + fn def_path_hash(&self, def: DefId) -> DefPathHash { + self.get_crate_data(def.krate).def_path_hash(def.index) + } + + fn def_path_hash_to_def_id(&self, cnum: CrateNum, hash: DefPathHash) -> DefId { + let def_index = self.get_crate_data(cnum).def_path_hash_to_def_index(hash); + DefId { krate: cnum, index: def_index } + } + + fn expn_hash_to_expn_id( + &self, + sess: &Session, + cnum: CrateNum, + index_guess: u32, + hash: ExpnHash, + ) -> ExpnId { + self.get_crate_data(cnum).expn_hash_to_expn_id(sess, index_guess, hash) + } + + fn import_source_files(&self, sess: &Session, cnum: CrateNum) { + self.get_crate_data(cnum).imported_source_files(sess); + } +} diff --git a/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs new file mode 100644 index 000000000..40c94b372 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs @@ -0,0 +1,65 @@ +use crate::rmeta::DecodeContext; +use crate::rmeta::EncodeContext; +use crate::rmeta::MetadataBlob; +use rustc_data_structures::owning_ref::OwningRef; +use rustc_hir::def_path_hash_map::{Config as HashMapConfig, DefPathHashMap}; +use rustc_middle::parameterized_over_tcx; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use rustc_span::def_id::{DefIndex, DefPathHash}; + +pub(crate) enum DefPathHashMapRef<'tcx> { + OwnedFromMetadata(odht::HashTable<HashMapConfig, OwningRef<MetadataBlob, [u8]>>), + BorrowedFromTcx(&'tcx DefPathHashMap), +} + +parameterized_over_tcx! { + DefPathHashMapRef, +} + +impl DefPathHashMapRef<'_> { + #[inline] + pub fn def_path_hash_to_def_index(&self, def_path_hash: &DefPathHash) -> DefIndex { + match *self { + DefPathHashMapRef::OwnedFromMetadata(ref map) => map.get(def_path_hash).unwrap(), + DefPathHashMapRef::BorrowedFromTcx(_) => { + panic!("DefPathHashMap::BorrowedFromTcx variant only exists for serialization") + } + } + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for DefPathHashMapRef<'tcx> { + fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) { + match *self { + DefPathHashMapRef::BorrowedFromTcx(def_path_hash_map) => { + let bytes = def_path_hash_map.raw_bytes(); + e.emit_usize(bytes.len()); + e.emit_raw_bytes(bytes); + } + DefPathHashMapRef::OwnedFromMetadata(_) => { + panic!("DefPathHashMap::OwnedFromMetadata variant only exists for deserialization") + } + } + } +} + +impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMapRef<'static> { + fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> { + // Import TyDecoder so we can access the DecodeContext::position() method + use crate::rustc_middle::ty::codec::TyDecoder; + + let len = d.read_usize(); + let pos = d.position(); + let o = OwningRef::new(d.blob().clone()).map(|x| &x[pos..pos + len]); + + // Although we already have the data we need via the OwningRef, we still need + // to advance the DecodeContext's position so it's in a valid state after + // the method. We use read_raw_bytes() for that. + let _ = d.read_raw_bytes(len); + + let inner = odht::HashTable::from_raw_bytes(o).unwrap_or_else(|e| { + panic!("decode error: {}", e); + }); + DefPathHashMapRef::OwnedFromMetadata(inner) + } +} diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs new file mode 100644 index 000000000..33278367c --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -0,0 +1,2302 @@ +use crate::rmeta::def_path_hash_map::DefPathHashMapRef; +use crate::rmeta::table::TableBuilder; +use crate::rmeta::*; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; +use rustc_data_structures::memmap::{Mmap, MmapMut}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::{join, par_iter, Lrc, ParallelIterator}; +use rustc_data_structures::temp_dir::MaybeTempDir; +use rustc_hir as hir; +use rustc_hir::def::DefKind; +use rustc_hir::def_id::{ + CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_ID, CRATE_DEF_INDEX, LOCAL_CRATE, +}; +use rustc_hir::definitions::DefPathData; +use rustc_hir::intravisit::{self, Visitor}; +use rustc_hir::lang_items; +use rustc_hir::{AnonConst, GenericParamKind}; +use rustc_index::bit_set::GrowableBitSet; +use rustc_middle::hir::nested_filter; +use rustc_middle::middle::dependency_format::Linkage; +use rustc_middle::middle::exported_symbols::{ + metadata_symbol_name, ExportedSymbol, SymbolExportInfo, +}; +use rustc_middle::mir::interpret; +use rustc_middle::traits::specialization_graph; +use rustc_middle::ty::codec::TyEncoder; +use rustc_middle::ty::fast_reject::{self, SimplifiedType, TreatParams}; +use rustc_middle::ty::query::Providers; +use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt}; +use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder}; +use rustc_session::config::CrateType; +use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib}; +use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind}; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::{ + self, DebuggerVisualizerFile, ExternalSource, FileName, SourceFile, Span, SyntaxContext, +}; +use rustc_target::abi::VariantIdx; +use std::borrow::Borrow; +use std::hash::Hash; +use std::io::{Read, Seek, Write}; +use std::iter; +use std::num::NonZeroUsize; +use std::path::{Path, PathBuf}; +use tracing::{debug, trace}; + +pub(super) struct EncodeContext<'a, 'tcx> { + opaque: opaque::FileEncoder, + tcx: TyCtxt<'tcx>, + feat: &'tcx rustc_feature::Features, + + tables: TableBuilders, + + lazy_state: LazyState, + type_shorthands: FxHashMap<Ty<'tcx>, usize>, + predicate_shorthands: FxHashMap<ty::PredicateKind<'tcx>, usize>, + + interpret_allocs: FxIndexSet<interpret::AllocId>, + + // This is used to speed up Span encoding. + // The `usize` is an index into the `MonotonicVec` + // that stores the `SourceFile` + source_file_cache: (Lrc<SourceFile>, usize), + // The indices (into the `SourceMap`'s `MonotonicVec`) + // of all of the `SourceFiles` that we need to serialize. + // When we serialize a `Span`, we insert the index of its + // `SourceFile` into the `GrowableBitSet`. + // + // This needs to be a `GrowableBitSet` and not a + // regular `BitSet` because we may actually import new `SourceFiles` + // during metadata encoding, due to executing a query + // with a result containing a foreign `Span`. + required_source_files: Option<GrowableBitSet<usize>>, + is_proc_macro: bool, + hygiene_ctxt: &'a HygieneEncodeContext, +} + +/// If the current crate is a proc-macro, returns early with `Lazy:empty()`. +/// This is useful for skipping the encoding of things that aren't needed +/// for proc-macro crates. +macro_rules! empty_proc_macro { + ($self:ident) => { + if $self.is_proc_macro { + return LazyArray::empty(); + } + }; +} + +macro_rules! encoder_methods { + ($($name:ident($ty:ty);)*) => { + $(fn $name(&mut self, value: $ty) { + self.opaque.$name(value) + })* + } +} + +impl<'a, 'tcx> Encoder for EncodeContext<'a, 'tcx> { + encoder_methods! { + emit_usize(usize); + emit_u128(u128); + emit_u64(u64); + emit_u32(u32); + emit_u16(u16); + emit_u8(u8); + + emit_isize(isize); + emit_i128(i128); + emit_i64(i64); + emit_i32(i32); + emit_i16(i16); + emit_i8(i8); + + emit_bool(bool); + emit_f64(f64); + emit_f32(f32); + emit_char(char); + emit_str(&str); + emit_raw_bytes(&[u8]); + } +} + +impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyValue<T> { + fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) { + e.emit_lazy_distance(self.position); + } +} + +impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> { + fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) { + e.emit_usize(self.num_elems); + if self.num_elems > 0 { + e.emit_lazy_distance(self.position) + } + } +} + +impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> { + fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) { + e.emit_usize(self.encoded_size); + e.emit_lazy_distance(self.position); + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for CrateNum { + fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { + if *self != LOCAL_CRATE && s.is_proc_macro { + panic!("Attempted to encode non-local CrateNum {:?} for proc-macro crate", self); + } + s.emit_u32(self.as_u32()); + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for DefIndex { + fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { + s.emit_u32(self.as_u32()); + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnIndex { + fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { + s.emit_u32(self.as_u32()); + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for SyntaxContext { + fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { + rustc_span::hygiene::raw_encode_syntax_context(*self, &s.hygiene_ctxt, s); + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnId { + fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { + if self.krate == LOCAL_CRATE { + // We will only write details for local expansions. Non-local expansions will fetch + // data from the corresponding crate's metadata. + // FIXME(#43047) FIXME(#74731) We may eventually want to avoid relying on external + // metadata from proc-macro crates. + s.hygiene_ctxt.schedule_expn_data_for_encoding(*self); + } + self.krate.encode(s); + self.local_id.encode(s); + } +} + +impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span { + fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { + let span = self.data(); + + // Don't serialize any `SyntaxContext`s from a proc-macro crate, + // since we don't load proc-macro dependencies during serialization. + // This means that any hygiene information from macros used *within* + // a proc-macro crate (e.g. invoking a macro that expands to a proc-macro + // definition) will be lost. + // + // This can show up in two ways: + // + // 1. Any hygiene information associated with identifier of + // a proc macro (e.g. `#[proc_macro] pub fn $name`) will be lost. + // Since proc-macros can only be invoked from a different crate, + // real code should never need to care about this. + // + // 2. Using `Span::def_site` or `Span::mixed_site` will not + // include any hygiene information associated with the definition + // site. This means that a proc-macro cannot emit a `$crate` + // identifier which resolves to one of its dependencies, + // which also should never come up in practice. + // + // Additionally, this affects `Span::parent`, and any other + // span inspection APIs that would otherwise allow traversing + // the `SyntaxContexts` associated with a span. + // + // None of these user-visible effects should result in any + // cross-crate inconsistencies (getting one behavior in the same + // crate, and a different behavior in another crate) due to the + // limited surface that proc-macros can expose. + // + // IMPORTANT: If this is ever changed, be sure to update + // `rustc_span::hygiene::raw_encode_expn_id` to handle + // encoding `ExpnData` for proc-macro crates. + if s.is_proc_macro { + SyntaxContext::root().encode(s); + } else { + span.ctxt.encode(s); + } + + if self.is_dummy() { + return TAG_PARTIAL_SPAN.encode(s); + } + + // The Span infrastructure should make sure that this invariant holds: + debug_assert!(span.lo <= span.hi); + + if !s.source_file_cache.0.contains(span.lo) { + let source_map = s.tcx.sess.source_map(); + let source_file_index = source_map.lookup_source_file_idx(span.lo); + s.source_file_cache = + (source_map.files()[source_file_index].clone(), source_file_index); + } + + if !s.source_file_cache.0.contains(span.hi) { + // Unfortunately, macro expansion still sometimes generates Spans + // that malformed in this way. + return TAG_PARTIAL_SPAN.encode(s); + } + + let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!"); + // Record the fact that we need to encode the data for this `SourceFile` + source_files.insert(s.source_file_cache.1); + + // There are two possible cases here: + // 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the + // crate we are writing metadata for. When the metadata for *this* crate gets + // deserialized, the deserializer will need to know which crate it originally came + // from. We use `TAG_VALID_SPAN_FOREIGN` to indicate that a `CrateNum` should + // be deserialized after the rest of the span data, which tells the deserializer + // which crate contains the source map information. + // 2. This span comes from our own crate. No special handling is needed - we just + // write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use + // our own source map information. + // + // If we're a proc-macro crate, we always treat this as a local `Span`. + // In `encode_source_map`, we serialize foreign `SourceFile`s into our metadata + // if we're a proc-macro crate. + // This allows us to avoid loading the dependencies of proc-macro crates: all of + // the information we need to decode `Span`s is stored in the proc-macro crate. + let (tag, lo, hi) = if s.source_file_cache.0.is_imported() && !s.is_proc_macro { + // To simplify deserialization, we 'rebase' this span onto the crate it originally came from + // (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values + // are relative to the source map information for the 'foreign' crate whose CrateNum + // we write into the metadata. This allows `imported_source_files` to binary + // search through the 'foreign' crate's source map information, using the + // deserialized 'lo' and 'hi' values directly. + // + // All of this logic ensures that the final result of deserialization is a 'normal' + // Span that can be used without any additional trouble. + let external_start_pos = { + // Introduce a new scope so that we drop the 'lock()' temporary + match &*s.source_file_cache.0.external_src.lock() { + ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos, + src => panic!("Unexpected external source {:?}", src), + } + }; + let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos; + let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos; + + (TAG_VALID_SPAN_FOREIGN, lo, hi) + } else { + (TAG_VALID_SPAN_LOCAL, span.lo, span.hi) + }; + + tag.encode(s); + lo.encode(s); + + // Encode length which is usually less than span.hi and profits more + // from the variable-length integer encoding that we use. + let len = hi - lo; + len.encode(s); + + if tag == TAG_VALID_SPAN_FOREIGN { + // This needs to be two lines to avoid holding the `s.source_file_cache` + // while calling `cnum.encode(s)` + let cnum = s.source_file_cache.0.cnum; + cnum.encode(s); + } + } +} + +impl<'a, 'tcx> TyEncoder for EncodeContext<'a, 'tcx> { + const CLEAR_CROSS_CRATE: bool = true; + + type I = TyCtxt<'tcx>; + + fn position(&self) -> usize { + self.opaque.position() + } + + fn type_shorthands(&mut self) -> &mut FxHashMap<Ty<'tcx>, usize> { + &mut self.type_shorthands + } + + fn predicate_shorthands(&mut self) -> &mut FxHashMap<ty::PredicateKind<'tcx>, usize> { + &mut self.predicate_shorthands + } + + fn encode_alloc_id(&mut self, alloc_id: &rustc_middle::mir::interpret::AllocId) { + let (index, _) = self.interpret_allocs.insert_full(*alloc_id); + + index.encode(self); + } +} + +// Shorthand for `$self.$tables.$table.set($def_id.index, $self.lazy_value($value))`, which would +// normally need extra variables to avoid errors about multiple mutable borrows. +macro_rules! record { + ($self:ident.$tables:ident.$table:ident[$def_id:expr] <- $value:expr) => {{ + { + let value = $value; + let lazy = $self.lazy(value); + $self.$tables.$table.set($def_id.index, lazy); + } + }}; +} + +// Shorthand for `$self.$tables.$table.set($def_id.index, $self.lazy_value($value))`, which would +// normally need extra variables to avoid errors about multiple mutable borrows. +macro_rules! record_array { + ($self:ident.$tables:ident.$table:ident[$def_id:expr] <- $value:expr) => {{ + { + let value = $value; + let lazy = $self.lazy_array(value); + $self.$tables.$table.set($def_id.index, lazy); + } + }}; +} + +impl<'a, 'tcx> EncodeContext<'a, 'tcx> { + fn emit_lazy_distance(&mut self, position: NonZeroUsize) { + let pos = position.get(); + let distance = match self.lazy_state { + LazyState::NoNode => bug!("emit_lazy_distance: outside of a metadata node"), + LazyState::NodeStart(start) => { + let start = start.get(); + assert!(pos <= start); + start - pos + } + LazyState::Previous(last_pos) => { + assert!( + last_pos <= position, + "make sure that the calls to `lazy*` \ + are in the same order as the metadata fields", + ); + position.get() - last_pos.get() + } + }; + self.lazy_state = LazyState::Previous(NonZeroUsize::new(pos).unwrap()); + self.emit_usize(distance); + } + + fn lazy<T: ParameterizedOverTcx, B: Borrow<T::Value<'tcx>>>(&mut self, value: B) -> LazyValue<T> + where + T::Value<'tcx>: Encodable<EncodeContext<'a, 'tcx>>, + { + let pos = NonZeroUsize::new(self.position()).unwrap(); + + assert_eq!(self.lazy_state, LazyState::NoNode); + self.lazy_state = LazyState::NodeStart(pos); + value.borrow().encode(self); + self.lazy_state = LazyState::NoNode; + + assert!(pos.get() <= self.position()); + + LazyValue::from_position(pos) + } + + fn lazy_array<T: ParameterizedOverTcx, I: IntoIterator<Item = B>, B: Borrow<T::Value<'tcx>>>( + &mut self, + values: I, + ) -> LazyArray<T> + where + T::Value<'tcx>: Encodable<EncodeContext<'a, 'tcx>>, + { + let pos = NonZeroUsize::new(self.position()).unwrap(); + + assert_eq!(self.lazy_state, LazyState::NoNode); + self.lazy_state = LazyState::NodeStart(pos); + let len = values.into_iter().map(|value| value.borrow().encode(self)).count(); + self.lazy_state = LazyState::NoNode; + + assert!(pos.get() <= self.position()); + + LazyArray::from_position_and_num_elems(pos, len) + } + + fn encode_info_for_items(&mut self) { + self.encode_info_for_mod(CRATE_DEF_ID, self.tcx.hir().root_module()); + + // Proc-macro crates only export proc-macro items, which are looked + // up using `proc_macro_data` + if self.is_proc_macro { + return; + } + + self.tcx.hir().visit_all_item_likes_in_crate(self); + } + + fn encode_def_path_table(&mut self) { + let table = self.tcx.def_path_table(); + if self.is_proc_macro { + for def_index in std::iter::once(CRATE_DEF_INDEX) + .chain(self.tcx.resolutions(()).proc_macros.iter().map(|p| p.local_def_index)) + { + let def_key = self.lazy(table.def_key(def_index)); + let def_path_hash = table.def_path_hash(def_index); + self.tables.def_keys.set(def_index, def_key); + self.tables.def_path_hashes.set(def_index, def_path_hash); + } + } else { + for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() { + let def_key = self.lazy(def_key); + self.tables.def_keys.set(def_index, def_key); + self.tables.def_path_hashes.set(def_index, *def_path_hash); + } + } + } + + fn encode_def_path_hash_map(&mut self) -> LazyValue<DefPathHashMapRef<'static>> { + self.lazy(DefPathHashMapRef::BorrowedFromTcx(self.tcx.def_path_hash_to_def_index_map())) + } + + fn encode_source_map(&mut self) -> LazyArray<rustc_span::SourceFile> { + let source_map = self.tcx.sess.source_map(); + let all_source_files = source_map.files(); + + // By replacing the `Option` with `None`, we ensure that we can't + // accidentally serialize any more `Span`s after the source map encoding + // is done. + let required_source_files = self.required_source_files.take().unwrap(); + + let working_directory = &self.tcx.sess.opts.working_dir; + + let adapted = all_source_files + .iter() + .enumerate() + .filter(|(idx, source_file)| { + // Only serialize `SourceFile`s that were used + // during the encoding of a `Span` + required_source_files.contains(*idx) && + // Don't serialize imported `SourceFile`s, unless + // we're in a proc-macro crate. + (!source_file.is_imported() || self.is_proc_macro) + }) + .map(|(_, source_file)| { + // At export time we expand all source file paths to absolute paths because + // downstream compilation sessions can have a different compiler working + // directory, so relative paths from this or any other upstream crate + // won't be valid anymore. + // + // At this point we also erase the actual on-disk path and only keep + // the remapped version -- as is necessary for reproducible builds. + match source_file.name { + FileName::Real(ref original_file_name) => { + let adapted_file_name = + source_map.path_mapping().to_embeddable_absolute_path( + original_file_name.clone(), + working_directory, + ); + + if adapted_file_name != *original_file_name { + let mut adapted: SourceFile = (**source_file).clone(); + adapted.name = FileName::Real(adapted_file_name); + adapted.name_hash = { + let mut hasher: StableHasher = StableHasher::new(); + adapted.name.hash(&mut hasher); + hasher.finish::<u128>() + }; + Lrc::new(adapted) + } else { + // Nothing to adapt + source_file.clone() + } + } + // expanded code, not from a file + _ => source_file.clone(), + } + }) + .map(|mut source_file| { + // We're serializing this `SourceFile` into our crate metadata, + // so mark it as coming from this crate. + // This also ensures that we don't try to deserialize the + // `CrateNum` for a proc-macro dependency - since proc macro + // dependencies aren't loaded when we deserialize a proc-macro, + // trying to remap the `CrateNum` would fail. + if self.is_proc_macro { + Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE; + } + source_file + }) + .collect::<Vec<_>>(); + + self.lazy_array(adapted.iter().map(|rc| &**rc)) + } + + fn encode_crate_root(&mut self) -> LazyValue<CrateRoot> { + let tcx = self.tcx; + let mut i = 0; + let preamble_bytes = self.position() - i; + + // Encode the crate deps + i = self.position(); + let crate_deps = self.encode_crate_deps(); + let dylib_dependency_formats = self.encode_dylib_dependency_formats(); + let dep_bytes = self.position() - i; + + // Encode the lib features. + i = self.position(); + let lib_features = self.encode_lib_features(); + let lib_feature_bytes = self.position() - i; + + // Encode the stability implications. + i = self.position(); + let stability_implications = self.encode_stability_implications(); + let stability_implications_bytes = self.position() - i; + + // Encode the language items. + i = self.position(); + let lang_items = self.encode_lang_items(); + let lang_items_missing = self.encode_lang_items_missing(); + let lang_item_bytes = self.position() - i; + + // Encode the diagnostic items. + i = self.position(); + let diagnostic_items = self.encode_diagnostic_items(); + let diagnostic_item_bytes = self.position() - i; + + // Encode the native libraries used + i = self.position(); + let native_libraries = self.encode_native_libraries(); + let native_lib_bytes = self.position() - i; + + i = self.position(); + let foreign_modules = self.encode_foreign_modules(); + let foreign_modules_bytes = self.position() - i; + + // Encode DefPathTable + i = self.position(); + self.encode_def_path_table(); + let def_path_table_bytes = self.position() - i; + + // Encode the def IDs of traits, for rustdoc and diagnostics. + i = self.position(); + let traits = self.encode_traits(); + let traits_bytes = self.position() - i; + + // Encode the def IDs of impls, for coherence checking. + i = self.position(); + let impls = self.encode_impls(); + let impls_bytes = self.position() - i; + + i = self.position(); + let incoherent_impls = self.encode_incoherent_impls(); + let incoherent_impls_bytes = self.position() - i; + + // Encode MIR. + i = self.position(); + self.encode_mir(); + let mir_bytes = self.position() - i; + + // Encode the items. + i = self.position(); + self.encode_def_ids(); + self.encode_info_for_items(); + let item_bytes = self.position() - i; + + // Encode the allocation index + i = self.position(); + let interpret_alloc_index = { + let mut interpret_alloc_index = Vec::new(); + let mut n = 0; + trace!("beginning to encode alloc ids"); + loop { + let new_n = self.interpret_allocs.len(); + // if we have found new ids, serialize those, too + if n == new_n { + // otherwise, abort + break; + } + trace!("encoding {} further alloc ids", new_n - n); + for idx in n..new_n { + let id = self.interpret_allocs[idx]; + let pos = self.position() as u32; + interpret_alloc_index.push(pos); + interpret::specialized_encode_alloc_id(self, tcx, id); + } + n = new_n; + } + self.lazy_array(interpret_alloc_index) + }; + let interpret_alloc_index_bytes = self.position() - i; + + // Encode the proc macro data. This affects 'tables', + // so we need to do this before we encode the tables. + // This overwrites def_keys, so it must happen after encode_def_path_table. + i = self.position(); + let proc_macro_data = self.encode_proc_macros(); + let proc_macro_data_bytes = self.position() - i; + + i = self.position(); + let tables = self.tables.encode(&mut self.opaque); + let tables_bytes = self.position() - i; + + i = self.position(); + let debugger_visualizers = self.encode_debugger_visualizers(); + let debugger_visualizers_bytes = self.position() - i; + + // Encode exported symbols info. This is prefetched in `encode_metadata` so we encode + // this as late as possible to give the prefetching as much time as possible to complete. + i = self.position(); + let exported_symbols = tcx.exported_symbols(LOCAL_CRATE); + let exported_symbols = self.encode_exported_symbols(&exported_symbols); + let exported_symbols_bytes = self.position() - i; + + // Encode the hygiene data, + // IMPORTANT: this *must* be the last thing that we encode (other than `SourceMap`). The process + // of encoding other items (e.g. `optimized_mir`) may cause us to load + // data from the incremental cache. If this causes us to deserialize a `Span`, + // then we may load additional `SyntaxContext`s into the global `HygieneData`. + // Therefore, we need to encode the hygiene data last to ensure that we encode + // any `SyntaxContext`s that might be used. + i = self.position(); + let (syntax_contexts, expn_data, expn_hashes) = self.encode_hygiene(); + let hygiene_bytes = self.position() - i; + + i = self.position(); + let def_path_hash_map = self.encode_def_path_hash_map(); + let def_path_hash_map_bytes = self.position() - i; + + // Encode source_map. This needs to be done last, + // since encoding `Span`s tells us which `SourceFiles` we actually + // need to encode. + i = self.position(); + let source_map = self.encode_source_map(); + let source_map_bytes = self.position() - i; + + i = self.position(); + let attrs = tcx.hir().krate_attrs(); + let has_default_lib_allocator = tcx.sess.contains_name(&attrs, sym::default_lib_allocator); + let root = self.lazy(CrateRoot { + name: tcx.crate_name(LOCAL_CRATE), + extra_filename: tcx.sess.opts.cg.extra_filename.clone(), + triple: tcx.sess.opts.target_triple.clone(), + hash: tcx.crate_hash(LOCAL_CRATE), + stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(), + required_panic_strategy: tcx.required_panic_strategy(LOCAL_CRATE), + panic_in_drop_strategy: tcx.sess.opts.unstable_opts.panic_in_drop, + edition: tcx.sess.edition(), + has_global_allocator: tcx.has_global_allocator(LOCAL_CRATE), + has_panic_handler: tcx.has_panic_handler(LOCAL_CRATE), + has_default_lib_allocator, + proc_macro_data, + debugger_visualizers, + compiler_builtins: tcx.sess.contains_name(&attrs, sym::compiler_builtins), + needs_allocator: tcx.sess.contains_name(&attrs, sym::needs_allocator), + needs_panic_runtime: tcx.sess.contains_name(&attrs, sym::needs_panic_runtime), + no_builtins: tcx.sess.contains_name(&attrs, sym::no_builtins), + panic_runtime: tcx.sess.contains_name(&attrs, sym::panic_runtime), + profiler_runtime: tcx.sess.contains_name(&attrs, sym::profiler_runtime), + symbol_mangling_version: tcx.sess.opts.get_symbol_mangling_version(), + + crate_deps, + dylib_dependency_formats, + lib_features, + stability_implications, + lang_items, + diagnostic_items, + lang_items_missing, + native_libraries, + foreign_modules, + source_map, + traits, + impls, + incoherent_impls, + exported_symbols, + interpret_alloc_index, + tables, + syntax_contexts, + expn_data, + expn_hashes, + def_path_hash_map, + }); + let final_bytes = self.position() - i; + + let total_bytes = self.position(); + + let computed_total_bytes = preamble_bytes + + dep_bytes + + lib_feature_bytes + + stability_implications_bytes + + lang_item_bytes + + diagnostic_item_bytes + + native_lib_bytes + + foreign_modules_bytes + + def_path_table_bytes + + traits_bytes + + impls_bytes + + incoherent_impls_bytes + + mir_bytes + + item_bytes + + interpret_alloc_index_bytes + + proc_macro_data_bytes + + tables_bytes + + debugger_visualizers_bytes + + exported_symbols_bytes + + hygiene_bytes + + def_path_hash_map_bytes + + source_map_bytes + + final_bytes; + assert_eq!(total_bytes, computed_total_bytes); + + if tcx.sess.meta_stats() { + self.opaque.flush(); + + // Rewind and re-read all the metadata to count the zero bytes we wrote. + let pos_before_rewind = self.opaque.file().stream_position().unwrap(); + let mut zero_bytes = 0; + self.opaque.file().rewind().unwrap(); + let file = std::io::BufReader::new(self.opaque.file()); + for e in file.bytes() { + if e.unwrap() == 0 { + zero_bytes += 1; + } + } + assert_eq!(self.opaque.file().stream_position().unwrap(), pos_before_rewind); + + let perc = |bytes| (bytes * 100) as f64 / total_bytes as f64; + let p = |label, bytes| { + eprintln!("{:>21}: {:>8} bytes ({:4.1}%)", label, bytes, perc(bytes)); + }; + + eprintln!(""); + eprintln!( + "{} metadata bytes, of which {} bytes ({:.1}%) are zero", + total_bytes, + zero_bytes, + perc(zero_bytes) + ); + p("preamble", preamble_bytes); + p("dep", dep_bytes); + p("lib feature", lib_feature_bytes); + p("stability_implications", stability_implications_bytes); + p("lang item", lang_item_bytes); + p("diagnostic item", diagnostic_item_bytes); + p("native lib", native_lib_bytes); + p("foreign modules", foreign_modules_bytes); + p("def-path table", def_path_table_bytes); + p("traits", traits_bytes); + p("impls", impls_bytes); + p("incoherent_impls", incoherent_impls_bytes); + p("mir", mir_bytes); + p("item", item_bytes); + p("interpret_alloc_index", interpret_alloc_index_bytes); + p("proc-macro-data", proc_macro_data_bytes); + p("tables", tables_bytes); + p("debugger visualizers", debugger_visualizers_bytes); + p("exported symbols", exported_symbols_bytes); + p("hygiene", hygiene_bytes); + p("def-path hashes", def_path_hash_map_bytes); + p("source_map", source_map_bytes); + p("final", final_bytes); + eprintln!(""); + } + + root + } +} + +fn should_encode_visibility(def_kind: DefKind) -> bool { + match def_kind { + DefKind::Mod + | DefKind::Struct + | DefKind::Union + | DefKind::Enum + | DefKind::Variant + | DefKind::Trait + | DefKind::TyAlias + | DefKind::ForeignTy + | DefKind::TraitAlias + | DefKind::AssocTy + | DefKind::Fn + | DefKind::Const + | DefKind::Static(..) + | DefKind::Ctor(..) + | DefKind::AssocFn + | DefKind::AssocConst + | DefKind::Macro(..) + | DefKind::Use + | DefKind::ForeignMod + | DefKind::OpaqueTy + | DefKind::Impl + | DefKind::Field => true, + DefKind::TyParam + | DefKind::ConstParam + | DefKind::LifetimeParam + | DefKind::AnonConst + | DefKind::InlineConst + | DefKind::GlobalAsm + | DefKind::Closure + | DefKind::Generator + | DefKind::ExternCrate => false, + } +} + +fn should_encode_stability(def_kind: DefKind) -> bool { + match def_kind { + DefKind::Mod + | DefKind::Ctor(..) + | DefKind::Variant + | DefKind::Field + | DefKind::Struct + | DefKind::AssocTy + | DefKind::AssocFn + | DefKind::AssocConst + | DefKind::TyParam + | DefKind::ConstParam + | DefKind::Static(..) + | DefKind::Const + | DefKind::Fn + | DefKind::ForeignMod + | DefKind::TyAlias + | DefKind::OpaqueTy + | DefKind::Enum + | DefKind::Union + | DefKind::Impl + | DefKind::Trait + | DefKind::TraitAlias + | DefKind::Macro(..) + | DefKind::ForeignTy => true, + DefKind::Use + | DefKind::LifetimeParam + | DefKind::AnonConst + | DefKind::InlineConst + | DefKind::GlobalAsm + | DefKind::Closure + | DefKind::Generator + | DefKind::ExternCrate => false, + } +} + +/// Whether we should encode MIR. +/// +/// Computing, optimizing and encoding the MIR is a relatively expensive operation. +/// We want to avoid this work when not required. Therefore: +/// - we only compute `mir_for_ctfe` on items with const-eval semantics; +/// - we skip `optimized_mir` for check runs. +/// +/// Return a pair, resp. for CTFE and for LLVM. +fn should_encode_mir(tcx: TyCtxt<'_>, def_id: LocalDefId) -> (bool, bool) { + match tcx.def_kind(def_id) { + // Constructors + DefKind::Ctor(_, _) => { + let mir_opt_base = tcx.sess.opts.output_types.should_codegen() + || tcx.sess.opts.unstable_opts.always_encode_mir; + (true, mir_opt_base) + } + // Constants + DefKind::AnonConst + | DefKind::InlineConst + | DefKind::AssocConst + | DefKind::Static(..) + | DefKind::Const => (true, false), + // Full-fledged functions + DefKind::AssocFn | DefKind::Fn => { + let generics = tcx.generics_of(def_id); + let needs_inline = (generics.requires_monomorphization(tcx) + || tcx.codegen_fn_attrs(def_id).requests_inline()) + && tcx.sess.opts.output_types.should_codegen(); + // The function has a `const` modifier or is in a `#[const_trait]`. + let is_const_fn = tcx.is_const_fn_raw(def_id.to_def_id()) + || tcx.is_const_default_method(def_id.to_def_id()); + let always_encode_mir = tcx.sess.opts.unstable_opts.always_encode_mir; + (is_const_fn, needs_inline || always_encode_mir) + } + // Closures can't be const fn. + DefKind::Closure => { + let generics = tcx.generics_of(def_id); + let needs_inline = (generics.requires_monomorphization(tcx) + || tcx.codegen_fn_attrs(def_id).requests_inline()) + && tcx.sess.opts.output_types.should_codegen(); + let always_encode_mir = tcx.sess.opts.unstable_opts.always_encode_mir; + (false, needs_inline || always_encode_mir) + } + // Generators require optimized MIR to compute layout. + DefKind::Generator => (false, true), + // The others don't have MIR. + _ => (false, false), + } +} + +fn should_encode_variances(def_kind: DefKind) -> bool { + match def_kind { + DefKind::Struct + | DefKind::Union + | DefKind::Enum + | DefKind::Variant + | DefKind::Fn + | DefKind::Ctor(..) + | DefKind::AssocFn => true, + DefKind::Mod + | DefKind::Field + | DefKind::AssocTy + | DefKind::AssocConst + | DefKind::TyParam + | DefKind::ConstParam + | DefKind::Static(..) + | DefKind::Const + | DefKind::ForeignMod + | DefKind::TyAlias + | DefKind::OpaqueTy + | DefKind::Impl + | DefKind::Trait + | DefKind::TraitAlias + | DefKind::Macro(..) + | DefKind::ForeignTy + | DefKind::Use + | DefKind::LifetimeParam + | DefKind::AnonConst + | DefKind::InlineConst + | DefKind::GlobalAsm + | DefKind::Closure + | DefKind::Generator + | DefKind::ExternCrate => false, + } +} + +fn should_encode_generics(def_kind: DefKind) -> bool { + match def_kind { + DefKind::Struct + | DefKind::Union + | DefKind::Enum + | DefKind::Variant + | DefKind::Trait + | DefKind::TyAlias + | DefKind::ForeignTy + | DefKind::TraitAlias + | DefKind::AssocTy + | DefKind::Fn + | DefKind::Const + | DefKind::Static(..) + | DefKind::Ctor(..) + | DefKind::AssocFn + | DefKind::AssocConst + | DefKind::AnonConst + | DefKind::InlineConst + | DefKind::OpaqueTy + | DefKind::Impl + | DefKind::Field + | DefKind::TyParam + | DefKind::Closure + | DefKind::Generator => true, + DefKind::Mod + | DefKind::ForeignMod + | DefKind::ConstParam + | DefKind::Macro(..) + | DefKind::Use + | DefKind::LifetimeParam + | DefKind::GlobalAsm + | DefKind::ExternCrate => false, + } +} + +impl<'a, 'tcx> EncodeContext<'a, 'tcx> { + fn encode_attrs(&mut self, def_id: LocalDefId) { + let mut attrs = self + .tcx + .hir() + .attrs(self.tcx.hir().local_def_id_to_hir_id(def_id)) + .iter() + .filter(|attr| !rustc_feature::is_builtin_only_local(attr.name_or_empty())); + + record_array!(self.tables.attributes[def_id.to_def_id()] <- attrs.clone()); + if attrs.any(|attr| attr.may_have_doc_links()) { + self.tables.may_have_doc_links.set(def_id.local_def_index, ()); + } + } + + fn encode_def_ids(&mut self) { + if self.is_proc_macro { + return; + } + let tcx = self.tcx; + for local_id in tcx.iter_local_def_id() { + let def_id = local_id.to_def_id(); + let def_kind = tcx.opt_def_kind(local_id); + let Some(def_kind) = def_kind else { continue }; + self.tables.opt_def_kind.set(def_id.index, def_kind); + record!(self.tables.def_span[def_id] <- tcx.def_span(def_id)); + self.encode_attrs(local_id); + record!(self.tables.expn_that_defined[def_id] <- self.tcx.expn_that_defined(def_id)); + if let Some(ident_span) = tcx.def_ident_span(def_id) { + record!(self.tables.def_ident_span[def_id] <- ident_span); + } + if def_kind.has_codegen_attrs() { + record!(self.tables.codegen_fn_attrs[def_id] <- self.tcx.codegen_fn_attrs(def_id)); + } + if should_encode_visibility(def_kind) { + record!(self.tables.visibility[def_id] <- self.tcx.visibility(def_id)); + } + if should_encode_stability(def_kind) { + self.encode_stability(def_id); + self.encode_const_stability(def_id); + self.encode_deprecation(def_id); + } + if should_encode_variances(def_kind) { + let v = self.tcx.variances_of(def_id); + record_array!(self.tables.variances_of[def_id] <- v); + } + if should_encode_generics(def_kind) { + let g = tcx.generics_of(def_id); + record!(self.tables.generics_of[def_id] <- g); + record!(self.tables.explicit_predicates_of[def_id] <- self.tcx.explicit_predicates_of(def_id)); + let inferred_outlives = self.tcx.inferred_outlives_of(def_id); + if !inferred_outlives.is_empty() { + record_array!(self.tables.inferred_outlives_of[def_id] <- inferred_outlives); + } + } + if let DefKind::Trait | DefKind::TraitAlias = def_kind { + record!(self.tables.super_predicates_of[def_id] <- self.tcx.super_predicates_of(def_id)); + } + } + let inherent_impls = tcx.crate_inherent_impls(()); + for (def_id, implementations) in inherent_impls.inherent_impls.iter() { + if implementations.is_empty() { + continue; + } + record_array!(self.tables.inherent_impls[def_id.to_def_id()] <- implementations.iter().map(|&def_id| { + assert!(def_id.is_local()); + def_id.index + })); + } + } + + fn encode_item_type(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_item_type({:?})", def_id); + record!(self.tables.type_of[def_id] <- self.tcx.type_of(def_id)); + } + + fn encode_enum_variant_info(&mut self, def: ty::AdtDef<'tcx>, index: VariantIdx) { + let tcx = self.tcx; + let variant = &def.variant(index); + let def_id = variant.def_id; + debug!("EncodeContext::encode_enum_variant_info({:?})", def_id); + + let data = VariantData { + ctor_kind: variant.ctor_kind, + discr: variant.discr, + ctor: variant.ctor_def_id.map(|did| did.index), + is_non_exhaustive: variant.is_field_list_non_exhaustive(), + }; + + record!(self.tables.kind[def_id] <- EntryKind::Variant(self.lazy(data))); + self.tables.constness.set(def_id.index, hir::Constness::Const); + record_array!(self.tables.children[def_id] <- variant.fields.iter().map(|f| { + assert!(f.did.is_local()); + f.did.index + })); + self.encode_item_type(def_id); + if variant.ctor_kind == CtorKind::Fn { + // FIXME(eddyb) encode signature only in `encode_enum_variant_ctor`. + if let Some(ctor_def_id) = variant.ctor_def_id { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(ctor_def_id)); + } + } + } + + fn encode_enum_variant_ctor(&mut self, def: ty::AdtDef<'tcx>, index: VariantIdx) { + let tcx = self.tcx; + let variant = &def.variant(index); + let def_id = variant.ctor_def_id.unwrap(); + debug!("EncodeContext::encode_enum_variant_ctor({:?})", def_id); + + // FIXME(eddyb) encode only the `CtorKind` for constructors. + let data = VariantData { + ctor_kind: variant.ctor_kind, + discr: variant.discr, + ctor: Some(def_id.index), + is_non_exhaustive: variant.is_field_list_non_exhaustive(), + }; + + record!(self.tables.kind[def_id] <- EntryKind::Variant(self.lazy(data))); + self.tables.constness.set(def_id.index, hir::Constness::Const); + self.encode_item_type(def_id); + if variant.ctor_kind == CtorKind::Fn { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(def_id)); + } + } + + fn encode_info_for_mod(&mut self, local_def_id: LocalDefId, md: &hir::Mod<'_>) { + let tcx = self.tcx; + let def_id = local_def_id.to_def_id(); + debug!("EncodeContext::encode_info_for_mod({:?})", def_id); + + // If we are encoding a proc-macro crates, `encode_info_for_mod` will + // only ever get called for the crate root. We still want to encode + // the crate root for consistency with other crates (some of the resolver + // code uses it). However, we skip encoding anything relating to child + // items - we encode information about proc-macros later on. + let reexports = if !self.is_proc_macro { + match tcx.module_reexports(local_def_id) { + Some(exports) => self.lazy_array(exports), + _ => LazyArray::empty(), + } + } else { + LazyArray::empty() + }; + + record!(self.tables.kind[def_id] <- EntryKind::Mod(reexports)); + if self.is_proc_macro { + // Encode this here because we don't do it in encode_def_ids. + record!(self.tables.expn_that_defined[def_id] <- tcx.expn_that_defined(local_def_id)); + } else { + record_array!(self.tables.children[def_id] <- iter::from_generator(|| { + for item_id in md.item_ids { + match tcx.hir().item(*item_id).kind { + // Foreign items are planted into their parent modules + // from name resolution point of view. + hir::ItemKind::ForeignMod { items, .. } => { + for foreign_item in items { + yield foreign_item.id.def_id.local_def_index; + } + } + // Only encode named non-reexport children, reexports are encoded + // separately and unnamed items are not used by name resolution. + hir::ItemKind::ExternCrate(..) => continue, + _ if tcx.def_key(item_id.def_id.to_def_id()).get_opt_name().is_some() => { + yield item_id.def_id.local_def_index; + } + _ => continue, + } + } + })); + } + } + + fn encode_field( + &mut self, + adt_def: ty::AdtDef<'tcx>, + variant_index: VariantIdx, + field_index: usize, + ) { + let variant = &adt_def.variant(variant_index); + let field = &variant.fields[field_index]; + + let def_id = field.did; + debug!("EncodeContext::encode_field({:?})", def_id); + + record!(self.tables.kind[def_id] <- EntryKind::Field); + self.encode_item_type(def_id); + } + + fn encode_struct_ctor(&mut self, adt_def: ty::AdtDef<'tcx>, def_id: DefId) { + debug!("EncodeContext::encode_struct_ctor({:?})", def_id); + let tcx = self.tcx; + let variant = adt_def.non_enum_variant(); + + let data = VariantData { + ctor_kind: variant.ctor_kind, + discr: variant.discr, + ctor: Some(def_id.index), + is_non_exhaustive: variant.is_field_list_non_exhaustive(), + }; + + record!(self.tables.repr_options[def_id] <- adt_def.repr()); + self.tables.constness.set(def_id.index, hir::Constness::Const); + record!(self.tables.kind[def_id] <- EntryKind::Struct(self.lazy(data))); + self.encode_item_type(def_id); + if variant.ctor_kind == CtorKind::Fn { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(def_id)); + } + } + + fn encode_explicit_item_bounds(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_explicit_item_bounds({:?})", def_id); + let bounds = self.tcx.explicit_item_bounds(def_id); + if !bounds.is_empty() { + record_array!(self.tables.explicit_item_bounds[def_id] <- bounds); + } + } + + fn encode_info_for_trait_item(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_info_for_trait_item({:?})", def_id); + let tcx = self.tcx; + + let ast_item = tcx.hir().expect_trait_item(def_id.expect_local()); + self.tables.impl_defaultness.set(def_id.index, ast_item.defaultness); + let trait_item = tcx.associated_item(def_id); + + match trait_item.kind { + ty::AssocKind::Const => { + let rendered = rustc_hir_pretty::to_string( + &(&self.tcx.hir() as &dyn intravisit::Map<'_>), + |s| s.print_trait_item(ast_item), + ); + + record!(self.tables.kind[def_id] <- EntryKind::AssocConst(ty::AssocItemContainer::TraitContainer)); + record!(self.tables.mir_const_qualif[def_id] <- mir::ConstQualifs::default()); + record!(self.tables.rendered_const[def_id] <- rendered); + } + ty::AssocKind::Fn => { + let hir::TraitItemKind::Fn(m_sig, m) = &ast_item.kind else { bug!() }; + match *m { + hir::TraitFn::Required(ref names) => { + record_array!(self.tables.fn_arg_names[def_id] <- *names) + } + hir::TraitFn::Provided(body) => { + record_array!(self.tables.fn_arg_names[def_id] <- self.tcx.hir().body_param_names(body)) + } + }; + self.tables.asyncness.set(def_id.index, m_sig.header.asyncness); + self.tables.constness.set(def_id.index, hir::Constness::NotConst); + record!(self.tables.kind[def_id] <- EntryKind::AssocFn { + container: ty::AssocItemContainer::TraitContainer, + has_self: trait_item.fn_has_self_parameter, + }); + } + ty::AssocKind::Type => { + self.encode_explicit_item_bounds(def_id); + record!(self.tables.kind[def_id] <- EntryKind::AssocType(ty::AssocItemContainer::TraitContainer)); + } + } + match trait_item.kind { + ty::AssocKind::Const | ty::AssocKind::Fn => { + self.encode_item_type(def_id); + } + ty::AssocKind::Type => { + if ast_item.defaultness.has_value() { + self.encode_item_type(def_id); + } + } + } + if trait_item.kind == ty::AssocKind::Fn { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(def_id)); + } + } + + fn encode_info_for_impl_item(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_info_for_impl_item({:?})", def_id); + let tcx = self.tcx; + + let ast_item = self.tcx.hir().expect_impl_item(def_id.expect_local()); + self.tables.impl_defaultness.set(def_id.index, ast_item.defaultness); + let impl_item = self.tcx.associated_item(def_id); + + match impl_item.kind { + ty::AssocKind::Const => { + if let hir::ImplItemKind::Const(_, body_id) = ast_item.kind { + let qualifs = self.tcx.at(ast_item.span).mir_const_qualif(def_id); + let const_data = self.encode_rendered_const_for_body(body_id); + + record!(self.tables.kind[def_id] <- EntryKind::AssocConst(ty::AssocItemContainer::ImplContainer)); + record!(self.tables.mir_const_qualif[def_id] <- qualifs); + record!(self.tables.rendered_const[def_id] <- const_data); + } else { + bug!() + } + } + ty::AssocKind::Fn => { + let hir::ImplItemKind::Fn(ref sig, body) = ast_item.kind else { bug!() }; + self.tables.asyncness.set(def_id.index, sig.header.asyncness); + record_array!(self.tables.fn_arg_names[def_id] <- self.tcx.hir().body_param_names(body)); + // Can be inside `impl const Trait`, so using sig.header.constness is not reliable + let constness = if self.tcx.is_const_fn_raw(def_id) { + hir::Constness::Const + } else { + hir::Constness::NotConst + }; + self.tables.constness.set(def_id.index, constness); + record!(self.tables.kind[def_id] <- EntryKind::AssocFn { + container: ty::AssocItemContainer::ImplContainer, + has_self: impl_item.fn_has_self_parameter, + }); + } + ty::AssocKind::Type => { + record!(self.tables.kind[def_id] <- EntryKind::AssocType(ty::AssocItemContainer::ImplContainer)); + } + } + self.encode_item_type(def_id); + if let Some(trait_item_def_id) = impl_item.trait_item_def_id { + self.tables.trait_item_def_id.set(def_id.index, trait_item_def_id.into()); + } + if impl_item.kind == ty::AssocKind::Fn { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(def_id)); + if tcx.is_intrinsic(def_id) { + self.tables.is_intrinsic.set(def_id.index, ()); + } + } + } + + fn encode_mir(&mut self) { + if self.is_proc_macro { + return; + } + + let keys_and_jobs = self + .tcx + .mir_keys(()) + .iter() + .filter_map(|&def_id| { + let (encode_const, encode_opt) = should_encode_mir(self.tcx, def_id); + if encode_const || encode_opt { + Some((def_id, encode_const, encode_opt)) + } else { + None + } + }) + .collect::<Vec<_>>(); + for (def_id, encode_const, encode_opt) in keys_and_jobs.into_iter() { + debug_assert!(encode_const || encode_opt); + + debug!("EntryBuilder::encode_mir({:?})", def_id); + if encode_opt { + record!(self.tables.optimized_mir[def_id.to_def_id()] <- self.tcx.optimized_mir(def_id)); + } + if encode_const { + record!(self.tables.mir_for_ctfe[def_id.to_def_id()] <- self.tcx.mir_for_ctfe(def_id)); + + // FIXME(generic_const_exprs): this feels wrong to have in `encode_mir` + let abstract_const = self.tcx.thir_abstract_const(def_id); + if let Ok(Some(abstract_const)) = abstract_const { + record!(self.tables.thir_abstract_const[def_id.to_def_id()] <- abstract_const); + } + } + record!(self.tables.promoted_mir[def_id.to_def_id()] <- self.tcx.promoted_mir(def_id)); + + let instance = + ty::InstanceDef::Item(ty::WithOptConstParam::unknown(def_id.to_def_id())); + let unused = self.tcx.unused_generic_params(instance); + if !unused.is_empty() { + record!(self.tables.unused_generic_params[def_id.to_def_id()] <- unused); + } + } + } + + fn encode_stability(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_stability({:?})", def_id); + + // The query lookup can take a measurable amount of time in crates with many items. Check if + // the stability attributes are even enabled before using their queries. + if self.feat.staged_api || self.tcx.sess.opts.unstable_opts.force_unstable_if_unmarked { + if let Some(stab) = self.tcx.lookup_stability(def_id) { + record!(self.tables.lookup_stability[def_id] <- stab) + } + } + } + + fn encode_const_stability(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_const_stability({:?})", def_id); + + // The query lookup can take a measurable amount of time in crates with many items. Check if + // the stability attributes are even enabled before using their queries. + if self.feat.staged_api || self.tcx.sess.opts.unstable_opts.force_unstable_if_unmarked { + if let Some(stab) = self.tcx.lookup_const_stability(def_id) { + record!(self.tables.lookup_const_stability[def_id] <- stab) + } + } + } + + fn encode_deprecation(&mut self, def_id: DefId) { + debug!("EncodeContext::encode_deprecation({:?})", def_id); + if let Some(depr) = self.tcx.lookup_deprecation(def_id) { + record!(self.tables.lookup_deprecation_entry[def_id] <- depr); + } + } + + fn encode_rendered_const_for_body(&mut self, body_id: hir::BodyId) -> String { + let hir = self.tcx.hir(); + let body = hir.body(body_id); + rustc_hir_pretty::to_string(&(&hir as &dyn intravisit::Map<'_>), |s| { + s.print_expr(&body.value) + }) + } + + fn encode_info_for_item(&mut self, def_id: DefId, item: &'tcx hir::Item<'tcx>) { + let tcx = self.tcx; + + debug!("EncodeContext::encode_info_for_item({:?})", def_id); + + let entry_kind = match item.kind { + hir::ItemKind::Static(..) => EntryKind::Static, + hir::ItemKind::Const(_, body_id) => { + let qualifs = self.tcx.at(item.span).mir_const_qualif(def_id); + let const_data = self.encode_rendered_const_for_body(body_id); + record!(self.tables.mir_const_qualif[def_id] <- qualifs); + record!(self.tables.rendered_const[def_id] <- const_data); + EntryKind::Const + } + hir::ItemKind::Fn(ref sig, .., body) => { + self.tables.asyncness.set(def_id.index, sig.header.asyncness); + record_array!(self.tables.fn_arg_names[def_id] <- self.tcx.hir().body_param_names(body)); + self.tables.constness.set(def_id.index, sig.header.constness); + EntryKind::Fn + } + hir::ItemKind::Macro(ref macro_def, _) => { + EntryKind::MacroDef(self.lazy(&*macro_def.body), macro_def.macro_rules) + } + hir::ItemKind::Mod(ref m) => { + return self.encode_info_for_mod(item.def_id, m); + } + hir::ItemKind::ForeignMod { .. } => EntryKind::ForeignMod, + hir::ItemKind::GlobalAsm(..) => EntryKind::GlobalAsm, + hir::ItemKind::TyAlias(..) => EntryKind::Type, + hir::ItemKind::OpaqueTy(..) => { + self.encode_explicit_item_bounds(def_id); + EntryKind::OpaqueTy + } + hir::ItemKind::Enum(..) => { + let adt_def = self.tcx.adt_def(def_id); + record!(self.tables.repr_options[def_id] <- adt_def.repr()); + EntryKind::Enum + } + hir::ItemKind::Struct(ref struct_def, _) => { + let adt_def = self.tcx.adt_def(def_id); + record!(self.tables.repr_options[def_id] <- adt_def.repr()); + self.tables.constness.set(def_id.index, hir::Constness::Const); + + // Encode def_ids for each field and method + // for methods, write all the stuff get_trait_method + // needs to know + let ctor = struct_def + .ctor_hir_id() + .map(|ctor_hir_id| self.tcx.hir().local_def_id(ctor_hir_id).local_def_index); + + let variant = adt_def.non_enum_variant(); + EntryKind::Struct(self.lazy(VariantData { + ctor_kind: variant.ctor_kind, + discr: variant.discr, + ctor, + is_non_exhaustive: variant.is_field_list_non_exhaustive(), + })) + } + hir::ItemKind::Union(..) => { + let adt_def = self.tcx.adt_def(def_id); + record!(self.tables.repr_options[def_id] <- adt_def.repr()); + + let variant = adt_def.non_enum_variant(); + EntryKind::Union(self.lazy(VariantData { + ctor_kind: variant.ctor_kind, + discr: variant.discr, + ctor: None, + is_non_exhaustive: variant.is_field_list_non_exhaustive(), + })) + } + hir::ItemKind::Impl(hir::Impl { defaultness, constness, .. }) => { + self.tables.impl_defaultness.set(def_id.index, *defaultness); + self.tables.constness.set(def_id.index, *constness); + + let trait_ref = self.tcx.impl_trait_ref(def_id); + if let Some(trait_ref) = trait_ref { + let trait_def = self.tcx.trait_def(trait_ref.def_id); + if let Some(mut an) = trait_def.ancestors(self.tcx, def_id).ok() { + if let Some(specialization_graph::Node::Impl(parent)) = an.nth(1) { + self.tables.impl_parent.set(def_id.index, parent.into()); + } + } + + // if this is an impl of `CoerceUnsized`, create its + // "unsized info", else just store None + if Some(trait_ref.def_id) == self.tcx.lang_items().coerce_unsized_trait() { + let coerce_unsized_info = + self.tcx.at(item.span).coerce_unsized_info(def_id); + record!(self.tables.coerce_unsized_info[def_id] <- coerce_unsized_info); + } + } + + let polarity = self.tcx.impl_polarity(def_id); + self.tables.impl_polarity.set(def_id.index, polarity); + + EntryKind::Impl + } + hir::ItemKind::Trait(..) => { + let trait_def = self.tcx.trait_def(def_id); + record!(self.tables.trait_def[def_id] <- trait_def); + + EntryKind::Trait + } + hir::ItemKind::TraitAlias(..) => { + let trait_def = self.tcx.trait_def(def_id); + record!(self.tables.trait_def[def_id] <- trait_def); + + EntryKind::TraitAlias + } + hir::ItemKind::ExternCrate(_) | hir::ItemKind::Use(..) => { + bug!("cannot encode info for item {:?}", item) + } + }; + record!(self.tables.kind[def_id] <- entry_kind); + // FIXME(eddyb) there should be a nicer way to do this. + match item.kind { + hir::ItemKind::Enum(..) => record_array!(self.tables.children[def_id] <- + self.tcx.adt_def(def_id).variants().iter().map(|v| { + assert!(v.def_id.is_local()); + v.def_id.index + }) + ), + hir::ItemKind::Struct(..) | hir::ItemKind::Union(..) => { + record_array!(self.tables.children[def_id] <- + self.tcx.adt_def(def_id).non_enum_variant().fields.iter().map(|f| { + assert!(f.did.is_local()); + f.did.index + }) + ) + } + hir::ItemKind::Impl { .. } | hir::ItemKind::Trait(..) => { + let associated_item_def_ids = self.tcx.associated_item_def_ids(def_id); + record_array!(self.tables.children[def_id] <- + associated_item_def_ids.iter().map(|&def_id| { + assert!(def_id.is_local()); + def_id.index + }) + ); + } + _ => {} + } + match item.kind { + hir::ItemKind::Static(..) + | hir::ItemKind::Const(..) + | hir::ItemKind::Fn(..) + | hir::ItemKind::TyAlias(..) + | hir::ItemKind::OpaqueTy(..) + | hir::ItemKind::Enum(..) + | hir::ItemKind::Struct(..) + | hir::ItemKind::Union(..) + | hir::ItemKind::Impl { .. } => self.encode_item_type(def_id), + _ => {} + } + if let hir::ItemKind::Fn(..) = item.kind { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(def_id)); + if tcx.is_intrinsic(def_id) { + self.tables.is_intrinsic.set(def_id.index, ()); + } + } + if let hir::ItemKind::Impl { .. } = item.kind { + if let Some(trait_ref) = self.tcx.impl_trait_ref(def_id) { + record!(self.tables.impl_trait_ref[def_id] <- trait_ref); + } + } + } + + fn encode_info_for_generic_param(&mut self, def_id: DefId, kind: EntryKind, encode_type: bool) { + record!(self.tables.kind[def_id] <- kind); + if encode_type { + self.encode_item_type(def_id); + } + } + + fn encode_info_for_closure(&mut self, hir_id: hir::HirId) { + let def_id = self.tcx.hir().local_def_id(hir_id); + debug!("EncodeContext::encode_info_for_closure({:?})", def_id); + // NOTE(eddyb) `tcx.type_of(def_id)` isn't used because it's fully generic, + // including on the signature, which is inferred in `typeck. + let typeck_result: &'tcx ty::TypeckResults<'tcx> = self.tcx.typeck(def_id); + let ty = typeck_result.node_type(hir_id); + match ty.kind() { + ty::Generator(..) => { + let data = self.tcx.generator_kind(def_id).unwrap(); + let generator_diagnostic_data = typeck_result.get_generator_diagnostic_data(); + record!(self.tables.kind[def_id.to_def_id()] <- EntryKind::Generator); + record!(self.tables.generator_kind[def_id.to_def_id()] <- data); + record!(self.tables.generator_diagnostic_data[def_id.to_def_id()] <- generator_diagnostic_data); + } + + ty::Closure(..) => { + record!(self.tables.kind[def_id.to_def_id()] <- EntryKind::Closure); + } + + _ => bug!("closure that is neither generator nor closure"), + } + self.encode_item_type(def_id.to_def_id()); + if let ty::Closure(def_id, substs) = *ty.kind() { + record!(self.tables.fn_sig[def_id] <- substs.as_closure().sig()); + } + } + + fn encode_info_for_anon_const(&mut self, id: hir::HirId) { + let def_id = self.tcx.hir().local_def_id(id); + debug!("EncodeContext::encode_info_for_anon_const({:?})", def_id); + let body_id = self.tcx.hir().body_owned_by(def_id); + let const_data = self.encode_rendered_const_for_body(body_id); + let qualifs = self.tcx.mir_const_qualif(def_id); + + record!(self.tables.kind[def_id.to_def_id()] <- EntryKind::AnonConst); + record!(self.tables.mir_const_qualif[def_id.to_def_id()] <- qualifs); + record!(self.tables.rendered_const[def_id.to_def_id()] <- const_data); + self.encode_item_type(def_id.to_def_id()); + } + + fn encode_native_libraries(&mut self) -> LazyArray<NativeLib> { + empty_proc_macro!(self); + let used_libraries = self.tcx.native_libraries(LOCAL_CRATE); + self.lazy_array(used_libraries.iter()) + } + + fn encode_foreign_modules(&mut self) -> LazyArray<ForeignModule> { + empty_proc_macro!(self); + let foreign_modules = self.tcx.foreign_modules(LOCAL_CRATE); + self.lazy_array(foreign_modules.iter().map(|(_, m)| m).cloned()) + } + + fn encode_hygiene(&mut self) -> (SyntaxContextTable, ExpnDataTable, ExpnHashTable) { + let mut syntax_contexts: TableBuilder<_, _> = Default::default(); + let mut expn_data_table: TableBuilder<_, _> = Default::default(); + let mut expn_hash_table: TableBuilder<_, _> = Default::default(); + + self.hygiene_ctxt.encode( + &mut (&mut *self, &mut syntax_contexts, &mut expn_data_table, &mut expn_hash_table), + |(this, syntax_contexts, _, _), index, ctxt_data| { + syntax_contexts.set(index, this.lazy(ctxt_data)); + }, + |(this, _, expn_data_table, expn_hash_table), index, expn_data, hash| { + if let Some(index) = index.as_local() { + expn_data_table.set(index.as_raw(), this.lazy(expn_data)); + expn_hash_table.set(index.as_raw(), this.lazy(hash)); + } + }, + ); + + ( + syntax_contexts.encode(&mut self.opaque), + expn_data_table.encode(&mut self.opaque), + expn_hash_table.encode(&mut self.opaque), + ) + } + + fn encode_proc_macros(&mut self) -> Option<ProcMacroData> { + let is_proc_macro = self.tcx.sess.crate_types().contains(&CrateType::ProcMacro); + if is_proc_macro { + let tcx = self.tcx; + let hir = tcx.hir(); + + let proc_macro_decls_static = tcx.proc_macro_decls_static(()).unwrap().local_def_index; + let stability = tcx.lookup_stability(CRATE_DEF_ID); + let macros = + self.lazy_array(tcx.resolutions(()).proc_macros.iter().map(|p| p.local_def_index)); + let spans = self.tcx.sess.parse_sess.proc_macro_quoted_spans(); + for (i, span) in spans.into_iter().enumerate() { + let span = self.lazy(span); + self.tables.proc_macro_quoted_spans.set(i, span); + } + + self.tables.opt_def_kind.set(LOCAL_CRATE.as_def_id().index, DefKind::Mod); + record!(self.tables.def_span[LOCAL_CRATE.as_def_id()] <- tcx.def_span(LOCAL_CRATE.as_def_id())); + self.encode_attrs(LOCAL_CRATE.as_def_id().expect_local()); + record!(self.tables.visibility[LOCAL_CRATE.as_def_id()] <- tcx.visibility(LOCAL_CRATE.as_def_id())); + if let Some(stability) = stability { + record!(self.tables.lookup_stability[LOCAL_CRATE.as_def_id()] <- stability); + } + self.encode_deprecation(LOCAL_CRATE.as_def_id()); + + // Normally, this information is encoded when we walk the items + // defined in this crate. However, we skip doing that for proc-macro crates, + // so we manually encode just the information that we need + for &proc_macro in &tcx.resolutions(()).proc_macros { + let id = proc_macro; + let proc_macro = hir.local_def_id_to_hir_id(proc_macro); + let mut name = hir.name(proc_macro); + let span = hir.span(proc_macro); + // Proc-macros may have attributes like `#[allow_internal_unstable]`, + // so downstream crates need access to them. + let attrs = hir.attrs(proc_macro); + let macro_kind = if tcx.sess.contains_name(attrs, sym::proc_macro) { + MacroKind::Bang + } else if tcx.sess.contains_name(attrs, sym::proc_macro_attribute) { + MacroKind::Attr + } else if let Some(attr) = tcx.sess.find_by_name(attrs, sym::proc_macro_derive) { + // This unwrap chain should have been checked by the proc-macro harness. + name = attr.meta_item_list().unwrap()[0] + .meta_item() + .unwrap() + .ident() + .unwrap() + .name; + MacroKind::Derive + } else { + bug!("Unknown proc-macro type for item {:?}", id); + }; + + let mut def_key = self.tcx.hir().def_key(id); + def_key.disambiguated_data.data = DefPathData::MacroNs(name); + + let def_id = id.to_def_id(); + self.tables.opt_def_kind.set(def_id.index, DefKind::Macro(macro_kind)); + record!(self.tables.kind[def_id] <- EntryKind::ProcMacro(macro_kind)); + self.encode_attrs(id); + record!(self.tables.def_keys[def_id] <- def_key); + record!(self.tables.def_ident_span[def_id] <- span); + record!(self.tables.def_span[def_id] <- span); + record!(self.tables.visibility[def_id] <- ty::Visibility::Public); + if let Some(stability) = stability { + record!(self.tables.lookup_stability[def_id] <- stability); + } + } + + Some(ProcMacroData { proc_macro_decls_static, stability, macros }) + } else { + None + } + } + + fn encode_debugger_visualizers(&mut self) -> LazyArray<DebuggerVisualizerFile> { + empty_proc_macro!(self); + self.lazy_array(self.tcx.debugger_visualizers(LOCAL_CRATE).iter()) + } + + fn encode_crate_deps(&mut self) -> LazyArray<CrateDep> { + empty_proc_macro!(self); + + let deps = self + .tcx + .crates(()) + .iter() + .map(|&cnum| { + let dep = CrateDep { + name: self.tcx.crate_name(cnum), + hash: self.tcx.crate_hash(cnum), + host_hash: self.tcx.crate_host_hash(cnum), + kind: self.tcx.dep_kind(cnum), + extra_filename: self.tcx.extra_filename(cnum).clone(), + }; + (cnum, dep) + }) + .collect::<Vec<_>>(); + + { + // Sanity-check the crate numbers + let mut expected_cnum = 1; + for &(n, _) in &deps { + assert_eq!(n, CrateNum::new(expected_cnum)); + expected_cnum += 1; + } + } + + // We're just going to write a list of crate 'name-hash-version's, with + // the assumption that they are numbered 1 to n. + // FIXME (#2166): This is not nearly enough to support correct versioning + // but is enough to get transitive crate dependencies working. + self.lazy_array(deps.iter().map(|&(_, ref dep)| dep)) + } + + fn encode_lib_features(&mut self) -> LazyArray<(Symbol, Option<Symbol>)> { + empty_proc_macro!(self); + let tcx = self.tcx; + let lib_features = tcx.lib_features(()); + self.lazy_array(lib_features.to_vec()) + } + + fn encode_stability_implications(&mut self) -> LazyArray<(Symbol, Symbol)> { + empty_proc_macro!(self); + let tcx = self.tcx; + let implications = tcx.stability_implications(LOCAL_CRATE); + self.lazy_array(implications.iter().map(|(k, v)| (*k, *v))) + } + + fn encode_diagnostic_items(&mut self) -> LazyArray<(Symbol, DefIndex)> { + empty_proc_macro!(self); + let tcx = self.tcx; + let diagnostic_items = &tcx.diagnostic_items(LOCAL_CRATE).name_to_id; + self.lazy_array(diagnostic_items.iter().map(|(&name, def_id)| (name, def_id.index))) + } + + fn encode_lang_items(&mut self) -> LazyArray<(DefIndex, usize)> { + empty_proc_macro!(self); + let tcx = self.tcx; + let lang_items = tcx.lang_items(); + let lang_items = lang_items.items().iter(); + self.lazy_array(lang_items.enumerate().filter_map(|(i, &opt_def_id)| { + if let Some(def_id) = opt_def_id { + if def_id.is_local() { + return Some((def_id.index, i)); + } + } + None + })) + } + + fn encode_lang_items_missing(&mut self) -> LazyArray<lang_items::LangItem> { + empty_proc_macro!(self); + let tcx = self.tcx; + self.lazy_array(&tcx.lang_items().missing) + } + + fn encode_traits(&mut self) -> LazyArray<DefIndex> { + empty_proc_macro!(self); + self.lazy_array(self.tcx.traits_in_crate(LOCAL_CRATE).iter().map(|def_id| def_id.index)) + } + + /// Encodes an index, mapping each trait to its (local) implementations. + fn encode_impls(&mut self) -> LazyArray<TraitImpls> { + debug!("EncodeContext::encode_traits_and_impls()"); + empty_proc_macro!(self); + let tcx = self.tcx; + let mut fx_hash_map: FxHashMap<DefId, Vec<(DefIndex, Option<SimplifiedType>)>> = + FxHashMap::default(); + + for id in tcx.hir().items() { + if matches!(tcx.def_kind(id.def_id), DefKind::Impl) { + if let Some(trait_ref) = tcx.impl_trait_ref(id.def_id.to_def_id()) { + let simplified_self_ty = fast_reject::simplify_type( + self.tcx, + trait_ref.self_ty(), + TreatParams::AsInfer, + ); + + fx_hash_map + .entry(trait_ref.def_id) + .or_default() + .push((id.def_id.local_def_index, simplified_self_ty)); + } + } + } + + let mut all_impls: Vec<_> = fx_hash_map.into_iter().collect(); + + // Bring everything into deterministic order for hashing + all_impls.sort_by_cached_key(|&(trait_def_id, _)| tcx.def_path_hash(trait_def_id)); + + let all_impls: Vec<_> = all_impls + .into_iter() + .map(|(trait_def_id, mut impls)| { + // Bring everything into deterministic order for hashing + impls.sort_by_cached_key(|&(index, _)| { + tcx.hir().def_path_hash(LocalDefId { local_def_index: index }) + }); + + TraitImpls { + trait_id: (trait_def_id.krate.as_u32(), trait_def_id.index), + impls: self.lazy_array(&impls), + } + }) + .collect(); + + self.lazy_array(&all_impls) + } + + fn encode_incoherent_impls(&mut self) -> LazyArray<IncoherentImpls> { + debug!("EncodeContext::encode_traits_and_impls()"); + empty_proc_macro!(self); + let tcx = self.tcx; + let mut all_impls: Vec<_> = tcx.crate_inherent_impls(()).incoherent_impls.iter().collect(); + tcx.with_stable_hashing_context(|mut ctx| { + all_impls.sort_by_cached_key(|&(&simp, _)| { + let mut hasher = StableHasher::new(); + simp.hash_stable(&mut ctx, &mut hasher); + hasher.finish::<Fingerprint>() + }) + }); + let all_impls: Vec<_> = all_impls + .into_iter() + .map(|(&simp, impls)| { + let mut impls: Vec<_> = + impls.into_iter().map(|def_id| def_id.local_def_index).collect(); + impls.sort_by_cached_key(|&local_def_index| { + tcx.hir().def_path_hash(LocalDefId { local_def_index }) + }); + + IncoherentImpls { self_ty: simp, impls: self.lazy_array(impls) } + }) + .collect(); + + self.lazy_array(&all_impls) + } + + // Encodes all symbols exported from this crate into the metadata. + // + // This pass is seeded off the reachability list calculated in the + // middle::reachable module but filters out items that either don't have a + // symbol associated with them (they weren't translated) or if they're an FFI + // definition (as that's not defined in this crate). + fn encode_exported_symbols( + &mut self, + exported_symbols: &[(ExportedSymbol<'tcx>, SymbolExportInfo)], + ) -> LazyArray<(ExportedSymbol<'static>, SymbolExportInfo)> { + empty_proc_macro!(self); + // The metadata symbol name is special. It should not show up in + // downstream crates. + let metadata_symbol_name = SymbolName::new(self.tcx, &metadata_symbol_name(self.tcx)); + + self.lazy_array( + exported_symbols + .iter() + .filter(|&&(ref exported_symbol, _)| match *exported_symbol { + ExportedSymbol::NoDefId(symbol_name) => symbol_name != metadata_symbol_name, + _ => true, + }) + .cloned(), + ) + } + + fn encode_dylib_dependency_formats(&mut self) -> LazyArray<Option<LinkagePreference>> { + empty_proc_macro!(self); + let formats = self.tcx.dependency_formats(()); + for (ty, arr) in formats.iter() { + if *ty != CrateType::Dylib { + continue; + } + return self.lazy_array(arr.iter().map(|slot| match *slot { + Linkage::NotLinked | Linkage::IncludedFromDylib => None, + + Linkage::Dynamic => Some(LinkagePreference::RequireDynamic), + Linkage::Static => Some(LinkagePreference::RequireStatic), + })); + } + LazyArray::empty() + } + + fn encode_info_for_foreign_item(&mut self, def_id: DefId, nitem: &hir::ForeignItem<'_>) { + let tcx = self.tcx; + + debug!("EncodeContext::encode_info_for_foreign_item({:?})", def_id); + + match nitem.kind { + hir::ForeignItemKind::Fn(_, ref names, _) => { + self.tables.asyncness.set(def_id.index, hir::IsAsync::NotAsync); + record_array!(self.tables.fn_arg_names[def_id] <- *names); + let constness = if self.tcx.is_const_fn_raw(def_id) { + hir::Constness::Const + } else { + hir::Constness::NotConst + }; + self.tables.constness.set(def_id.index, constness); + record!(self.tables.kind[def_id] <- EntryKind::ForeignFn); + } + hir::ForeignItemKind::Static(..) => { + record!(self.tables.kind[def_id] <- EntryKind::ForeignStatic); + } + hir::ForeignItemKind::Type => { + record!(self.tables.kind[def_id] <- EntryKind::ForeignType); + } + } + self.encode_item_type(def_id); + if let hir::ForeignItemKind::Fn(..) = nitem.kind { + record!(self.tables.fn_sig[def_id] <- tcx.fn_sig(def_id)); + if tcx.is_intrinsic(def_id) { + self.tables.is_intrinsic.set(def_id.index, ()); + } + } + } +} + +// FIXME(eddyb) make metadata encoding walk over all definitions, instead of HIR. +impl<'a, 'tcx> Visitor<'tcx> for EncodeContext<'a, 'tcx> { + type NestedFilter = nested_filter::OnlyBodies; + + fn nested_visit_map(&mut self) -> Self::Map { + self.tcx.hir() + } + fn visit_expr(&mut self, ex: &'tcx hir::Expr<'tcx>) { + intravisit::walk_expr(self, ex); + self.encode_info_for_expr(ex); + } + fn visit_anon_const(&mut self, c: &'tcx AnonConst) { + intravisit::walk_anon_const(self, c); + self.encode_info_for_anon_const(c.hir_id); + } + fn visit_item(&mut self, item: &'tcx hir::Item<'tcx>) { + intravisit::walk_item(self, item); + match item.kind { + hir::ItemKind::ExternCrate(_) | hir::ItemKind::Use(..) => {} // ignore these + _ => self.encode_info_for_item(item.def_id.to_def_id(), item), + } + self.encode_addl_info_for_item(item); + } + fn visit_foreign_item(&mut self, ni: &'tcx hir::ForeignItem<'tcx>) { + intravisit::walk_foreign_item(self, ni); + self.encode_info_for_foreign_item(ni.def_id.to_def_id(), ni); + } + fn visit_generics(&mut self, generics: &'tcx hir::Generics<'tcx>) { + intravisit::walk_generics(self, generics); + self.encode_info_for_generics(generics); + } +} + +impl<'a, 'tcx> EncodeContext<'a, 'tcx> { + fn encode_fields(&mut self, adt_def: ty::AdtDef<'tcx>) { + for (variant_index, variant) in adt_def.variants().iter_enumerated() { + for (field_index, _field) in variant.fields.iter().enumerate() { + self.encode_field(adt_def, variant_index, field_index); + } + } + } + + fn encode_info_for_generics(&mut self, generics: &hir::Generics<'tcx>) { + for param in generics.params { + let def_id = self.tcx.hir().local_def_id(param.hir_id); + match param.kind { + GenericParamKind::Lifetime { .. } => continue, + GenericParamKind::Type { default, .. } => { + self.encode_info_for_generic_param( + def_id.to_def_id(), + EntryKind::TypeParam, + default.is_some(), + ); + } + GenericParamKind::Const { ref default, .. } => { + let def_id = def_id.to_def_id(); + self.encode_info_for_generic_param(def_id, EntryKind::ConstParam, true); + if default.is_some() { + record!(self.tables.const_param_default[def_id] <- self.tcx.const_param_default(def_id)) + } + } + } + } + } + + fn encode_info_for_expr(&mut self, expr: &hir::Expr<'_>) { + if let hir::ExprKind::Closure { .. } = expr.kind { + self.encode_info_for_closure(expr.hir_id); + } + } + + /// In some cases, along with the item itself, we also + /// encode some sub-items. Usually we want some info from the item + /// so it's easier to do that here then to wait until we would encounter + /// normally in the visitor walk. + fn encode_addl_info_for_item(&mut self, item: &hir::Item<'_>) { + match item.kind { + hir::ItemKind::Static(..) + | hir::ItemKind::Const(..) + | hir::ItemKind::Fn(..) + | hir::ItemKind::Macro(..) + | hir::ItemKind::Mod(..) + | hir::ItemKind::ForeignMod { .. } + | hir::ItemKind::GlobalAsm(..) + | hir::ItemKind::ExternCrate(..) + | hir::ItemKind::Use(..) + | hir::ItemKind::TyAlias(..) + | hir::ItemKind::OpaqueTy(..) + | hir::ItemKind::TraitAlias(..) => { + // no sub-item recording needed in these cases + } + hir::ItemKind::Enum(..) => { + let def = self.tcx.adt_def(item.def_id.to_def_id()); + self.encode_fields(def); + + for (i, variant) in def.variants().iter_enumerated() { + self.encode_enum_variant_info(def, i); + + if let Some(_ctor_def_id) = variant.ctor_def_id { + self.encode_enum_variant_ctor(def, i); + } + } + } + hir::ItemKind::Struct(ref struct_def, _) => { + let def = self.tcx.adt_def(item.def_id.to_def_id()); + self.encode_fields(def); + + // If the struct has a constructor, encode it. + if let Some(ctor_hir_id) = struct_def.ctor_hir_id() { + let ctor_def_id = self.tcx.hir().local_def_id(ctor_hir_id); + self.encode_struct_ctor(def, ctor_def_id.to_def_id()); + } + } + hir::ItemKind::Union(..) => { + let def = self.tcx.adt_def(item.def_id.to_def_id()); + self.encode_fields(def); + } + hir::ItemKind::Impl { .. } => { + for &trait_item_def_id in + self.tcx.associated_item_def_ids(item.def_id.to_def_id()).iter() + { + self.encode_info_for_impl_item(trait_item_def_id); + } + } + hir::ItemKind::Trait(..) => { + for &item_def_id in self.tcx.associated_item_def_ids(item.def_id.to_def_id()).iter() + { + self.encode_info_for_trait_item(item_def_id); + } + } + } + } +} + +/// Used to prefetch queries which will be needed later by metadata encoding. +/// Only a subset of the queries are actually prefetched to keep this code smaller. +fn prefetch_mir(tcx: TyCtxt<'_>) { + if !tcx.sess.opts.output_types.should_codegen() { + // We won't emit MIR, so don't prefetch it. + return; + } + + par_iter(tcx.mir_keys(())).for_each(|&def_id| { + let (encode_const, encode_opt) = should_encode_mir(tcx, def_id); + + if encode_const { + tcx.ensure().mir_for_ctfe(def_id); + } + if encode_opt { + tcx.ensure().optimized_mir(def_id); + } + if encode_opt || encode_const { + tcx.ensure().promoted_mir(def_id); + } + }) +} + +// NOTE(eddyb) The following comment was preserved for posterity, even +// though it's no longer relevant as EBML (which uses nested & tagged +// "documents") was replaced with a scheme that can't go out of bounds. +// +// And here we run into yet another obscure archive bug: in which metadata +// loaded from archives may have trailing garbage bytes. Awhile back one of +// our tests was failing sporadically on the macOS 64-bit builders (both nopt +// and opt) by having ebml generate an out-of-bounds panic when looking at +// metadata. +// +// Upon investigation it turned out that the metadata file inside of an rlib +// (and ar archive) was being corrupted. Some compilations would generate a +// metadata file which would end in a few extra bytes, while other +// compilations would not have these extra bytes appended to the end. These +// extra bytes were interpreted by ebml as an extra tag, so they ended up +// being interpreted causing the out-of-bounds. +// +// The root cause of why these extra bytes were appearing was never +// discovered, and in the meantime the solution we're employing is to insert +// the length of the metadata to the start of the metadata. Later on this +// will allow us to slice the metadata to the precise length that we just +// generated regardless of trailing bytes that end up in it. + +pub struct EncodedMetadata { + // The declaration order matters because `mmap` should be dropped before `_temp_dir`. + mmap: Option<Mmap>, + // We need to carry MaybeTempDir to avoid deleting the temporary + // directory while accessing the Mmap. + _temp_dir: Option<MaybeTempDir>, +} + +impl EncodedMetadata { + #[inline] + pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> { + let file = std::fs::File::open(&path)?; + let file_metadata = file.metadata()?; + if file_metadata.len() == 0 { + return Ok(Self { mmap: None, _temp_dir: None }); + } + let mmap = unsafe { Some(Mmap::map(file)?) }; + Ok(Self { mmap, _temp_dir: temp_dir }) + } + + #[inline] + pub fn raw_data(&self) -> &[u8] { + self.mmap.as_ref().map(|mmap| mmap.as_ref()).unwrap_or_default() + } +} + +impl<S: Encoder> Encodable<S> for EncodedMetadata { + fn encode(&self, s: &mut S) { + let slice = self.raw_data(); + slice.encode(s) + } +} + +impl<D: Decoder> Decodable<D> for EncodedMetadata { + fn decode(d: &mut D) -> Self { + let len = d.read_usize(); + let mmap = if len > 0 { + let mut mmap = MmapMut::map_anon(len).unwrap(); + for _ in 0..len { + (&mut mmap[..]).write(&[d.read_u8()]).unwrap(); + } + mmap.flush().unwrap(); + Some(mmap.make_read_only().unwrap()) + } else { + None + }; + + Self { mmap, _temp_dir: None } + } +} + +pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) { + let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata"); + + // Since encoding metadata is not in a query, and nothing is cached, + // there's no need to do dep-graph tracking for any of it. + tcx.dep_graph.assert_ignored(); + + join( + || encode_metadata_impl(tcx, path), + || { + if tcx.sess.threads() == 1 { + return; + } + // Prefetch some queries used by metadata encoding. + // This is not necessary for correctness, but is only done for performance reasons. + // It can be removed if it turns out to cause trouble or be detrimental to performance. + join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE)); + }, + ); +} + +fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) { + let mut encoder = opaque::FileEncoder::new(path) + .unwrap_or_else(|err| tcx.sess.fatal(&format!("failed to create file encoder: {}", err))); + encoder.emit_raw_bytes(METADATA_HEADER); + + // Will be filled with the root position after encoding everything. + encoder.emit_raw_bytes(&[0, 0, 0, 0]); + + let source_map_files = tcx.sess.source_map().files(); + let source_file_cache = (source_map_files[0].clone(), 0); + let required_source_files = Some(GrowableBitSet::with_capacity(source_map_files.len())); + drop(source_map_files); + + let hygiene_ctxt = HygieneEncodeContext::default(); + + let mut ecx = EncodeContext { + opaque: encoder, + tcx, + feat: tcx.features(), + tables: Default::default(), + lazy_state: LazyState::NoNode, + type_shorthands: Default::default(), + predicate_shorthands: Default::default(), + source_file_cache, + interpret_allocs: Default::default(), + required_source_files, + is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro), + hygiene_ctxt: &hygiene_ctxt, + }; + + // Encode the rustc version string in a predictable location. + rustc_version().encode(&mut ecx); + + // Encode all the entries and extra information in the crate, + // culminating in the `CrateRoot` which points to all of it. + let root = ecx.encode_crate_root(); + + ecx.opaque.flush(); + + let mut file = ecx.opaque.file(); + // We will return to this position after writing the root position. + let pos_before_seek = file.stream_position().unwrap(); + + // Encode the root position. + let header = METADATA_HEADER.len(); + file.seek(std::io::SeekFrom::Start(header as u64)) + .unwrap_or_else(|err| tcx.sess.fatal(&format!("failed to seek the file: {}", err))); + let pos = root.position.get(); + file.write_all(&[(pos >> 24) as u8, (pos >> 16) as u8, (pos >> 8) as u8, (pos >> 0) as u8]) + .unwrap_or_else(|err| tcx.sess.fatal(&format!("failed to write to the file: {}", err))); + + // Return to the position where we are before writing the root position. + file.seek(std::io::SeekFrom::Start(pos_before_seek)).unwrap(); + + // Record metadata size for self-profiling + tcx.prof.artifact_size( + "crate_metadata", + "crate_metadata", + file.metadata().unwrap().len() as u64, + ); +} + +pub fn provide(providers: &mut Providers) { + *providers = Providers { + traits_in_crate: |tcx, cnum| { + assert_eq!(cnum, LOCAL_CRATE); + + let mut traits = Vec::new(); + for id in tcx.hir().items() { + if matches!(tcx.def_kind(id.def_id), DefKind::Trait | DefKind::TraitAlias) { + traits.push(id.def_id.to_def_id()) + } + } + + // Bring everything into deterministic order. + traits.sort_by_cached_key(|&def_id| tcx.def_path_hash(def_id)); + tcx.arena.alloc_slice(&traits) + }, + + ..*providers + } +} diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs new file mode 100644 index 000000000..66bdecc30 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -0,0 +1,460 @@ +use crate::creader::CrateMetadataRef; +use decoder::Metadata; +use def_path_hash_map::DefPathHashMapRef; +use table::TableBuilder; + +use rustc_ast as ast; +use rustc_attr as attr; +use rustc_data_structures::svh::Svh; +use rustc_data_structures::sync::MetadataRef; +use rustc_hir as hir; +use rustc_hir::def::{CtorKind, DefKind}; +use rustc_hir::def_id::{CrateNum, DefId, DefIndex, DefPathHash, StableCrateId}; +use rustc_hir::definitions::DefKey; +use rustc_hir::lang_items; +use rustc_index::{bit_set::FiniteBitSet, vec::IndexVec}; +use rustc_middle::metadata::ModChild; +use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; +use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo}; +use rustc_middle::mir; +use rustc_middle::ty::fast_reject::SimplifiedType; +use rustc_middle::ty::query::Providers; +use rustc_middle::ty::{self, ReprOptions, Ty}; +use rustc_middle::ty::{GeneratorDiagnosticData, ParameterizedOverTcx, TyCtxt}; +use rustc_serialize::opaque::FileEncoder; +use rustc_session::config::SymbolManglingVersion; +use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib}; +use rustc_span::edition::Edition; +use rustc_span::hygiene::{ExpnIndex, MacroKind}; +use rustc_span::symbol::{Ident, Symbol}; +use rustc_span::{self, ExpnData, ExpnHash, ExpnId, Span}; +use rustc_target::spec::{PanicStrategy, TargetTriple}; + +use std::marker::PhantomData; +use std::num::NonZeroUsize; + +pub use decoder::provide_extern; +use decoder::DecodeContext; +pub(crate) use decoder::{CrateMetadata, CrateNumMap, MetadataBlob}; +use encoder::EncodeContext; +pub use encoder::{encode_metadata, EncodedMetadata}; +use rustc_span::hygiene::SyntaxContextData; + +mod decoder; +mod def_path_hash_map; +mod encoder; +mod table; + +pub(crate) fn rustc_version() -> String { + format!("rustc {}", option_env!("CFG_VERSION").unwrap_or("unknown version")) +} + +/// Metadata encoding version. +/// N.B., increment this if you change the format of metadata such that +/// the rustc version can't be found to compare with `rustc_version()`. +const METADATA_VERSION: u8 = 6; + +/// Metadata header which includes `METADATA_VERSION`. +/// +/// This header is followed by the position of the `CrateRoot`, +/// which is encoded as a 32-bit big-endian unsigned integer, +/// and further followed by the rustc version string. +pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION]; + +/// A value of type T referred to by its absolute position +/// in the metadata, and which can be decoded lazily. +/// +/// Metadata is effective a tree, encoded in post-order, +/// and with the root's position written next to the header. +/// That means every single `LazyValue` points to some previous +/// location in the metadata and is part of a larger node. +/// +/// The first `LazyValue` in a node is encoded as the backwards +/// distance from the position where the containing node +/// starts and where the `LazyValue` points to, while the rest +/// use the forward distance from the previous `LazyValue`. +/// Distances start at 1, as 0-byte nodes are invalid. +/// Also invalid are nodes being referred in a different +/// order than they were encoded in. +#[must_use] +struct LazyValue<T> { + position: NonZeroUsize, + _marker: PhantomData<fn() -> T>, +} + +impl<T: ParameterizedOverTcx> ParameterizedOverTcx for LazyValue<T> { + type Value<'tcx> = LazyValue<T::Value<'tcx>>; +} + +impl<T> LazyValue<T> { + fn from_position(position: NonZeroUsize) -> LazyValue<T> { + LazyValue { position, _marker: PhantomData } + } +} + +/// A list of lazily-decoded values. +/// +/// Unlike `LazyValue<Vec<T>>`, the length is encoded next to the +/// position, not at the position, which means that the length +/// doesn't need to be known before encoding all the elements. +/// +/// If the length is 0, no position is encoded, but otherwise, +/// the encoding is that of `LazyArray`, with the distinction that +/// the minimal distance the length of the sequence, i.e. +/// it's assumed there's no 0-byte element in the sequence. +struct LazyArray<T> { + position: NonZeroUsize, + num_elems: usize, + _marker: PhantomData<fn() -> T>, +} + +impl<T: ParameterizedOverTcx> ParameterizedOverTcx for LazyArray<T> { + type Value<'tcx> = LazyArray<T::Value<'tcx>>; +} + +impl<T> LazyArray<T> { + fn from_position_and_num_elems(position: NonZeroUsize, num_elems: usize) -> LazyArray<T> { + LazyArray { position, num_elems, _marker: PhantomData } + } + + fn empty() -> LazyArray<T> { + LazyArray::from_position_and_num_elems(NonZeroUsize::new(1).unwrap(), 0) + } +} + +/// A list of lazily-decoded values, with the added capability of random access. +/// +/// Random-access table (i.e. offering constant-time `get`/`set`), similar to +/// `LazyArray<T>`, but without requiring encoding or decoding all the values +/// eagerly and in-order. +struct LazyTable<I, T> { + position: NonZeroUsize, + encoded_size: usize, + _marker: PhantomData<fn(I) -> T>, +} + +impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I, T> { + type Value<'tcx> = LazyTable<I, T::Value<'tcx>>; +} + +impl<I, T> LazyTable<I, T> { + fn from_position_and_encoded_size( + position: NonZeroUsize, + encoded_size: usize, + ) -> LazyTable<I, T> { + LazyTable { position, encoded_size, _marker: PhantomData } + } +} + +impl<T> Copy for LazyValue<T> {} +impl<T> Clone for LazyValue<T> { + fn clone(&self) -> Self { + *self + } +} + +impl<T> Copy for LazyArray<T> {} +impl<T> Clone for LazyArray<T> { + fn clone(&self) -> Self { + *self + } +} + +impl<I, T> Copy for LazyTable<I, T> {} +impl<I, T> Clone for LazyTable<I, T> { + fn clone(&self) -> Self { + *self + } +} + +/// Encoding / decoding state for `Lazy`s (`LazyValue`, `LazyArray`, and `LazyTable`). +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +enum LazyState { + /// Outside of a metadata node. + NoNode, + + /// Inside a metadata node, and before any `Lazy`s. + /// The position is that of the node itself. + NodeStart(NonZeroUsize), + + /// Inside a metadata node, with a previous `Lazy`s. + /// The position is where that previous `Lazy` would start. + Previous(NonZeroUsize), +} + +type SyntaxContextTable = LazyTable<u32, LazyValue<SyntaxContextData>>; +type ExpnDataTable = LazyTable<ExpnIndex, LazyValue<ExpnData>>; +type ExpnHashTable = LazyTable<ExpnIndex, LazyValue<ExpnHash>>; + +#[derive(MetadataEncodable, MetadataDecodable)] +pub(crate) struct ProcMacroData { + proc_macro_decls_static: DefIndex, + stability: Option<attr::Stability>, + macros: LazyArray<DefIndex>, +} + +/// Serialized metadata for a crate. +/// When compiling a proc-macro crate, we encode many of +/// the `LazyArray<T>` fields as `Lazy::empty()`. This serves two purposes: +/// +/// 1. We avoid performing unnecessary work. Proc-macro crates can only +/// export proc-macros functions, which are compiled into a shared library. +/// As a result, a large amount of the information we normally store +/// (e.g. optimized MIR) is unneeded by downstream crates. +/// 2. We avoid serializing invalid `CrateNum`s. When we deserialize +/// a proc-macro crate, we don't load any of its dependencies (since we +/// just need to invoke a native function from the shared library). +/// This means that any foreign `CrateNum`s that we serialize cannot be +/// deserialized, since we will not know how to map them into the current +/// compilation session. If we were to serialize a proc-macro crate like +/// a normal crate, much of what we serialized would be unusable in addition +/// to being unused. +#[derive(MetadataEncodable, MetadataDecodable)] +pub(crate) struct CrateRoot { + name: Symbol, + triple: TargetTriple, + extra_filename: String, + hash: Svh, + stable_crate_id: StableCrateId, + required_panic_strategy: Option<PanicStrategy>, + panic_in_drop_strategy: PanicStrategy, + edition: Edition, + has_global_allocator: bool, + has_panic_handler: bool, + has_default_lib_allocator: bool, + + crate_deps: LazyArray<CrateDep>, + dylib_dependency_formats: LazyArray<Option<LinkagePreference>>, + lib_features: LazyArray<(Symbol, Option<Symbol>)>, + stability_implications: LazyArray<(Symbol, Symbol)>, + lang_items: LazyArray<(DefIndex, usize)>, + lang_items_missing: LazyArray<lang_items::LangItem>, + diagnostic_items: LazyArray<(Symbol, DefIndex)>, + native_libraries: LazyArray<NativeLib>, + foreign_modules: LazyArray<ForeignModule>, + traits: LazyArray<DefIndex>, + impls: LazyArray<TraitImpls>, + incoherent_impls: LazyArray<IncoherentImpls>, + interpret_alloc_index: LazyArray<u32>, + proc_macro_data: Option<ProcMacroData>, + + tables: LazyTables, + debugger_visualizers: LazyArray<rustc_span::DebuggerVisualizerFile>, + + exported_symbols: LazyArray<(ExportedSymbol<'static>, SymbolExportInfo)>, + + syntax_contexts: SyntaxContextTable, + expn_data: ExpnDataTable, + expn_hashes: ExpnHashTable, + + def_path_hash_map: LazyValue<DefPathHashMapRef<'static>>, + + source_map: LazyArray<rustc_span::SourceFile>, + + compiler_builtins: bool, + needs_allocator: bool, + needs_panic_runtime: bool, + no_builtins: bool, + panic_runtime: bool, + profiler_runtime: bool, + symbol_mangling_version: SymbolManglingVersion, +} + +/// On-disk representation of `DefId`. +/// This creates a type-safe way to enforce that we remap the CrateNum between the on-disk +/// representation and the compilation session. +#[derive(Copy, Clone)] +pub(crate) struct RawDefId { + krate: u32, + index: u32, +} + +impl Into<RawDefId> for DefId { + fn into(self) -> RawDefId { + RawDefId { krate: self.krate.as_u32(), index: self.index.as_u32() } + } +} + +impl RawDefId { + /// This exists so that `provide_one!` is happy + fn decode(self, meta: (CrateMetadataRef<'_>, TyCtxt<'_>)) -> DefId { + self.decode_from_cdata(meta.0) + } + + fn decode_from_cdata(self, cdata: CrateMetadataRef<'_>) -> DefId { + let krate = CrateNum::from_u32(self.krate); + let krate = cdata.map_encoded_cnum_to_current(krate); + DefId { krate, index: DefIndex::from_u32(self.index) } + } +} + +#[derive(Encodable, Decodable)] +pub(crate) struct CrateDep { + pub name: Symbol, + pub hash: Svh, + pub host_hash: Option<Svh>, + pub kind: CrateDepKind, + pub extra_filename: String, +} + +#[derive(MetadataEncodable, MetadataDecodable)] +pub(crate) struct TraitImpls { + trait_id: (u32, DefIndex), + impls: LazyArray<(DefIndex, Option<SimplifiedType>)>, +} + +#[derive(MetadataEncodable, MetadataDecodable)] +pub(crate) struct IncoherentImpls { + self_ty: SimplifiedType, + impls: LazyArray<DefIndex>, +} + +/// Define `LazyTables` and `TableBuilders` at the same time. +macro_rules! define_tables { + ($($name:ident: Table<$IDX:ty, $T:ty>),+ $(,)?) => { + #[derive(MetadataEncodable, MetadataDecodable)] + pub(crate) struct LazyTables { + $($name: LazyTable<$IDX, $T>),+ + } + + #[derive(Default)] + struct TableBuilders { + $($name: TableBuilder<$IDX, $T>),+ + } + + impl TableBuilders { + fn encode(&self, buf: &mut FileEncoder) -> LazyTables { + LazyTables { + $($name: self.$name.encode(buf)),+ + } + } + } + } +} + +define_tables! { + kind: Table<DefIndex, LazyValue<EntryKind>>, + attributes: Table<DefIndex, LazyArray<ast::Attribute>>, + children: Table<DefIndex, LazyArray<DefIndex>>, + + opt_def_kind: Table<DefIndex, DefKind>, + visibility: Table<DefIndex, LazyValue<ty::Visibility>>, + def_span: Table<DefIndex, LazyValue<Span>>, + def_ident_span: Table<DefIndex, LazyValue<Span>>, + lookup_stability: Table<DefIndex, LazyValue<attr::Stability>>, + lookup_const_stability: Table<DefIndex, LazyValue<attr::ConstStability>>, + lookup_deprecation_entry: Table<DefIndex, LazyValue<attr::Deprecation>>, + // As an optimization, a missing entry indicates an empty `&[]`. + explicit_item_bounds: Table<DefIndex, LazyArray<(ty::Predicate<'static>, Span)>>, + explicit_predicates_of: Table<DefIndex, LazyValue<ty::GenericPredicates<'static>>>, + generics_of: Table<DefIndex, LazyValue<ty::Generics>>, + // As an optimization, a missing entry indicates an empty `&[]`. + inferred_outlives_of: Table<DefIndex, LazyArray<(ty::Predicate<'static>, Span)>>, + super_predicates_of: Table<DefIndex, LazyValue<ty::GenericPredicates<'static>>>, + type_of: Table<DefIndex, LazyValue<Ty<'static>>>, + variances_of: Table<DefIndex, LazyArray<ty::Variance>>, + fn_sig: Table<DefIndex, LazyValue<ty::PolyFnSig<'static>>>, + codegen_fn_attrs: Table<DefIndex, LazyValue<CodegenFnAttrs>>, + impl_trait_ref: Table<DefIndex, LazyValue<ty::TraitRef<'static>>>, + const_param_default: Table<DefIndex, LazyValue<rustc_middle::ty::Const<'static>>>, + optimized_mir: Table<DefIndex, LazyValue<mir::Body<'static>>>, + mir_for_ctfe: Table<DefIndex, LazyValue<mir::Body<'static>>>, + promoted_mir: Table<DefIndex, LazyValue<IndexVec<mir::Promoted, mir::Body<'static>>>>, + // FIXME(compiler-errors): Why isn't this a LazyArray? + thir_abstract_const: Table<DefIndex, LazyValue<&'static [ty::abstract_const::Node<'static>]>>, + impl_parent: Table<DefIndex, RawDefId>, + impl_polarity: Table<DefIndex, ty::ImplPolarity>, + constness: Table<DefIndex, hir::Constness>, + is_intrinsic: Table<DefIndex, ()>, + impl_defaultness: Table<DefIndex, hir::Defaultness>, + // FIXME(eddyb) perhaps compute this on the fly if cheap enough? + coerce_unsized_info: Table<DefIndex, LazyValue<ty::adjustment::CoerceUnsizedInfo>>, + mir_const_qualif: Table<DefIndex, LazyValue<mir::ConstQualifs>>, + rendered_const: Table<DefIndex, LazyValue<String>>, + asyncness: Table<DefIndex, hir::IsAsync>, + fn_arg_names: Table<DefIndex, LazyArray<Ident>>, + generator_kind: Table<DefIndex, LazyValue<hir::GeneratorKind>>, + trait_def: Table<DefIndex, LazyValue<ty::TraitDef>>, + + trait_item_def_id: Table<DefIndex, RawDefId>, + inherent_impls: Table<DefIndex, LazyArray<DefIndex>>, + expn_that_defined: Table<DefIndex, LazyValue<ExpnId>>, + unused_generic_params: Table<DefIndex, LazyValue<FiniteBitSet<u32>>>, + repr_options: Table<DefIndex, LazyValue<ReprOptions>>, + // `def_keys` and `def_path_hashes` represent a lazy version of a + // `DefPathTable`. This allows us to avoid deserializing an entire + // `DefPathTable` up front, since we may only ever use a few + // definitions from any given crate. + def_keys: Table<DefIndex, LazyValue<DefKey>>, + def_path_hashes: Table<DefIndex, DefPathHash>, + proc_macro_quoted_spans: Table<usize, LazyValue<Span>>, + generator_diagnostic_data: Table<DefIndex, LazyValue<GeneratorDiagnosticData<'static>>>, + may_have_doc_links: Table<DefIndex, ()>, +} + +#[derive(Copy, Clone, MetadataEncodable, MetadataDecodable)] +enum EntryKind { + AnonConst, + Const, + Static, + ForeignStatic, + ForeignMod, + ForeignType, + GlobalAsm, + Type, + TypeParam, + ConstParam, + OpaqueTy, + Enum, + Field, + Variant(LazyValue<VariantData>), + Struct(LazyValue<VariantData>), + Union(LazyValue<VariantData>), + Fn, + ForeignFn, + Mod(LazyArray<ModChild>), + MacroDef(LazyValue<ast::MacArgs>, /*macro_rules*/ bool), + ProcMacro(MacroKind), + Closure, + Generator, + Trait, + Impl, + AssocFn { container: ty::AssocItemContainer, has_self: bool }, + AssocType(ty::AssocItemContainer), + AssocConst(ty::AssocItemContainer), + TraitAlias, +} + +#[derive(TyEncodable, TyDecodable)] +struct VariantData { + ctor_kind: CtorKind, + discr: ty::VariantDiscr, + /// If this is unit or tuple-variant/struct, then this is the index of the ctor id. + ctor: Option<DefIndex>, + is_non_exhaustive: bool, +} + +#[derive(TyEncodable, TyDecodable)] +struct GeneratorData<'tcx> { + layout: mir::GeneratorLayout<'tcx>, +} + +// Tags used for encoding Spans: +const TAG_VALID_SPAN_LOCAL: u8 = 0; +const TAG_VALID_SPAN_FOREIGN: u8 = 1; +const TAG_PARTIAL_SPAN: u8 = 2; + +pub fn provide(providers: &mut Providers) { + encoder::provide(providers); + decoder::provide(providers); +} + +trivially_parameterized_over_tcx! { + VariantData, + EntryKind, + RawDefId, + TraitImpls, + IncoherentImpls, + CrateRoot, + CrateDep, +} diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs new file mode 100644 index 000000000..21841ae25 --- /dev/null +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -0,0 +1,330 @@ +use crate::rmeta::*; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_hir::def::{CtorKind, CtorOf}; +use rustc_index::vec::Idx; +use rustc_middle::ty::ParameterizedOverTcx; +use rustc_serialize::opaque::FileEncoder; +use rustc_serialize::Encoder as _; +use rustc_span::hygiene::MacroKind; +use std::convert::TryInto; +use std::marker::PhantomData; +use std::num::NonZeroUsize; +use tracing::debug; + +/// Helper trait, for encoding to, and decoding from, a fixed number of bytes. +/// Used mainly for Lazy positions and lengths. +/// Unchecked invariant: `Self::default()` should encode as `[0; BYTE_LEN]`, +/// but this has no impact on safety. +pub(super) trait FixedSizeEncoding: Default { + /// This should be `[u8; BYTE_LEN]`; + type ByteArray; + + fn from_bytes(b: &Self::ByteArray) -> Self; + fn write_to_bytes(self, b: &mut Self::ByteArray); +} + +impl FixedSizeEncoding for u32 { + type ByteArray = [u8; 4]; + + #[inline] + fn from_bytes(b: &[u8; 4]) -> Self { + Self::from_le_bytes(*b) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 4]) { + *b = self.to_le_bytes(); + } +} + +macro_rules! fixed_size_enum { + ($ty:ty { $(($($pat:tt)*))* }) => { + impl FixedSizeEncoding for Option<$ty> { + type ByteArray = [u8;1]; + + #[inline] + fn from_bytes(b: &[u8;1]) -> Self { + use $ty::*; + if b[0] == 0 { + return None; + } + match b[0] - 1 { + $(${index()} => Some($($pat)*),)* + _ => panic!("Unexpected ImplPolarity code: {:?}", b[0]), + } + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8;1]) { + use $ty::*; + b[0] = match self { + None => 0, + $(Some($($pat)*) => 1 + ${index()},)* + } + } + } + } +} + +fixed_size_enum! { + DefKind { + ( Mod ) + ( Struct ) + ( Union ) + ( Enum ) + ( Variant ) + ( Trait ) + ( TyAlias ) + ( ForeignTy ) + ( TraitAlias ) + ( AssocTy ) + ( TyParam ) + ( Fn ) + ( Const ) + ( ConstParam ) + ( AssocFn ) + ( AssocConst ) + ( ExternCrate ) + ( Use ) + ( ForeignMod ) + ( AnonConst ) + ( InlineConst ) + ( OpaqueTy ) + ( Field ) + ( LifetimeParam ) + ( GlobalAsm ) + ( Impl ) + ( Closure ) + ( Generator ) + ( Static(ast::Mutability::Not) ) + ( Static(ast::Mutability::Mut) ) + ( Ctor(CtorOf::Struct, CtorKind::Fn) ) + ( Ctor(CtorOf::Struct, CtorKind::Const) ) + ( Ctor(CtorOf::Struct, CtorKind::Fictive) ) + ( Ctor(CtorOf::Variant, CtorKind::Fn) ) + ( Ctor(CtorOf::Variant, CtorKind::Const) ) + ( Ctor(CtorOf::Variant, CtorKind::Fictive) ) + ( Macro(MacroKind::Bang) ) + ( Macro(MacroKind::Attr) ) + ( Macro(MacroKind::Derive) ) + } +} + +fixed_size_enum! { + ty::ImplPolarity { + ( Positive ) + ( Negative ) + ( Reservation ) + } +} + +fixed_size_enum! { + hir::Constness { + ( NotConst ) + ( Const ) + } +} + +fixed_size_enum! { + hir::Defaultness { + ( Final ) + ( Default { has_value: false } ) + ( Default { has_value: true } ) + } +} + +fixed_size_enum! { + hir::IsAsync { + ( NotAsync ) + ( Async ) + } +} + +// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost. +impl FixedSizeEncoding for Option<DefPathHash> { + type ByteArray = [u8; 16]; + + #[inline] + fn from_bytes(b: &[u8; 16]) -> Self { + Some(DefPathHash(Fingerprint::from_le_bytes(*b))) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 16]) { + let Some(DefPathHash(fingerprint)) = self else { + panic!("Trying to encode absent DefPathHash.") + }; + *b = fingerprint.to_le_bytes(); + } +} + +// We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case. +impl FixedSizeEncoding for Option<RawDefId> { + type ByteArray = [u8; 8]; + + #[inline] + fn from_bytes(b: &[u8; 8]) -> Self { + let krate = u32::from_le_bytes(b[0..4].try_into().unwrap()); + let index = u32::from_le_bytes(b[4..8].try_into().unwrap()); + if krate == 0 { + return None; + } + Some(RawDefId { krate: krate - 1, index }) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 8]) { + match self { + None => *b = [0; 8], + Some(RawDefId { krate, index }) => { + // CrateNum is less than `CrateNum::MAX_AS_U32`. + debug_assert!(krate < u32::MAX); + b[0..4].copy_from_slice(&(1 + krate).to_le_bytes()); + b[4..8].copy_from_slice(&index.to_le_bytes()); + } + } + } +} + +impl FixedSizeEncoding for Option<()> { + type ByteArray = [u8; 1]; + + #[inline] + fn from_bytes(b: &[u8; 1]) -> Self { + (b[0] != 0).then(|| ()) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 1]) { + b[0] = self.is_some() as u8 + } +} + +// NOTE(eddyb) there could be an impl for `usize`, which would enable a more +// generic `LazyValue<T>` impl, but in the general case we might not need / want +// to fit every `usize` in `u32`. +impl<T> FixedSizeEncoding for Option<LazyValue<T>> { + type ByteArray = [u8; 4]; + + #[inline] + fn from_bytes(b: &[u8; 4]) -> Self { + let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?; + Some(LazyValue::from_position(position)) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 4]) { + let position = self.map_or(0, |lazy| lazy.position.get()); + let position: u32 = position.try_into().unwrap(); + position.write_to_bytes(b) + } +} + +impl<T> FixedSizeEncoding for Option<LazyArray<T>> { + type ByteArray = [u8; 8]; + + #[inline] + fn from_bytes(b: &[u8; 8]) -> Self { + let ([ref position_bytes, ref meta_bytes],[])= b.as_chunks::<4>() else { panic!() }; + let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?; + let len = u32::from_bytes(meta_bytes) as usize; + Some(LazyArray::from_position_and_num_elems(position, len)) + } + + #[inline] + fn write_to_bytes(self, b: &mut [u8; 8]) { + let ([ref mut position_bytes, ref mut meta_bytes],[])= b.as_chunks_mut::<4>() else { panic!() }; + + let position = self.map_or(0, |lazy| lazy.position.get()); + let position: u32 = position.try_into().unwrap(); + position.write_to_bytes(position_bytes); + + let len = self.map_or(0, |lazy| lazy.num_elems); + let len: u32 = len.try_into().unwrap(); + len.write_to_bytes(meta_bytes); + } +} + +/// Helper for constructing a table's serialization (also see `Table`). +pub(super) struct TableBuilder<I: Idx, T> +where + Option<T>: FixedSizeEncoding, +{ + blocks: IndexVec<I, <Option<T> as FixedSizeEncoding>::ByteArray>, + _marker: PhantomData<T>, +} + +impl<I: Idx, T> Default for TableBuilder<I, T> +where + Option<T>: FixedSizeEncoding, +{ + fn default() -> Self { + TableBuilder { blocks: Default::default(), _marker: PhantomData } + } +} + +impl<I: Idx, T> TableBuilder<I, T> +where + Option<T>: FixedSizeEncoding, +{ + pub(crate) fn set<const N: usize>(&mut self, i: I, value: T) + where + Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>, + { + // FIXME(eddyb) investigate more compact encodings for sparse tables. + // On the PR @michaelwoerister mentioned: + // > Space requirements could perhaps be optimized by using the HAMT `popcnt` + // > trick (i.e. divide things into buckets of 32 or 64 items and then + // > store bit-masks of which item in each bucket is actually serialized). + self.blocks.ensure_contains_elem(i, || [0; N]); + Some(value).write_to_bytes(&mut self.blocks[i]); + } + + pub(crate) fn encode<const N: usize>(&self, buf: &mut FileEncoder) -> LazyTable<I, T> + where + Option<T>: FixedSizeEncoding<ByteArray = [u8; N]>, + { + let pos = buf.position(); + for block in &self.blocks { + buf.emit_raw_bytes(block); + } + let num_bytes = self.blocks.len() * N; + LazyTable::from_position_and_encoded_size( + NonZeroUsize::new(pos as usize).unwrap(), + num_bytes, + ) + } +} + +impl<I: Idx, T: ParameterizedOverTcx> LazyTable<I, T> +where + Option<T>: FixedSizeEncoding, +{ + /// Given the metadata, extract out the value at a particular index (if any). + #[inline(never)] + pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>, const N: usize>( + &self, + metadata: M, + i: I, + ) -> Option<T::Value<'tcx>> + where + Option<T::Value<'tcx>>: FixedSizeEncoding<ByteArray = [u8; N]>, + { + debug!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size); + + let start = self.position.get(); + let bytes = &metadata.blob()[start..start + self.encoded_size]; + let (bytes, []) = bytes.as_chunks::<N>() else { panic!() }; + let bytes = bytes.get(i.index())?; + FixedSizeEncoding::from_bytes(bytes) + } + + /// Size of the table in entries, including possible gaps. + pub(super) fn size<const N: usize>(&self) -> usize + where + for<'tcx> Option<T::Value<'tcx>>: FixedSizeEncoding<ByteArray = [u8; N]>, + { + self.encoded_size / N + } +} |