From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_query_system/src/cache.rs | 53 + .../rustc_query_system/src/dep_graph/README.md | 4 + compiler/rustc_query_system/src/dep_graph/debug.rs | 63 + .../rustc_query_system/src/dep_graph/dep_node.rs | 176 +++ compiler/rustc_query_system/src/dep_graph/graph.rs | 1288 ++++++++++++++++++++ compiler/rustc_query_system/src/dep_graph/mod.rs | 106 ++ compiler/rustc_query_system/src/dep_graph/query.rs | 68 ++ .../rustc_query_system/src/dep_graph/serialized.rs | 330 +++++ compiler/rustc_query_system/src/ich/hcx.rs | 223 ++++ compiler/rustc_query_system/src/ich/impls_hir.rs | 42 + .../rustc_query_system/src/ich/impls_syntax.rs | 150 +++ compiler/rustc_query_system/src/ich/mod.rs | 19 + compiler/rustc_query_system/src/lib.rs | 19 + compiler/rustc_query_system/src/query/README.md | 3 + compiler/rustc_query_system/src/query/caches.rs | 226 ++++ compiler/rustc_query_system/src/query/config.rs | 75 ++ compiler/rustc_query_system/src/query/job.rs | 612 ++++++++++ compiler/rustc_query_system/src/query/mod.rs | 125 ++ compiler/rustc_query_system/src/query/plumbing.rs | 742 +++++++++++ 19 files changed, 4324 insertions(+) create mode 100644 compiler/rustc_query_system/src/cache.rs create mode 100644 compiler/rustc_query_system/src/dep_graph/README.md create mode 100644 compiler/rustc_query_system/src/dep_graph/debug.rs create mode 100644 compiler/rustc_query_system/src/dep_graph/dep_node.rs create mode 100644 compiler/rustc_query_system/src/dep_graph/graph.rs create mode 100644 compiler/rustc_query_system/src/dep_graph/mod.rs create mode 100644 compiler/rustc_query_system/src/dep_graph/query.rs create mode 100644 compiler/rustc_query_system/src/dep_graph/serialized.rs create mode 100644 compiler/rustc_query_system/src/ich/hcx.rs create mode 100644 compiler/rustc_query_system/src/ich/impls_hir.rs create mode 100644 compiler/rustc_query_system/src/ich/impls_syntax.rs create mode 100644 compiler/rustc_query_system/src/ich/mod.rs create mode 100644 compiler/rustc_query_system/src/lib.rs create mode 100644 compiler/rustc_query_system/src/query/README.md create mode 100644 compiler/rustc_query_system/src/query/caches.rs create mode 100644 compiler/rustc_query_system/src/query/config.rs create mode 100644 compiler/rustc_query_system/src/query/job.rs create mode 100644 compiler/rustc_query_system/src/query/mod.rs create mode 100644 compiler/rustc_query_system/src/query/plumbing.rs (limited to 'compiler/rustc_query_system/src') diff --git a/compiler/rustc_query_system/src/cache.rs b/compiler/rustc_query_system/src/cache.rs new file mode 100644 index 000000000..d592812f7 --- /dev/null +++ b/compiler/rustc_query_system/src/cache.rs @@ -0,0 +1,53 @@ +//! Cache for candidate selection. + +use crate::dep_graph::{DepContext, DepNodeIndex}; + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lock; + +use std::hash::Hash; + +#[derive(Clone)] +pub struct Cache { + hashmap: Lock>>, +} + +impl Default for Cache { + fn default() -> Self { + Self { hashmap: Default::default() } + } +} + +impl Cache { + /// Actually frees the underlying memory in contrast to what stdlib containers do on `clear` + pub fn clear(&self) { + *self.hashmap.borrow_mut() = Default::default(); + } +} + +impl Cache { + pub fn get(&self, key: &Key, tcx: CTX) -> Option { + Some(self.hashmap.borrow().get(key)?.get(tcx)) + } + + pub fn insert(&self, key: Key, dep_node: DepNodeIndex, value: Value) { + self.hashmap.borrow_mut().insert(key, WithDepNode::new(dep_node, value)); + } +} + +#[derive(Clone, Eq, PartialEq)] +pub struct WithDepNode { + dep_node: DepNodeIndex, + cached_value: T, +} + +impl WithDepNode { + pub fn new(dep_node: DepNodeIndex, cached_value: T) -> Self { + WithDepNode { dep_node, cached_value } + } + + pub fn get(&self, tcx: CTX) -> T { + tcx.dep_graph().read_index(self.dep_node); + self.cached_value.clone() + } +} diff --git a/compiler/rustc_query_system/src/dep_graph/README.md b/compiler/rustc_query_system/src/dep_graph/README.md new file mode 100644 index 000000000..b9d91cd35 --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/README.md @@ -0,0 +1,4 @@ +To learn more about how dependency tracking works in rustc, see the [rustc +guide]. + +[rustc dev guide]: https://rustc-dev-guide.rust-lang.org/query.html diff --git a/compiler/rustc_query_system/src/dep_graph/debug.rs b/compiler/rustc_query_system/src/dep_graph/debug.rs new file mode 100644 index 000000000..f9f3169af --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/debug.rs @@ -0,0 +1,63 @@ +//! Code for debugging the dep-graph. + +use super::{DepKind, DepNode, DepNodeIndex}; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lock; +use std::error::Error; + +/// A dep-node filter goes from a user-defined string to a query over +/// nodes. Right now the format is like this: +/// ```ignore (illustrative) +/// x & y & z +/// ``` +/// where the format-string of the dep-node must contain `x`, `y`, and +/// `z`. +#[derive(Debug)] +pub struct DepNodeFilter { + text: String, +} + +impl DepNodeFilter { + pub fn new(text: &str) -> Self { + DepNodeFilter { text: text.trim().to_string() } + } + + /// Returns `true` if all nodes always pass the filter. + pub fn accepts_all(&self) -> bool { + self.text.is_empty() + } + + /// Tests whether `node` meets the filter, returning true if so. + pub fn test(&self, node: &DepNode) -> bool { + let debug_str = format!("{:?}", node); + self.text.split('&').map(|s| s.trim()).all(|f| debug_str.contains(f)) + } +} + +/// A filter like `F -> G` where `F` and `G` are valid dep-node +/// filters. This can be used to test the source/target independently. +pub struct EdgeFilter { + pub source: DepNodeFilter, + pub target: DepNodeFilter, + pub index_to_node: Lock>>, +} + +impl EdgeFilter { + pub fn new(test: &str) -> Result, Box> { + let parts: Vec<_> = test.split("->").collect(); + if parts.len() != 2 { + Err(format!("expected a filter like `a&b -> c&d`, not `{}`", test).into()) + } else { + Ok(EdgeFilter { + source: DepNodeFilter::new(parts[0]), + target: DepNodeFilter::new(parts[1]), + index_to_node: Lock::new(FxHashMap::default()), + }) + } + } + + #[cfg(debug_assertions)] + pub fn test(&self, source: &DepNode, target: &DepNode) -> bool { + self.source.test(source) && self.target.test(target) + } +} diff --git a/compiler/rustc_query_system/src/dep_graph/dep_node.rs b/compiler/rustc_query_system/src/dep_graph/dep_node.rs new file mode 100644 index 000000000..162c274d8 --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/dep_node.rs @@ -0,0 +1,176 @@ +//! This module defines the `DepNode` type which the compiler uses to represent +//! nodes in the dependency graph. A `DepNode` consists of a `DepKind` (which +//! specifies the kind of thing it represents, like a piece of HIR, MIR, etc) +//! and a `Fingerprint`, a 128 bit hash value the exact meaning of which +//! depends on the node's `DepKind`. Together, the kind and the fingerprint +//! fully identify a dependency node, even across multiple compilation sessions. +//! In other words, the value of the fingerprint does not depend on anything +//! that is specific to a given compilation session, like an unpredictable +//! interning key (e.g., NodeId, DefId, Symbol) or the numeric value of a +//! pointer. The concept behind this could be compared to how git commit hashes +//! uniquely identify a given commit and has a few advantages: +//! +//! * A `DepNode` can simply be serialized to disk and loaded in another session +//! without the need to do any "rebasing (like we have to do for Spans and +//! NodeIds) or "retracing" like we had to do for `DefId` in earlier +//! implementations of the dependency graph. +//! * A `Fingerprint` is just a bunch of bits, which allows `DepNode` to +//! implement `Copy`, `Sync`, `Send`, `Freeze`, etc. +//! * Since we just have a bit pattern, `DepNode` can be mapped from disk into +//! memory without any post-processing (e.g., "abomination-style" pointer +//! reconstruction). +//! * Because a `DepNode` is self-contained, we can instantiate `DepNodes` that +//! refer to things that do not exist anymore. In previous implementations +//! `DepNode` contained a `DefId`. A `DepNode` referring to something that +//! had been removed between the previous and the current compilation session +//! could not be instantiated because the current compilation session +//! contained no `DefId` for thing that had been removed. +//! +//! `DepNode` definition happens in `rustc_middle` with the `define_dep_nodes!()` macro. +//! This macro defines the `DepKind` enum and a corresponding `DepConstructor` enum. The +//! `DepConstructor` enum links a `DepKind` to the parameters that are needed at runtime in order +//! to construct a valid `DepNode` fingerprint. +//! +//! Because the macro sees what parameters a given `DepKind` requires, it can +//! "infer" some properties for each kind of `DepNode`: +//! +//! * Whether a `DepNode` of a given kind has any parameters at all. Some +//! `DepNode`s could represent global concepts with only one value. +//! * Whether it is possible, in principle, to reconstruct a query key from a +//! given `DepNode`. Many `DepKind`s only require a single `DefId` parameter, +//! in which case it is possible to map the node's fingerprint back to the +//! `DefId` it was computed from. In other cases, too much information gets +//! lost during fingerprint computation. + +use super::{DepContext, DepKind, FingerprintStyle}; +use crate::ich::StableHashingContext; + +use rustc_data_structures::fingerprint::{Fingerprint, PackedFingerprint}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use std::fmt; +use std::hash::Hash; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)] +pub struct DepNode { + pub kind: K, + pub hash: PackedFingerprint, +} + +impl DepNode { + /// Creates a new, parameterless DepNode. This method will assert + /// that the DepNode corresponding to the given DepKind actually + /// does not require any parameters. + pub fn new_no_params(tcx: Ctxt, kind: K) -> DepNode + where + Ctxt: super::DepContext, + { + debug_assert_eq!(tcx.fingerprint_style(kind), FingerprintStyle::Unit); + DepNode { kind, hash: Fingerprint::ZERO.into() } + } + + pub fn construct(tcx: Ctxt, kind: K, arg: &Key) -> DepNode + where + Ctxt: super::DepContext, + Key: DepNodeParams, + { + let hash = arg.to_fingerprint(tcx); + let dep_node = DepNode { kind, hash: hash.into() }; + + #[cfg(debug_assertions)] + { + if !tcx.fingerprint_style(kind).reconstructible() + && (tcx.sess().opts.unstable_opts.incremental_info + || tcx.sess().opts.unstable_opts.query_dep_graph) + { + tcx.dep_graph().register_dep_node_debug_str(dep_node, || arg.to_debug_str(tcx)); + } + } + + dep_node + } +} + +impl fmt::Debug for DepNode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + K::debug_node(self, f) + } +} + +pub trait DepNodeParams: fmt::Debug + Sized { + fn fingerprint_style() -> FingerprintStyle; + + /// This method turns the parameters of a DepNodeConstructor into an opaque + /// Fingerprint to be used in DepNode. + /// Not all DepNodeParams support being turned into a Fingerprint (they + /// don't need to if the corresponding DepNode is anonymous). + fn to_fingerprint(&self, _: Ctxt) -> Fingerprint { + panic!("Not implemented. Accidentally called on anonymous node?") + } + + fn to_debug_str(&self, _: Ctxt) -> String { + format!("{:?}", self) + } + + /// This method tries to recover the query key from the given `DepNode`, + /// something which is needed when forcing `DepNode`s during red-green + /// evaluation. The query system will only call this method if + /// `fingerprint_style()` is not `FingerprintStyle::Opaque`. + /// It is always valid to return `None` here, in which case incremental + /// compilation will treat the query as having changed instead of forcing it. + fn recover(tcx: Ctxt, dep_node: &DepNode) -> Option; +} + +impl DepNodeParams for T +where + T: for<'a> HashStable> + fmt::Debug, +{ + #[inline(always)] + default fn fingerprint_style() -> FingerprintStyle { + FingerprintStyle::Opaque + } + + #[inline(always)] + default fn to_fingerprint(&self, tcx: Ctxt) -> Fingerprint { + tcx.with_stable_hashing_context(|mut hcx| { + let mut hasher = StableHasher::new(); + self.hash_stable(&mut hcx, &mut hasher); + hasher.finish() + }) + } + + #[inline(always)] + default fn to_debug_str(&self, _: Ctxt) -> String { + format!("{:?}", *self) + } + + #[inline(always)] + default fn recover(_: Ctxt, _: &DepNode) -> Option { + None + } +} + +/// A "work product" corresponds to a `.o` (or other) file that we +/// save in between runs. These IDs do not have a `DefId` but rather +/// some independent path or string that persists between runs without +/// the need to be mapped or unmapped. (This ensures we can serialize +/// them even in the absence of a tcx.) +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Encodable, Decodable)] +pub struct WorkProductId { + hash: Fingerprint, +} + +impl WorkProductId { + pub fn from_cgu_name(cgu_name: &str) -> WorkProductId { + let mut hasher = StableHasher::new(); + cgu_name.hash(&mut hasher); + WorkProductId { hash: hasher.finish() } + } +} + +impl HashStable for WorkProductId { + #[inline] + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + self.hash.hash_stable(hcx, hasher) + } +} diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs new file mode 100644 index 000000000..8ff561327 --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -0,0 +1,1288 @@ +use parking_lot::Mutex; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::profiling::{EventId, QueryInvocationId, SelfProfilerRef}; +use rustc_data_structures::sharded::{self, Sharded}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::steal::Steal; +use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering}; +use rustc_index::vec::IndexVec; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use smallvec::{smallvec, SmallVec}; +use std::assert_matches::assert_matches; +use std::collections::hash_map::Entry; +use std::fmt::Debug; +use std::hash::Hash; +use std::marker::PhantomData; +use std::sync::atomic::Ordering::Relaxed; + +use super::query::DepGraphQuery; +use super::serialized::{GraphEncoder, SerializedDepGraph, SerializedDepNodeIndex}; +use super::{DepContext, DepKind, DepNode, HasDepContext, WorkProductId}; +use crate::ich::StableHashingContext; +use crate::query::{QueryContext, QuerySideEffects}; + +#[cfg(debug_assertions)] +use {super::debug::EdgeFilter, std::env}; + +#[derive(Clone)] +pub struct DepGraph { + data: Option>>, + + /// This field is used for assigning DepNodeIndices when running in + /// non-incremental mode. Even in non-incremental mode we make sure that + /// each task has a `DepNodeIndex` that uniquely identifies it. This unique + /// ID is used for self-profiling. + virtual_dep_node_index: Lrc, +} + +rustc_index::newtype_index! { + pub struct DepNodeIndex { .. } +} + +impl DepNodeIndex { + pub const INVALID: DepNodeIndex = DepNodeIndex::MAX; + pub const SINGLETON_DEPENDENCYLESS_ANON_NODE: DepNodeIndex = DepNodeIndex::from_u32(0); + pub const FOREVER_RED_NODE: DepNodeIndex = DepNodeIndex::from_u32(1); +} + +impl std::convert::From for QueryInvocationId { + #[inline] + fn from(dep_node_index: DepNodeIndex) -> Self { + QueryInvocationId(dep_node_index.as_u32()) + } +} + +#[derive(PartialEq)] +pub enum DepNodeColor { + Red, + Green(DepNodeIndex), +} + +impl DepNodeColor { + #[inline] + pub fn is_green(self) -> bool { + match self { + DepNodeColor::Red => false, + DepNodeColor::Green(_) => true, + } + } +} + +struct DepGraphData { + /// The new encoding of the dependency graph, optimized for red/green + /// tracking. The `current` field is the dependency graph of only the + /// current compilation session: We don't merge the previous dep-graph into + /// current one anymore, but we do reference shared data to save space. + current: CurrentDepGraph, + + /// The dep-graph from the previous compilation session. It contains all + /// nodes and edges as well as all fingerprints of nodes that have them. + previous: SerializedDepGraph, + + colors: DepNodeColorMap, + + processed_side_effects: Mutex>, + + /// When we load, there may be `.o` files, cached MIR, or other such + /// things available to us. If we find that they are not dirty, we + /// load the path to the file storing those work-products here into + /// this map. We can later look for and extract that data. + previous_work_products: FxHashMap, + + dep_node_debug: Lock, String>>, + + /// Used by incremental compilation tests to assert that + /// a particular query result was decoded from disk + /// (not just marked green) + debug_loaded_from_disk: Lock>>, +} + +pub fn hash_result(hcx: &mut StableHashingContext<'_>, result: &R) -> Fingerprint +where + R: for<'a> HashStable>, +{ + let mut stable_hasher = StableHasher::new(); + result.hash_stable(hcx, &mut stable_hasher); + stable_hasher.finish() +} + +impl DepGraph { + pub fn new( + profiler: &SelfProfilerRef, + prev_graph: SerializedDepGraph, + prev_work_products: FxHashMap, + encoder: FileEncoder, + record_graph: bool, + record_stats: bool, + ) -> DepGraph { + let prev_graph_node_count = prev_graph.node_count(); + + let current = CurrentDepGraph::new( + profiler, + prev_graph_node_count, + encoder, + record_graph, + record_stats, + ); + + let colors = DepNodeColorMap::new(prev_graph_node_count); + + // Instantiate a dependy-less node only once for anonymous queries. + let _green_node_index = current.intern_new_node( + profiler, + DepNode { kind: DepKind::NULL, hash: current.anon_id_seed.into() }, + smallvec![], + Fingerprint::ZERO, + ); + assert_eq!(_green_node_index, DepNodeIndex::SINGLETON_DEPENDENCYLESS_ANON_NODE); + + // Instantiate a dependy-less red node only once for anonymous queries. + let (_red_node_index, _prev_and_index) = current.intern_node( + profiler, + &prev_graph, + DepNode { kind: DepKind::RED, hash: Fingerprint::ZERO.into() }, + smallvec![], + None, + false, + ); + assert_eq!(_red_node_index, DepNodeIndex::FOREVER_RED_NODE); + assert!(matches!(_prev_and_index, None | Some((_, DepNodeColor::Red)))); + + DepGraph { + data: Some(Lrc::new(DepGraphData { + previous_work_products: prev_work_products, + dep_node_debug: Default::default(), + current, + processed_side_effects: Default::default(), + previous: prev_graph, + colors, + debug_loaded_from_disk: Default::default(), + })), + virtual_dep_node_index: Lrc::new(AtomicU32::new(0)), + } + } + + pub fn new_disabled() -> DepGraph { + DepGraph { data: None, virtual_dep_node_index: Lrc::new(AtomicU32::new(0)) } + } + + /// Returns `true` if we are actually building the full dep-graph, and `false` otherwise. + #[inline] + pub fn is_fully_enabled(&self) -> bool { + self.data.is_some() + } + + pub fn with_query(&self, f: impl Fn(&DepGraphQuery)) { + if let Some(data) = &self.data { + data.current.encoder.borrow().with_query(f) + } + } + + pub fn assert_ignored(&self) { + if let Some(..) = self.data { + K::read_deps(|task_deps| { + assert_matches!( + task_deps, + TaskDepsRef::Ignore, + "expected no task dependency tracking" + ); + }) + } + } + + pub fn with_ignore(&self, op: OP) -> R + where + OP: FnOnce() -> R, + { + K::with_deps(TaskDepsRef::Ignore, op) + } + + /// Used to wrap the deserialization of a query result from disk, + /// This method enforces that no new `DepNodes` are created during + /// query result deserialization. + /// + /// Enforcing this makes the query dep graph simpler - all nodes + /// must be created during the query execution, and should be + /// created from inside the 'body' of a query (the implementation + /// provided by a particular compiler crate). + /// + /// Consider the case of three queries `A`, `B`, and `C`, where + /// `A` invokes `B` and `B` invokes `C`: + /// + /// `A -> B -> C` + /// + /// Suppose that decoding the result of query `B` required re-computing + /// the query `C`. If we did not create a fresh `TaskDeps` when + /// decoding `B`, we would still be using the `TaskDeps` for query `A` + /// (if we needed to re-execute `A`). This would cause us to create + /// a new edge `A -> C`. If this edge did not previously + /// exist in the `DepGraph`, then we could end up with a different + /// `DepGraph` at the end of compilation, even if there were no + /// meaningful changes to the overall program (e.g. a newline was added). + /// In addition, this edge might cause a subsequent compilation run + /// to try to force `C` before marking other necessary nodes green. If + /// `C` did not exist in the new compilation session, then we could + /// get an ICE. Normally, we would have tried (and failed) to mark + /// some other query green (e.g. `item_children`) which was used + /// to obtain `C`, which would prevent us from ever trying to force + /// a non-existent `D`. + /// + /// It might be possible to enforce that all `DepNode`s read during + /// deserialization already exist in the previous `DepGraph`. In + /// the above example, we would invoke `D` during the deserialization + /// of `B`. Since we correctly create a new `TaskDeps` from the decoding + /// of `B`, this would result in an edge `B -> D`. If that edge already + /// existed (with the same `DepPathHash`es), then it should be correct + /// to allow the invocation of the query to proceed during deserialization + /// of a query result. We would merely assert that the dep-graph fragment + /// that would have been added by invoking `C` while decoding `B` + /// is equivalent to the dep-graph fragment that we already instantiated for B + /// (at the point where we successfully marked B as green). + /// + /// However, this would require additional complexity + /// in the query infrastructure, and is not currently needed by the + /// decoding of any query results. Should the need arise in the future, + /// we should consider extending the query system with this functionality. + pub fn with_query_deserialization(&self, op: OP) -> R + where + OP: FnOnce() -> R, + { + K::with_deps(TaskDepsRef::Forbid, op) + } + + /// Starts a new dep-graph task. Dep-graph tasks are specified + /// using a free function (`task`) and **not** a closure -- this + /// is intentional because we want to exercise tight control over + /// what state they have access to. In particular, we want to + /// prevent implicit 'leaks' of tracked state into the task (which + /// could then be read without generating correct edges in the + /// dep-graph -- see the [rustc dev guide] for more details on + /// the dep-graph). To this end, the task function gets exactly two + /// pieces of state: the context `cx` and an argument `arg`. Both + /// of these bits of state must be of some type that implements + /// `DepGraphSafe` and hence does not leak. + /// + /// The choice of two arguments is not fundamental. One argument + /// would work just as well, since multiple values can be + /// collected using tuples. However, using two arguments works out + /// to be quite convenient, since it is common to need a context + /// (`cx`) and some argument (e.g., a `DefId` identifying what + /// item to process). + /// + /// For cases where you need some other number of arguments: + /// + /// - If you only need one argument, just use `()` for the `arg` + /// parameter. + /// - If you need 3+ arguments, use a tuple for the + /// `arg` parameter. + /// + /// [rustc dev guide]: https://rustc-dev-guide.rust-lang.org/incremental-compilation.html + pub fn with_task, A: Debug, R>( + &self, + key: DepNode, + cx: Ctxt, + arg: A, + task: fn(Ctxt, A) -> R, + hash_result: Option, &R) -> Fingerprint>, + ) -> (R, DepNodeIndex) { + if self.is_fully_enabled() { + self.with_task_impl(key, cx, arg, task, hash_result) + } else { + // Incremental compilation is turned off. We just execute the task + // without tracking. We still provide a dep-node index that uniquely + // identifies the task so that we have a cheap way of referring to + // the query for self-profiling. + (task(cx, arg), self.next_virtual_depnode_index()) + } + } + + fn with_task_impl, A: Debug, R>( + &self, + key: DepNode, + cx: Ctxt, + arg: A, + task: fn(Ctxt, A) -> R, + hash_result: Option, &R) -> Fingerprint>, + ) -> (R, DepNodeIndex) { + // This function is only called when the graph is enabled. + let data = self.data.as_ref().unwrap(); + + // If the following assertion triggers, it can have two reasons: + // 1. Something is wrong with DepNode creation, either here or + // in `DepGraph::try_mark_green()`. + // 2. Two distinct query keys get mapped to the same `DepNode` + // (see for example #48923). + assert!( + !self.dep_node_exists(&key), + "forcing query with already existing `DepNode`\n\ + - query-key: {:?}\n\ + - dep-node: {:?}", + arg, + key + ); + + let task_deps = if cx.dep_context().is_eval_always(key.kind) { + None + } else { + Some(Lock::new(TaskDeps { + #[cfg(debug_assertions)] + node: Some(key), + reads: SmallVec::new(), + read_set: Default::default(), + phantom_data: PhantomData, + })) + }; + + let task_deps_ref = match &task_deps { + Some(deps) => TaskDepsRef::Allow(deps), + None => TaskDepsRef::Ignore, + }; + + let result = K::with_deps(task_deps_ref, || task(cx, arg)); + let edges = task_deps.map_or_else(|| smallvec![], |lock| lock.into_inner().reads); + + let dcx = cx.dep_context(); + let hashing_timer = dcx.profiler().incr_result_hashing(); + let current_fingerprint = + hash_result.map(|f| dcx.with_stable_hashing_context(|mut hcx| f(&mut hcx, &result))); + + let print_status = cfg!(debug_assertions) && dcx.sess().opts.unstable_opts.dep_tasks; + + // Intern the new `DepNode`. + let (dep_node_index, prev_and_color) = data.current.intern_node( + dcx.profiler(), + &data.previous, + key, + edges, + current_fingerprint, + print_status, + ); + + hashing_timer.finish_with_query_invocation_id(dep_node_index.into()); + + if let Some((prev_index, color)) = prev_and_color { + debug_assert!( + data.colors.get(prev_index).is_none(), + "DepGraph::with_task() - Duplicate DepNodeColor \ + insertion for {:?}", + key + ); + + data.colors.insert(prev_index, color); + } + + (result, dep_node_index) + } + + /// Executes something within an "anonymous" task, that is, a task the + /// `DepNode` of which is determined by the list of inputs it read from. + pub fn with_anon_task, OP, R>( + &self, + cx: Ctxt, + dep_kind: K, + op: OP, + ) -> (R, DepNodeIndex) + where + OP: FnOnce() -> R, + { + debug_assert!(!cx.is_eval_always(dep_kind)); + + if let Some(ref data) = self.data { + let task_deps = Lock::new(TaskDeps::default()); + let result = K::with_deps(TaskDepsRef::Allow(&task_deps), op); + let task_deps = task_deps.into_inner(); + let task_deps = task_deps.reads; + + let dep_node_index = match task_deps.len() { + 0 => { + // Because the dep-node id of anon nodes is computed from the sets of its + // dependencies we already know what the ID of this dependency-less node is + // going to be (i.e. equal to the precomputed + // `SINGLETON_DEPENDENCYLESS_ANON_NODE`). As a consequence we can skip creating + // a `StableHasher` and sending the node through interning. + DepNodeIndex::SINGLETON_DEPENDENCYLESS_ANON_NODE + } + 1 => { + // When there is only one dependency, don't bother creating a node. + task_deps[0] + } + _ => { + // The dep node indices are hashed here instead of hashing the dep nodes of the + // dependencies. These indices may refer to different nodes per session, but this isn't + // a problem here because we that ensure the final dep node hash is per session only by + // combining it with the per session random number `anon_id_seed`. This hash only need + // to map the dependencies to a single value on a per session basis. + let mut hasher = StableHasher::new(); + task_deps.hash(&mut hasher); + + let target_dep_node = DepNode { + kind: dep_kind, + // Fingerprint::combine() is faster than sending Fingerprint + // through the StableHasher (at least as long as StableHasher + // is so slow). + hash: data.current.anon_id_seed.combine(hasher.finish()).into(), + }; + + data.current.intern_new_node( + cx.profiler(), + target_dep_node, + task_deps, + Fingerprint::ZERO, + ) + } + }; + + (result, dep_node_index) + } else { + (op(), self.next_virtual_depnode_index()) + } + } + + #[inline] + pub fn read_index(&self, dep_node_index: DepNodeIndex) { + if let Some(ref data) = self.data { + K::read_deps(|task_deps| { + let mut task_deps = match task_deps { + TaskDepsRef::Allow(deps) => deps.lock(), + TaskDepsRef::Ignore => return, + TaskDepsRef::Forbid => { + panic!("Illegal read of: {:?}", dep_node_index) + } + }; + let task_deps = &mut *task_deps; + + if cfg!(debug_assertions) { + data.current.total_read_count.fetch_add(1, Relaxed); + } + + // As long as we only have a low number of reads we can avoid doing a hash + // insert and potentially allocating/reallocating the hashmap + let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP { + task_deps.reads.iter().all(|other| *other != dep_node_index) + } else { + task_deps.read_set.insert(dep_node_index) + }; + if new_read { + task_deps.reads.push(dep_node_index); + if task_deps.reads.len() == TASK_DEPS_READS_CAP { + // Fill `read_set` with what we have so far so we can use the hashset + // next time + task_deps.read_set.extend(task_deps.reads.iter().copied()); + } + + #[cfg(debug_assertions)] + { + if let Some(target) = task_deps.node { + if let Some(ref forbidden_edge) = data.current.forbidden_edge { + let src = forbidden_edge.index_to_node.lock()[&dep_node_index]; + if forbidden_edge.test(&src, &target) { + panic!("forbidden edge {:?} -> {:?} created", src, target) + } + } + } + } + } else if cfg!(debug_assertions) { + data.current.total_duplicate_read_count.fetch_add(1, Relaxed); + } + }) + } + } + + #[inline] + pub fn dep_node_index_of(&self, dep_node: &DepNode) -> DepNodeIndex { + self.dep_node_index_of_opt(dep_node).unwrap() + } + + #[inline] + pub fn dep_node_index_of_opt(&self, dep_node: &DepNode) -> Option { + let data = self.data.as_ref().unwrap(); + let current = &data.current; + + if let Some(prev_index) = data.previous.node_to_index_opt(dep_node) { + current.prev_index_to_index.lock()[prev_index] + } else { + current.new_node_to_index.get_shard_by_value(dep_node).lock().get(dep_node).copied() + } + } + + #[inline] + pub fn dep_node_exists(&self, dep_node: &DepNode) -> bool { + self.data.is_some() && self.dep_node_index_of_opt(dep_node).is_some() + } + + pub fn prev_fingerprint_of(&self, dep_node: &DepNode) -> Option { + self.data.as_ref().unwrap().previous.fingerprint_of(dep_node) + } + + /// Checks whether a previous work product exists for `v` and, if + /// so, return the path that leads to it. Used to skip doing work. + pub fn previous_work_product(&self, v: &WorkProductId) -> Option { + self.data.as_ref().and_then(|data| data.previous_work_products.get(v).cloned()) + } + + /// Access the map of work-products created during the cached run. Only + /// used during saving of the dep-graph. + pub fn previous_work_products(&self) -> &FxHashMap { + &self.data.as_ref().unwrap().previous_work_products + } + + pub fn mark_debug_loaded_from_disk(&self, dep_node: DepNode) { + self.data.as_ref().unwrap().debug_loaded_from_disk.lock().insert(dep_node); + } + + pub fn debug_was_loaded_from_disk(&self, dep_node: DepNode) -> bool { + self.data.as_ref().unwrap().debug_loaded_from_disk.lock().contains(&dep_node) + } + + #[inline(always)] + pub fn register_dep_node_debug_str(&self, dep_node: DepNode, debug_str_gen: F) + where + F: FnOnce() -> String, + { + let dep_node_debug = &self.data.as_ref().unwrap().dep_node_debug; + + if dep_node_debug.borrow().contains_key(&dep_node) { + return; + } + let debug_str = debug_str_gen(); + dep_node_debug.borrow_mut().insert(dep_node, debug_str); + } + + pub fn dep_node_debug_str(&self, dep_node: DepNode) -> Option { + self.data.as_ref()?.dep_node_debug.borrow().get(&dep_node).cloned() + } + + fn node_color(&self, dep_node: &DepNode) -> Option { + if let Some(ref data) = self.data { + if let Some(prev_index) = data.previous.node_to_index_opt(dep_node) { + return data.colors.get(prev_index); + } else { + // This is a node that did not exist in the previous compilation session. + return None; + } + } + + None + } + + /// Try to mark a node index for the node dep_node. + /// + /// A node will have an index, when it's already been marked green, or when we can mark it + /// green. This function will mark the current task as a reader of the specified node, when + /// a node index can be found for that node. + pub fn try_mark_green>( + &self, + tcx: Ctxt, + dep_node: &DepNode, + ) -> Option<(SerializedDepNodeIndex, DepNodeIndex)> { + debug_assert!(!tcx.dep_context().is_eval_always(dep_node.kind)); + + // Return None if the dep graph is disabled + let data = self.data.as_ref()?; + + // Return None if the dep node didn't exist in the previous session + let prev_index = data.previous.node_to_index_opt(dep_node)?; + + match data.colors.get(prev_index) { + Some(DepNodeColor::Green(dep_node_index)) => Some((prev_index, dep_node_index)), + Some(DepNodeColor::Red) => None, + None => { + // This DepNode and the corresponding query invocation existed + // in the previous compilation session too, so we can try to + // mark it as green by recursively marking all of its + // dependencies green. + self.try_mark_previous_green(tcx, data, prev_index, &dep_node) + .map(|dep_node_index| (prev_index, dep_node_index)) + } + } + } + + fn try_mark_parent_green>( + &self, + tcx: Ctxt, + data: &DepGraphData, + parent_dep_node_index: SerializedDepNodeIndex, + dep_node: &DepNode, + ) -> Option<()> { + let dep_dep_node_color = data.colors.get(parent_dep_node_index); + let dep_dep_node = &data.previous.index_to_node(parent_dep_node_index); + + match dep_dep_node_color { + Some(DepNodeColor::Green(_)) => { + // This dependency has been marked as green before, we are + // still fine and can continue with checking the other + // dependencies. + debug!( + "try_mark_previous_green({:?}) --- found dependency {:?} to \ + be immediately green", + dep_node, dep_dep_node, + ); + return Some(()); + } + Some(DepNodeColor::Red) => { + // We found a dependency the value of which has changed + // compared to the previous compilation session. We cannot + // mark the DepNode as green and also don't need to bother + // with checking any of the other dependencies. + debug!( + "try_mark_previous_green({:?}) - END - dependency {:?} was immediately red", + dep_node, dep_dep_node, + ); + return None; + } + None => {} + } + + // We don't know the state of this dependency. If it isn't + // an eval_always node, let's try to mark it green recursively. + if !tcx.dep_context().is_eval_always(dep_dep_node.kind) { + debug!( + "try_mark_previous_green({:?}) --- state of dependency {:?} ({}) \ + is unknown, trying to mark it green", + dep_node, dep_dep_node, dep_dep_node.hash, + ); + + let node_index = + self.try_mark_previous_green(tcx, data, parent_dep_node_index, dep_dep_node); + if node_index.is_some() { + debug!( + "try_mark_previous_green({:?}) --- managed to MARK dependency {:?} as green", + dep_node, dep_dep_node + ); + return Some(()); + } + } + + // We failed to mark it green, so we try to force the query. + debug!( + "try_mark_previous_green({:?}) --- trying to force dependency {:?}", + dep_node, dep_dep_node + ); + if !tcx.dep_context().try_force_from_dep_node(*dep_dep_node) { + // The DepNode could not be forced. + debug!( + "try_mark_previous_green({:?}) - END - dependency {:?} could not be forced", + dep_node, dep_dep_node + ); + return None; + } + + let dep_dep_node_color = data.colors.get(parent_dep_node_index); + + match dep_dep_node_color { + Some(DepNodeColor::Green(_)) => { + debug!( + "try_mark_previous_green({:?}) --- managed to FORCE dependency {:?} to green", + dep_node, dep_dep_node + ); + return Some(()); + } + Some(DepNodeColor::Red) => { + debug!( + "try_mark_previous_green({:?}) - END - dependency {:?} was red after forcing", + dep_node, dep_dep_node + ); + return None; + } + None => {} + } + + if !tcx.dep_context().sess().has_errors_or_delayed_span_bugs() { + panic!("try_mark_previous_green() - Forcing the DepNode should have set its color") + } + + // If the query we just forced has resulted in + // some kind of compilation error, we cannot rely on + // the dep-node color having been properly updated. + // This means that the query system has reached an + // invalid state. We let the compiler continue (by + // returning `None`) so it can emit error messages + // and wind down, but rely on the fact that this + // invalid state will not be persisted to the + // incremental compilation cache because of + // compilation errors being present. + debug!( + "try_mark_previous_green({:?}) - END - dependency {:?} resulted in compilation error", + dep_node, dep_dep_node + ); + return None; + } + + /// Try to mark a dep-node which existed in the previous compilation session as green. + fn try_mark_previous_green>( + &self, + tcx: Ctxt, + data: &DepGraphData, + prev_dep_node_index: SerializedDepNodeIndex, + dep_node: &DepNode, + ) -> Option { + debug!("try_mark_previous_green({:?}) - BEGIN", dep_node); + + #[cfg(not(parallel_compiler))] + { + debug_assert!(!self.dep_node_exists(dep_node)); + debug_assert!(data.colors.get(prev_dep_node_index).is_none()); + } + + // We never try to mark eval_always nodes as green + debug_assert!(!tcx.dep_context().is_eval_always(dep_node.kind)); + + debug_assert_eq!(data.previous.index_to_node(prev_dep_node_index), *dep_node); + + let prev_deps = data.previous.edge_targets_from(prev_dep_node_index); + + for &dep_dep_node_index in prev_deps { + self.try_mark_parent_green(tcx, data, dep_dep_node_index, dep_node)? + } + + // If we got here without hitting a `return` that means that all + // dependencies of this DepNode could be marked as green. Therefore we + // can also mark this DepNode as green. + + // There may be multiple threads trying to mark the same dep node green concurrently + + // We allocating an entry for the node in the current dependency graph and + // adding all the appropriate edges imported from the previous graph + let dep_node_index = data.current.promote_node_and_deps_to_current( + tcx.dep_context().profiler(), + &data.previous, + prev_dep_node_index, + ); + + // ... emitting any stored diagnostic ... + + // FIXME: Store the fact that a node has diagnostics in a bit in the dep graph somewhere + // Maybe store a list on disk and encode this fact in the DepNodeState + let side_effects = tcx.load_side_effects(prev_dep_node_index); + + #[cfg(not(parallel_compiler))] + debug_assert!( + data.colors.get(prev_dep_node_index).is_none(), + "DepGraph::try_mark_previous_green() - Duplicate DepNodeColor \ + insertion for {:?}", + dep_node + ); + + if !side_effects.is_empty() { + self.emit_side_effects(tcx, data, dep_node_index, side_effects); + } + + // ... and finally storing a "Green" entry in the color map. + // Multiple threads can all write the same color here + data.colors.insert(prev_dep_node_index, DepNodeColor::Green(dep_node_index)); + + debug!("try_mark_previous_green({:?}) - END - successfully marked as green", dep_node); + Some(dep_node_index) + } + + /// Atomically emits some loaded diagnostics. + /// This may be called concurrently on multiple threads for the same dep node. + #[cold] + #[inline(never)] + fn emit_side_effects>( + &self, + tcx: Ctxt, + data: &DepGraphData, + dep_node_index: DepNodeIndex, + side_effects: QuerySideEffects, + ) { + let mut processed = data.processed_side_effects.lock(); + + if processed.insert(dep_node_index) { + // We were the first to insert the node in the set so this thread + // must process side effects + + // Promote the previous diagnostics to the current session. + tcx.store_side_effects(dep_node_index, side_effects.clone()); + + let handle = tcx.dep_context().sess().diagnostic(); + + for mut diagnostic in side_effects.diagnostics { + handle.emit_diagnostic(&mut diagnostic); + } + } + } + + // Returns true if the given node has been marked as red during the + // current compilation session. Used in various assertions + pub fn is_red(&self, dep_node: &DepNode) -> bool { + self.node_color(dep_node) == Some(DepNodeColor::Red) + } + + // Returns true if the given node has been marked as green during the + // current compilation session. Used in various assertions + pub fn is_green(&self, dep_node: &DepNode) -> bool { + self.node_color(dep_node).map_or(false, |c| c.is_green()) + } + + // This method loads all on-disk cacheable query results into memory, so + // they can be written out to the new cache file again. Most query results + // will already be in memory but in the case where we marked something as + // green but then did not need the value, that value will never have been + // loaded from disk. + // + // This method will only load queries that will end up in the disk cache. + // Other queries will not be executed. + pub fn exec_cache_promotions>(&self, tcx: Ctxt) { + let _prof_timer = tcx.profiler().generic_activity("incr_comp_query_cache_promotion"); + + let data = self.data.as_ref().unwrap(); + for prev_index in data.colors.values.indices() { + match data.colors.get(prev_index) { + Some(DepNodeColor::Green(_)) => { + let dep_node = data.previous.index_to_node(prev_index); + tcx.try_load_from_on_disk_cache(dep_node); + } + None | Some(DepNodeColor::Red) => { + // We can skip red nodes because a node can only be marked + // as red if the query result was recomputed and thus is + // already in memory. + } + } + } + } + + pub fn print_incremental_info(&self) { + if let Some(data) = &self.data { + data.current.encoder.borrow().print_incremental_info( + data.current.total_read_count.load(Relaxed), + data.current.total_duplicate_read_count.load(Relaxed), + ) + } + } + + pub fn encode(&self, profiler: &SelfProfilerRef) -> FileEncodeResult { + if let Some(data) = &self.data { + data.current.encoder.steal().finish(profiler) + } else { + Ok(0) + } + } + + pub(crate) fn next_virtual_depnode_index(&self) -> DepNodeIndex { + let index = self.virtual_dep_node_index.fetch_add(1, Relaxed); + DepNodeIndex::from_u32(index) + } +} + +/// A "work product" is an intermediate result that we save into the +/// incremental directory for later re-use. The primary example are +/// the object files that we save for each partition at code +/// generation time. +/// +/// Each work product is associated with a dep-node, representing the +/// process that produced the work-product. If that dep-node is found +/// to be dirty when we load up, then we will delete the work-product +/// at load time. If the work-product is found to be clean, then we +/// will keep a record in the `previous_work_products` list. +/// +/// In addition, work products have an associated hash. This hash is +/// an extra hash that can be used to decide if the work-product from +/// a previous compilation can be re-used (in addition to the dirty +/// edges check). +/// +/// As the primary example, consider the object files we generate for +/// each partition. In the first run, we create partitions based on +/// the symbols that need to be compiled. For each partition P, we +/// hash the symbols in P and create a `WorkProduct` record associated +/// with `DepNode::CodegenUnit(P)`; the hash is the set of symbols +/// in P. +/// +/// The next time we compile, if the `DepNode::CodegenUnit(P)` is +/// judged to be clean (which means none of the things we read to +/// generate the partition were found to be dirty), it will be loaded +/// into previous work products. We will then regenerate the set of +/// symbols in the partition P and hash them (note that new symbols +/// may be added -- for example, new monomorphizations -- even if +/// nothing in P changed!). We will compare that hash against the +/// previous hash. If it matches up, we can reuse the object file. +#[derive(Clone, Debug, Encodable, Decodable)] +pub struct WorkProduct { + pub cgu_name: String, + /// Saved files associated with this CGU. In each key/value pair, the value is the path to the + /// saved file and the key is some identifier for the type of file being saved. + /// + /// By convention, file extensions are currently used as identifiers, i.e. the key "o" maps to + /// the object file's path, and "dwo" to the dwarf object file's path. + pub saved_files: FxHashMap, +} + +// Index type for `DepNodeData`'s edges. +rustc_index::newtype_index! { + struct EdgeIndex { .. } +} + +/// `CurrentDepGraph` stores the dependency graph for the current session. It +/// will be populated as we run queries or tasks. We never remove nodes from the +/// graph: they are only added. +/// +/// The nodes in it are identified by a `DepNodeIndex`. We avoid keeping the nodes +/// in memory. This is important, because these graph structures are some of the +/// largest in the compiler. +/// +/// For this reason, we avoid storing `DepNode`s more than once as map +/// keys. The `new_node_to_index` map only contains nodes not in the previous +/// graph, and we map nodes in the previous graph to indices via a two-step +/// mapping. `SerializedDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`, +/// and the `prev_index_to_index` vector (which is more compact and faster than +/// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`. +/// +/// This struct uses three locks internally. The `data`, `new_node_to_index`, +/// and `prev_index_to_index` fields are locked separately. Operations that take +/// a `DepNodeIndex` typically just access the `data` field. +/// +/// We only need to manipulate at most two locks simultaneously: +/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. When +/// manipulating both, we acquire `new_node_to_index` or `prev_index_to_index` +/// first, and `data` second. +pub(super) struct CurrentDepGraph { + encoder: Steal>, + new_node_to_index: Sharded, DepNodeIndex>>, + prev_index_to_index: Lock>>, + + /// Used to trap when a specific edge is added to the graph. + /// This is used for debug purposes and is only active with `debug_assertions`. + #[cfg(debug_assertions)] + forbidden_edge: Option>, + + /// Anonymous `DepNode`s are nodes whose IDs we compute from the list of + /// their edges. This has the beneficial side-effect that multiple anonymous + /// nodes can be coalesced into one without changing the semantics of the + /// dependency graph. However, the merging of nodes can lead to a subtle + /// problem during red-green marking: The color of an anonymous node from + /// the current session might "shadow" the color of the node with the same + /// ID from the previous session. In order to side-step this problem, we make + /// sure that anonymous `NodeId`s allocated in different sessions don't overlap. + /// This is implemented by mixing a session-key into the ID fingerprint of + /// each anon node. The session-key is just a random number generated when + /// the `DepGraph` is created. + anon_id_seed: Fingerprint, + + /// These are simple counters that are for profiling and + /// debugging and only active with `debug_assertions`. + total_read_count: AtomicU64, + total_duplicate_read_count: AtomicU64, + + /// The cached event id for profiling node interning. This saves us + /// from having to look up the event id every time we intern a node + /// which may incur too much overhead. + /// This will be None if self-profiling is disabled. + node_intern_event_id: Option, +} + +impl CurrentDepGraph { + fn new( + profiler: &SelfProfilerRef, + prev_graph_node_count: usize, + encoder: FileEncoder, + record_graph: bool, + record_stats: bool, + ) -> CurrentDepGraph { + use std::time::{SystemTime, UNIX_EPOCH}; + + let duration = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + let nanos = duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64; + let mut stable_hasher = StableHasher::new(); + nanos.hash(&mut stable_hasher); + let anon_id_seed = stable_hasher.finish(); + + #[cfg(debug_assertions)] + let forbidden_edge = match env::var("RUST_FORBID_DEP_GRAPH_EDGE") { + Ok(s) => match EdgeFilter::new(&s) { + Ok(f) => Some(f), + Err(err) => panic!("RUST_FORBID_DEP_GRAPH_EDGE invalid: {}", err), + }, + Err(_) => None, + }; + + // We store a large collection of these in `prev_index_to_index` during + // non-full incremental builds, and want to ensure that the element size + // doesn't inadvertently increase. + static_assert_size!(Option, 4); + + let new_node_count_estimate = 102 * prev_graph_node_count / 100 + 200; + + let node_intern_event_id = profiler + .get_or_alloc_cached_string("incr_comp_intern_dep_graph_node") + .map(EventId::from_label); + + CurrentDepGraph { + encoder: Steal::new(GraphEncoder::new( + encoder, + prev_graph_node_count, + record_graph, + record_stats, + )), + new_node_to_index: Sharded::new(|| { + FxHashMap::with_capacity_and_hasher( + new_node_count_estimate / sharded::SHARDS, + Default::default(), + ) + }), + prev_index_to_index: Lock::new(IndexVec::from_elem_n(None, prev_graph_node_count)), + anon_id_seed, + #[cfg(debug_assertions)] + forbidden_edge, + total_read_count: AtomicU64::new(0), + total_duplicate_read_count: AtomicU64::new(0), + node_intern_event_id, + } + } + + #[cfg(debug_assertions)] + fn record_edge(&self, dep_node_index: DepNodeIndex, key: DepNode) { + if let Some(forbidden_edge) = &self.forbidden_edge { + forbidden_edge.index_to_node.lock().insert(dep_node_index, key); + } + } + + /// Writes the node to the current dep-graph and allocates a `DepNodeIndex` for it. + /// Assumes that this is a node that has no equivalent in the previous dep-graph. + fn intern_new_node( + &self, + profiler: &SelfProfilerRef, + key: DepNode, + edges: EdgesVec, + current_fingerprint: Fingerprint, + ) -> DepNodeIndex { + match self.new_node_to_index.get_shard_by_value(&key).lock().entry(key) { + Entry::Occupied(entry) => *entry.get(), + Entry::Vacant(entry) => { + let dep_node_index = + self.encoder.borrow().send(profiler, key, current_fingerprint, edges); + entry.insert(dep_node_index); + #[cfg(debug_assertions)] + self.record_edge(dep_node_index, key); + dep_node_index + } + } + } + + fn intern_node( + &self, + profiler: &SelfProfilerRef, + prev_graph: &SerializedDepGraph, + key: DepNode, + edges: EdgesVec, + fingerprint: Option, + print_status: bool, + ) -> (DepNodeIndex, Option<(SerializedDepNodeIndex, DepNodeColor)>) { + let print_status = cfg!(debug_assertions) && print_status; + + // Get timer for profiling `DepNode` interning + let _node_intern_timer = + self.node_intern_event_id.map(|eid| profiler.generic_activity_with_event_id(eid)); + + if let Some(prev_index) = prev_graph.node_to_index_opt(&key) { + // Determine the color and index of the new `DepNode`. + if let Some(fingerprint) = fingerprint { + if fingerprint == prev_graph.fingerprint_by_index(prev_index) { + if print_status { + eprintln!("[task::green] {:?}", key); + } + + // This is a green node: it existed in the previous compilation, + // its query was re-executed, and it has the same result as before. + let mut prev_index_to_index = self.prev_index_to_index.lock(); + + let dep_node_index = match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let dep_node_index = + self.encoder.borrow().send(profiler, key, fingerprint, edges); + prev_index_to_index[prev_index] = Some(dep_node_index); + dep_node_index + } + }; + + #[cfg(debug_assertions)] + self.record_edge(dep_node_index, key); + (dep_node_index, Some((prev_index, DepNodeColor::Green(dep_node_index)))) + } else { + if print_status { + eprintln!("[task::red] {:?}", key); + } + + // This is a red node: it existed in the previous compilation, its query + // was re-executed, but it has a different result from before. + let mut prev_index_to_index = self.prev_index_to_index.lock(); + + let dep_node_index = match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let dep_node_index = + self.encoder.borrow().send(profiler, key, fingerprint, edges); + prev_index_to_index[prev_index] = Some(dep_node_index); + dep_node_index + } + }; + + #[cfg(debug_assertions)] + self.record_edge(dep_node_index, key); + (dep_node_index, Some((prev_index, DepNodeColor::Red))) + } + } else { + if print_status { + eprintln!("[task::unknown] {:?}", key); + } + + // This is a red node, effectively: it existed in the previous compilation + // session, its query was re-executed, but it doesn't compute a result hash + // (i.e. it represents a `no_hash` query), so we have no way of determining + // whether or not the result was the same as before. + let mut prev_index_to_index = self.prev_index_to_index.lock(); + + let dep_node_index = match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let dep_node_index = + self.encoder.borrow().send(profiler, key, Fingerprint::ZERO, edges); + prev_index_to_index[prev_index] = Some(dep_node_index); + dep_node_index + } + }; + + #[cfg(debug_assertions)] + self.record_edge(dep_node_index, key); + (dep_node_index, Some((prev_index, DepNodeColor::Red))) + } + } else { + if print_status { + eprintln!("[task::new] {:?}", key); + } + + let fingerprint = fingerprint.unwrap_or(Fingerprint::ZERO); + + // This is a new node: it didn't exist in the previous compilation session. + let dep_node_index = self.intern_new_node(profiler, key, edges, fingerprint); + + (dep_node_index, None) + } + } + + fn promote_node_and_deps_to_current( + &self, + profiler: &SelfProfilerRef, + prev_graph: &SerializedDepGraph, + prev_index: SerializedDepNodeIndex, + ) -> DepNodeIndex { + self.debug_assert_not_in_new_nodes(prev_graph, prev_index); + + let mut prev_index_to_index = self.prev_index_to_index.lock(); + + match prev_index_to_index[prev_index] { + Some(dep_node_index) => dep_node_index, + None => { + let key = prev_graph.index_to_node(prev_index); + let dep_node_index = self.encoder.borrow().send( + profiler, + key, + prev_graph.fingerprint_by_index(prev_index), + prev_graph + .edge_targets_from(prev_index) + .iter() + .map(|i| prev_index_to_index[*i].unwrap()) + .collect(), + ); + prev_index_to_index[prev_index] = Some(dep_node_index); + #[cfg(debug_assertions)] + self.record_edge(dep_node_index, key); + dep_node_index + } + } + } + + #[inline] + fn debug_assert_not_in_new_nodes( + &self, + prev_graph: &SerializedDepGraph, + prev_index: SerializedDepNodeIndex, + ) { + let node = &prev_graph.index_to_node(prev_index); + debug_assert!( + !self.new_node_to_index.get_shard_by_value(node).lock().contains_key(node), + "node from previous graph present in new node collection" + ); + } +} + +/// The capacity of the `reads` field `SmallVec` +const TASK_DEPS_READS_CAP: usize = 8; +type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>; + +#[derive(Debug, Clone, Copy)] +pub enum TaskDepsRef<'a, K: DepKind> { + /// New dependencies can be added to the + /// `TaskDeps`. This is used when executing a 'normal' query + /// (no `eval_always` modifier) + Allow(&'a Lock>), + /// New dependencies are ignored. This is used when + /// executing an `eval_always` query, since there's no + /// need to track dependencies for a query that's always + /// re-executed. This is also used for `dep_graph.with_ignore` + Ignore, + /// Any attempt to add new dependencies will cause a panic. + /// This is used when decoding a query result from disk, + /// to ensure that the decoding process doesn't itself + /// require the execution of any queries. + Forbid, +} + +#[derive(Debug)] +pub struct TaskDeps { + #[cfg(debug_assertions)] + node: Option>, + reads: EdgesVec, + read_set: FxHashSet, + phantom_data: PhantomData>, +} + +impl Default for TaskDeps { + fn default() -> Self { + Self { + #[cfg(debug_assertions)] + node: None, + reads: EdgesVec::new(), + read_set: FxHashSet::default(), + phantom_data: PhantomData, + } + } +} + +// A data structure that stores Option values as a contiguous +// array, using one u32 per entry. +struct DepNodeColorMap { + values: IndexVec, +} + +const COMPRESSED_NONE: u32 = 0; +const COMPRESSED_RED: u32 = 1; +const COMPRESSED_FIRST_GREEN: u32 = 2; + +impl DepNodeColorMap { + fn new(size: usize) -> DepNodeColorMap { + DepNodeColorMap { values: (0..size).map(|_| AtomicU32::new(COMPRESSED_NONE)).collect() } + } + + #[inline] + fn get(&self, index: SerializedDepNodeIndex) -> Option { + match self.values[index].load(Ordering::Acquire) { + COMPRESSED_NONE => None, + COMPRESSED_RED => Some(DepNodeColor::Red), + value => { + Some(DepNodeColor::Green(DepNodeIndex::from_u32(value - COMPRESSED_FIRST_GREEN))) + } + } + } + + fn insert(&self, index: SerializedDepNodeIndex, color: DepNodeColor) { + self.values[index].store( + match color { + DepNodeColor::Red => COMPRESSED_RED, + DepNodeColor::Green(index) => index.as_u32() + COMPRESSED_FIRST_GREEN, + }, + Ordering::Release, + ) + } +} diff --git a/compiler/rustc_query_system/src/dep_graph/mod.rs b/compiler/rustc_query_system/src/dep_graph/mod.rs new file mode 100644 index 000000000..342d95ca4 --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/mod.rs @@ -0,0 +1,106 @@ +pub mod debug; +mod dep_node; +mod graph; +mod query; +mod serialized; + +pub use dep_node::{DepNode, DepNodeParams, WorkProductId}; +pub use graph::{ + hash_result, DepGraph, DepNodeColor, DepNodeIndex, TaskDeps, TaskDepsRef, WorkProduct, +}; +pub use query::DepGraphQuery; +pub use serialized::{SerializedDepGraph, SerializedDepNodeIndex}; + +use crate::ich::StableHashingContext; +use rustc_data_structures::profiling::SelfProfilerRef; +use rustc_serialize::{opaque::FileEncoder, Encodable}; +use rustc_session::Session; + +use std::fmt; +use std::hash::Hash; + +pub trait DepContext: Copy { + type DepKind: self::DepKind; + + /// Create a hashing context for hashing new results. + fn with_stable_hashing_context(&self, f: impl FnOnce(StableHashingContext<'_>) -> R) -> R; + + /// Access the DepGraph. + fn dep_graph(&self) -> &DepGraph; + + /// Access the profiler. + fn profiler(&self) -> &SelfProfilerRef; + + /// Access the compiler session. + fn sess(&self) -> &Session; + + /// Return whether this kind always require evaluation. + fn is_eval_always(&self, kind: Self::DepKind) -> bool; + + fn fingerprint_style(&self, kind: Self::DepKind) -> FingerprintStyle; + + /// Try to force a dep node to execute and see if it's green. + fn try_force_from_dep_node(&self, dep_node: DepNode) -> bool; + + /// Load data from the on-disk cache. + fn try_load_from_on_disk_cache(&self, dep_node: DepNode); +} + +pub trait HasDepContext: Copy { + type DepKind: self::DepKind; + type DepContext: self::DepContext; + + fn dep_context(&self) -> &Self::DepContext; +} + +impl HasDepContext for T { + type DepKind = T::DepKind; + type DepContext = Self; + + fn dep_context(&self) -> &Self::DepContext { + self + } +} + +/// Describes the contents of the fingerprint generated by a given query. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum FingerprintStyle { + /// The fingerprint is actually a DefPathHash. + DefPathHash, + /// Query key was `()` or equivalent, so fingerprint is just zero. + Unit, + /// Some opaque hash. + Opaque, +} + +impl FingerprintStyle { + #[inline] + pub fn reconstructible(self) -> bool { + match self { + FingerprintStyle::DefPathHash | FingerprintStyle::Unit => true, + FingerprintStyle::Opaque => false, + } + } +} + +/// Describe the different families of dependency nodes. +pub trait DepKind: Copy + fmt::Debug + Eq + Hash + Send + Encodable + 'static { + /// DepKind to use when incr. comp. is turned off. + const NULL: Self; + + /// DepKind to use to create the initial forever-red node. + const RED: Self; + + /// Implementation of `std::fmt::Debug` for `DepNode`. + fn debug_node(node: &DepNode, f: &mut fmt::Formatter<'_>) -> fmt::Result; + + /// Execute the operation with provided dependencies. + fn with_deps(deps: TaskDepsRef<'_, Self>, op: OP) -> R + where + OP: FnOnce() -> R; + + /// Access dependencies from current implicit context. + fn read_deps(op: OP) + where + OP: for<'a> FnOnce(TaskDepsRef<'a, Self>); +} diff --git a/compiler/rustc_query_system/src/dep_graph/query.rs b/compiler/rustc_query_system/src/dep_graph/query.rs new file mode 100644 index 000000000..27b3b5e13 --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/query.rs @@ -0,0 +1,68 @@ +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::graph::implementation::{Direction, Graph, NodeIndex, INCOMING}; +use rustc_index::vec::IndexVec; + +use super::{DepKind, DepNode, DepNodeIndex}; + +pub struct DepGraphQuery { + pub graph: Graph, ()>, + pub indices: FxHashMap, NodeIndex>, + pub dep_index_to_index: IndexVec>, +} + +impl DepGraphQuery { + pub fn new(prev_node_count: usize) -> DepGraphQuery { + let node_count = prev_node_count + prev_node_count / 4; + let edge_count = 6 * node_count; + + let graph = Graph::with_capacity(node_count, edge_count); + let indices = FxHashMap::default(); + let dep_index_to_index = IndexVec::new(); + + DepGraphQuery { graph, indices, dep_index_to_index } + } + + pub fn push(&mut self, index: DepNodeIndex, node: DepNode, edges: &[DepNodeIndex]) { + let source = self.graph.add_node(node); + if index.index() >= self.dep_index_to_index.len() { + self.dep_index_to_index.resize(index.index() + 1, None); + } + self.dep_index_to_index[index] = Some(source); + self.indices.insert(node, source); + + for &target in edges.iter() { + let target = self.dep_index_to_index[target]; + // We may miss the edges that are pushed while the `DepGraphQuery` is being accessed. + // Skip them to issues. + if let Some(target) = target { + self.graph.add_edge(source, target, ()); + } + } + } + + pub fn nodes(&self) -> Vec<&DepNode> { + self.graph.all_nodes().iter().map(|n| &n.data).collect() + } + + pub fn edges(&self) -> Vec<(&DepNode, &DepNode)> { + self.graph + .all_edges() + .iter() + .map(|edge| (edge.source(), edge.target())) + .map(|(s, t)| (self.graph.node_data(s), self.graph.node_data(t))) + .collect() + } + + fn reachable_nodes(&self, node: &DepNode, direction: Direction) -> Vec<&DepNode> { + if let Some(&index) = self.indices.get(node) { + self.graph.depth_traverse(index, direction).map(|s| self.graph.node_data(s)).collect() + } else { + vec![] + } + } + + /// All nodes that can reach `node`. + pub fn transitive_predecessors(&self, node: &DepNode) -> Vec<&DepNode> { + self.reachable_nodes(node, INCOMING) + } +} diff --git a/compiler/rustc_query_system/src/dep_graph/serialized.rs b/compiler/rustc_query_system/src/dep_graph/serialized.rs new file mode 100644 index 000000000..3b20ec70d --- /dev/null +++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs @@ -0,0 +1,330 @@ +//! The data that we will serialize and deserialize. +//! +//! The dep-graph is serialized as a sequence of NodeInfo, with the dependencies +//! specified inline. The total number of nodes and edges are stored as the last +//! 16 bytes of the file, so we can find them easily at decoding time. +//! +//! The serialisation is performed on-demand when each node is emitted. Using this +//! scheme, we do not need to keep the current graph in memory. +//! +//! The deserialization is performed manually, in order to convert from the stored +//! sequence of NodeInfos to the different arrays in SerializedDepGraph. Since the +//! node and edge count are stored at the end of the file, all the arrays can be +//! pre-allocated with the right length. + +use super::query::DepGraphQuery; +use super::{DepKind, DepNode, DepNodeIndex}; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::profiling::SelfProfilerRef; +use rustc_data_structures::sync::Lock; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder}; +use rustc_serialize::{Decodable, Decoder, Encodable}; +use smallvec::SmallVec; +use std::convert::TryInto; + +// The maximum value of `SerializedDepNodeIndex` leaves the upper two bits +// unused so that we can store multiple index types in `CompressedHybridIndex`, +// and use those bits to encode which index type it contains. +rustc_index::newtype_index! { + pub struct SerializedDepNodeIndex { + MAX = 0x7FFF_FFFF + } +} + +/// Data for use when recompiling the **current crate**. +#[derive(Debug)] +pub struct SerializedDepGraph { + /// The set of all DepNodes in the graph + nodes: IndexVec>, + /// The set of all Fingerprints in the graph. Each Fingerprint corresponds to + /// the DepNode at the same index in the nodes vector. + fingerprints: IndexVec, + /// For each DepNode, stores the list of edges originating from that + /// DepNode. Encoded as a [start, end) pair indexing into edge_list_data, + /// which holds the actual DepNodeIndices of the target nodes. + edge_list_indices: IndexVec, + /// A flattened list of all edge targets in the graph. Edge sources are + /// implicit in edge_list_indices. + edge_list_data: Vec, + /// Reciprocal map to `nodes`. + index: FxHashMap, SerializedDepNodeIndex>, +} + +impl Default for SerializedDepGraph { + fn default() -> Self { + SerializedDepGraph { + nodes: Default::default(), + fingerprints: Default::default(), + edge_list_indices: Default::default(), + edge_list_data: Default::default(), + index: Default::default(), + } + } +} + +impl SerializedDepGraph { + #[inline] + pub fn edge_targets_from(&self, source: SerializedDepNodeIndex) -> &[SerializedDepNodeIndex] { + let targets = self.edge_list_indices[source]; + &self.edge_list_data[targets.0 as usize..targets.1 as usize] + } + + #[inline] + pub fn index_to_node(&self, dep_node_index: SerializedDepNodeIndex) -> DepNode { + self.nodes[dep_node_index] + } + + #[inline] + pub fn node_to_index_opt(&self, dep_node: &DepNode) -> Option { + self.index.get(dep_node).cloned() + } + + #[inline] + pub fn fingerprint_of(&self, dep_node: &DepNode) -> Option { + self.index.get(dep_node).map(|&node_index| self.fingerprints[node_index]) + } + + #[inline] + pub fn fingerprint_by_index(&self, dep_node_index: SerializedDepNodeIndex) -> Fingerprint { + self.fingerprints[dep_node_index] + } + + pub fn node_count(&self) -> usize { + self.index.len() + } +} + +impl<'a, K: DepKind + Decodable>> Decodable> + for SerializedDepGraph +{ + #[instrument(level = "debug", skip(d))] + fn decode(d: &mut MemDecoder<'a>) -> SerializedDepGraph { + let start_position = d.position(); + + // The last 16 bytes are the node count and edge count. + debug!("position: {:?}", d.position()); + d.set_position(d.data.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE); + debug!("position: {:?}", d.position()); + + let node_count = IntEncodedWithFixedSize::decode(d).0 as usize; + let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize; + debug!(?node_count, ?edge_count); + + debug!("position: {:?}", d.position()); + d.set_position(start_position); + debug!("position: {:?}", d.position()); + + let mut nodes = IndexVec::with_capacity(node_count); + let mut fingerprints = IndexVec::with_capacity(node_count); + let mut edge_list_indices = IndexVec::with_capacity(node_count); + let mut edge_list_data = Vec::with_capacity(edge_count); + + for _index in 0..node_count { + let dep_node: DepNode = Decodable::decode(d); + let _i: SerializedDepNodeIndex = nodes.push(dep_node); + debug_assert_eq!(_i.index(), _index); + + let fingerprint: Fingerprint = Decodable::decode(d); + let _i: SerializedDepNodeIndex = fingerprints.push(fingerprint); + debug_assert_eq!(_i.index(), _index); + + // Deserialize edges -- sequence of DepNodeIndex + let len = d.read_usize(); + let start = edge_list_data.len().try_into().unwrap(); + for _ in 0..len { + let edge = Decodable::decode(d); + edge_list_data.push(edge); + } + let end = edge_list_data.len().try_into().unwrap(); + let _i: SerializedDepNodeIndex = edge_list_indices.push((start, end)); + debug_assert_eq!(_i.index(), _index); + } + + let index: FxHashMap<_, _> = + nodes.iter_enumerated().map(|(idx, &dep_node)| (dep_node, idx)).collect(); + + SerializedDepGraph { nodes, fingerprints, edge_list_indices, edge_list_data, index } + } +} + +#[derive(Debug, Encodable, Decodable)] +pub struct NodeInfo { + node: DepNode, + fingerprint: Fingerprint, + edges: SmallVec<[DepNodeIndex; 8]>, +} + +struct Stat { + kind: K, + node_counter: u64, + edge_counter: u64, +} + +struct EncoderState { + encoder: FileEncoder, + total_node_count: usize, + total_edge_count: usize, + stats: Option>>, +} + +impl EncoderState { + fn new(encoder: FileEncoder, record_stats: bool) -> Self { + Self { + encoder, + total_edge_count: 0, + total_node_count: 0, + stats: record_stats.then(FxHashMap::default), + } + } + + fn encode_node( + &mut self, + node: &NodeInfo, + record_graph: &Option>>, + ) -> DepNodeIndex { + let index = DepNodeIndex::new(self.total_node_count); + self.total_node_count += 1; + + let edge_count = node.edges.len(); + self.total_edge_count += edge_count; + + if let Some(record_graph) = &record_graph { + // Do not ICE when a query is called from within `with_query`. + if let Some(record_graph) = &mut record_graph.try_lock() { + record_graph.push(index, node.node, &node.edges); + } + } + + if let Some(stats) = &mut self.stats { + let kind = node.node.kind; + + let stat = stats.entry(kind).or_insert(Stat { kind, node_counter: 0, edge_counter: 0 }); + stat.node_counter += 1; + stat.edge_counter += edge_count as u64; + } + + let encoder = &mut self.encoder; + node.encode(encoder); + index + } + + fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult { + let Self { mut encoder, total_node_count, total_edge_count, stats: _ } = self; + + let node_count = total_node_count.try_into().unwrap(); + let edge_count = total_edge_count.try_into().unwrap(); + + debug!(?node_count, ?edge_count); + debug!("position: {:?}", encoder.position()); + IntEncodedWithFixedSize(node_count).encode(&mut encoder); + IntEncodedWithFixedSize(edge_count).encode(&mut encoder); + debug!("position: {:?}", encoder.position()); + // Drop the encoder so that nothing is written after the counts. + let result = encoder.finish(); + if let Ok(position) = result { + // FIXME(rylev): we hardcode the dep graph file name so we + // don't need a dependency on rustc_incremental just for that. + profiler.artifact_size("dep_graph", "dep-graph.bin", position as u64); + } + result + } +} + +pub struct GraphEncoder { + status: Lock>, + record_graph: Option>>, +} + +impl> GraphEncoder { + pub fn new( + encoder: FileEncoder, + prev_node_count: usize, + record_graph: bool, + record_stats: bool, + ) -> Self { + let record_graph = + if record_graph { Some(Lock::new(DepGraphQuery::new(prev_node_count))) } else { None }; + let status = Lock::new(EncoderState::new(encoder, record_stats)); + GraphEncoder { status, record_graph } + } + + pub(crate) fn with_query(&self, f: impl Fn(&DepGraphQuery)) { + if let Some(record_graph) = &self.record_graph { + f(&record_graph.lock()) + } + } + + pub(crate) fn print_incremental_info( + &self, + total_read_count: u64, + total_duplicate_read_count: u64, + ) { + let status = self.status.lock(); + if let Some(record_stats) = &status.stats { + let mut stats: Vec<_> = record_stats.values().collect(); + stats.sort_by_key(|s| -(s.node_counter as i64)); + + const SEPARATOR: &str = "[incremental] --------------------------------\ + ----------------------------------------------\ + ------------"; + + eprintln!("[incremental]"); + eprintln!("[incremental] DepGraph Statistics"); + eprintln!("{}", SEPARATOR); + eprintln!("[incremental]"); + eprintln!("[incremental] Total Node Count: {}", status.total_node_count); + eprintln!("[incremental] Total Edge Count: {}", status.total_edge_count); + + if cfg!(debug_assertions) { + eprintln!("[incremental] Total Edge Reads: {}", total_read_count); + eprintln!( + "[incremental] Total Duplicate Edge Reads: {}", + total_duplicate_read_count + ); + } + + eprintln!("[incremental]"); + eprintln!( + "[incremental] {:<36}| {:<17}| {:<12}| {:<17}|", + "Node Kind", "Node Frequency", "Node Count", "Avg. Edge Count" + ); + eprintln!("{}", SEPARATOR); + + for stat in stats { + let node_kind_ratio = + (100.0 * (stat.node_counter as f64)) / (status.total_node_count as f64); + let node_kind_avg_edges = (stat.edge_counter as f64) / (stat.node_counter as f64); + + eprintln!( + "[incremental] {:<36}|{:>16.1}% |{:>12} |{:>17.1} |", + format!("{:?}", stat.kind), + node_kind_ratio, + stat.node_counter, + node_kind_avg_edges, + ); + } + + eprintln!("{}", SEPARATOR); + eprintln!("[incremental]"); + } + } + + pub(crate) fn send( + &self, + profiler: &SelfProfilerRef, + node: DepNode, + fingerprint: Fingerprint, + edges: SmallVec<[DepNodeIndex; 8]>, + ) -> DepNodeIndex { + let _prof_timer = profiler.generic_activity("incr_comp_encode_dep_graph"); + let node = NodeInfo { node, fingerprint, edges }; + self.status.lock().encode_node(&node, &self.record_graph) + } + + pub fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult { + let _prof_timer = profiler.generic_activity("incr_comp_encode_dep_graph"); + self.status.into_inner().finish(profiler) + } +} diff --git a/compiler/rustc_query_system/src/ich/hcx.rs b/compiler/rustc_query_system/src/ich/hcx.rs new file mode 100644 index 000000000..217fac341 --- /dev/null +++ b/compiler/rustc_query_system/src/ich/hcx.rs @@ -0,0 +1,223 @@ +use crate::ich; + +use rustc_ast as ast; +use rustc_data_structures::sorted_map::SortedMap; +use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher}; +use rustc_data_structures::sync::Lrc; +use rustc_hir as hir; +use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_hir::definitions::{DefPathHash, Definitions}; +use rustc_index::vec::IndexVec; +use rustc_session::cstore::CrateStore; +use rustc_session::Session; +use rustc_span::source_map::SourceMap; +use rustc_span::symbol::Symbol; +use rustc_span::{BytePos, CachingSourceMapView, SourceFile, Span, SpanData}; + +/// This is the context state available during incr. comp. hashing. It contains +/// enough information to transform `DefId`s and `HirId`s into stable `DefPath`s (i.e., +/// a reference to the `TyCtxt`) and it holds a few caches for speeding up various +/// things (e.g., each `DefId`/`DefPath` is only hashed once). +#[derive(Clone)] +pub struct StableHashingContext<'a> { + definitions: &'a Definitions, + cstore: &'a dyn CrateStore, + source_span: &'a IndexVec, + // The value of `-Z incremental-ignore-spans`. + // This field should only be used by `unstable_opts_incremental_ignore_span` + incremental_ignore_spans: bool, + pub(super) body_resolver: BodyResolver<'a>, + // Very often, we are hashing something that does not need the + // `CachingSourceMapView`, so we initialize it lazily. + raw_source_map: &'a SourceMap, + caching_source_map: Option>, + pub(super) hashing_controls: HashingControls, +} + +/// The `BodyResolver` allows mapping a `BodyId` to the corresponding `hir::Body`. +/// We could also just store a plain reference to the `hir::Crate` but we want +/// to avoid that the crate is used to get untracked access to all of the HIR. +#[derive(Clone, Copy)] +pub(super) enum BodyResolver<'tcx> { + Forbidden, + Traverse { + hash_bodies: bool, + owner: LocalDefId, + bodies: &'tcx SortedMap>, + }, +} + +impl<'a> StableHashingContext<'a> { + #[inline] + fn new_with_or_without_spans( + sess: &'a Session, + definitions: &'a Definitions, + cstore: &'a dyn CrateStore, + source_span: &'a IndexVec, + always_ignore_spans: bool, + ) -> Self { + let hash_spans_initial = + !always_ignore_spans && !sess.opts.unstable_opts.incremental_ignore_spans; + + StableHashingContext { + body_resolver: BodyResolver::Forbidden, + definitions, + cstore, + source_span, + incremental_ignore_spans: sess.opts.unstable_opts.incremental_ignore_spans, + caching_source_map: None, + raw_source_map: sess.source_map(), + hashing_controls: HashingControls { hash_spans: hash_spans_initial }, + } + } + + #[inline] + pub fn new( + sess: &'a Session, + definitions: &'a Definitions, + cstore: &'a dyn CrateStore, + source_span: &'a IndexVec, + ) -> Self { + Self::new_with_or_without_spans( + sess, + definitions, + cstore, + source_span, + /*always_ignore_spans=*/ false, + ) + } + + #[inline] + pub fn ignore_spans( + sess: &'a Session, + definitions: &'a Definitions, + cstore: &'a dyn CrateStore, + source_span: &'a IndexVec, + ) -> Self { + let always_ignore_spans = true; + Self::new_with_or_without_spans(sess, definitions, cstore, source_span, always_ignore_spans) + } + + /// Allow hashing + #[inline] + pub fn while_hashing_hir_bodies(&mut self, hb: bool, f: impl FnOnce(&mut Self)) { + let prev = match &mut self.body_resolver { + BodyResolver::Forbidden => panic!("Hashing HIR bodies is forbidden."), + BodyResolver::Traverse { ref mut hash_bodies, .. } => { + std::mem::replace(hash_bodies, hb) + } + }; + f(self); + match &mut self.body_resolver { + BodyResolver::Forbidden => unreachable!(), + BodyResolver::Traverse { ref mut hash_bodies, .. } => *hash_bodies = prev, + } + } + + #[inline] + pub fn with_hir_bodies( + &mut self, + hash_bodies: bool, + owner: LocalDefId, + bodies: &SortedMap>, + f: impl FnOnce(&mut StableHashingContext<'_>), + ) { + f(&mut StableHashingContext { + body_resolver: BodyResolver::Traverse { hash_bodies, owner, bodies }, + ..self.clone() + }); + } + + #[inline] + pub fn while_hashing_spans(&mut self, hash_spans: bool, f: F) { + let prev_hash_spans = self.hashing_controls.hash_spans; + self.hashing_controls.hash_spans = hash_spans; + f(self); + self.hashing_controls.hash_spans = prev_hash_spans; + } + + #[inline] + pub fn def_path_hash(&self, def_id: DefId) -> DefPathHash { + if let Some(def_id) = def_id.as_local() { + self.local_def_path_hash(def_id) + } else { + self.cstore.def_path_hash(def_id) + } + } + + #[inline] + pub fn local_def_path_hash(&self, def_id: LocalDefId) -> DefPathHash { + self.definitions.def_path_hash(def_id) + } + + #[inline] + pub fn source_map(&mut self) -> &mut CachingSourceMapView<'a> { + match self.caching_source_map { + Some(ref mut sm) => sm, + ref mut none => { + *none = Some(CachingSourceMapView::new(self.raw_source_map)); + none.as_mut().unwrap() + } + } + } + + #[inline] + pub fn is_ignored_attr(&self, name: Symbol) -> bool { + ich::IGNORED_ATTRIBUTES.contains(&name) + } + + #[inline] + pub fn hashing_controls(&self) -> HashingControls { + self.hashing_controls.clone() + } +} + +impl<'a> HashStable> for ast::NodeId { + #[inline] + fn hash_stable(&self, _: &mut StableHashingContext<'a>, _: &mut StableHasher) { + panic!("Node IDs should not appear in incremental state"); + } +} + +impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> { + #[inline] + fn hash_spans(&self) -> bool { + self.hashing_controls.hash_spans + } + + #[inline] + fn unstable_opts_incremental_ignore_spans(&self) -> bool { + self.incremental_ignore_spans + } + + #[inline] + fn def_path_hash(&self, def_id: DefId) -> DefPathHash { + self.def_path_hash(def_id) + } + + #[inline] + fn def_span(&self, def_id: LocalDefId) -> Span { + self.source_span[def_id] + } + + #[inline] + fn span_data_to_lines_and_cols( + &mut self, + span: &SpanData, + ) -> Option<(Lrc, usize, BytePos, usize, BytePos)> { + self.source_map().span_data_to_lines_and_cols(span) + } + + #[inline] + fn hashing_controls(&self) -> HashingControls { + self.hashing_controls.clone() + } +} + +impl<'a> rustc_data_structures::intern::InternedHashingContext for StableHashingContext<'a> { + fn with_def_path_and_no_spans(&mut self, f: impl FnOnce(&mut Self)) { + self.while_hashing_spans(false, f); + } +} + +impl<'a> rustc_session::HashStableContext for StableHashingContext<'a> {} diff --git a/compiler/rustc_query_system/src/ich/impls_hir.rs b/compiler/rustc_query_system/src/ich/impls_hir.rs new file mode 100644 index 000000000..3390ed9eb --- /dev/null +++ b/compiler/rustc_query_system/src/ich/impls_hir.rs @@ -0,0 +1,42 @@ +//! This module contains `HashStable` implementations for various HIR data +//! types in no particular order. + +use crate::ich::hcx::BodyResolver; +use crate::ich::StableHashingContext; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_hir as hir; + +impl<'ctx> rustc_hir::HashStableContext for StableHashingContext<'ctx> { + #[inline] + fn hash_body_id(&mut self, id: hir::BodyId, hasher: &mut StableHasher) { + let hcx = self; + match hcx.body_resolver { + BodyResolver::Forbidden => panic!("Hashing HIR bodies is forbidden."), + BodyResolver::Traverse { hash_bodies: false, .. } => {} + BodyResolver::Traverse { hash_bodies: true, owner, bodies } => { + assert_eq!(id.hir_id.owner, owner); + bodies[&id.hir_id.local_id].hash_stable(hcx, hasher); + } + } + } + + fn hash_hir_expr(&mut self, expr: &hir::Expr<'_>, hasher: &mut StableHasher) { + self.while_hashing_hir_bodies(true, |hcx| { + let hir::Expr { hir_id, ref span, ref kind } = *expr; + + hir_id.hash_stable(hcx, hasher); + span.hash_stable(hcx, hasher); + kind.hash_stable(hcx, hasher); + }) + } + + fn hash_hir_ty(&mut self, ty: &hir::Ty<'_>, hasher: &mut StableHasher) { + self.while_hashing_hir_bodies(true, |hcx| { + let hir::Ty { hir_id, ref kind, ref span } = *ty; + + hir_id.hash_stable(hcx, hasher); + kind.hash_stable(hcx, hasher); + span.hash_stable(hcx, hasher); + }) + } +} diff --git a/compiler/rustc_query_system/src/ich/impls_syntax.rs b/compiler/rustc_query_system/src/ich/impls_syntax.rs new file mode 100644 index 000000000..1fa085926 --- /dev/null +++ b/compiler/rustc_query_system/src/ich/impls_syntax.rs @@ -0,0 +1,150 @@ +//! This module contains `HashStable` implementations for various data types +//! from `rustc_ast` in no particular order. + +use crate::ich::StableHashingContext; + +use rustc_ast as ast; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_span::{BytePos, NormalizedPos, SourceFile}; +use std::assert_matches::assert_matches; + +use smallvec::SmallVec; + +impl<'ctx> rustc_target::HashStableContext for StableHashingContext<'ctx> {} + +impl<'a> HashStable> for [ast::Attribute] { + fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) { + if self.is_empty() { + self.len().hash_stable(hcx, hasher); + return; + } + + // Some attributes are always ignored during hashing. + let filtered: SmallVec<[&ast::Attribute; 8]> = self + .iter() + .filter(|attr| { + !attr.is_doc_comment() + && !attr.ident().map_or(false, |ident| hcx.is_ignored_attr(ident.name)) + }) + .collect(); + + filtered.len().hash_stable(hcx, hasher); + for attr in filtered { + attr.hash_stable(hcx, hasher); + } + } +} + +impl<'ctx> rustc_ast::HashStableContext for StableHashingContext<'ctx> { + fn hash_attr(&mut self, attr: &ast::Attribute, hasher: &mut StableHasher) { + // Make sure that these have been filtered out. + debug_assert!(!attr.ident().map_or(false, |ident| self.is_ignored_attr(ident.name))); + debug_assert!(!attr.is_doc_comment()); + + let ast::Attribute { kind, id: _, style, span } = attr; + if let ast::AttrKind::Normal(item, tokens) = kind { + item.hash_stable(self, hasher); + style.hash_stable(self, hasher); + span.hash_stable(self, hasher); + assert_matches!( + tokens.as_ref(), + None, + "Tokens should have been removed during lowering!" + ); + } else { + unreachable!(); + } + } +} + +impl<'a> HashStable> for SourceFile { + fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) { + let SourceFile { + name: _, // We hash the smaller name_hash instead of this + name_hash, + cnum, + // Do not hash the source as it is not encoded + src: _, + ref src_hash, + external_src: _, + start_pos, + end_pos: _, + lines: _, + ref multibyte_chars, + ref non_narrow_chars, + ref normalized_pos, + } = *self; + + (name_hash as u64).hash_stable(hcx, hasher); + + src_hash.hash_stable(hcx, hasher); + + // We are always in `Lines` form by the time we reach here. + assert!(self.lines.borrow().is_lines()); + self.lines(|lines| { + // We only hash the relative position within this source_file + lines.len().hash_stable(hcx, hasher); + for &line in lines.iter() { + stable_byte_pos(line, start_pos).hash_stable(hcx, hasher); + } + }); + + // We only hash the relative position within this source_file + multibyte_chars.len().hash_stable(hcx, hasher); + for &char_pos in multibyte_chars.iter() { + stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher); + } + + non_narrow_chars.len().hash_stable(hcx, hasher); + for &char_pos in non_narrow_chars.iter() { + stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher); + } + + normalized_pos.len().hash_stable(hcx, hasher); + for &char_pos in normalized_pos.iter() { + stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher); + } + + cnum.hash_stable(hcx, hasher); + } +} + +fn stable_byte_pos(pos: BytePos, source_file_start: BytePos) -> u32 { + pos.0 - source_file_start.0 +} + +fn stable_multibyte_char(mbc: rustc_span::MultiByteChar, source_file_start: BytePos) -> (u32, u32) { + let rustc_span::MultiByteChar { pos, bytes } = mbc; + + (pos.0 - source_file_start.0, bytes as u32) +} + +fn stable_non_narrow_char( + swc: rustc_span::NonNarrowChar, + source_file_start: BytePos, +) -> (u32, u32) { + let pos = swc.pos(); + let width = swc.width(); + + (pos.0 - source_file_start.0, width as u32) +} + +fn stable_normalized_pos(np: NormalizedPos, source_file_start: BytePos) -> (u32, u32) { + let NormalizedPos { pos, diff } = np; + + (pos.0 - source_file_start.0, diff) +} + +impl<'tcx> HashStable> for rustc_feature::Features { + fn hash_stable(&self, hcx: &mut StableHashingContext<'tcx>, hasher: &mut StableHasher) { + // Unfortunately we cannot exhaustively list fields here, since the + // struct is macro generated. + self.declared_lang_features.hash_stable(hcx, hasher); + self.declared_lib_features.hash_stable(hcx, hasher); + + self.walk_feature_fields(|feature_name, value| { + feature_name.hash_stable(hcx, hasher); + value.hash_stable(hcx, hasher); + }); + } +} diff --git a/compiler/rustc_query_system/src/ich/mod.rs b/compiler/rustc_query_system/src/ich/mod.rs new file mode 100644 index 000000000..0a1c350b2 --- /dev/null +++ b/compiler/rustc_query_system/src/ich/mod.rs @@ -0,0 +1,19 @@ +//! ICH - Incremental Compilation Hash + +pub use self::hcx::StableHashingContext; +use rustc_span::symbol::{sym, Symbol}; + +mod hcx; +mod impls_hir; +mod impls_syntax; + +pub const IGNORED_ATTRIBUTES: &[Symbol] = &[ + sym::cfg, + sym::rustc_if_this_changed, + sym::rustc_then_this_would_need, + sym::rustc_dirty, + sym::rustc_clean, + sym::rustc_partition_reused, + sym::rustc_partition_codegened, + sym::rustc_expected_cgu_reuse, +]; diff --git a/compiler/rustc_query_system/src/lib.rs b/compiler/rustc_query_system/src/lib.rs new file mode 100644 index 000000000..68284dcaa --- /dev/null +++ b/compiler/rustc_query_system/src/lib.rs @@ -0,0 +1,19 @@ +#![feature(assert_matches)] +#![feature(core_intrinsics)] +#![feature(hash_raw_entry)] +#![feature(let_else)] +#![feature(min_specialization)] +#![feature(extern_types)] +#![allow(rustc::potential_query_instability)] + +#[macro_use] +extern crate tracing; +#[macro_use] +extern crate rustc_data_structures; +#[macro_use] +extern crate rustc_macros; + +pub mod cache; +pub mod dep_graph; +pub mod ich; +pub mod query; diff --git a/compiler/rustc_query_system/src/query/README.md b/compiler/rustc_query_system/src/query/README.md new file mode 100644 index 000000000..8ec07b9fd --- /dev/null +++ b/compiler/rustc_query_system/src/query/README.md @@ -0,0 +1,3 @@ +For more information about how the query system works, see the [rustc dev guide]. + +[rustc dev guide]: https://rustc-dev-guide.rust-lang.org/query.html diff --git a/compiler/rustc_query_system/src/query/caches.rs b/compiler/rustc_query_system/src/query/caches.rs new file mode 100644 index 000000000..85c5af72e --- /dev/null +++ b/compiler/rustc_query_system/src/query/caches.rs @@ -0,0 +1,226 @@ +use crate::dep_graph::DepNodeIndex; + +use rustc_arena::TypedArena; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sharded; +#[cfg(parallel_compiler)] +use rustc_data_structures::sharded::Sharded; +#[cfg(not(parallel_compiler))] +use rustc_data_structures::sync::Lock; +use rustc_data_structures::sync::WorkerLocal; +use std::default::Default; +use std::fmt::Debug; +use std::hash::Hash; +use std::marker::PhantomData; + +pub trait CacheSelector { + type Cache; +} + +pub trait QueryStorage { + type Value: Debug; + type Stored: Clone; + + /// Store a value without putting it in the cache. + /// This is meant to be used with cycle errors. + fn store_nocache(&self, value: Self::Value) -> Self::Stored; +} + +pub trait QueryCache: QueryStorage + Sized { + type Key: Hash + Eq + Clone + Debug; + + /// Checks if the query is already computed and in the cache. + /// It returns the shard index and a lock guard to the shard, + /// which will be used if the query is not in the cache and we need + /// to compute it. + fn lookup( + &self, + key: &Self::Key, + // `on_hit` can be called while holding a lock to the query state shard. + on_hit: OnHit, + ) -> Result + where + OnHit: FnOnce(&Self::Stored, DepNodeIndex) -> R; + + fn complete(&self, key: Self::Key, value: Self::Value, index: DepNodeIndex) -> Self::Stored; + + fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)); +} + +pub struct DefaultCacheSelector; + +impl CacheSelector for DefaultCacheSelector { + type Cache = DefaultCache; +} + +pub struct DefaultCache { + #[cfg(parallel_compiler)] + cache: Sharded>, + #[cfg(not(parallel_compiler))] + cache: Lock>, +} + +impl Default for DefaultCache { + fn default() -> Self { + DefaultCache { cache: Default::default() } + } +} + +impl QueryStorage for DefaultCache { + type Value = V; + type Stored = V; + + #[inline] + fn store_nocache(&self, value: Self::Value) -> Self::Stored { + // We have no dedicated storage + value + } +} + +impl QueryCache for DefaultCache +where + K: Eq + Hash + Clone + Debug, + V: Clone + Debug, +{ + type Key = K; + + #[inline(always)] + fn lookup(&self, key: &K, on_hit: OnHit) -> Result + where + OnHit: FnOnce(&V, DepNodeIndex) -> R, + { + let key_hash = sharded::make_hash(key); + #[cfg(parallel_compiler)] + let lock = self.cache.get_shard_by_hash(key_hash).lock(); + #[cfg(not(parallel_compiler))] + let lock = self.cache.lock(); + let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key); + + if let Some((_, value)) = result { + let hit_result = on_hit(&value.0, value.1); + Ok(hit_result) + } else { + Err(()) + } + } + + #[inline] + fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored { + #[cfg(parallel_compiler)] + let mut lock = self.cache.get_shard_by_value(&key).lock(); + #[cfg(not(parallel_compiler))] + let mut lock = self.cache.lock(); + lock.insert(key, (value.clone(), index)); + value + } + + fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) { + #[cfg(parallel_compiler)] + { + let shards = self.cache.lock_shards(); + for shard in shards.iter() { + for (k, v) in shard.iter() { + f(k, &v.0, v.1); + } + } + } + #[cfg(not(parallel_compiler))] + { + let map = self.cache.lock(); + for (k, v) in map.iter() { + f(k, &v.0, v.1); + } + } + } +} + +pub struct ArenaCacheSelector<'tcx>(PhantomData<&'tcx ()>); + +impl<'tcx, K: Eq + Hash, V: 'tcx> CacheSelector for ArenaCacheSelector<'tcx> { + type Cache = ArenaCache<'tcx, K, V>; +} + +pub struct ArenaCache<'tcx, K, V> { + arena: WorkerLocal>, + #[cfg(parallel_compiler)] + cache: Sharded>, + #[cfg(not(parallel_compiler))] + cache: Lock>, +} + +impl<'tcx, K, V> Default for ArenaCache<'tcx, K, V> { + fn default() -> Self { + ArenaCache { arena: WorkerLocal::new(|_| TypedArena::default()), cache: Default::default() } + } +} + +impl<'tcx, K: Eq + Hash, V: Debug + 'tcx> QueryStorage for ArenaCache<'tcx, K, V> { + type Value = V; + type Stored = &'tcx V; + + #[inline] + fn store_nocache(&self, value: Self::Value) -> Self::Stored { + let value = self.arena.alloc((value, DepNodeIndex::INVALID)); + let value = unsafe { &*(&value.0 as *const _) }; + &value + } +} + +impl<'tcx, K, V: 'tcx> QueryCache for ArenaCache<'tcx, K, V> +where + K: Eq + Hash + Clone + Debug, + V: Debug, +{ + type Key = K; + + #[inline(always)] + fn lookup(&self, key: &K, on_hit: OnHit) -> Result + where + OnHit: FnOnce(&&'tcx V, DepNodeIndex) -> R, + { + let key_hash = sharded::make_hash(key); + #[cfg(parallel_compiler)] + let lock = self.cache.get_shard_by_hash(key_hash).lock(); + #[cfg(not(parallel_compiler))] + let lock = self.cache.lock(); + let result = lock.raw_entry().from_key_hashed_nocheck(key_hash, key); + + if let Some((_, value)) = result { + let hit_result = on_hit(&&value.0, value.1); + Ok(hit_result) + } else { + Err(()) + } + } + + #[inline] + fn complete(&self, key: K, value: V, index: DepNodeIndex) -> Self::Stored { + let value = self.arena.alloc((value, index)); + let value = unsafe { &*(value as *const _) }; + #[cfg(parallel_compiler)] + let mut lock = self.cache.get_shard_by_value(&key).lock(); + #[cfg(not(parallel_compiler))] + let mut lock = self.cache.lock(); + lock.insert(key, value); + &value.0 + } + + fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) { + #[cfg(parallel_compiler)] + { + let shards = self.cache.lock_shards(); + for shard in shards.iter() { + for (k, v) in shard.iter() { + f(k, &v.0, v.1); + } + } + } + #[cfg(not(parallel_compiler))] + { + let map = self.cache.lock(); + for (k, v) in map.iter() { + f(k, &v.0, v.1); + } + } + } +} diff --git a/compiler/rustc_query_system/src/query/config.rs b/compiler/rustc_query_system/src/query/config.rs new file mode 100644 index 000000000..964914a13 --- /dev/null +++ b/compiler/rustc_query_system/src/query/config.rs @@ -0,0 +1,75 @@ +//! Query configuration and description traits. + +use crate::dep_graph::DepNode; +use crate::dep_graph::SerializedDepNodeIndex; +use crate::ich::StableHashingContext; +use crate::query::caches::QueryCache; +use crate::query::{QueryContext, QueryState}; + +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed}; +use std::fmt::Debug; +use std::hash::Hash; + +pub trait QueryConfig { + const NAME: &'static str; + + type Key: Eq + Hash + Clone + Debug; + type Value; + type Stored: Clone; +} + +pub struct QueryVTable { + pub anon: bool, + pub dep_kind: CTX::DepKind, + pub eval_always: bool, + pub cache_on_disk: bool, + + pub compute: fn(CTX::DepContext, K) -> V, + pub hash_result: Option, &V) -> Fingerprint>, + pub handle_cycle_error: fn(CTX, DiagnosticBuilder<'_, ErrorGuaranteed>) -> V, + pub try_load_from_disk: Option Option>, +} + +impl QueryVTable { + pub(crate) fn to_dep_node(&self, tcx: CTX::DepContext, key: &K) -> DepNode + where + K: crate::dep_graph::DepNodeParams, + { + DepNode::construct(tcx, self.dep_kind, key) + } + + pub(crate) fn compute(&self, tcx: CTX::DepContext, key: K) -> V { + (self.compute)(tcx, key) + } + + pub(crate) fn try_load_from_disk(&self, tcx: CTX, index: SerializedDepNodeIndex) -> Option { + self.try_load_from_disk + .expect("QueryDescription::load_from_disk() called for an unsupported query.")( + tcx, index, + ) + } +} + +pub trait QueryDescription: QueryConfig { + const TRY_LOAD_FROM_DISK: Option Option>; + + type Cache: QueryCache; + + fn describe(tcx: CTX, key: Self::Key) -> String; + + // Don't use this method to access query results, instead use the methods on TyCtxt + fn query_state<'a>(tcx: CTX) -> &'a QueryState + where + CTX: 'a; + + // Don't use this method to access query results, instead use the methods on TyCtxt + fn query_cache<'a>(tcx: CTX) -> &'a Self::Cache + where + CTX: 'a; + + // Don't use this method to compute query results, instead use the methods on TyCtxt + fn make_vtable(tcx: CTX, key: &Self::Key) -> QueryVTable; + + fn cache_on_disk(tcx: CTX::DepContext, key: &Self::Key) -> bool; +} diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs new file mode 100644 index 000000000..6d2aff381 --- /dev/null +++ b/compiler/rustc_query_system/src/query/job.rs @@ -0,0 +1,612 @@ +use crate::query::plumbing::CycleError; +use crate::query::{QueryContext, QueryStackFrame}; +use rustc_hir::def::DefKind; + +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::{ + struct_span_err, Diagnostic, DiagnosticBuilder, ErrorGuaranteed, Handler, Level, +}; +use rustc_session::Session; +use rustc_span::Span; + +use std::hash::Hash; +use std::num::NonZeroU64; + +#[cfg(parallel_compiler)] +use { + parking_lot::{Condvar, Mutex}, + rustc_data_structures::fx::FxHashSet, + rustc_data_structures::sync::Lock, + rustc_data_structures::sync::Lrc, + rustc_data_structures::{jobserver, OnDrop}, + rustc_rayon_core as rayon_core, + rustc_span::DUMMY_SP, + std::iter::{self, FromIterator}, + std::{mem, process}, +}; + +/// Represents a span and a query key. +#[derive(Clone, Debug)] +pub struct QueryInfo { + /// The span corresponding to the reason for which this query was required. + pub span: Span, + pub query: QueryStackFrame, +} + +pub type QueryMap = FxHashMap; + +/// A value uniquely identifying an active query job. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct QueryJobId(pub NonZeroU64); + +impl QueryJobId { + fn query(self, map: &QueryMap) -> QueryStackFrame { + map.get(&self).unwrap().query.clone() + } + + #[cfg(parallel_compiler)] + fn span(self, map: &QueryMap) -> Span { + map.get(&self).unwrap().job.span + } + + #[cfg(parallel_compiler)] + fn parent(self, map: &QueryMap) -> Option { + map.get(&self).unwrap().job.parent + } + + #[cfg(parallel_compiler)] + fn latch<'a>(self, map: &'a QueryMap) -> Option<&'a QueryLatch> { + map.get(&self).unwrap().job.latch.as_ref() + } +} + +pub struct QueryJobInfo { + pub query: QueryStackFrame, + pub job: QueryJob, +} + +/// Represents an active query job. +#[derive(Clone)] +pub struct QueryJob { + pub id: QueryJobId, + + /// The span corresponding to the reason for which this query was required. + pub span: Span, + + /// The parent query job which created this job and is implicitly waiting on it. + pub parent: Option, + + /// The latch that is used to wait on this job. + #[cfg(parallel_compiler)] + latch: Option, +} + +impl QueryJob { + /// Creates a new query job. + #[inline] + pub fn new(id: QueryJobId, span: Span, parent: Option) -> Self { + QueryJob { + id, + span, + parent, + #[cfg(parallel_compiler)] + latch: None, + } + } + + #[cfg(parallel_compiler)] + pub(super) fn latch(&mut self) -> QueryLatch { + if self.latch.is_none() { + self.latch = Some(QueryLatch::new()); + } + self.latch.as_ref().unwrap().clone() + } + + /// Signals to waiters that the query is complete. + /// + /// This does nothing for single threaded rustc, + /// as there are no concurrent jobs which could be waiting on us + #[inline] + pub fn signal_complete(self) { + #[cfg(parallel_compiler)] + { + if let Some(latch) = self.latch { + latch.set(); + } + } + } +} + +#[cfg(not(parallel_compiler))] +impl QueryJobId { + #[cold] + #[inline(never)] + pub(super) fn find_cycle_in_stack( + &self, + query_map: QueryMap, + current_job: &Option, + span: Span, + ) -> CycleError { + // Find the waitee amongst `current_job` parents + let mut cycle = Vec::new(); + let mut current_job = Option::clone(current_job); + + while let Some(job) = current_job { + let info = query_map.get(&job).unwrap(); + cycle.push(QueryInfo { span: info.job.span, query: info.query.clone() }); + + if job == *self { + cycle.reverse(); + + // This is the end of the cycle + // The span entry we included was for the usage + // of the cycle itself, and not part of the cycle + // Replace it with the span which caused the cycle to form + cycle[0].span = span; + // Find out why the cycle itself was used + let usage = info + .job + .parent + .as_ref() + .map(|parent| (info.job.span, parent.query(&query_map))); + return CycleError { usage, cycle }; + } + + current_job = info.job.parent; + } + + panic!("did not find a cycle") + } +} + +#[cfg(parallel_compiler)] +struct QueryWaiter { + query: Option, + condvar: Condvar, + span: Span, + cycle: Lock>, +} + +#[cfg(parallel_compiler)] +impl QueryWaiter { + fn notify(&self, registry: &rayon_core::Registry) { + rayon_core::mark_unblocked(registry); + self.condvar.notify_one(); + } +} + +#[cfg(parallel_compiler)] +struct QueryLatchInfo { + complete: bool, + waiters: Vec>, +} + +#[cfg(parallel_compiler)] +#[derive(Clone)] +pub(super) struct QueryLatch { + info: Lrc>, +} + +#[cfg(parallel_compiler)] +impl QueryLatch { + fn new() -> Self { + QueryLatch { + info: Lrc::new(Mutex::new(QueryLatchInfo { complete: false, waiters: Vec::new() })), + } + } + + /// Awaits for the query job to complete. + pub(super) fn wait_on(&self, query: Option, span: Span) -> Result<(), CycleError> { + let waiter = + Lrc::new(QueryWaiter { query, span, cycle: Lock::new(None), condvar: Condvar::new() }); + self.wait_on_inner(&waiter); + // FIXME: Get rid of this lock. We have ownership of the QueryWaiter + // although another thread may still have a Lrc reference so we cannot + // use Lrc::get_mut + let mut cycle = waiter.cycle.lock(); + match cycle.take() { + None => Ok(()), + Some(cycle) => Err(cycle), + } + } + + /// Awaits the caller on this latch by blocking the current thread. + fn wait_on_inner(&self, waiter: &Lrc) { + let mut info = self.info.lock(); + if !info.complete { + // We push the waiter on to the `waiters` list. It can be accessed inside + // the `wait` call below, by 1) the `set` method or 2) by deadlock detection. + // Both of these will remove it from the `waiters` list before resuming + // this thread. + info.waiters.push(waiter.clone()); + + // If this detects a deadlock and the deadlock handler wants to resume this thread + // we have to be in the `wait` call. This is ensured by the deadlock handler + // getting the self.info lock. + rayon_core::mark_blocked(); + jobserver::release_thread(); + waiter.condvar.wait(&mut info); + // Release the lock before we potentially block in `acquire_thread` + mem::drop(info); + jobserver::acquire_thread(); + } + } + + /// Sets the latch and resumes all waiters on it + fn set(&self) { + let mut info = self.info.lock(); + debug_assert!(!info.complete); + info.complete = true; + let registry = rayon_core::Registry::current(); + for waiter in info.waiters.drain(..) { + waiter.notify(®istry); + } + } + + /// Removes a single waiter from the list of waiters. + /// This is used to break query cycles. + fn extract_waiter(&self, waiter: usize) -> Lrc { + let mut info = self.info.lock(); + debug_assert!(!info.complete); + // Remove the waiter from the list of waiters + info.waiters.remove(waiter) + } +} + +/// A resumable waiter of a query. The usize is the index into waiters in the query's latch +#[cfg(parallel_compiler)] +type Waiter = (QueryJobId, usize); + +/// Visits all the non-resumable and resumable waiters of a query. +/// Only waiters in a query are visited. +/// `visit` is called for every waiter and is passed a query waiting on `query_ref` +/// and a span indicating the reason the query waited on `query_ref`. +/// If `visit` returns Some, this function returns. +/// For visits of non-resumable waiters it returns the return value of `visit`. +/// For visits of resumable waiters it returns Some(Some(Waiter)) which has the +/// required information to resume the waiter. +/// If all `visit` calls returns None, this function also returns None. +#[cfg(parallel_compiler)] +fn visit_waiters(query_map: &QueryMap, query: QueryJobId, mut visit: F) -> Option> +where + F: FnMut(Span, QueryJobId) -> Option>, +{ + // Visit the parent query which is a non-resumable waiter since it's on the same stack + if let Some(parent) = query.parent(query_map) { + if let Some(cycle) = visit(query.span(query_map), parent) { + return Some(cycle); + } + } + + // Visit the explicit waiters which use condvars and are resumable + if let Some(latch) = query.latch(query_map) { + for (i, waiter) in latch.info.lock().waiters.iter().enumerate() { + if let Some(waiter_query) = waiter.query { + if visit(waiter.span, waiter_query).is_some() { + // Return a value which indicates that this waiter can be resumed + return Some(Some((query, i))); + } + } + } + } + + None +} + +/// Look for query cycles by doing a depth first search starting at `query`. +/// `span` is the reason for the `query` to execute. This is initially DUMMY_SP. +/// If a cycle is detected, this initial value is replaced with the span causing +/// the cycle. +#[cfg(parallel_compiler)] +fn cycle_check( + query_map: &QueryMap, + query: QueryJobId, + span: Span, + stack: &mut Vec<(Span, QueryJobId)>, + visited: &mut FxHashSet, +) -> Option> { + if !visited.insert(query) { + return if let Some(p) = stack.iter().position(|q| q.1 == query) { + // We detected a query cycle, fix up the initial span and return Some + + // Remove previous stack entries + stack.drain(0..p); + // Replace the span for the first query with the cycle cause + stack[0].0 = span; + Some(None) + } else { + None + }; + } + + // Query marked as visited is added it to the stack + stack.push((span, query)); + + // Visit all the waiters + let r = visit_waiters(query_map, query, |span, successor| { + cycle_check(query_map, successor, span, stack, visited) + }); + + // Remove the entry in our stack if we didn't find a cycle + if r.is_none() { + stack.pop(); + } + + r +} + +/// Finds out if there's a path to the compiler root (aka. code which isn't in a query) +/// from `query` without going through any of the queries in `visited`. +/// This is achieved with a depth first search. +#[cfg(parallel_compiler)] +fn connected_to_root( + query_map: &QueryMap, + query: QueryJobId, + visited: &mut FxHashSet, +) -> bool { + // We already visited this or we're deliberately ignoring it + if !visited.insert(query) { + return false; + } + + // This query is connected to the root (it has no query parent), return true + if query.parent(query_map).is_none() { + return true; + } + + visit_waiters(query_map, query, |_, successor| { + connected_to_root(query_map, successor, visited).then_some(None) + }) + .is_some() +} + +// Deterministically pick an query from a list +#[cfg(parallel_compiler)] +fn pick_query<'a, T, F>(query_map: &QueryMap, queries: &'a [T], f: F) -> &'a T +where + F: Fn(&T) -> (Span, QueryJobId), +{ + // Deterministically pick an entry point + // FIXME: Sort this instead + queries + .iter() + .min_by_key(|v| { + let (span, query) = f(v); + let hash = query.query(query_map).hash; + // Prefer entry points which have valid spans for nicer error messages + // We add an integer to the tuple ensuring that entry points + // with valid spans are picked first + let span_cmp = if span == DUMMY_SP { 1 } else { 0 }; + (span_cmp, hash) + }) + .unwrap() +} + +/// Looks for query cycles starting from the last query in `jobs`. +/// If a cycle is found, all queries in the cycle is removed from `jobs` and +/// the function return true. +/// If a cycle was not found, the starting query is removed from `jobs` and +/// the function returns false. +#[cfg(parallel_compiler)] +fn remove_cycle( + query_map: &QueryMap, + jobs: &mut Vec, + wakelist: &mut Vec>, +) -> bool { + let mut visited = FxHashSet::default(); + let mut stack = Vec::new(); + // Look for a cycle starting with the last query in `jobs` + if let Some(waiter) = + cycle_check(query_map, jobs.pop().unwrap(), DUMMY_SP, &mut stack, &mut visited) + { + // The stack is a vector of pairs of spans and queries; reverse it so that + // the earlier entries require later entries + let (mut spans, queries): (Vec<_>, Vec<_>) = stack.into_iter().rev().unzip(); + + // Shift the spans so that queries are matched with the span for their waitee + spans.rotate_right(1); + + // Zip them back together + let mut stack: Vec<_> = iter::zip(spans, queries).collect(); + + // Remove the queries in our cycle from the list of jobs to look at + for r in &stack { + if let Some(pos) = jobs.iter().position(|j| j == &r.1) { + jobs.remove(pos); + } + } + + // Find the queries in the cycle which are + // connected to queries outside the cycle + let entry_points = stack + .iter() + .filter_map(|&(span, query)| { + if query.parent(query_map).is_none() { + // This query is connected to the root (it has no query parent) + Some((span, query, None)) + } else { + let mut waiters = Vec::new(); + // Find all the direct waiters who lead to the root + visit_waiters(query_map, query, |span, waiter| { + // Mark all the other queries in the cycle as already visited + let mut visited = FxHashSet::from_iter(stack.iter().map(|q| q.1)); + + if connected_to_root(query_map, waiter, &mut visited) { + waiters.push((span, waiter)); + } + + None + }); + if waiters.is_empty() { + None + } else { + // Deterministically pick one of the waiters to show to the user + let waiter = *pick_query(query_map, &waiters, |s| *s); + Some((span, query, Some(waiter))) + } + } + }) + .collect::)>>(); + + // Deterministically pick an entry point + let (_, entry_point, usage) = pick_query(query_map, &entry_points, |e| (e.0, e.1)); + + // Shift the stack so that our entry point is first + let entry_point_pos = stack.iter().position(|(_, query)| query == entry_point); + if let Some(pos) = entry_point_pos { + stack.rotate_left(pos); + } + + let usage = usage.as_ref().map(|(span, query)| (*span, query.query(query_map))); + + // Create the cycle error + let error = CycleError { + usage, + cycle: stack + .iter() + .map(|&(s, ref q)| QueryInfo { span: s, query: q.query(query_map) }) + .collect(), + }; + + // We unwrap `waiter` here since there must always be one + // edge which is resumable / waited using a query latch + let (waitee_query, waiter_idx) = waiter.unwrap(); + + // Extract the waiter we want to resume + let waiter = waitee_query.latch(query_map).unwrap().extract_waiter(waiter_idx); + + // Set the cycle error so it will be picked up when resumed + *waiter.cycle.lock() = Some(error); + + // Put the waiter on the list of things to resume + wakelist.push(waiter); + + true + } else { + false + } +} + +/// Detects query cycles by using depth first search over all active query jobs. +/// If a query cycle is found it will break the cycle by finding an edge which +/// uses a query latch and then resuming that waiter. +/// There may be multiple cycles involved in a deadlock, so this searches +/// all active queries for cycles before finally resuming all the waiters at once. +#[cfg(parallel_compiler)] +pub fn deadlock(query_map: QueryMap, registry: &rayon_core::Registry) { + let on_panic = OnDrop(|| { + eprintln!("deadlock handler panicked, aborting process"); + process::abort(); + }); + + let mut wakelist = Vec::new(); + let mut jobs: Vec = query_map.keys().cloned().collect(); + + let mut found_cycle = false; + + while jobs.len() > 0 { + if remove_cycle(&query_map, &mut jobs, &mut wakelist) { + found_cycle = true; + } + } + + // Check that a cycle was found. It is possible for a deadlock to occur without + // a query cycle if a query which can be waited on uses Rayon to do multithreading + // internally. Such a query (X) may be executing on 2 threads (A and B) and A may + // wait using Rayon on B. Rayon may then switch to executing another query (Y) + // which in turn will wait on X causing a deadlock. We have a false dependency from + // X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here + // only considers the true dependency and won't detect a cycle. + assert!(found_cycle); + + // FIXME: Ensure this won't cause a deadlock before we return + for waiter in wakelist.into_iter() { + waiter.notify(registry); + } + + on_panic.disable(); +} + +#[inline(never)] +#[cold] +pub(crate) fn report_cycle<'a>( + sess: &'a Session, + CycleError { usage, cycle: stack }: CycleError, +) -> DiagnosticBuilder<'a, ErrorGuaranteed> { + assert!(!stack.is_empty()); + + let span = stack[0].query.default_span(stack[1 % stack.len()].span); + let mut err = + struct_span_err!(sess, span, E0391, "cycle detected when {}", stack[0].query.description); + + for i in 1..stack.len() { + let query = &stack[i].query; + let span = query.default_span(stack[(i + 1) % stack.len()].span); + err.span_note(span, &format!("...which requires {}...", query.description)); + } + + if stack.len() == 1 { + err.note(&format!("...which immediately requires {} again", stack[0].query.description)); + } else { + err.note(&format!( + "...which again requires {}, completing the cycle", + stack[0].query.description + )); + } + + if stack.iter().all(|entry| { + entry + .query + .def_kind + .map_or(false, |def_kind| matches!(def_kind, DefKind::TyAlias | DefKind::TraitAlias)) + }) { + if stack.iter().all(|entry| { + entry.query.def_kind.map_or(false, |def_kind| matches!(def_kind, DefKind::TyAlias)) + }) { + err.note("type aliases cannot be recursive"); + err.help("consider using a struct, enum, or union instead to break the cycle"); + err.help("see for more information"); + } else { + err.note("trait aliases cannot be recursive"); + } + } + + if let Some((span, query)) = usage { + err.span_note(query.default_span(span), &format!("cycle used when {}", query.description)); + } + + err +} + +pub fn print_query_stack( + tcx: CTX, + mut current_query: Option, + handler: &Handler, + num_frames: Option, +) -> usize { + // Be careful relying on global state here: this code is called from + // a panic hook, which means that the global `Handler` may be in a weird + // state if it was responsible for triggering the panic. + let mut i = 0; + let query_map = tcx.try_collect_active_jobs(); + + while let Some(query) = current_query { + if Some(i) == num_frames { + break; + } + let Some(query_info) = query_map.as_ref().and_then(|map| map.get(&query)) else { + break; + }; + let mut diag = Diagnostic::new( + Level::FailureNote, + &format!("#{} [{}] {}", i, query_info.query.name, query_info.query.description), + ); + diag.span = query_info.job.span.into(); + handler.force_print_diagnostic(diag); + + current_query = query_info.job.parent; + i += 1; + } + + i +} diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs new file mode 100644 index 000000000..fb2258434 --- /dev/null +++ b/compiler/rustc_query_system/src/query/mod.rs @@ -0,0 +1,125 @@ +mod plumbing; +pub use self::plumbing::*; + +mod job; +#[cfg(parallel_compiler)] +pub use self::job::deadlock; +pub use self::job::{print_query_stack, QueryInfo, QueryJob, QueryJobId, QueryJobInfo, QueryMap}; + +mod caches; +pub use self::caches::{ + ArenaCacheSelector, CacheSelector, DefaultCacheSelector, QueryCache, QueryStorage, +}; + +mod config; +pub use self::config::{QueryConfig, QueryDescription, QueryVTable}; + +use crate::dep_graph::{DepNodeIndex, HasDepContext, SerializedDepNodeIndex}; + +use rustc_data_structures::sync::Lock; +use rustc_data_structures::thin_vec::ThinVec; +use rustc_errors::Diagnostic; +use rustc_hir::def::DefKind; +use rustc_span::Span; + +/// Description of a frame in the query stack. +/// +/// This is mostly used in case of cycles for error reporting. +#[derive(Clone, Debug)] +pub struct QueryStackFrame { + pub name: &'static str, + pub description: String, + span: Option, + def_kind: Option, + /// This hash is used to deterministically pick + /// a query to remove cycles in the parallel compiler. + #[cfg(parallel_compiler)] + hash: u64, +} + +impl QueryStackFrame { + #[inline] + pub fn new( + name: &'static str, + description: String, + span: Option, + def_kind: Option, + _hash: impl FnOnce() -> u64, + ) -> Self { + Self { + name, + description, + span, + def_kind, + #[cfg(parallel_compiler)] + hash: _hash(), + } + } + + // FIXME(eddyb) Get more valid `Span`s on queries. + #[inline] + pub fn default_span(&self, span: Span) -> Span { + if !span.is_dummy() { + return span; + } + self.span.unwrap_or(span) + } +} + +/// Tracks 'side effects' for a particular query. +/// This struct is saved to disk along with the query result, +/// and loaded from disk if we mark the query as green. +/// This allows us to 'replay' changes to global state +/// that would otherwise only occur if we actually +/// executed the query method. +#[derive(Debug, Clone, Default, Encodable, Decodable)] +pub struct QuerySideEffects { + /// Stores any diagnostics emitted during query execution. + /// These diagnostics will be re-emitted if we mark + /// the query as green. + pub(super) diagnostics: ThinVec, +} + +impl QuerySideEffects { + #[inline] + pub fn is_empty(&self) -> bool { + let QuerySideEffects { diagnostics } = self; + diagnostics.is_empty() + } + pub fn append(&mut self, other: QuerySideEffects) { + let QuerySideEffects { diagnostics } = self; + diagnostics.extend(other.diagnostics); + } +} + +pub trait QueryContext: HasDepContext { + fn next_job_id(&self) -> QueryJobId; + + /// Get the query information from the TLS context. + fn current_query_job(&self) -> Option; + + fn try_collect_active_jobs(&self) -> Option; + + /// Load side effects associated to the node in the previous session. + fn load_side_effects(&self, prev_dep_node_index: SerializedDepNodeIndex) -> QuerySideEffects; + + /// Register diagnostics for the given node, for use in next session. + fn store_side_effects(&self, dep_node_index: DepNodeIndex, side_effects: QuerySideEffects); + + /// Register diagnostics for the given node, for use in next session. + fn store_side_effects_for_anon_node( + &self, + dep_node_index: DepNodeIndex, + side_effects: QuerySideEffects, + ); + + /// Executes a job by changing the `ImplicitCtxt` to point to the + /// new query job while it executes. It returns the diagnostics + /// captured during execution and the actual result. + fn start_query( + &self, + token: QueryJobId, + diagnostics: Option<&Lock>>, + compute: impl FnOnce() -> R, + ) -> R; +} diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs new file mode 100644 index 000000000..5e8ea07d0 --- /dev/null +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -0,0 +1,742 @@ +//! The implementation of the query system itself. This defines the macros that +//! generate the actual methods on tcx which find and execute the provider, +//! manage the caches, and so forth. + +use crate::dep_graph::{DepContext, DepNode, DepNodeIndex, DepNodeParams}; +use crate::query::caches::QueryCache; +use crate::query::config::{QueryDescription, QueryVTable}; +use crate::query::job::{report_cycle, QueryInfo, QueryJob, QueryJobId, QueryJobInfo}; +use crate::query::{QueryContext, QueryMap, QuerySideEffects, QueryStackFrame}; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::fx::FxHashMap; +#[cfg(parallel_compiler)] +use rustc_data_structures::profiling::TimingGuard; +#[cfg(parallel_compiler)] +use rustc_data_structures::sharded::Sharded; +use rustc_data_structures::sync::Lock; +use rustc_data_structures::thin_vec::ThinVec; +use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed, FatalError}; +use rustc_session::Session; +use rustc_span::{Span, DUMMY_SP}; +use std::cell::Cell; +use std::collections::hash_map::Entry; +use std::fmt::Debug; +use std::hash::Hash; +use std::mem; +use std::ptr; + +pub struct QueryState { + #[cfg(parallel_compiler)] + active: Sharded>, + #[cfg(not(parallel_compiler))] + active: Lock>, +} + +/// Indicates the state of a query for a given key in a query map. +enum QueryResult { + /// An already executing query. The query job can be used to await for its completion. + Started(QueryJob), + + /// The query panicked. Queries trying to wait on this will raise a fatal error which will + /// silently panic. + Poisoned, +} + +impl QueryState +where + K: Eq + Hash + Clone + Debug, +{ + pub fn all_inactive(&self) -> bool { + #[cfg(parallel_compiler)] + { + let shards = self.active.lock_shards(); + shards.iter().all(|shard| shard.is_empty()) + } + #[cfg(not(parallel_compiler))] + { + self.active.lock().is_empty() + } + } + + pub fn try_collect_active_jobs( + &self, + tcx: CTX, + make_query: fn(CTX, K) -> QueryStackFrame, + jobs: &mut QueryMap, + ) -> Option<()> { + #[cfg(parallel_compiler)] + { + // We use try_lock_shards here since we are called from the + // deadlock handler, and this shouldn't be locked. + let shards = self.active.try_lock_shards()?; + for shard in shards.iter() { + for (k, v) in shard.iter() { + if let QueryResult::Started(ref job) = *v { + let query = make_query(tcx, k.clone()); + jobs.insert(job.id, QueryJobInfo { query, job: job.clone() }); + } + } + } + } + #[cfg(not(parallel_compiler))] + { + // We use try_lock here since we are called from the + // deadlock handler, and this shouldn't be locked. + // (FIXME: Is this relevant for non-parallel compilers? It doesn't + // really hurt much.) + for (k, v) in self.active.try_lock()?.iter() { + if let QueryResult::Started(ref job) = *v { + let query = make_query(tcx, k.clone()); + jobs.insert(job.id, QueryJobInfo { query, job: job.clone() }); + } + } + } + + Some(()) + } +} + +impl Default for QueryState { + fn default() -> QueryState { + QueryState { active: Default::default() } + } +} + +/// A type representing the responsibility to execute the job in the `job` field. +/// This will poison the relevant query if dropped. +struct JobOwner<'tcx, K> +where + K: Eq + Hash + Clone, +{ + state: &'tcx QueryState, + key: K, + id: QueryJobId, +} + +#[cold] +#[inline(never)] +fn mk_cycle( + tcx: CTX, + error: CycleError, + handle_cycle_error: fn(CTX, DiagnosticBuilder<'_, ErrorGuaranteed>) -> V, + cache: &dyn crate::query::QueryStorage, +) -> R +where + CTX: QueryContext, + V: std::fmt::Debug, + R: Clone, +{ + let error = report_cycle(tcx.dep_context().sess(), error); + let value = handle_cycle_error(tcx, error); + cache.store_nocache(value) +} + +impl<'tcx, K> JobOwner<'tcx, K> +where + K: Eq + Hash + Clone, +{ + /// Either gets a `JobOwner` corresponding the query, allowing us to + /// start executing the query, or returns with the result of the query. + /// This function assumes that `try_get_cached` is already called and returned `lookup`. + /// If the query is executing elsewhere, this will wait for it and return the result. + /// If the query panicked, this will silently panic. + /// + /// This function is inlined because that results in a noticeable speed-up + /// for some compile-time benchmarks. + #[inline(always)] + fn try_start<'b, CTX>( + tcx: &'b CTX, + state: &'b QueryState, + span: Span, + key: K, + ) -> TryGetJob<'b, K> + where + CTX: QueryContext, + { + #[cfg(parallel_compiler)] + let mut state_lock = state.active.get_shard_by_value(&key).lock(); + #[cfg(not(parallel_compiler))] + let mut state_lock = state.active.lock(); + let lock = &mut *state_lock; + + match lock.entry(key) { + Entry::Vacant(entry) => { + let id = tcx.next_job_id(); + let job = tcx.current_query_job(); + let job = QueryJob::new(id, span, job); + + let key = entry.key().clone(); + entry.insert(QueryResult::Started(job)); + + let owner = JobOwner { state, id, key }; + return TryGetJob::NotYetStarted(owner); + } + Entry::Occupied(mut entry) => { + match entry.get_mut() { + #[cfg(not(parallel_compiler))] + QueryResult::Started(job) => { + let id = job.id; + drop(state_lock); + + // If we are single-threaded we know that we have cycle error, + // so we just return the error. + return TryGetJob::Cycle(id.find_cycle_in_stack( + tcx.try_collect_active_jobs().unwrap(), + &tcx.current_query_job(), + span, + )); + } + #[cfg(parallel_compiler)] + QueryResult::Started(job) => { + // For parallel queries, we'll block and wait until the query running + // in another thread has completed. Record how long we wait in the + // self-profiler. + let query_blocked_prof_timer = tcx.dep_context().profiler().query_blocked(); + + // Get the latch out + let latch = job.latch(); + + drop(state_lock); + + // With parallel queries we might just have to wait on some other + // thread. + let result = latch.wait_on(tcx.current_query_job(), span); + + match result { + Ok(()) => TryGetJob::JobCompleted(query_blocked_prof_timer), + Err(cycle) => TryGetJob::Cycle(cycle), + } + } + QueryResult::Poisoned => FatalError.raise(), + } + } + } + } + + /// Completes the query by updating the query cache with the `result`, + /// signals the waiter and forgets the JobOwner, so it won't poison the query + fn complete(self, cache: &C, result: C::Value, dep_node_index: DepNodeIndex) -> C::Stored + where + C: QueryCache, + { + // We can move out of `self` here because we `mem::forget` it below + let key = unsafe { ptr::read(&self.key) }; + let state = self.state; + + // Forget ourself so our destructor won't poison the query + mem::forget(self); + + let (job, result) = { + let job = { + #[cfg(parallel_compiler)] + let mut lock = state.active.get_shard_by_value(&key).lock(); + #[cfg(not(parallel_compiler))] + let mut lock = state.active.lock(); + match lock.remove(&key).unwrap() { + QueryResult::Started(job) => job, + QueryResult::Poisoned => panic!(), + } + }; + let result = cache.complete(key, result, dep_node_index); + (job, result) + }; + + job.signal_complete(); + result + } +} + +impl<'tcx, K> Drop for JobOwner<'tcx, K> +where + K: Eq + Hash + Clone, +{ + #[inline(never)] + #[cold] + fn drop(&mut self) { + // Poison the query so jobs waiting on it panic. + let state = self.state; + let job = { + #[cfg(parallel_compiler)] + let mut shard = state.active.get_shard_by_value(&self.key).lock(); + #[cfg(not(parallel_compiler))] + let mut shard = state.active.lock(); + let job = match shard.remove(&self.key).unwrap() { + QueryResult::Started(job) => job, + QueryResult::Poisoned => panic!(), + }; + shard.insert(self.key.clone(), QueryResult::Poisoned); + job + }; + // Also signal the completion of the job, so waiters + // will continue execution. + job.signal_complete(); + } +} + +#[derive(Clone)] +pub(crate) struct CycleError { + /// The query and related span that uses the cycle. + pub usage: Option<(Span, QueryStackFrame)>, + pub cycle: Vec, +} + +/// The result of `try_start`. +enum TryGetJob<'tcx, K> +where + K: Eq + Hash + Clone, +{ + /// The query is not yet started. Contains a guard to the cache eventually used to start it. + NotYetStarted(JobOwner<'tcx, K>), + + /// The query was already completed. + /// Returns the result of the query and its dep-node index + /// if it succeeded or a cycle error if it failed. + #[cfg(parallel_compiler)] + JobCompleted(TimingGuard<'tcx>), + + /// Trying to execute the query resulted in a cycle. + Cycle(CycleError), +} + +/// Checks if the query is already computed and in the cache. +/// It returns the shard index and a lock guard to the shard, +/// which will be used if the query is not in the cache and we need +/// to compute it. +#[inline] +pub fn try_get_cached<'a, CTX, C, R, OnHit>( + tcx: CTX, + cache: &'a C, + key: &C::Key, + // `on_hit` can be called while holding a lock to the query cache + on_hit: OnHit, +) -> Result +where + C: QueryCache, + CTX: DepContext, + OnHit: FnOnce(&C::Stored) -> R, +{ + cache.lookup(&key, |value, index| { + if std::intrinsics::unlikely(tcx.profiler().enabled()) { + tcx.profiler().query_cache_hit(index.into()); + } + tcx.dep_graph().read_index(index); + on_hit(value) + }) +} + +fn try_execute_query( + tcx: CTX, + state: &QueryState, + cache: &C, + span: Span, + key: C::Key, + dep_node: Option>, + query: &QueryVTable, +) -> (C::Stored, Option) +where + C: QueryCache, + C::Key: Clone + DepNodeParams, + CTX: QueryContext, +{ + match JobOwner::<'_, C::Key>::try_start(&tcx, state, span, key.clone()) { + TryGetJob::NotYetStarted(job) => { + let (result, dep_node_index) = execute_job(tcx, key, dep_node, query, job.id); + let result = job.complete(cache, result, dep_node_index); + (result, Some(dep_node_index)) + } + TryGetJob::Cycle(error) => { + let result = mk_cycle(tcx, error, query.handle_cycle_error, cache); + (result, None) + } + #[cfg(parallel_compiler)] + TryGetJob::JobCompleted(query_blocked_prof_timer) => { + let (v, index) = cache + .lookup(&key, |value, index| (value.clone(), index)) + .unwrap_or_else(|_| panic!("value must be in cache after waiting")); + + if std::intrinsics::unlikely(tcx.dep_context().profiler().enabled()) { + tcx.dep_context().profiler().query_cache_hit(index.into()); + } + query_blocked_prof_timer.finish_with_query_invocation_id(index.into()); + + (v, Some(index)) + } + } +} + +fn execute_job( + tcx: CTX, + key: K, + mut dep_node_opt: Option>, + query: &QueryVTable, + job_id: QueryJobId, +) -> (V, DepNodeIndex) +where + K: Clone + DepNodeParams, + V: Debug, + CTX: QueryContext, +{ + let dep_graph = tcx.dep_context().dep_graph(); + + // Fast path for when incr. comp. is off. + if !dep_graph.is_fully_enabled() { + let prof_timer = tcx.dep_context().profiler().query_provider(); + let result = tcx.start_query(job_id, None, || query.compute(*tcx.dep_context(), key)); + let dep_node_index = dep_graph.next_virtual_depnode_index(); + prof_timer.finish_with_query_invocation_id(dep_node_index.into()); + return (result, dep_node_index); + } + + if !query.anon && !query.eval_always { + // `to_dep_node` is expensive for some `DepKind`s. + let dep_node = + dep_node_opt.get_or_insert_with(|| query.to_dep_node(*tcx.dep_context(), &key)); + + // The diagnostics for this query will be promoted to the current session during + // `try_mark_green()`, so we can ignore them here. + if let Some(ret) = tcx.start_query(job_id, None, || { + try_load_from_disk_and_cache_in_memory(tcx, &key, &dep_node, query) + }) { + return ret; + } + } + + let prof_timer = tcx.dep_context().profiler().query_provider(); + let diagnostics = Lock::new(ThinVec::new()); + + let (result, dep_node_index) = tcx.start_query(job_id, Some(&diagnostics), || { + if query.anon { + return dep_graph.with_anon_task(*tcx.dep_context(), query.dep_kind, || { + query.compute(*tcx.dep_context(), key) + }); + } + + // `to_dep_node` is expensive for some `DepKind`s. + let dep_node = dep_node_opt.unwrap_or_else(|| query.to_dep_node(*tcx.dep_context(), &key)); + + dep_graph.with_task(dep_node, *tcx.dep_context(), key, query.compute, query.hash_result) + }); + + prof_timer.finish_with_query_invocation_id(dep_node_index.into()); + + let diagnostics = diagnostics.into_inner(); + let side_effects = QuerySideEffects { diagnostics }; + + if std::intrinsics::unlikely(!side_effects.is_empty()) { + if query.anon { + tcx.store_side_effects_for_anon_node(dep_node_index, side_effects); + } else { + tcx.store_side_effects(dep_node_index, side_effects); + } + } + + (result, dep_node_index) +} + +fn try_load_from_disk_and_cache_in_memory( + tcx: CTX, + key: &K, + dep_node: &DepNode, + query: &QueryVTable, +) -> Option<(V, DepNodeIndex)> +where + K: Clone, + CTX: QueryContext, + V: Debug, +{ + // Note this function can be called concurrently from the same query + // We must ensure that this is handled correctly. + + let dep_graph = tcx.dep_context().dep_graph(); + let (prev_dep_node_index, dep_node_index) = dep_graph.try_mark_green(tcx, &dep_node)?; + + debug_assert!(dep_graph.is_green(dep_node)); + + // First we try to load the result from the on-disk cache. + // Some things are never cached on disk. + if query.cache_on_disk { + let prof_timer = tcx.dep_context().profiler().incr_cache_loading(); + + // The call to `with_query_deserialization` enforces that no new `DepNodes` + // are created during deserialization. See the docs of that method for more + // details. + let result = dep_graph + .with_query_deserialization(|| query.try_load_from_disk(tcx, prev_dep_node_index)); + + prof_timer.finish_with_query_invocation_id(dep_node_index.into()); + + if let Some(result) = result { + if std::intrinsics::unlikely( + tcx.dep_context().sess().opts.unstable_opts.query_dep_graph, + ) { + dep_graph.mark_debug_loaded_from_disk(*dep_node) + } + + let prev_fingerprint = tcx + .dep_context() + .dep_graph() + .prev_fingerprint_of(dep_node) + .unwrap_or(Fingerprint::ZERO); + // If `-Zincremental-verify-ich` is specified, re-hash results from + // the cache and make sure that they have the expected fingerprint. + // + // If not, we still seek to verify a subset of fingerprints loaded + // from disk. Re-hashing results is fairly expensive, so we can't + // currently afford to verify every hash. This subset should still + // give us some coverage of potential bugs though. + let try_verify = prev_fingerprint.as_value().1 % 32 == 0; + if std::intrinsics::unlikely( + try_verify || tcx.dep_context().sess().opts.unstable_opts.incremental_verify_ich, + ) { + incremental_verify_ich(*tcx.dep_context(), &result, dep_node, query); + } + + return Some((result, dep_node_index)); + } + + // We always expect to find a cached result for things that + // can be forced from `DepNode`. + debug_assert!( + !tcx.dep_context().fingerprint_style(dep_node.kind).reconstructible(), + "missing on-disk cache entry for {:?}", + dep_node + ); + } + + // We could not load a result from the on-disk cache, so + // recompute. + let prof_timer = tcx.dep_context().profiler().query_provider(); + + // The dep-graph for this computation is already in-place. + let result = dep_graph.with_ignore(|| query.compute(*tcx.dep_context(), key.clone())); + + prof_timer.finish_with_query_invocation_id(dep_node_index.into()); + + // Verify that re-running the query produced a result with the expected hash + // This catches bugs in query implementations, turning them into ICEs. + // For example, a query might sort its result by `DefId` - since `DefId`s are + // not stable across compilation sessions, the result could get up getting sorted + // in a different order when the query is re-run, even though all of the inputs + // (e.g. `DefPathHash` values) were green. + // + // See issue #82920 for an example of a miscompilation that would get turned into + // an ICE by this check + incremental_verify_ich(*tcx.dep_context(), &result, dep_node, query); + + Some((result, dep_node_index)) +} + +fn incremental_verify_ich( + tcx: CTX::DepContext, + result: &V, + dep_node: &DepNode, + query: &QueryVTable, +) where + CTX: QueryContext, +{ + assert!( + tcx.dep_graph().is_green(dep_node), + "fingerprint for green query instance not loaded from cache: {:?}", + dep_node, + ); + + debug!("BEGIN verify_ich({:?})", dep_node); + let new_hash = query.hash_result.map_or(Fingerprint::ZERO, |f| { + tcx.with_stable_hashing_context(|mut hcx| f(&mut hcx, result)) + }); + let old_hash = tcx.dep_graph().prev_fingerprint_of(dep_node); + debug!("END verify_ich({:?})", dep_node); + + if Some(new_hash) != old_hash { + incremental_verify_ich_cold(tcx.sess(), DebugArg::from(&dep_node), DebugArg::from(&result)); + } +} + +// This DebugArg business is largely a mirror of std::fmt::ArgumentV1, which is +// currently not exposed publicly. +// +// The PR which added this attempted to use `&dyn Debug` instead, but that +// showed statistically significant worse compiler performance. It's not +// actually clear what the cause there was -- the code should be cold. If this +// can be replaced with `&dyn Debug` with on perf impact, then it probably +// should be. +extern "C" { + type Opaque; +} + +struct DebugArg<'a> { + value: &'a Opaque, + fmt: fn(&Opaque, &mut std::fmt::Formatter<'_>) -> std::fmt::Result, +} + +impl<'a, T> From<&'a T> for DebugArg<'a> +where + T: std::fmt::Debug, +{ + fn from(value: &'a T) -> DebugArg<'a> { + DebugArg { + value: unsafe { std::mem::transmute(value) }, + fmt: unsafe { + std::mem::transmute(::fmt as fn(_, _) -> std::fmt::Result) + }, + } + } +} + +impl std::fmt::Debug for DebugArg<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + (self.fmt)(self.value, f) + } +} + +// Note that this is marked #[cold] and intentionally takes the equivalent of +// `dyn Debug` for its arguments, as we want to avoid generating a bunch of +// different implementations for LLVM to chew on (and filling up the final +// binary, too). +#[cold] +fn incremental_verify_ich_cold(sess: &Session, dep_node: DebugArg<'_>, result: DebugArg<'_>) { + let run_cmd = if let Some(crate_name) = &sess.opts.crate_name { + format!("`cargo clean -p {}` or `cargo clean`", crate_name) + } else { + "`cargo clean`".to_string() + }; + + // When we emit an error message and panic, we try to debug-print the `DepNode` + // and query result. Unfortunately, this can cause us to run additional queries, + // which may result in another fingerprint mismatch while we're in the middle + // of processing this one. To avoid a double-panic (which kills the process + // before we can print out the query static), we print out a terse + // but 'safe' message if we detect a re-entrant call to this method. + thread_local! { + static INSIDE_VERIFY_PANIC: Cell = const { Cell::new(false) }; + }; + + let old_in_panic = INSIDE_VERIFY_PANIC.with(|in_panic| in_panic.replace(true)); + + if old_in_panic { + sess.struct_err( + "internal compiler error: re-entrant incremental verify failure, suppressing message", + ) + .emit(); + } else { + sess.struct_err(&format!("internal compiler error: encountered incremental compilation error with {:?}", dep_node)) + .help(&format!("This is a known issue with the compiler. Run {} to allow your project to compile", run_cmd)) + .note("Please follow the instructions below to create a bug report with the provided information") + .note("See for more information") + .emit(); + panic!("Found unstable fingerprints for {:?}: {:?}", dep_node, result); + } + + INSIDE_VERIFY_PANIC.with(|in_panic| in_panic.set(old_in_panic)); +} + +/// Ensure that either this query has all green inputs or been executed. +/// Executing `query::ensure(D)` is considered a read of the dep-node `D`. +/// Returns true if the query should still run. +/// +/// This function is particularly useful when executing passes for their +/// side-effects -- e.g., in order to report errors for erroneous programs. +/// +/// Note: The optimization is only available during incr. comp. +#[inline(never)] +fn ensure_must_run( + tcx: CTX, + key: &K, + query: &QueryVTable, +) -> (bool, Option>) +where + K: crate::dep_graph::DepNodeParams, + CTX: QueryContext, +{ + if query.eval_always { + return (true, None); + } + + // Ensuring an anonymous query makes no sense + assert!(!query.anon); + + let dep_node = query.to_dep_node(*tcx.dep_context(), key); + + let dep_graph = tcx.dep_context().dep_graph(); + match dep_graph.try_mark_green(tcx, &dep_node) { + None => { + // A None return from `try_mark_green` means that this is either + // a new dep node or that the dep node has already been marked red. + // Either way, we can't call `dep_graph.read()` as we don't have the + // DepNodeIndex. We must invoke the query itself. The performance cost + // this introduces should be negligible as we'll immediately hit the + // in-memory cache, or another query down the line will. + (true, Some(dep_node)) + } + Some((_, dep_node_index)) => { + dep_graph.read_index(dep_node_index); + tcx.dep_context().profiler().query_cache_hit(dep_node_index.into()); + (false, None) + } + } +} + +#[derive(Debug)] +pub enum QueryMode { + Get, + Ensure, +} + +pub fn get_query(tcx: CTX, span: Span, key: Q::Key, mode: QueryMode) -> Option +where + Q: QueryDescription, + Q::Key: DepNodeParams, + CTX: QueryContext, +{ + let query = Q::make_vtable(tcx, &key); + let dep_node = if let QueryMode::Ensure = mode { + let (must_run, dep_node) = ensure_must_run(tcx, &key, &query); + if !must_run { + return None; + } + dep_node + } else { + None + }; + + let (result, dep_node_index) = try_execute_query( + tcx, + Q::query_state(tcx), + Q::query_cache(tcx), + span, + key, + dep_node, + &query, + ); + if let Some(dep_node_index) = dep_node_index { + tcx.dep_context().dep_graph().read_index(dep_node_index) + } + Some(result) +} + +pub fn force_query(tcx: CTX, key: Q::Key, dep_node: DepNode) +where + Q: QueryDescription, + Q::Key: DepNodeParams, + CTX: QueryContext, +{ + // We may be concurrently trying both execute and force a query. + // Ensure that only one of them runs the query. + let cache = Q::query_cache(tcx); + let cached = cache.lookup(&key, |_, index| { + if std::intrinsics::unlikely(tcx.dep_context().profiler().enabled()) { + tcx.dep_context().profiler().query_cache_hit(index.into()); + } + }); + + match cached { + Ok(()) => return, + Err(()) => {} + } + + let query = Q::make_vtable(tcx, &key); + let state = Q::query_state(tcx); + debug_assert!(!query.anon); + + try_execute_query(tcx, state, cache, DUMMY_SP, key, Some(dep_node), &query); +} -- cgit v1.2.3