diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_incremental | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_incremental')
-rw-r--r-- | compiler/rustc_incremental/Cargo.toml | 22 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/assert_dep_graph.rs | 446 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/assert_module_sources.rs | 178 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/lib.rs | 30 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/README.md | 3 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/data.rs | 13 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/dirty_clean.rs | 480 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/file_format.rs | 195 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/fs.rs | 984 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/fs/tests.rs | 84 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/load.rs | 235 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/mod.rs | 25 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/save.rs | 188 | ||||
-rw-r--r-- | compiler/rustc_incremental/src/persist/work_product.rs | 59 |
14 files changed, 2942 insertions, 0 deletions
diff --git a/compiler/rustc_incremental/Cargo.toml b/compiler/rustc_incremental/Cargo.toml new file mode 100644 index 000000000..d3c425a07 --- /dev/null +++ b/compiler/rustc_incremental/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "rustc_incremental" +version = "0.0.0" +edition = "2021" + +[lib] +doctest = false + +[dependencies] +rustc_graphviz = { path = "../rustc_graphviz" } +tracing = "0.1" +rand = "0.8.4" +rustc_middle = { path = "../rustc_middle" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_hir = { path = "../rustc_hir" } +rustc_serialize = { path = "../rustc_serialize" } +rustc_ast = { path = "../rustc_ast" } +rustc_macros = { path = "../rustc_macros" } +rustc_span = { path = "../rustc_span" } +rustc_fs_util = { path = "../rustc_fs_util" } +rustc_session = { path = "../rustc_session" } +rustc_errors = { path = "../rustc_errors" } diff --git a/compiler/rustc_incremental/src/assert_dep_graph.rs b/compiler/rustc_incremental/src/assert_dep_graph.rs new file mode 100644 index 000000000..69e482ce8 --- /dev/null +++ b/compiler/rustc_incremental/src/assert_dep_graph.rs @@ -0,0 +1,446 @@ +//! This pass is only used for the UNIT TESTS and DEBUGGING NEEDS +//! around dependency graph construction. It serves two purposes; it +//! will dump graphs in graphviz form to disk, and it searches for +//! `#[rustc_if_this_changed]` and `#[rustc_then_this_would_need]` +//! annotations. These annotations can be used to test whether paths +//! exist in the graph. These checks run after codegen, so they view the +//! the final state of the dependency graph. Note that there are +//! similar assertions found in `persist::dirty_clean` which check the +//! **initial** state of the dependency graph, just after it has been +//! loaded from disk. +//! +//! In this code, we report errors on each `rustc_if_this_changed` +//! annotation. If a path exists in all cases, then we would report +//! "all path(s) exist". Otherwise, we report: "no path to `foo`" for +//! each case where no path exists. `ui` tests can then be +//! used to check when paths exist or do not. +//! +//! The full form of the `rustc_if_this_changed` annotation is +//! `#[rustc_if_this_changed("foo")]`, which will report a +//! source node of `foo(def_id)`. The `"foo"` is optional and +//! defaults to `"Hir"` if omitted. +//! +//! Example: +//! +//! ```ignore (needs flags) +//! #[rustc_if_this_changed(Hir)] +//! fn foo() { } +//! +//! #[rustc_then_this_would_need(codegen)] //~ ERROR no path from `foo` +//! fn bar() { } +//! +//! #[rustc_then_this_would_need(codegen)] //~ ERROR OK +//! fn baz() { foo(); } +//! ``` + +use rustc_ast as ast; +use rustc_data_structures::fx::FxHashSet; +use rustc_data_structures::graph::implementation::{Direction, NodeIndex, INCOMING, OUTGOING}; +use rustc_graphviz as dot; +use rustc_hir as hir; +use rustc_hir::def_id::DefId; +use rustc_hir::intravisit::{self, Visitor}; +use rustc_middle::dep_graph::{ + DepGraphQuery, DepKind, DepNode, DepNodeExt, DepNodeFilter, EdgeFilter, +}; +use rustc_middle::hir::nested_filter; +use rustc_middle::ty::TyCtxt; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::Span; + +use std::env; +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; + +#[allow(missing_docs)] +pub fn assert_dep_graph(tcx: TyCtxt<'_>) { + tcx.dep_graph.with_ignore(|| { + if tcx.sess.opts.unstable_opts.dump_dep_graph { + tcx.dep_graph.with_query(dump_graph); + } + + if !tcx.sess.opts.unstable_opts.query_dep_graph { + return; + } + + // if the `rustc_attrs` feature is not enabled, then the + // attributes we are interested in cannot be present anyway, so + // skip the walk. + if !tcx.features().rustc_attrs { + return; + } + + // Find annotations supplied by user (if any). + let (if_this_changed, then_this_would_need) = { + let mut visitor = + IfThisChanged { tcx, if_this_changed: vec![], then_this_would_need: vec![] }; + visitor.process_attrs(hir::CRATE_HIR_ID); + tcx.hir().visit_all_item_likes_in_crate(&mut visitor); + (visitor.if_this_changed, visitor.then_this_would_need) + }; + + if !if_this_changed.is_empty() || !then_this_would_need.is_empty() { + assert!( + tcx.sess.opts.unstable_opts.query_dep_graph, + "cannot use the `#[{}]` or `#[{}]` annotations \ + without supplying `-Z query-dep-graph`", + sym::rustc_if_this_changed, + sym::rustc_then_this_would_need + ); + } + + // Check paths. + check_paths(tcx, &if_this_changed, &then_this_would_need); + }) +} + +type Sources = Vec<(Span, DefId, DepNode)>; +type Targets = Vec<(Span, Symbol, hir::HirId, DepNode)>; + +struct IfThisChanged<'tcx> { + tcx: TyCtxt<'tcx>, + if_this_changed: Sources, + then_this_would_need: Targets, +} + +impl<'tcx> IfThisChanged<'tcx> { + fn argument(&self, attr: &ast::Attribute) -> Option<Symbol> { + let mut value = None; + for list_item in attr.meta_item_list().unwrap_or_default() { + match list_item.ident() { + Some(ident) if list_item.is_word() && value.is_none() => value = Some(ident.name), + _ => + // FIXME better-encapsulate meta_item (don't directly access `node`) + { + span_bug!(list_item.span(), "unexpected meta-item {:?}", list_item) + } + } + } + value + } + + fn process_attrs(&mut self, hir_id: hir::HirId) { + let def_id = self.tcx.hir().local_def_id(hir_id); + let def_path_hash = self.tcx.def_path_hash(def_id.to_def_id()); + let attrs = self.tcx.hir().attrs(hir_id); + for attr in attrs { + if attr.has_name(sym::rustc_if_this_changed) { + let dep_node_interned = self.argument(attr); + let dep_node = match dep_node_interned { + None => { + DepNode::from_def_path_hash(self.tcx, def_path_hash, DepKind::hir_owner) + } + Some(n) => { + match DepNode::from_label_string(self.tcx, n.as_str(), def_path_hash) { + Ok(n) => n, + Err(()) => { + self.tcx.sess.span_fatal( + attr.span, + &format!("unrecognized DepNode variant {:?}", n), + ); + } + } + } + }; + self.if_this_changed.push((attr.span, def_id.to_def_id(), dep_node)); + } else if attr.has_name(sym::rustc_then_this_would_need) { + let dep_node_interned = self.argument(attr); + let dep_node = match dep_node_interned { + Some(n) => { + match DepNode::from_label_string(self.tcx, n.as_str(), def_path_hash) { + Ok(n) => n, + Err(()) => { + self.tcx.sess.span_fatal( + attr.span, + &format!("unrecognized DepNode variant {:?}", n), + ); + } + } + } + None => { + self.tcx.sess.span_fatal(attr.span, "missing DepNode variant"); + } + }; + self.then_this_would_need.push(( + attr.span, + dep_node_interned.unwrap(), + hir_id, + dep_node, + )); + } + } + } +} + +impl<'tcx> Visitor<'tcx> for IfThisChanged<'tcx> { + type NestedFilter = nested_filter::OnlyBodies; + + fn nested_visit_map(&mut self) -> Self::Map { + self.tcx.hir() + } + + fn visit_item(&mut self, item: &'tcx hir::Item<'tcx>) { + self.process_attrs(item.hir_id()); + intravisit::walk_item(self, item); + } + + fn visit_trait_item(&mut self, trait_item: &'tcx hir::TraitItem<'tcx>) { + self.process_attrs(trait_item.hir_id()); + intravisit::walk_trait_item(self, trait_item); + } + + fn visit_impl_item(&mut self, impl_item: &'tcx hir::ImplItem<'tcx>) { + self.process_attrs(impl_item.hir_id()); + intravisit::walk_impl_item(self, impl_item); + } + + fn visit_field_def(&mut self, s: &'tcx hir::FieldDef<'tcx>) { + self.process_attrs(s.hir_id); + intravisit::walk_field_def(self, s); + } +} + +fn check_paths<'tcx>(tcx: TyCtxt<'tcx>, if_this_changed: &Sources, then_this_would_need: &Targets) { + // Return early here so as not to construct the query, which is not cheap. + if if_this_changed.is_empty() { + for &(target_span, _, _, _) in then_this_would_need { + tcx.sess.span_err(target_span, "no `#[rustc_if_this_changed]` annotation detected"); + } + return; + } + tcx.dep_graph.with_query(|query| { + for &(_, source_def_id, ref source_dep_node) in if_this_changed { + let dependents = query.transitive_predecessors(source_dep_node); + for &(target_span, ref target_pass, _, ref target_dep_node) in then_this_would_need { + if !dependents.contains(&target_dep_node) { + tcx.sess.span_err( + target_span, + &format!( + "no path from `{}` to `{}`", + tcx.def_path_str(source_def_id), + target_pass + ), + ); + } else { + tcx.sess.span_err(target_span, "OK"); + } + } + } + }); +} + +fn dump_graph(query: &DepGraphQuery) { + let path: String = env::var("RUST_DEP_GRAPH").unwrap_or_else(|_| "dep_graph".to_string()); + + let nodes = match env::var("RUST_DEP_GRAPH_FILTER") { + Ok(string) => { + // Expect one of: "-> target", "source -> target", or "source ->". + let edge_filter = + EdgeFilter::new(&string).unwrap_or_else(|e| bug!("invalid filter: {}", e)); + let sources = node_set(&query, &edge_filter.source); + let targets = node_set(&query, &edge_filter.target); + filter_nodes(&query, &sources, &targets) + } + Err(_) => query.nodes().into_iter().map(|n| n.kind).collect(), + }; + let edges = filter_edges(&query, &nodes); + + { + // dump a .txt file with just the edges: + let txt_path = format!("{}.txt", path); + let mut file = BufWriter::new(File::create(&txt_path).unwrap()); + for &(ref source, ref target) in &edges { + write!(file, "{:?} -> {:?}\n", source, target).unwrap(); + } + } + + { + // dump a .dot file in graphviz format: + let dot_path = format!("{}.dot", path); + let mut v = Vec::new(); + dot::render(&GraphvizDepGraph(nodes, edges), &mut v).unwrap(); + fs::write(dot_path, v).unwrap(); + } +} + +#[allow(missing_docs)] +pub struct GraphvizDepGraph(FxHashSet<DepKind>, Vec<(DepKind, DepKind)>); + +impl<'a> dot::GraphWalk<'a> for GraphvizDepGraph { + type Node = DepKind; + type Edge = (DepKind, DepKind); + fn nodes(&self) -> dot::Nodes<'_, DepKind> { + let nodes: Vec<_> = self.0.iter().cloned().collect(); + nodes.into() + } + fn edges(&self) -> dot::Edges<'_, (DepKind, DepKind)> { + self.1[..].into() + } + fn source(&self, edge: &(DepKind, DepKind)) -> DepKind { + edge.0 + } + fn target(&self, edge: &(DepKind, DepKind)) -> DepKind { + edge.1 + } +} + +impl<'a> dot::Labeller<'a> for GraphvizDepGraph { + type Node = DepKind; + type Edge = (DepKind, DepKind); + fn graph_id(&self) -> dot::Id<'_> { + dot::Id::new("DependencyGraph").unwrap() + } + fn node_id(&self, n: &DepKind) -> dot::Id<'_> { + let s: String = format!("{:?}", n) + .chars() + .map(|c| if c == '_' || c.is_alphanumeric() { c } else { '_' }) + .collect(); + debug!("n={:?} s={:?}", n, s); + dot::Id::new(s).unwrap() + } + fn node_label(&self, n: &DepKind) -> dot::LabelText<'_> { + dot::LabelText::label(format!("{:?}", n)) + } +} + +// Given an optional filter like `"x,y,z"`, returns either `None` (no +// filter) or the set of nodes whose labels contain all of those +// substrings. +fn node_set<'q>( + query: &'q DepGraphQuery, + filter: &DepNodeFilter, +) -> Option<FxHashSet<&'q DepNode>> { + debug!("node_set(filter={:?})", filter); + + if filter.accepts_all() { + return None; + } + + Some(query.nodes().into_iter().filter(|n| filter.test(n)).collect()) +} + +fn filter_nodes<'q>( + query: &'q DepGraphQuery, + sources: &Option<FxHashSet<&'q DepNode>>, + targets: &Option<FxHashSet<&'q DepNode>>, +) -> FxHashSet<DepKind> { + if let Some(sources) = sources { + if let Some(targets) = targets { + walk_between(query, sources, targets) + } else { + walk_nodes(query, sources, OUTGOING) + } + } else if let Some(targets) = targets { + walk_nodes(query, targets, INCOMING) + } else { + query.nodes().into_iter().map(|n| n.kind).collect() + } +} + +fn walk_nodes<'q>( + query: &'q DepGraphQuery, + starts: &FxHashSet<&'q DepNode>, + direction: Direction, +) -> FxHashSet<DepKind> { + let mut set = FxHashSet::default(); + for &start in starts { + debug!("walk_nodes: start={:?} outgoing?={:?}", start, direction == OUTGOING); + if set.insert(start.kind) { + let mut stack = vec![query.indices[start]]; + while let Some(index) = stack.pop() { + for (_, edge) in query.graph.adjacent_edges(index, direction) { + let neighbor_index = edge.source_or_target(direction); + let neighbor = query.graph.node_data(neighbor_index); + if set.insert(neighbor.kind) { + stack.push(neighbor_index); + } + } + } + } + } + set +} + +fn walk_between<'q>( + query: &'q DepGraphQuery, + sources: &FxHashSet<&'q DepNode>, + targets: &FxHashSet<&'q DepNode>, +) -> FxHashSet<DepKind> { + // This is a bit tricky. We want to include a node only if it is: + // (a) reachable from a source and (b) will reach a target. And we + // have to be careful about cycles etc. Luckily efficiency is not + // a big concern! + + #[derive(Copy, Clone, PartialEq)] + enum State { + Undecided, + Deciding, + Included, + Excluded, + } + + let mut node_states = vec![State::Undecided; query.graph.len_nodes()]; + + for &target in targets { + node_states[query.indices[target].0] = State::Included; + } + + for source in sources.iter().map(|&n| query.indices[n]) { + recurse(query, &mut node_states, source); + } + + return query + .nodes() + .into_iter() + .filter(|&n| { + let index = query.indices[n]; + node_states[index.0] == State::Included + }) + .map(|n| n.kind) + .collect(); + + fn recurse(query: &DepGraphQuery, node_states: &mut [State], node: NodeIndex) -> bool { + match node_states[node.0] { + // known to reach a target + State::Included => return true, + + // known not to reach a target + State::Excluded => return false, + + // backedge, not yet known, say false + State::Deciding => return false, + + State::Undecided => {} + } + + node_states[node.0] = State::Deciding; + + for neighbor_index in query.graph.successor_nodes(node) { + if recurse(query, node_states, neighbor_index) { + node_states[node.0] = State::Included; + } + } + + // if we didn't find a path to target, then set to excluded + if node_states[node.0] == State::Deciding { + node_states[node.0] = State::Excluded; + false + } else { + assert!(node_states[node.0] == State::Included); + true + } + } +} + +fn filter_edges<'q>( + query: &'q DepGraphQuery, + nodes: &FxHashSet<DepKind>, +) -> Vec<(DepKind, DepKind)> { + let uniq: FxHashSet<_> = query + .edges() + .into_iter() + .map(|(s, t)| (s.kind, t.kind)) + .filter(|(source, target)| nodes.contains(source) && nodes.contains(target)) + .collect(); + uniq.into_iter().collect() +} diff --git a/compiler/rustc_incremental/src/assert_module_sources.rs b/compiler/rustc_incremental/src/assert_module_sources.rs new file mode 100644 index 000000000..89d419bc8 --- /dev/null +++ b/compiler/rustc_incremental/src/assert_module_sources.rs @@ -0,0 +1,178 @@ +//! This pass is only used for UNIT TESTS related to incremental +//! compilation. It tests whether a particular `.o` file will be re-used +//! from a previous compilation or whether it must be regenerated. +//! +//! The user adds annotations to the crate of the following form: +//! +//! ``` +//! # #![feature(rustc_attrs)] +//! #![rustc_partition_reused(module="spike", cfg="rpass2")] +//! #![rustc_partition_codegened(module="spike-x", cfg="rpass2")] +//! ``` +//! +//! The first indicates (in the cfg `rpass2`) that `spike.o` will be +//! reused, the second that `spike-x.o` will be recreated. If these +//! annotations are inaccurate, errors are reported. +//! +//! The reason that we use `cfg=...` and not `#[cfg_attr]` is so that +//! the HIR doesn't change as a result of the annotations, which might +//! perturb the reuse results. +//! +//! `#![rustc_expected_cgu_reuse(module="spike", cfg="rpass2", kind="post-lto")] +//! allows for doing a more fine-grained check to see if pre- or post-lto data +//! was re-used. + +use rustc_ast as ast; +use rustc_data_structures::fx::FxHashSet; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_middle::mir::mono::CodegenUnitNameBuilder; +use rustc_middle::ty::TyCtxt; +use rustc_session::cgu_reuse_tracker::*; +use rustc_span::symbol::{sym, Symbol}; + +#[allow(missing_docs)] +pub fn assert_module_sources(tcx: TyCtxt<'_>) { + tcx.dep_graph.with_ignore(|| { + if tcx.sess.opts.incremental.is_none() { + return; + } + + let available_cgus = + tcx.collect_and_partition_mono_items(()).1.iter().map(|cgu| cgu.name()).collect(); + + let ams = AssertModuleSource { tcx, available_cgus }; + + for attr in tcx.hir().attrs(rustc_hir::CRATE_HIR_ID) { + ams.check_attr(attr); + } + }) +} + +struct AssertModuleSource<'tcx> { + tcx: TyCtxt<'tcx>, + available_cgus: FxHashSet<Symbol>, +} + +impl<'tcx> AssertModuleSource<'tcx> { + fn check_attr(&self, attr: &ast::Attribute) { + let (expected_reuse, comp_kind) = if attr.has_name(sym::rustc_partition_reused) { + (CguReuse::PreLto, ComparisonKind::AtLeast) + } else if attr.has_name(sym::rustc_partition_codegened) { + (CguReuse::No, ComparisonKind::Exact) + } else if attr.has_name(sym::rustc_expected_cgu_reuse) { + match self.field(attr, sym::kind) { + sym::no => (CguReuse::No, ComparisonKind::Exact), + sym::pre_dash_lto => (CguReuse::PreLto, ComparisonKind::Exact), + sym::post_dash_lto => (CguReuse::PostLto, ComparisonKind::Exact), + sym::any => (CguReuse::PreLto, ComparisonKind::AtLeast), + other => { + self.tcx.sess.span_fatal( + attr.span, + &format!("unknown cgu-reuse-kind `{}` specified", other), + ); + } + } + } else { + return; + }; + + if !self.tcx.sess.opts.unstable_opts.query_dep_graph { + self.tcx.sess.span_fatal( + attr.span, + "found CGU-reuse attribute but `-Zquery-dep-graph` was not specified", + ); + } + + if !self.check_config(attr) { + debug!("check_attr: config does not match, ignoring attr"); + return; + } + + let user_path = self.field(attr, sym::module).to_string(); + let crate_name = self.tcx.crate_name(LOCAL_CRATE).to_string(); + + if !user_path.starts_with(&crate_name) { + let msg = format!( + "Found malformed codegen unit name `{}`. \ + Codegen units names must always start with the name of the \ + crate (`{}` in this case).", + user_path, crate_name + ); + self.tcx.sess.span_fatal(attr.span, &msg); + } + + // Split of the "special suffix" if there is one. + let (user_path, cgu_special_suffix) = if let Some(index) = user_path.rfind('.') { + (&user_path[..index], Some(&user_path[index + 1..])) + } else { + (&user_path[..], None) + }; + + let mut iter = user_path.split('-'); + + // Remove the crate name + assert_eq!(iter.next().unwrap(), crate_name); + + let cgu_path_components = iter.collect::<Vec<_>>(); + + let cgu_name_builder = &mut CodegenUnitNameBuilder::new(self.tcx); + let cgu_name = + cgu_name_builder.build_cgu_name(LOCAL_CRATE, cgu_path_components, cgu_special_suffix); + + debug!("mapping '{}' to cgu name '{}'", self.field(attr, sym::module), cgu_name); + + if !self.available_cgus.contains(&cgu_name) { + let mut cgu_names: Vec<&str> = + self.available_cgus.iter().map(|cgu| cgu.as_str()).collect(); + cgu_names.sort(); + self.tcx.sess.span_err( + attr.span, + &format!( + "no module named `{}` (mangled: {}). Available modules: {}", + user_path, + cgu_name, + cgu_names.join(", ") + ), + ); + } + + self.tcx.sess.cgu_reuse_tracker.set_expectation( + cgu_name, + &user_path, + attr.span, + expected_reuse, + comp_kind, + ); + } + + fn field(&self, attr: &ast::Attribute, name: Symbol) -> Symbol { + for item in attr.meta_item_list().unwrap_or_else(Vec::new) { + if item.has_name(name) { + if let Some(value) = item.value_str() { + return value; + } else { + self.tcx.sess.span_fatal( + item.span(), + &format!("associated value expected for `{}`", name), + ); + } + } + } + + self.tcx.sess.span_fatal(attr.span, &format!("no field `{}`", name)); + } + + /// Scan for a `cfg="foo"` attribute and check whether we have a + /// cfg flag called `foo`. + fn check_config(&self, attr: &ast::Attribute) -> bool { + let config = &self.tcx.sess.parse_sess.config; + let value = self.field(attr, sym::cfg); + debug!("check_config(config={:?}, value={:?})", config, value); + if config.iter().any(|&(name, _)| name == value) { + debug!("check_config: matched"); + return true; + } + debug!("check_config: no match found"); + false + } +} diff --git a/compiler/rustc_incremental/src/lib.rs b/compiler/rustc_incremental/src/lib.rs new file mode 100644 index 000000000..1e88e8091 --- /dev/null +++ b/compiler/rustc_incremental/src/lib.rs @@ -0,0 +1,30 @@ +//! Support for serializing the dep-graph and reloading it. + +#![deny(missing_docs)] +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(let_else)] +#![recursion_limit = "256"] +#![allow(rustc::potential_query_instability)] + +#[macro_use] +extern crate rustc_middle; +#[macro_use] +extern crate tracing; + +mod assert_dep_graph; +pub mod assert_module_sources; +mod persist; + +use assert_dep_graph::assert_dep_graph; +pub use persist::copy_cgu_workproduct_to_incr_comp_cache_dir; +pub use persist::delete_workproduct_files; +pub use persist::finalize_session_directory; +pub use persist::garbage_collect_session_directories; +pub use persist::in_incr_comp_dir; +pub use persist::in_incr_comp_dir_sess; +pub use persist::load_query_result_cache; +pub use persist::prepare_session_directory; +pub use persist::save_dep_graph; +pub use persist::save_work_product_index; +pub use persist::LoadResult; +pub use persist::{build_dep_graph, load_dep_graph, DepGraphFuture}; diff --git a/compiler/rustc_incremental/src/persist/README.md b/compiler/rustc_incremental/src/persist/README.md new file mode 100644 index 000000000..b01fe219e --- /dev/null +++ b/compiler/rustc_incremental/src/persist/README.md @@ -0,0 +1,3 @@ +For info on how the incremental compilation works, see the [rustc dev guide]. + +[rustc dev guide]: https://rustc-dev-guide.rust-lang.org/query.html diff --git a/compiler/rustc_incremental/src/persist/data.rs b/compiler/rustc_incremental/src/persist/data.rs new file mode 100644 index 000000000..81e541097 --- /dev/null +++ b/compiler/rustc_incremental/src/persist/data.rs @@ -0,0 +1,13 @@ +//! The data that we will serialize and deserialize. + +use rustc_macros::{Decodable, Encodable}; +use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; + +#[derive(Debug, Encodable, Decodable)] +pub struct SerializedWorkProduct { + /// node that produced the work-product + pub id: WorkProductId, + + /// work-product data itself + pub work_product: WorkProduct, +} diff --git a/compiler/rustc_incremental/src/persist/dirty_clean.rs b/compiler/rustc_incremental/src/persist/dirty_clean.rs new file mode 100644 index 000000000..710c4a01b --- /dev/null +++ b/compiler/rustc_incremental/src/persist/dirty_clean.rs @@ -0,0 +1,480 @@ +//! Debugging code to test fingerprints computed for query results. For each node marked with +//! `#[rustc_clean]` we will compare the fingerprint from the current and from the previous +//! compilation session as appropriate: +//! +//! - `#[rustc_clean(cfg="rev2", except="typeck")]` if we are +//! in `#[cfg(rev2)]`, then the fingerprints associated with +//! `DepNode::typeck(X)` must be DIFFERENT (`X` is the `DefId` of the +//! current node). +//! - `#[rustc_clean(cfg="rev2")]` same as above, except that the +//! fingerprints must be the SAME (along with all other fingerprints). +//! +//! - `#[rustc_clean(cfg="rev2", loaded_from_disk='typeck")]` asserts that +//! the query result for `DepNode::typeck(X)` was actually +//! loaded from disk (not just marked green). This can be useful +//! to ensure that a test is actually exercising the deserialization +//! logic for a particular query result. This can be combined with +//! `except` +//! +//! Errors are reported if we are in the suitable configuration but +//! the required condition is not met. + +use rustc_ast::{self as ast, Attribute, NestedMetaItem}; +use rustc_data_structures::fx::FxHashSet; +use rustc_hir::def_id::LocalDefId; +use rustc_hir::intravisit; +use rustc_hir::Node as HirNode; +use rustc_hir::{ImplItemKind, ItemKind as HirItem, TraitItemKind}; +use rustc_middle::dep_graph::{label_strs, DepNode, DepNodeExt}; +use rustc_middle::hir::nested_filter; +use rustc_middle::ty::TyCtxt; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::Span; +use std::iter::FromIterator; +use std::vec::Vec; + +const LOADED_FROM_DISK: Symbol = sym::loaded_from_disk; +const EXCEPT: Symbol = sym::except; +const CFG: Symbol = sym::cfg; + +// Base and Extra labels to build up the labels + +/// For typedef, constants, and statics +const BASE_CONST: &[&str] = &[label_strs::type_of]; + +/// DepNodes for functions + methods +const BASE_FN: &[&str] = &[ + // Callers will depend on the signature of these items, so we better test + label_strs::fn_sig, + label_strs::generics_of, + label_strs::predicates_of, + label_strs::type_of, + // And a big part of compilation (that we eventually want to cache) is type inference + // information: + label_strs::typeck, +]; + +/// DepNodes for Hir, which is pretty much everything +const BASE_HIR: &[&str] = &[ + // hir_owner and hir_owner_nodes should be computed for all nodes + label_strs::hir_owner, + label_strs::hir_owner_nodes, +]; + +/// `impl` implementation of struct/trait +const BASE_IMPL: &[&str] = + &[label_strs::associated_item_def_ids, label_strs::generics_of, label_strs::impl_trait_ref]; + +/// DepNodes for mir_built/Optimized, which is relevant in "executable" +/// code, i.e., functions+methods +const BASE_MIR: &[&str] = &[label_strs::optimized_mir, label_strs::promoted_mir]; + +/// Struct, Enum and Union DepNodes +/// +/// Note that changing the type of a field does not change the type of the struct or enum, but +/// adding/removing fields or changing a fields name or visibility does. +const BASE_STRUCT: &[&str] = + &[label_strs::generics_of, label_strs::predicates_of, label_strs::type_of]; + +/// Trait definition `DepNode`s. +/// Extra `DepNode`s for functions and methods. +const EXTRA_ASSOCIATED: &[&str] = &[label_strs::associated_item]; + +const EXTRA_TRAIT: &[&str] = &[]; + +// Fully Built Labels + +const LABELS_CONST: &[&[&str]] = &[BASE_HIR, BASE_CONST]; + +/// Constant/Typedef in an impl +const LABELS_CONST_IN_IMPL: &[&[&str]] = &[BASE_HIR, BASE_CONST, EXTRA_ASSOCIATED]; + +/// Trait-Const/Typedef DepNodes +const LABELS_CONST_IN_TRAIT: &[&[&str]] = &[BASE_HIR, BASE_CONST, EXTRA_ASSOCIATED, EXTRA_TRAIT]; + +/// Function `DepNode`s. +const LABELS_FN: &[&[&str]] = &[BASE_HIR, BASE_MIR, BASE_FN]; + +/// Method `DepNode`s. +const LABELS_FN_IN_IMPL: &[&[&str]] = &[BASE_HIR, BASE_MIR, BASE_FN, EXTRA_ASSOCIATED]; + +/// Trait method `DepNode`s. +const LABELS_FN_IN_TRAIT: &[&[&str]] = + &[BASE_HIR, BASE_MIR, BASE_FN, EXTRA_ASSOCIATED, EXTRA_TRAIT]; + +/// For generic cases like inline-assembly, modules, etc. +const LABELS_HIR_ONLY: &[&[&str]] = &[BASE_HIR]; + +/// Impl `DepNode`s. +const LABELS_TRAIT: &[&[&str]] = &[ + BASE_HIR, + &[label_strs::associated_item_def_ids, label_strs::predicates_of, label_strs::generics_of], +]; + +/// Impl `DepNode`s. +const LABELS_IMPL: &[&[&str]] = &[BASE_HIR, BASE_IMPL]; + +/// Abstract data type (struct, enum, union) `DepNode`s. +const LABELS_ADT: &[&[&str]] = &[BASE_HIR, BASE_STRUCT]; + +// FIXME: Struct/Enum/Unions Fields (there is currently no way to attach these) +// +// Fields are kind of separate from their containers, as they can change independently from +// them. We should at least check +// +// type_of for these. + +type Labels = FxHashSet<String>; + +/// Represents the requested configuration by rustc_clean/dirty +struct Assertion { + clean: Labels, + dirty: Labels, + loaded_from_disk: Labels, +} + +pub fn check_dirty_clean_annotations(tcx: TyCtxt<'_>) { + if !tcx.sess.opts.unstable_opts.query_dep_graph { + return; + } + + // can't add `#[rustc_clean]` etc without opting in to this feature + if !tcx.features().rustc_attrs { + return; + } + + tcx.dep_graph.with_ignore(|| { + let mut dirty_clean_visitor = DirtyCleanVisitor { tcx, checked_attrs: Default::default() }; + + let crate_items = tcx.hir_crate_items(()); + + for id in crate_items.items() { + dirty_clean_visitor.check_item(id.def_id); + } + + for id in crate_items.trait_items() { + dirty_clean_visitor.check_item(id.def_id); + } + + for id in crate_items.impl_items() { + dirty_clean_visitor.check_item(id.def_id); + } + + for id in crate_items.foreign_items() { + dirty_clean_visitor.check_item(id.def_id); + } + + let mut all_attrs = FindAllAttrs { tcx, found_attrs: vec![] }; + tcx.hir().walk_attributes(&mut all_attrs); + + // Note that we cannot use the existing "unused attribute"-infrastructure + // here, since that is running before codegen. This is also the reason why + // all codegen-specific attributes are `AssumedUsed` in rustc_ast::feature_gate. + all_attrs.report_unchecked_attrs(dirty_clean_visitor.checked_attrs); + }) +} + +pub struct DirtyCleanVisitor<'tcx> { + tcx: TyCtxt<'tcx>, + checked_attrs: FxHashSet<ast::AttrId>, +} + +impl<'tcx> DirtyCleanVisitor<'tcx> { + /// Possibly "deserialize" the attribute into a clean/dirty assertion + fn assertion_maybe(&mut self, item_id: LocalDefId, attr: &Attribute) -> Option<Assertion> { + assert!(attr.has_name(sym::rustc_clean)); + if !check_config(self.tcx, attr) { + // skip: not the correct `cfg=` + return None; + } + let assertion = self.assertion_auto(item_id, attr); + Some(assertion) + } + + /// Gets the "auto" assertion on pre-validated attr, along with the `except` labels. + fn assertion_auto(&mut self, item_id: LocalDefId, attr: &Attribute) -> Assertion { + let (name, mut auto) = self.auto_labels(item_id, attr); + let except = self.except(attr); + let loaded_from_disk = self.loaded_from_disk(attr); + for e in except.iter() { + if !auto.remove(e) { + let msg = format!( + "`except` specified DepNodes that can not be affected for \"{}\": \"{}\"", + name, e + ); + self.tcx.sess.span_fatal(attr.span, &msg); + } + } + Assertion { clean: auto, dirty: except, loaded_from_disk } + } + + /// `loaded_from_disk=` attribute value + fn loaded_from_disk(&self, attr: &Attribute) -> Labels { + for item in attr.meta_item_list().unwrap_or_else(Vec::new) { + if item.has_name(LOADED_FROM_DISK) { + let value = expect_associated_value(self.tcx, &item); + return self.resolve_labels(&item, value); + } + } + // If `loaded_from_disk=` is not specified, don't assert anything + Labels::default() + } + + /// `except=` attribute value + fn except(&self, attr: &Attribute) -> Labels { + for item in attr.meta_item_list().unwrap_or_else(Vec::new) { + if item.has_name(EXCEPT) { + let value = expect_associated_value(self.tcx, &item); + return self.resolve_labels(&item, value); + } + } + // if no `label` or `except` is given, only the node's group are asserted + Labels::default() + } + + /// Return all DepNode labels that should be asserted for this item. + /// index=0 is the "name" used for error messages + fn auto_labels(&mut self, item_id: LocalDefId, attr: &Attribute) -> (&'static str, Labels) { + let node = self.tcx.hir().get_by_def_id(item_id); + let (name, labels) = match node { + HirNode::Item(item) => { + match item.kind { + // note: these are in the same order as hir::Item_; + // FIXME(michaelwoerister): do commented out ones + + // // An `extern crate` item, with optional original crate name, + // HirItem::ExternCrate(..), // intentionally no assertions + + // // `use foo::bar::*;` or `use foo::bar::baz as quux;` + // HirItem::Use(..), // intentionally no assertions + + // A `static` item + HirItem::Static(..) => ("ItemStatic", LABELS_CONST), + + // A `const` item + HirItem::Const(..) => ("ItemConst", LABELS_CONST), + + // A function declaration + HirItem::Fn(..) => ("ItemFn", LABELS_FN), + + // // A module + HirItem::Mod(..) => ("ItemMod", LABELS_HIR_ONLY), + + // // An external module + HirItem::ForeignMod { .. } => ("ItemForeignMod", LABELS_HIR_ONLY), + + // Module-level inline assembly (from global_asm!) + HirItem::GlobalAsm(..) => ("ItemGlobalAsm", LABELS_HIR_ONLY), + + // A type alias, e.g., `type Foo = Bar<u8>` + HirItem::TyAlias(..) => ("ItemTy", LABELS_HIR_ONLY), + + // An enum definition, e.g., `enum Foo<A, B> {C<A>, D<B>}` + HirItem::Enum(..) => ("ItemEnum", LABELS_ADT), + + // A struct definition, e.g., `struct Foo<A> {x: A}` + HirItem::Struct(..) => ("ItemStruct", LABELS_ADT), + + // A union definition, e.g., `union Foo<A, B> {x: A, y: B}` + HirItem::Union(..) => ("ItemUnion", LABELS_ADT), + + // Represents a Trait Declaration + HirItem::Trait(..) => ("ItemTrait", LABELS_TRAIT), + + // An implementation, eg `impl<A> Trait for Foo { .. }` + HirItem::Impl { .. } => ("ItemKind::Impl", LABELS_IMPL), + + _ => self.tcx.sess.span_fatal( + attr.span, + &format!( + "clean/dirty auto-assertions not yet defined \ + for Node::Item.node={:?}", + item.kind + ), + ), + } + } + HirNode::TraitItem(item) => match item.kind { + TraitItemKind::Fn(..) => ("Node::TraitItem", LABELS_FN_IN_TRAIT), + TraitItemKind::Const(..) => ("NodeTraitConst", LABELS_CONST_IN_TRAIT), + TraitItemKind::Type(..) => ("NodeTraitType", LABELS_CONST_IN_TRAIT), + }, + HirNode::ImplItem(item) => match item.kind { + ImplItemKind::Fn(..) => ("Node::ImplItem", LABELS_FN_IN_IMPL), + ImplItemKind::Const(..) => ("NodeImplConst", LABELS_CONST_IN_IMPL), + ImplItemKind::TyAlias(..) => ("NodeImplType", LABELS_CONST_IN_IMPL), + }, + _ => self.tcx.sess.span_fatal( + attr.span, + &format!("clean/dirty auto-assertions not yet defined for {:?}", node), + ), + }; + let labels = + Labels::from_iter(labels.iter().flat_map(|s| s.iter().map(|l| (*l).to_string()))); + (name, labels) + } + + fn resolve_labels(&self, item: &NestedMetaItem, value: Symbol) -> Labels { + let mut out = Labels::default(); + for label in value.as_str().split(',') { + let label = label.trim(); + if DepNode::has_label_string(label) { + if out.contains(label) { + self.tcx.sess.span_fatal( + item.span(), + &format!("dep-node label `{}` is repeated", label), + ); + } + out.insert(label.to_string()); + } else { + self.tcx + .sess + .span_fatal(item.span(), &format!("dep-node label `{}` not recognized", label)); + } + } + out + } + + fn dep_node_str(&self, dep_node: &DepNode) -> String { + if let Some(def_id) = dep_node.extract_def_id(self.tcx) { + format!("{:?}({})", dep_node.kind, self.tcx.def_path_str(def_id)) + } else { + format!("{:?}({:?})", dep_node.kind, dep_node.hash) + } + } + + fn assert_dirty(&self, item_span: Span, dep_node: DepNode) { + debug!("assert_dirty({:?})", dep_node); + + if self.tcx.dep_graph.is_green(&dep_node) { + let dep_node_str = self.dep_node_str(&dep_node); + self.tcx + .sess + .span_err(item_span, &format!("`{}` should be dirty but is not", dep_node_str)); + } + } + + fn assert_clean(&self, item_span: Span, dep_node: DepNode) { + debug!("assert_clean({:?})", dep_node); + + if self.tcx.dep_graph.is_red(&dep_node) { + let dep_node_str = self.dep_node_str(&dep_node); + self.tcx + .sess + .span_err(item_span, &format!("`{}` should be clean but is not", dep_node_str)); + } + } + + fn assert_loaded_from_disk(&self, item_span: Span, dep_node: DepNode) { + debug!("assert_loaded_from_disk({:?})", dep_node); + + if !self.tcx.dep_graph.debug_was_loaded_from_disk(dep_node) { + let dep_node_str = self.dep_node_str(&dep_node); + self.tcx.sess.span_err( + item_span, + &format!("`{}` should have been loaded from disk but it was not", dep_node_str), + ); + } + } + + fn check_item(&mut self, item_id: LocalDefId) { + let item_span = self.tcx.def_span(item_id.to_def_id()); + let def_path_hash = self.tcx.def_path_hash(item_id.to_def_id()); + for attr in self.tcx.get_attrs(item_id.to_def_id(), sym::rustc_clean) { + let Some(assertion) = self.assertion_maybe(item_id, attr) else { + continue; + }; + self.checked_attrs.insert(attr.id); + for label in assertion.clean { + let dep_node = DepNode::from_label_string(self.tcx, &label, def_path_hash).unwrap(); + self.assert_clean(item_span, dep_node); + } + for label in assertion.dirty { + let dep_node = DepNode::from_label_string(self.tcx, &label, def_path_hash).unwrap(); + self.assert_dirty(item_span, dep_node); + } + for label in assertion.loaded_from_disk { + let dep_node = DepNode::from_label_string(self.tcx, &label, def_path_hash).unwrap(); + self.assert_loaded_from_disk(item_span, dep_node); + } + } + } +} + +/// Given a `#[rustc_clean]` attribute, scan for a `cfg="foo"` attribute and check whether we have +/// a cfg flag called `foo`. +fn check_config(tcx: TyCtxt<'_>, attr: &Attribute) -> bool { + debug!("check_config(attr={:?})", attr); + let config = &tcx.sess.parse_sess.config; + debug!("check_config: config={:?}", config); + let mut cfg = None; + for item in attr.meta_item_list().unwrap_or_else(Vec::new) { + if item.has_name(CFG) { + let value = expect_associated_value(tcx, &item); + debug!("check_config: searching for cfg {:?}", value); + cfg = Some(config.contains(&(value, None))); + } else if !(item.has_name(EXCEPT) || item.has_name(LOADED_FROM_DISK)) { + tcx.sess.span_err(attr.span, &format!("unknown item `{}`", item.name_or_empty())); + } + } + + match cfg { + None => tcx.sess.span_fatal(attr.span, "no cfg attribute"), + Some(c) => c, + } +} + +fn expect_associated_value(tcx: TyCtxt<'_>, item: &NestedMetaItem) -> Symbol { + if let Some(value) = item.value_str() { + value + } else { + let msg = if let Some(ident) = item.ident() { + format!("associated value expected for `{}`", ident) + } else { + "expected an associated value".to_string() + }; + + tcx.sess.span_fatal(item.span(), &msg); + } +} + +// A visitor that collects all #[rustc_clean] attributes from +// the HIR. It is used to verify that we really ran checks for all annotated +// nodes. +pub struct FindAllAttrs<'tcx> { + tcx: TyCtxt<'tcx>, + found_attrs: Vec<&'tcx Attribute>, +} + +impl<'tcx> FindAllAttrs<'tcx> { + fn is_active_attr(&mut self, attr: &Attribute) -> bool { + if attr.has_name(sym::rustc_clean) && check_config(self.tcx, attr) { + return true; + } + + false + } + + fn report_unchecked_attrs(&self, mut checked_attrs: FxHashSet<ast::AttrId>) { + for attr in &self.found_attrs { + if !checked_attrs.contains(&attr.id) { + self.tcx.sess.span_err(attr.span, "found unchecked `#[rustc_clean]` attribute"); + checked_attrs.insert(attr.id); + } + } + } +} + +impl<'tcx> intravisit::Visitor<'tcx> for FindAllAttrs<'tcx> { + type NestedFilter = nested_filter::All; + + fn nested_visit_map(&mut self) -> Self::Map { + self.tcx.hir() + } + + fn visit_attribute(&mut self, attr: &'tcx Attribute) { + if self.is_active_attr(attr) { + self.found_attrs.push(attr); + } + } +} diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs new file mode 100644 index 000000000..2dbd4b6bc --- /dev/null +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -0,0 +1,195 @@ +//! This module defines a generic file format that allows to check if a given +//! file generated by incremental compilation was generated by a compatible +//! compiler version. This file format is used for the on-disk version of the +//! dependency graph and the exported metadata hashes. +//! +//! In practice "compatible compiler version" means "exactly the same compiler +//! version", since the header encodes the git commit hash of the compiler. +//! Since we can always just ignore the incremental compilation cache and +//! compiler versions don't change frequently for the typical user, being +//! conservative here practically has no downside. + +use std::env; +use std::fs; +use std::io::{self, Read}; +use std::path::{Path, PathBuf}; + +use rustc_data_structures::memmap::Mmap; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use rustc_serialize::Encoder; +use rustc_session::Session; + +/// The first few bytes of files generated by incremental compilation. +const FILE_MAGIC: &[u8] = b"RSIC"; + +/// Change this if the header format changes. +const HEADER_FORMAT_VERSION: u16 = 0; + +/// A version string that hopefully is always different for compiler versions +/// with different encodings of incremental compilation artifacts. Contains +/// the Git commit hash. +const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION"); + +pub(crate) fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) { + stream.emit_raw_bytes(FILE_MAGIC); + stream + .emit_raw_bytes(&[(HEADER_FORMAT_VERSION >> 0) as u8, (HEADER_FORMAT_VERSION >> 8) as u8]); + + let rustc_version = rustc_version(nightly_build); + assert_eq!(rustc_version.len(), (rustc_version.len() as u8) as usize); + stream.emit_raw_bytes(&[rustc_version.len() as u8]); + stream.emit_raw_bytes(rustc_version.as_bytes()); +} + +pub(crate) fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F) +where + F: FnOnce(FileEncoder) -> FileEncodeResult, +{ + debug!("save: storing data in {}", path_buf.display()); + + // Delete the old file, if any. + // Note: It's important that we actually delete the old file and not just + // truncate and overwrite it, since it might be a shared hard-link, the + // underlying data of which we don't want to modify. + // + // We have to ensure we have dropped the memory maps to this file + // before performing this removal. + match fs::remove_file(&path_buf) { + Ok(()) => { + debug!("save: remove old file"); + } + Err(err) if err.kind() == io::ErrorKind::NotFound => (), + Err(err) => { + sess.err(&format!( + "unable to delete old {} at `{}`: {}", + name, + path_buf.display(), + err + )); + return; + } + } + + let mut encoder = match FileEncoder::new(&path_buf) { + Ok(encoder) => encoder, + Err(err) => { + sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err)); + return; + } + }; + + write_file_header(&mut encoder, sess.is_nightly_build()); + + match encode(encoder) { + Ok(position) => { + sess.prof.artifact_size( + &name.replace(' ', "_"), + path_buf.file_name().unwrap().to_string_lossy(), + position as u64, + ); + debug!("save: data written to disk successfully"); + } + Err(err) => { + sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err)); + } + } +} + +/// Reads the contents of a file with a file header as defined in this module. +/// +/// - Returns `Ok(Some(data, pos))` if the file existed and was generated by a +/// compatible compiler version. `data` is the entire contents of the file +/// and `pos` points to the first byte after the header. +/// - Returns `Ok(None)` if the file did not exist or was generated by an +/// incompatible version of the compiler. +/// - Returns `Err(..)` if some kind of IO error occurred while reading the +/// file. +pub fn read_file( + report_incremental_info: bool, + path: &Path, + nightly_build: bool, +) -> io::Result<Option<(Mmap, usize)>> { + let file = match fs::File::open(path) { + Ok(file) => file, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err), + }; + // SAFETY: This process must not modify nor remove the backing file while the memory map lives. + // For the dep-graph and the work product index, it is as soon as the decoding is done. + // For the query result cache, the memory map is dropped in save_dep_graph before calling + // save_in and trying to remove the backing file. + // + // There is no way to prevent another process from modifying this file. + let mmap = unsafe { Mmap::map(file) }?; + + let mut file = io::Cursor::new(&*mmap); + + // Check FILE_MAGIC + { + debug_assert!(FILE_MAGIC.len() == 4); + let mut file_magic = [0u8; 4]; + file.read_exact(&mut file_magic)?; + if file_magic != FILE_MAGIC { + report_format_mismatch(report_incremental_info, path, "Wrong FILE_MAGIC"); + return Ok(None); + } + } + + // Check HEADER_FORMAT_VERSION + { + debug_assert!(::std::mem::size_of_val(&HEADER_FORMAT_VERSION) == 2); + let mut header_format_version = [0u8; 2]; + file.read_exact(&mut header_format_version)?; + let header_format_version = + (header_format_version[0] as u16) | ((header_format_version[1] as u16) << 8); + + if header_format_version != HEADER_FORMAT_VERSION { + report_format_mismatch(report_incremental_info, path, "Wrong HEADER_FORMAT_VERSION"); + return Ok(None); + } + } + + // Check RUSTC_VERSION + { + let mut rustc_version_str_len = [0u8; 1]; + file.read_exact(&mut rustc_version_str_len)?; + let rustc_version_str_len = rustc_version_str_len[0] as usize; + let mut buffer = vec![0; rustc_version_str_len]; + file.read_exact(&mut buffer)?; + + if buffer != rustc_version(nightly_build).as_bytes() { + report_format_mismatch(report_incremental_info, path, "Different compiler version"); + return Ok(None); + } + } + + let post_header_start_pos = file.position() as usize; + Ok(Some((mmap, post_header_start_pos))) +} + +fn report_format_mismatch(report_incremental_info: bool, file: &Path, message: &str) { + debug!("read_file: {}", message); + + if report_incremental_info { + eprintln!( + "[incremental] ignoring cache artifact `{}`: {}", + file.file_name().unwrap().to_string_lossy(), + message + ); + } +} + +fn rustc_version(nightly_build: bool) -> String { + if nightly_build { + if let Some(val) = env::var_os("RUSTC_FORCE_RUSTC_VERSION") { + return val.to_string_lossy().into_owned(); + } + } + + RUSTC_VERSION + .expect( + "Cannot use rustc without explicit version for \ + incremental compilation", + ) + .to_string() +} diff --git a/compiler/rustc_incremental/src/persist/fs.rs b/compiler/rustc_incremental/src/persist/fs.rs new file mode 100644 index 000000000..25c1b2e1c --- /dev/null +++ b/compiler/rustc_incremental/src/persist/fs.rs @@ -0,0 +1,984 @@ +//! This module manages how the incremental compilation cache is represented in +//! the file system. +//! +//! Incremental compilation caches are managed according to a copy-on-write +//! strategy: Once a complete, consistent cache version is finalized, it is +//! never modified. Instead, when a subsequent compilation session is started, +//! the compiler will allocate a new version of the cache that starts out as +//! a copy of the previous version. Then only this new copy is modified and it +//! will not be visible to other processes until it is finalized. This ensures +//! that multiple compiler processes can be executed concurrently for the same +//! crate without interfering with each other or blocking each other. +//! +//! More concretely this is implemented via the following protocol: +//! +//! 1. For a newly started compilation session, the compiler allocates a +//! new `session` directory within the incremental compilation directory. +//! This session directory will have a unique name that ends with the suffix +//! "-working" and that contains a creation timestamp. +//! 2. Next, the compiler looks for the newest finalized session directory, +//! that is, a session directory from a previous compilation session that +//! has been marked as valid and consistent. A session directory is +//! considered finalized if the "-working" suffix in the directory name has +//! been replaced by the SVH of the crate. +//! 3. Once the compiler has found a valid, finalized session directory, it will +//! hard-link/copy its contents into the new "-working" directory. If all +//! goes well, it will have its own, private copy of the source directory and +//! subsequently not have to worry about synchronizing with other compiler +//! processes. +//! 4. Now the compiler can do its normal compilation process, which involves +//! reading and updating its private session directory. +//! 5. When compilation finishes without errors, the private session directory +//! will be in a state where it can be used as input for other compilation +//! sessions. That is, it will contain a dependency graph and cache artifacts +//! that are consistent with the state of the source code it was compiled +//! from, with no need to change them ever again. At this point, the compiler +//! finalizes and "publishes" its private session directory by renaming it +//! from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}". +//! 6. At this point the "old" session directory that we copied our data from +//! at the beginning of the session has become obsolete because we have just +//! published a more current version. Thus the compiler will delete it. +//! +//! ## Garbage Collection +//! +//! Naively following the above protocol might lead to old session directories +//! piling up if a compiler instance crashes for some reason before its able to +//! remove its private session directory. In order to avoid wasting disk space, +//! the compiler also does some garbage collection each time it is started in +//! incremental compilation mode. Specifically, it will scan the incremental +//! compilation directory for private session directories that are not in use +//! any more and will delete those. It will also delete any finalized session +//! directories for a given crate except for the most recent one. +//! +//! ## Synchronization +//! +//! There is some synchronization needed in order for the compiler to be able to +//! determine whether a given private session directory is not in used any more. +//! This is done by creating a lock file for each session directory and +//! locking it while the directory is still being used. Since file locks have +//! operating system support, we can rely on the lock being released if the +//! compiler process dies for some unexpected reason. Thus, when garbage +//! collecting private session directories, the collecting process can determine +//! whether the directory is still in use by trying to acquire a lock on the +//! file. If locking the file fails, the original process must still be alive. +//! If locking the file succeeds, we know that the owning process is not alive +//! any more and we can safely delete the directory. +//! There is still a small time window between the original process creating the +//! lock file and actually locking it. In order to minimize the chance that +//! another process tries to acquire the lock in just that instance, only +//! session directories that are older than a few seconds are considered for +//! garbage collection. +//! +//! Another case that has to be considered is what happens if one process +//! deletes a finalized session directory that another process is currently +//! trying to copy from. This case is also handled via the lock file. Before +//! a process starts copying a finalized session directory, it will acquire a +//! shared lock on the directory's lock file. Any garbage collecting process, +//! on the other hand, will acquire an exclusive lock on the lock file. +//! Thus, if a directory is being collected, any reader process will fail +//! acquiring the shared lock and will leave the directory alone. Conversely, +//! if a collecting process can't acquire the exclusive lock because the +//! directory is currently being read from, it will leave collecting that +//! directory to another process at a later point in time. +//! The exact same scheme is also used when reading the metadata hashes file +//! from an extern crate. When a crate is compiled, the hash values of its +//! metadata are stored in a file in its session directory. When the +//! compilation session of another crate imports the first crate's metadata, +//! it also has to read in the accompanying metadata hashes. It thus will access +//! the finalized session directory of all crates it links to and while doing +//! so, it will also place a read lock on that the respective session directory +//! so that it won't be deleted while the metadata hashes are loaded. +//! +//! ## Preconditions +//! +//! This system relies on two features being available in the file system in +//! order to work really well: file locking and hard linking. +//! If hard linking is not available (like on FAT) the data in the cache +//! actually has to be copied at the beginning of each session. +//! If file locking does not work reliably (like on NFS), some of the +//! synchronization will go haywire. +//! In both cases we recommend to locate the incremental compilation directory +//! on a file system that supports these things. +//! It might be a good idea though to try and detect whether we are on an +//! unsupported file system and emit a warning in that case. This is not yet +//! implemented. + +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::svh::Svh; +use rustc_data_structures::{base_n, flock}; +use rustc_errors::ErrorGuaranteed; +use rustc_fs_util::{link_or_copy, LinkOrCopy}; +use rustc_session::{Session, StableCrateId}; + +use std::fs as std_fs; +use std::io::{self, ErrorKind}; +use std::mem; +use std::path::{Path, PathBuf}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use rand::{thread_rng, RngCore}; + +#[cfg(test)] +mod tests; + +const LOCK_FILE_EXT: &str = ".lock"; +const DEP_GRAPH_FILENAME: &str = "dep-graph.bin"; +const STAGING_DEP_GRAPH_FILENAME: &str = "dep-graph.part.bin"; +const WORK_PRODUCTS_FILENAME: &str = "work-products.bin"; +const QUERY_CACHE_FILENAME: &str = "query-cache.bin"; + +// We encode integers using the following base, so they are shorter than decimal +// or hexadecimal numbers (we want short file and directory names). Since these +// numbers will be used in file names, we choose an encoding that is not +// case-sensitive (as opposed to base64, for example). +const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE; + +/// Returns the path to a session's dependency graph. +pub fn dep_graph_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME) +} +/// Returns the path to a session's staging dependency graph. +/// +/// On the difference between dep-graph and staging dep-graph, +/// see `build_dep_graph`. +pub fn staging_dep_graph_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, STAGING_DEP_GRAPH_FILENAME) +} +pub fn work_products_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME) +} +/// Returns the path to a session's query cache. +pub fn query_cache_path(sess: &Session) -> PathBuf { + in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME) +} + +/// Locks a given session directory. +pub fn lock_file_path(session_dir: &Path) -> PathBuf { + let crate_dir = session_dir.parent().unwrap(); + + let directory_name = session_dir.file_name().unwrap().to_string_lossy(); + assert_no_characters_lost(&directory_name); + + let dash_indices: Vec<_> = directory_name.match_indices('-').map(|(idx, _)| idx).collect(); + if dash_indices.len() != 3 { + bug!( + "Encountered incremental compilation session directory with \ + malformed name: {}", + session_dir.display() + ) + } + + crate_dir.join(&directory_name[0..dash_indices[2]]).with_extension(&LOCK_FILE_EXT[1..]) +} + +/// Returns the path for a given filename within the incremental compilation directory +/// in the current session. +pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf { + in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name) +} + +/// Returns the path for a given filename within the incremental compilation directory, +/// not necessarily from the current session. +/// +/// To ensure the file is part of the current session, use [`in_incr_comp_dir_sess`]. +pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf { + incr_comp_session_dir.join(file_name) +} + +/// Allocates the private session directory. +/// +/// If the result of this function is `Ok`, we have a valid incremental +/// compilation session directory. A valid session +/// directory is one that contains a locked lock file. It may or may not contain +/// a dep-graph and work products from a previous session. +/// +/// This always attempts to load a dep-graph from the directory. +/// If loading fails for some reason, we fallback to a disabled `DepGraph`. +/// See [`rustc_interface::queries::dep_graph`]. +/// +/// If this function returns an error, it may leave behind an invalid session directory. +/// The garbage collection will take care of it. +/// +/// [`rustc_interface::queries::dep_graph`]: ../../rustc_interface/struct.Queries.html#structfield.dep_graph +pub fn prepare_session_directory( + sess: &Session, + crate_name: &str, + stable_crate_id: StableCrateId, +) -> Result<(), ErrorGuaranteed> { + if sess.opts.incremental.is_none() { + return Ok(()); + } + + let _timer = sess.timer("incr_comp_prepare_session_directory"); + + debug!("prepare_session_directory"); + + // {incr-comp-dir}/{crate-name-and-disambiguator} + let crate_dir = crate_path(sess, crate_name, stable_crate_id); + debug!("crate-dir: {}", crate_dir.display()); + create_dir(sess, &crate_dir, "crate")?; + + // Hack: canonicalize the path *after creating the directory* + // because, on windows, long paths can cause problems; + // canonicalization inserts this weird prefix that makes windows + // tolerate long paths. + let crate_dir = match crate_dir.canonicalize() { + Ok(v) => v, + Err(err) => { + let reported = sess.err(&format!( + "incremental compilation: error canonicalizing path `{}`: {}", + crate_dir.display(), + err + )); + return Err(reported); + } + }; + + let mut source_directories_already_tried = FxHashSet::default(); + + loop { + // Generate a session directory of the form: + // + // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working + let session_dir = generate_session_dir_path(&crate_dir); + debug!("session-dir: {}", session_dir.display()); + + // Lock the new session directory. If this fails, return an + // error without retrying + let (directory_lock, lock_file_path) = lock_directory(sess, &session_dir)?; + + // Now that we have the lock, we can actually create the session + // directory + create_dir(sess, &session_dir, "session")?; + + // Find a suitable source directory to copy from. Ignore those that we + // have already tried before. + let source_directory = find_source_directory(&crate_dir, &source_directories_already_tried); + + let Some(source_directory) = source_directory else { + // There's nowhere to copy from, we're done + debug!( + "no source directory found. Continuing with empty session \ + directory." + ); + + sess.init_incr_comp_session(session_dir, directory_lock, false); + return Ok(()); + }; + + debug!("attempting to copy data from source: {}", source_directory.display()); + + // Try copying over all files from the source directory + if let Ok(allows_links) = copy_files(sess, &session_dir, &source_directory) { + debug!("successfully copied data from: {}", source_directory.display()); + + if !allows_links { + sess.warn(&format!( + "Hard linking files in the incremental \ + compilation cache failed. Copying files \ + instead. Consider moving the cache \ + directory to a file system which supports \ + hard linking in session dir `{}`", + session_dir.display() + )); + } + + sess.init_incr_comp_session(session_dir, directory_lock, true); + return Ok(()); + } else { + debug!("copying failed - trying next directory"); + + // Something went wrong while trying to copy/link files from the + // source directory. Try again with a different one. + source_directories_already_tried.insert(source_directory); + + // Try to remove the session directory we just allocated. We don't + // know if there's any garbage in it from the failed copy action. + if let Err(err) = safe_remove_dir_all(&session_dir) { + sess.warn(&format!( + "Failed to delete partly initialized \ + session dir `{}`: {}", + session_dir.display(), + err + )); + } + + delete_session_dir_lock_file(sess, &lock_file_path); + mem::drop(directory_lock); + } + } +} + +/// This function finalizes and thus 'publishes' the session directory by +/// renaming it to `s-{timestamp}-{svh}` and releasing the file lock. +/// If there have been compilation errors, however, this function will just +/// delete the presumably invalid session directory. +pub fn finalize_session_directory(sess: &Session, svh: Svh) { + if sess.opts.incremental.is_none() { + return; + } + + let _timer = sess.timer("incr_comp_finalize_session_directory"); + + let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone(); + + if sess.has_errors_or_delayed_span_bugs() { + // If there have been any errors during compilation, we don't want to + // publish this session directory. Rather, we'll just delete it. + + debug!( + "finalize_session_directory() - invalidating session directory: {}", + incr_comp_session_dir.display() + ); + + if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) { + sess.warn(&format!( + "Error deleting incremental compilation \ + session directory `{}`: {}", + incr_comp_session_dir.display(), + err + )); + } + + let lock_file_path = lock_file_path(&*incr_comp_session_dir); + delete_session_dir_lock_file(sess, &lock_file_path); + sess.mark_incr_comp_session_as_invalid(); + } + + debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display()); + + let old_sub_dir_name = incr_comp_session_dir.file_name().unwrap().to_string_lossy(); + assert_no_characters_lost(&old_sub_dir_name); + + // Keep the 's-{timestamp}-{random-number}' prefix, but replace the + // '-working' part with the SVH of the crate + let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect(); + if dash_indices.len() != 3 { + bug!( + "Encountered incremental compilation session directory with \ + malformed name: {}", + incr_comp_session_dir.display() + ) + } + + // State: "s-{timestamp}-{random-number}-" + let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]); + + // Append the svh + base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name); + + // Create the full path + let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); + debug!("finalize_session_directory() - new path: {}", new_path.display()); + + match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) { + Ok(_) => { + debug!("finalize_session_directory() - directory renamed successfully"); + + // This unlocks the directory + sess.finalize_incr_comp_session(new_path); + } + Err(e) => { + // Warn about the error. However, no need to abort compilation now. + sess.warn(&format!( + "Error finalizing incremental compilation \ + session directory `{}`: {}", + incr_comp_session_dir.display(), + e + )); + + debug!("finalize_session_directory() - error, marking as invalid"); + // Drop the file lock, so we can garage collect + sess.mark_incr_comp_session_as_invalid(); + } + } + + let _ = garbage_collect_session_directories(sess); +} + +pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> { + let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?; + for entry in sess_dir_iterator { + let entry = entry?; + safe_remove_file(&entry.path())? + } + Ok(()) +} + +fn copy_files(sess: &Session, target_dir: &Path, source_dir: &Path) -> Result<bool, ()> { + // We acquire a shared lock on the lock file of the directory, so that + // nobody deletes it out from under us while we are reading from it. + let lock_file_path = lock_file_path(source_dir); + + // not exclusive + let Ok(_lock) = flock::Lock::new( + &lock_file_path, + false, // don't wait, + false, // don't create + false, + ) else { + // Could not acquire the lock, don't try to copy from here + return Err(()); + }; + + let Ok(source_dir_iterator) = source_dir.read_dir() else { + return Err(()); + }; + + let mut files_linked = 0; + let mut files_copied = 0; + + for entry in source_dir_iterator { + match entry { + Ok(entry) => { + let file_name = entry.file_name(); + + let target_file_path = target_dir.join(file_name); + let source_path = entry.path(); + + debug!("copying into session dir: {}", source_path.display()); + match link_or_copy(source_path, target_file_path) { + Ok(LinkOrCopy::Link) => files_linked += 1, + Ok(LinkOrCopy::Copy) => files_copied += 1, + Err(_) => return Err(()), + } + } + Err(_) => return Err(()), + } + } + + if sess.opts.unstable_opts.incremental_info { + eprintln!( + "[incremental] session directory: \ + {} files hard-linked", + files_linked + ); + eprintln!( + "[incremental] session directory: \ + {} files copied", + files_copied + ); + } + + Ok(files_linked > 0 || files_copied == 0) +} + +/// Generates unique directory path of the form: +/// {crate_dir}/s-{timestamp}-{random-number}-working +fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { + let timestamp = timestamp_to_string(SystemTime::now()); + debug!("generate_session_dir_path: timestamp = {}", timestamp); + let random_number = thread_rng().next_u32(); + debug!("generate_session_dir_path: random_number = {}", random_number); + + let directory_name = format!( + "s-{}-{}-working", + timestamp, + base_n::encode(random_number as u128, INT_ENCODE_BASE) + ); + debug!("generate_session_dir_path: directory_name = {}", directory_name); + let directory_path = crate_dir.join(directory_name); + debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); + directory_path +} + +fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(), ErrorGuaranteed> { + match std_fs::create_dir_all(path) { + Ok(()) => { + debug!("{} directory created successfully", dir_tag); + Ok(()) + } + Err(err) => { + let reported = sess.err(&format!( + "Could not create incremental compilation {} \ + directory `{}`: {}", + dir_tag, + path.display(), + err + )); + Err(reported) + } + } +} + +/// Allocate the lock-file and lock it. +fn lock_directory( + sess: &Session, + session_dir: &Path, +) -> Result<(flock::Lock, PathBuf), ErrorGuaranteed> { + let lock_file_path = lock_file_path(session_dir); + debug!("lock_directory() - lock_file: {}", lock_file_path.display()); + + match flock::Lock::new( + &lock_file_path, + false, // don't wait + true, // create the lock file + true, + ) { + // the lock should be exclusive + Ok(lock) => Ok((lock, lock_file_path)), + Err(lock_err) => { + let mut err = sess.struct_err(&format!( + "incremental compilation: could not create \ + session directory lock file: {}", + lock_err + )); + if flock::Lock::error_unsupported(&lock_err) { + err.note(&format!( + "the filesystem for the incremental path at {} \ + does not appear to support locking, consider changing the \ + incremental path to a filesystem that supports locking \ + or disable incremental compilation", + session_dir.display() + )); + if std::env::var_os("CARGO").is_some() { + err.help( + "incremental compilation can be disabled by setting the \ + environment variable CARGO_INCREMENTAL=0 (see \ + https://doc.rust-lang.org/cargo/reference/profiles.html#incremental)", + ); + err.help( + "the entire build directory can be changed to a different \ + filesystem by setting the environment variable CARGO_TARGET_DIR \ + to a different path (see \ + https://doc.rust-lang.org/cargo/reference/config.html#buildtarget-dir)", + ); + } + } + Err(err.emit()) + } + } +} + +fn delete_session_dir_lock_file(sess: &Session, lock_file_path: &Path) { + if let Err(err) = safe_remove_file(&lock_file_path) { + sess.warn(&format!( + "Error deleting lock file for incremental \ + compilation session directory `{}`: {}", + lock_file_path.display(), + err + )); + } +} + +/// Finds the most recent published session directory that is not in the +/// ignore-list. +fn find_source_directory( + crate_dir: &Path, + source_directories_already_tried: &FxHashSet<PathBuf>, +) -> Option<PathBuf> { + let iter = crate_dir + .read_dir() + .unwrap() // FIXME + .filter_map(|e| e.ok().map(|e| e.path())); + + find_source_directory_in_iter(iter, source_directories_already_tried) +} + +fn find_source_directory_in_iter<I>( + iter: I, + source_directories_already_tried: &FxHashSet<PathBuf>, +) -> Option<PathBuf> +where + I: Iterator<Item = PathBuf>, +{ + let mut best_candidate = (UNIX_EPOCH, None); + + for session_dir in iter { + debug!("find_source_directory_in_iter - inspecting `{}`", session_dir.display()); + + let directory_name = session_dir.file_name().unwrap().to_string_lossy(); + assert_no_characters_lost(&directory_name); + + if source_directories_already_tried.contains(&session_dir) + || !is_session_directory(&directory_name) + || !is_finalized(&directory_name) + { + debug!("find_source_directory_in_iter - ignoring"); + continue; + } + + let timestamp = extract_timestamp_from_session_dir(&directory_name).unwrap_or_else(|_| { + bug!("unexpected incr-comp session dir: {}", session_dir.display()) + }); + + if timestamp > best_candidate.0 { + best_candidate = (timestamp, Some(session_dir.clone())); + } + } + + best_candidate.1 +} + +fn is_finalized(directory_name: &str) -> bool { + !directory_name.ends_with("-working") +} + +fn is_session_directory(directory_name: &str) -> bool { + directory_name.starts_with("s-") && !directory_name.ends_with(LOCK_FILE_EXT) +} + +fn is_session_directory_lock_file(file_name: &str) -> bool { + file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT) +} + +fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime, ()> { + if !is_session_directory(directory_name) { + return Err(()); + } + + let dash_indices: Vec<_> = directory_name.match_indices('-').map(|(idx, _)| idx).collect(); + if dash_indices.len() != 3 { + return Err(()); + } + + string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]]) +} + +fn timestamp_to_string(timestamp: SystemTime) -> String { + let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); + let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000; + base_n::encode(micros as u128, INT_ENCODE_BASE) +} + +fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> { + let micros_since_unix_epoch = u64::from_str_radix(s, INT_ENCODE_BASE as u32); + + if micros_since_unix_epoch.is_err() { + return Err(()); + } + + let micros_since_unix_epoch = micros_since_unix_epoch.unwrap(); + + let duration = Duration::new( + micros_since_unix_epoch / 1_000_000, + 1000 * (micros_since_unix_epoch % 1_000_000) as u32, + ); + Ok(UNIX_EPOCH + duration) +} + +fn crate_path(sess: &Session, crate_name: &str, stable_crate_id: StableCrateId) -> PathBuf { + let incr_dir = sess.opts.incremental.as_ref().unwrap().clone(); + + let stable_crate_id = base_n::encode(stable_crate_id.to_u64() as u128, INT_ENCODE_BASE); + + let crate_name = format!("{}-{}", crate_name, stable_crate_id); + incr_dir.join(crate_name) +} + +fn assert_no_characters_lost(s: &str) { + if s.contains('\u{FFFD}') { + bug!("Could not losslessly convert '{}'.", s) + } +} + +fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool { + timestamp < SystemTime::now() - Duration::from_secs(10) +} + +/// Runs garbage collection for the current session. +pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { + debug!("garbage_collect_session_directories() - begin"); + + let session_directory = sess.incr_comp_session_dir(); + debug!( + "garbage_collect_session_directories() - session directory: {}", + session_directory.display() + ); + + let crate_directory = session_directory.parent().unwrap(); + debug!( + "garbage_collect_session_directories() - crate directory: {}", + crate_directory.display() + ); + + // First do a pass over the crate directory, collecting lock files and + // session directories + let mut session_directories = FxHashSet::default(); + let mut lock_files = FxHashSet::default(); + + for dir_entry in crate_directory.read_dir()? { + let Ok(dir_entry) = dir_entry else { + // Ignore any errors + continue; + }; + + let entry_name = dir_entry.file_name(); + let entry_name = entry_name.to_string_lossy(); + + if is_session_directory_lock_file(&entry_name) { + assert_no_characters_lost(&entry_name); + lock_files.insert(entry_name.into_owned()); + } else if is_session_directory(&entry_name) { + assert_no_characters_lost(&entry_name); + session_directories.insert(entry_name.into_owned()); + } else { + // This is something we don't know, leave it alone + } + } + + // Now map from lock files to session directories + let lock_file_to_session_dir: FxHashMap<String, Option<String>> = lock_files + .into_iter() + .map(|lock_file_name| { + assert!(lock_file_name.ends_with(LOCK_FILE_EXT)); + let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len(); + let session_dir = { + let dir_prefix = &lock_file_name[0..dir_prefix_end]; + session_directories.iter().find(|dir_name| dir_name.starts_with(dir_prefix)) + }; + (lock_file_name, session_dir.map(String::clone)) + }) + .collect(); + + // Delete all lock files, that don't have an associated directory. They must + // be some kind of leftover + for (lock_file_name, directory_name) in &lock_file_to_session_dir { + if directory_name.is_none() { + let Ok(timestamp) = extract_timestamp_from_session_dir(lock_file_name) else { + debug!( + "found lock-file with malformed timestamp: {}", + crate_directory.join(&lock_file_name).display() + ); + // Ignore it + continue; + }; + + let lock_file_path = crate_directory.join(&**lock_file_name); + + if is_old_enough_to_be_collected(timestamp) { + debug!( + "garbage_collect_session_directories() - deleting \ + garbage lock file: {}", + lock_file_path.display() + ); + delete_session_dir_lock_file(sess, &lock_file_path); + } else { + debug!( + "garbage_collect_session_directories() - lock file with \ + no session dir not old enough to be collected: {}", + lock_file_path.display() + ); + } + } + } + + // Filter out `None` directories + let lock_file_to_session_dir: FxHashMap<String, String> = lock_file_to_session_dir + .into_iter() + .filter_map(|(lock_file_name, directory_name)| directory_name.map(|n| (lock_file_name, n))) + .collect(); + + // Delete all session directories that don't have a lock file. + for directory_name in session_directories { + if !lock_file_to_session_dir.values().any(|dir| *dir == directory_name) { + let path = crate_directory.join(directory_name); + if let Err(err) = safe_remove_dir_all(&path) { + sess.warn(&format!( + "Failed to garbage collect invalid incremental \ + compilation session directory `{}`: {}", + path.display(), + err + )); + } + } + } + + // Now garbage collect the valid session directories. + let mut deletion_candidates = vec![]; + + for (lock_file_name, directory_name) in &lock_file_to_session_dir { + debug!("garbage_collect_session_directories() - inspecting: {}", directory_name); + + let Ok(timestamp) = extract_timestamp_from_session_dir(directory_name) else { + debug!( + "found session-dir with malformed timestamp: {}", + crate_directory.join(directory_name).display() + ); + // Ignore it + continue; + }; + + if is_finalized(directory_name) { + let lock_file_path = crate_directory.join(lock_file_name); + match flock::Lock::new( + &lock_file_path, + false, // don't wait + false, // don't create the lock-file + true, + ) { + // get an exclusive lock + Ok(lock) => { + debug!( + "garbage_collect_session_directories() - \ + successfully acquired lock" + ); + debug!( + "garbage_collect_session_directories() - adding \ + deletion candidate: {}", + directory_name + ); + + // Note that we are holding on to the lock + deletion_candidates.push(( + timestamp, + crate_directory.join(directory_name), + Some(lock), + )); + } + Err(_) => { + debug!( + "garbage_collect_session_directories() - \ + not collecting, still in use" + ); + } + } + } else if is_old_enough_to_be_collected(timestamp) { + // When cleaning out "-working" session directories, i.e. + // session directories that might still be in use by another + // compiler instance, we only look a directories that are + // at least ten seconds old. This is supposed to reduce the + // chance of deleting a directory in the time window where + // the process has allocated the directory but has not yet + // acquired the file-lock on it. + + // Try to acquire the directory lock. If we can't, it + // means that the owning process is still alive and we + // leave this directory alone. + let lock_file_path = crate_directory.join(lock_file_name); + match flock::Lock::new( + &lock_file_path, + false, // don't wait + false, // don't create the lock-file + true, + ) { + // get an exclusive lock + Ok(lock) => { + debug!( + "garbage_collect_session_directories() - \ + successfully acquired lock" + ); + + delete_old(sess, &crate_directory.join(directory_name)); + + // Let's make it explicit that the file lock is released at this point, + // or rather, that we held on to it until here + mem::drop(lock); + } + Err(_) => { + debug!( + "garbage_collect_session_directories() - \ + not collecting, still in use" + ); + } + } + } else { + debug!( + "garbage_collect_session_directories() - not finalized, not \ + old enough" + ); + } + } + + // Delete all but the most recent of the candidates + for (path, lock) in all_except_most_recent(deletion_candidates) { + debug!("garbage_collect_session_directories() - deleting `{}`", path.display()); + + if let Err(err) = safe_remove_dir_all(&path) { + sess.warn(&format!( + "Failed to garbage collect finalized incremental \ + compilation session directory `{}`: {}", + path.display(), + err + )); + } else { + delete_session_dir_lock_file(sess, &lock_file_path(&path)); + } + + // Let's make it explicit that the file lock is released at this point, + // or rather, that we held on to it until here + mem::drop(lock); + } + + Ok(()) +} + +fn delete_old(sess: &Session, path: &Path) { + debug!("garbage_collect_session_directories() - deleting `{}`", path.display()); + + if let Err(err) = safe_remove_dir_all(&path) { + sess.warn(&format!( + "Failed to garbage collect incremental compilation session directory `{}`: {}", + path.display(), + err + )); + } else { + delete_session_dir_lock_file(sess, &lock_file_path(&path)); + } +} + +fn all_except_most_recent( + deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>, +) -> FxHashMap<PathBuf, Option<flock::Lock>> { + let most_recent = deletion_candidates.iter().map(|&(timestamp, ..)| timestamp).max(); + + if let Some(most_recent) = most_recent { + deletion_candidates + .into_iter() + .filter(|&(timestamp, ..)| timestamp != most_recent) + .map(|(_, path, lock)| (path, lock)) + .collect() + } else { + FxHashMap::default() + } +} + +/// Since paths of artifacts within session directories can get quite long, we +/// need to support deleting files with very long paths. The regular +/// WinApi functions only support paths up to 260 characters, however. In order +/// to circumvent this limitation, we canonicalize the path of the directory +/// before passing it to std::fs::remove_dir_all(). This will convert the path +/// into the '\\?\' format, which supports much longer paths. +fn safe_remove_dir_all(p: &Path) -> io::Result<()> { + let canonicalized = match std_fs::canonicalize(p) { + Ok(canonicalized) => canonicalized, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()), + Err(err) => return Err(err), + }; + + std_fs::remove_dir_all(canonicalized) +} + +fn safe_remove_file(p: &Path) -> io::Result<()> { + let canonicalized = match std_fs::canonicalize(p) { + Ok(canonicalized) => canonicalized, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()), + Err(err) => return Err(err), + }; + + match std_fs::remove_file(canonicalized) { + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(()), + result => result, + } +} + +// On Windows the compiler would sometimes fail to rename the session directory because +// the OS thought something was still being accessed in it. So we retry a few times to give +// the OS time to catch up. +// See https://github.com/rust-lang/rust/issues/86929. +fn rename_path_with_retry(from: &Path, to: &Path, mut retries_left: usize) -> std::io::Result<()> { + loop { + match std_fs::rename(from, to) { + Ok(()) => return Ok(()), + Err(e) => { + if retries_left > 0 && e.kind() == ErrorKind::PermissionDenied { + // Try again after a short waiting period. + std::thread::sleep(Duration::from_millis(50)); + retries_left -= 1; + } else { + return Err(e); + } + } + } + } +} diff --git a/compiler/rustc_incremental/src/persist/fs/tests.rs b/compiler/rustc_incremental/src/persist/fs/tests.rs new file mode 100644 index 000000000..184796948 --- /dev/null +++ b/compiler/rustc_incremental/src/persist/fs/tests.rs @@ -0,0 +1,84 @@ +use super::*; + +#[test] +fn test_all_except_most_recent() { + assert_eq!( + all_except_most_recent(vec![ + (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None), + (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None), + (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None), + (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None), + (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None), + ]) + .keys() + .cloned() + .collect::<FxHashSet<PathBuf>>(), + [PathBuf::from("1"), PathBuf::from("2"), PathBuf::from("3"), PathBuf::from("4"),] + .into_iter() + .collect::<FxHashSet<PathBuf>>() + ); + + assert_eq!( + all_except_most_recent(vec![]).keys().cloned().collect::<FxHashSet<PathBuf>>(), + FxHashSet::default() + ); +} + +#[test] +fn test_timestamp_serialization() { + for i in 0..1_000u64 { + let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000); + let s = timestamp_to_string(time); + assert_eq!(Ok(time), string_to_timestamp(&s)); + } +} + +#[test] +fn test_find_source_directory_in_iter() { + let already_visited = FxHashSet::default(); + + // Find newest + assert_eq!( + find_source_directory_in_iter( + [ + PathBuf::from("crate-dir/s-3234-0000-svh"), + PathBuf::from("crate-dir/s-2234-0000-svh"), + PathBuf::from("crate-dir/s-1234-0000-svh") + ] + .into_iter(), + &already_visited + ), + Some(PathBuf::from("crate-dir/s-3234-0000-svh")) + ); + + // Filter out "-working" + assert_eq!( + find_source_directory_in_iter( + [ + PathBuf::from("crate-dir/s-3234-0000-working"), + PathBuf::from("crate-dir/s-2234-0000-svh"), + PathBuf::from("crate-dir/s-1234-0000-svh") + ] + .into_iter(), + &already_visited + ), + Some(PathBuf::from("crate-dir/s-2234-0000-svh")) + ); + + // Handle empty + assert_eq!(find_source_directory_in_iter([].into_iter(), &already_visited), None); + + // Handle only working + assert_eq!( + find_source_directory_in_iter( + [ + PathBuf::from("crate-dir/s-3234-0000-working"), + PathBuf::from("crate-dir/s-2234-0000-working"), + PathBuf::from("crate-dir/s-1234-0000-working") + ] + .into_iter(), + &already_visited + ), + None + ); +} diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs new file mode 100644 index 000000000..1c5fd9169 --- /dev/null +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -0,0 +1,235 @@ +//! Code to save/load the dep-graph from files. + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::memmap::Mmap; +use rustc_middle::dep_graph::{SerializedDepGraph, WorkProduct, WorkProductId}; +use rustc_middle::ty::OnDiskCache; +use rustc_serialize::opaque::MemDecoder; +use rustc_serialize::Decodable; +use rustc_session::config::IncrementalStateAssertion; +use rustc_session::Session; +use std::path::Path; + +use super::data::*; +use super::file_format; +use super::fs::*; +use super::work_product; + +type WorkProductMap = FxHashMap<WorkProductId, WorkProduct>; + +#[derive(Debug)] +/// Represents the result of an attempt to load incremental compilation data. +pub enum LoadResult<T> { + /// Loading was successful. + Ok { + #[allow(missing_docs)] + data: T, + }, + /// The file either didn't exist or was produced by an incompatible compiler version. + DataOutOfDate, + /// An error occurred. + Error { + #[allow(missing_docs)] + message: String, + }, +} + +impl<T: Default> LoadResult<T> { + /// Accesses the data returned in [`LoadResult::Ok`]. + pub fn open(self, sess: &Session) -> T { + // Check for errors when using `-Zassert-incremental-state` + match (sess.opts.assert_incr_state, &self) { + (Some(IncrementalStateAssertion::NotLoaded), LoadResult::Ok { .. }) => { + sess.fatal( + "We asserted that the incremental cache should not be loaded, \ + but it was loaded.", + ); + } + ( + Some(IncrementalStateAssertion::Loaded), + LoadResult::Error { .. } | LoadResult::DataOutOfDate, + ) => { + sess.fatal( + "We asserted that an existing incremental cache directory should \ + be successfully loaded, but it was not.", + ); + } + _ => {} + }; + + match self { + LoadResult::Error { message } => { + sess.warn(&message); + Default::default() + } + LoadResult::DataOutOfDate => { + if let Err(err) = delete_all_session_dir_contents(sess) { + sess.err(&format!( + "Failed to delete invalidated or incompatible \ + incremental compilation session directory contents `{}`: {}.", + dep_graph_path(sess).display(), + err + )); + } + Default::default() + } + LoadResult::Ok { data } => data, + } + } +} + +fn load_data( + report_incremental_info: bool, + path: &Path, + nightly_build: bool, +) -> LoadResult<(Mmap, usize)> { + match file_format::read_file(report_incremental_info, path, nightly_build) { + Ok(Some(data_and_pos)) => LoadResult::Ok { data: data_and_pos }, + Ok(None) => { + // The file either didn't exist or was produced by an incompatible + // compiler version. Neither is an error. + LoadResult::DataOutOfDate + } + Err(err) => LoadResult::Error { + message: format!("could not load dep-graph from `{}`: {}", path.display(), err), + }, + } +} + +fn delete_dirty_work_product(sess: &Session, swp: SerializedWorkProduct) { + debug!("delete_dirty_work_product({:?})", swp); + work_product::delete_workproduct_files(sess, &swp.work_product); +} + +/// Either a result that has already be computed or a +/// handle that will let us wait until it is computed +/// by a background thread. +pub enum MaybeAsync<T> { + Sync(T), + Async(std::thread::JoinHandle<T>), +} + +impl<T> MaybeAsync<LoadResult<T>> { + /// Accesses the data returned in [`LoadResult::Ok`] in an asynchronous way if possible. + pub fn open(self) -> LoadResult<T> { + match self { + MaybeAsync::Sync(result) => result, + MaybeAsync::Async(handle) => handle.join().unwrap_or_else(|e| LoadResult::Error { + message: format!("could not decode incremental cache: {:?}", e), + }), + } + } +} + +/// An asynchronous type for computing the dependency graph. +pub type DepGraphFuture = MaybeAsync<LoadResult<(SerializedDepGraph, WorkProductMap)>>; + +/// Launch a thread and load the dependency graph in the background. +pub fn load_dep_graph(sess: &Session) -> DepGraphFuture { + // Since `sess` isn't `Sync`, we perform all accesses to `sess` + // before we fire the background thread. + + let prof = sess.prof.clone(); + + if sess.opts.incremental.is_none() { + // No incremental compilation. + return MaybeAsync::Sync(LoadResult::Ok { data: Default::default() }); + } + + let _timer = sess.prof.generic_activity("incr_comp_prepare_load_dep_graph"); + + // Calling `sess.incr_comp_session_dir()` will panic if `sess.opts.incremental.is_none()`. + // Fortunately, we just checked that this isn't the case. + let path = dep_graph_path(&sess); + let report_incremental_info = sess.opts.unstable_opts.incremental_info; + let expected_hash = sess.opts.dep_tracking_hash(false); + + let mut prev_work_products = FxHashMap::default(); + let nightly_build = sess.is_nightly_build(); + + // If we are only building with -Zquery-dep-graph but without an actual + // incr. comp. session directory, we skip this. Otherwise we'd fail + // when trying to load work products. + if sess.incr_comp_session_dir_opt().is_some() { + let work_products_path = work_products_path(sess); + let load_result = load_data(report_incremental_info, &work_products_path, nightly_build); + + if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result { + // Decode the list of work_products + let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos); + let work_products: Vec<SerializedWorkProduct> = + Decodable::decode(&mut work_product_decoder); + + for swp in work_products { + let all_files_exist = swp.work_product.saved_files.iter().all(|(_, path)| { + let exists = in_incr_comp_dir_sess(sess, path).exists(); + if !exists && sess.opts.unstable_opts.incremental_info { + eprintln!("incremental: could not find file for work product: {path}",); + } + exists + }); + + if all_files_exist { + debug!("reconcile_work_products: all files for {:?} exist", swp); + prev_work_products.insert(swp.id, swp.work_product); + } else { + debug!("reconcile_work_products: some file for {:?} does not exist", swp); + delete_dirty_work_product(sess, swp); + } + } + } + } + + MaybeAsync::Async(std::thread::spawn(move || { + let _prof_timer = prof.generic_activity("incr_comp_load_dep_graph"); + + match load_data(report_incremental_info, &path, nightly_build) { + LoadResult::DataOutOfDate => LoadResult::DataOutOfDate, + LoadResult::Error { message } => LoadResult::Error { message }, + LoadResult::Ok { data: (bytes, start_pos) } => { + let mut decoder = MemDecoder::new(&bytes, start_pos); + let prev_commandline_args_hash = u64::decode(&mut decoder); + + if prev_commandline_args_hash != expected_hash { + if report_incremental_info { + eprintln!( + "[incremental] completely ignoring cache because of \ + differing commandline arguments" + ); + } + // We can't reuse the cache, purge it. + debug!("load_dep_graph_new: differing commandline arg hashes"); + + // No need to do any further work + return LoadResult::DataOutOfDate; + } + + let dep_graph = SerializedDepGraph::decode(&mut decoder); + + LoadResult::Ok { data: (dep_graph, prev_work_products) } + } + } + })) +} + +/// Attempts to load the query result cache from disk +/// +/// If we are not in incremental compilation mode, returns `None`. +/// Otherwise, tries to load the query result cache from disk, +/// creating an empty cache if it could not be loaded. +pub fn load_query_result_cache<'a, C: OnDiskCache<'a>>(sess: &'a Session) -> Option<C> { + if sess.opts.incremental.is_none() { + return None; + } + + let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache"); + + match load_data( + sess.opts.unstable_opts.incremental_info, + &query_cache_path(sess), + sess.is_nightly_build(), + ) { + LoadResult::Ok { data: (bytes, start_pos) } => Some(C::new(sess, bytes, start_pos)), + _ => Some(C::new_empty(sess.source_map())), + } +} diff --git a/compiler/rustc_incremental/src/persist/mod.rs b/compiler/rustc_incremental/src/persist/mod.rs new file mode 100644 index 000000000..1336189bc --- /dev/null +++ b/compiler/rustc_incremental/src/persist/mod.rs @@ -0,0 +1,25 @@ +//! When in incremental mode, this pass dumps out the dependency graph +//! into the given directory. At the same time, it also hashes the +//! various HIR nodes. + +mod data; +mod dirty_clean; +mod file_format; +mod fs; +mod load; +mod save; +mod work_product; + +pub use fs::finalize_session_directory; +pub use fs::garbage_collect_session_directories; +pub use fs::in_incr_comp_dir; +pub use fs::in_incr_comp_dir_sess; +pub use fs::prepare_session_directory; +pub use load::load_query_result_cache; +pub use load::LoadResult; +pub use load::{load_dep_graph, DepGraphFuture}; +pub use save::build_dep_graph; +pub use save::save_dep_graph; +pub use save::save_work_product_index; +pub use work_product::copy_cgu_workproduct_to_incr_comp_cache_dir; +pub use work_product::delete_workproduct_files; diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs new file mode 100644 index 000000000..710350314 --- /dev/null +++ b/compiler/rustc_incremental/src/persist/save.rs @@ -0,0 +1,188 @@ +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::join; +use rustc_middle::dep_graph::{DepGraph, SerializedDepGraph, WorkProduct, WorkProductId}; +use rustc_middle::ty::TyCtxt; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; +use rustc_serialize::Encodable as RustcEncodable; +use rustc_session::Session; +use std::fs; + +use super::data::*; +use super::dirty_clean; +use super::file_format; +use super::fs::*; +use super::work_product; + +/// Saves and writes the [`DepGraph`] to the file system. +/// +/// This function saves both the dep-graph and the query result cache, +/// and drops the result cache. +/// +/// This function should only run after all queries have completed. +/// Trying to execute a query afterwards would attempt to read the result cache we just dropped. +pub fn save_dep_graph(tcx: TyCtxt<'_>) { + debug!("save_dep_graph()"); + tcx.dep_graph.with_ignore(|| { + let sess = tcx.sess; + if sess.opts.incremental.is_none() { + return; + } + // This is going to be deleted in finalize_session_directory, so let's not create it + if sess.has_errors_or_delayed_span_bugs() { + return; + } + + let query_cache_path = query_cache_path(sess); + let dep_graph_path = dep_graph_path(sess); + let staging_dep_graph_path = staging_dep_graph_path(sess); + + sess.time("assert_dep_graph", || crate::assert_dep_graph(tcx)); + sess.time("check_dirty_clean", || dirty_clean::check_dirty_clean_annotations(tcx)); + + if sess.opts.unstable_opts.incremental_info { + tcx.dep_graph.print_incremental_info() + } + + join( + move || { + sess.time("incr_comp_persist_result_cache", || { + // Drop the memory map so that we can remove the file and write to it. + if let Some(odc) = &tcx.on_disk_cache { + odc.drop_serialized_data(tcx); + } + + file_format::save_in(sess, query_cache_path, "query cache", |e| { + encode_query_cache(tcx, e) + }); + }); + }, + move || { + sess.time("incr_comp_persist_dep_graph", || { + if let Err(err) = tcx.dep_graph.encode(&tcx.sess.prof) { + sess.err(&format!( + "failed to write dependency graph to `{}`: {}", + staging_dep_graph_path.display(), + err + )); + } + if let Err(err) = fs::rename(&staging_dep_graph_path, &dep_graph_path) { + sess.err(&format!( + "failed to move dependency graph from `{}` to `{}`: {}", + staging_dep_graph_path.display(), + dep_graph_path.display(), + err + )); + } + }); + }, + ); + }) +} + +/// Saves the work product index. +pub fn save_work_product_index( + sess: &Session, + dep_graph: &DepGraph, + new_work_products: FxHashMap<WorkProductId, WorkProduct>, +) { + if sess.opts.incremental.is_none() { + return; + } + // This is going to be deleted in finalize_session_directory, so let's not create it + if sess.has_errors_or_delayed_span_bugs() { + return; + } + + debug!("save_work_product_index()"); + dep_graph.assert_ignored(); + let path = work_products_path(sess); + file_format::save_in(sess, path, "work product index", |mut e| { + encode_work_product_index(&new_work_products, &mut e); + e.finish() + }); + + // We also need to clean out old work-products, as not all of them are + // deleted during invalidation. Some object files don't change their + // content, they are just not needed anymore. + let previous_work_products = dep_graph.previous_work_products(); + for (id, wp) in previous_work_products.iter() { + if !new_work_products.contains_key(id) { + work_product::delete_workproduct_files(sess, wp); + debug_assert!( + !wp.saved_files.iter().all(|(_, path)| in_incr_comp_dir_sess(sess, path).exists()) + ); + } + } + + // Check that we did not delete one of the current work-products: + debug_assert!({ + new_work_products.iter().all(|(_, wp)| { + wp.saved_files.iter().all(|(_, path)| in_incr_comp_dir_sess(sess, path).exists()) + }) + }); +} + +fn encode_work_product_index( + work_products: &FxHashMap<WorkProductId, WorkProduct>, + encoder: &mut FileEncoder, +) { + let serialized_products: Vec<_> = work_products + .iter() + .map(|(id, work_product)| SerializedWorkProduct { + id: *id, + work_product: work_product.clone(), + }) + .collect(); + + serialized_products.encode(encoder) +} + +fn encode_query_cache(tcx: TyCtxt<'_>, encoder: FileEncoder) -> FileEncodeResult { + tcx.sess.time("incr_comp_serialize_result_cache", || tcx.serialize_query_result_cache(encoder)) +} + +/// Builds the dependency graph. +/// +/// This function creates the *staging dep-graph*. When the dep-graph is modified by a query +/// execution, the new dependency information is not kept in memory but directly +/// output to this file. `save_dep_graph` then finalizes the staging dep-graph +/// and moves it to the permanent dep-graph path +pub fn build_dep_graph( + sess: &Session, + prev_graph: SerializedDepGraph, + prev_work_products: FxHashMap<WorkProductId, WorkProduct>, +) -> Option<DepGraph> { + if sess.opts.incremental.is_none() { + // No incremental compilation. + return None; + } + + // Stream the dep-graph to an alternate file, to avoid overwriting anything in case of errors. + let path_buf = staging_dep_graph_path(sess); + + let mut encoder = match FileEncoder::new(&path_buf) { + Ok(encoder) => encoder, + Err(err) => { + sess.err(&format!( + "failed to create dependency graph at `{}`: {}", + path_buf.display(), + err + )); + return None; + } + }; + + file_format::write_file_header(&mut encoder, sess.is_nightly_build()); + + // First encode the commandline arguments hash + sess.opts.dep_tracking_hash(false).encode(&mut encoder); + + Some(DepGraph::new( + &sess.prof, + prev_graph, + prev_work_products, + encoder, + sess.opts.unstable_opts.query_dep_graph, + sess.opts.unstable_opts.incremental_info, + )) +} diff --git a/compiler/rustc_incremental/src/persist/work_product.rs b/compiler/rustc_incremental/src/persist/work_product.rs new file mode 100644 index 000000000..2f1853c44 --- /dev/null +++ b/compiler/rustc_incremental/src/persist/work_product.rs @@ -0,0 +1,59 @@ +//! Functions for saving and removing intermediate [work products]. +//! +//! [work products]: WorkProduct + +use crate::persist::fs::*; +use rustc_data_structures::fx::FxHashMap; +use rustc_fs_util::link_or_copy; +use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; +use rustc_session::Session; +use std::fs as std_fs; +use std::path::Path; + +/// Copies a CGU work product to the incremental compilation directory, so next compilation can find and reuse it. +pub fn copy_cgu_workproduct_to_incr_comp_cache_dir( + sess: &Session, + cgu_name: &str, + files: &[(&'static str, &Path)], +) -> Option<(WorkProductId, WorkProduct)> { + debug!(?cgu_name, ?files); + sess.opts.incremental.as_ref()?; + + let mut saved_files = FxHashMap::default(); + for (ext, path) in files { + let file_name = format!("{cgu_name}.{ext}"); + let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name); + match link_or_copy(path, &path_in_incr_dir) { + Ok(_) => { + let _ = saved_files.insert(ext.to_string(), file_name); + } + Err(err) => { + sess.warn(&format!( + "error copying object file `{}` to incremental directory as `{}`: {}", + path.display(), + path_in_incr_dir.display(), + err + )); + } + } + } + + let work_product = WorkProduct { cgu_name: cgu_name.to_string(), saved_files }; + debug!(?work_product); + let work_product_id = WorkProductId::from_cgu_name(cgu_name); + Some((work_product_id, work_product)) +} + +/// Removes files for a given work product. +pub fn delete_workproduct_files(sess: &Session, work_product: &WorkProduct) { + for (_, path) in &work_product.saved_files { + let path = in_incr_comp_dir_sess(sess, path); + if let Err(err) = std_fs::remove_file(&path) { + sess.warn(&format!( + "file-system error deleting outdated file `{}`: {}", + path.display(), + err + )); + } + } +} |