diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:11:38 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:13:23 +0000 |
commit | 20431706a863f92cb37dc512fef6e48d192aaf2c (patch) | |
tree | 2867f13f5fd5437ba628c67d7f87309ccadcd286 /compiler/rustc_hir_typeck/src/generator_interior/drop_ranges | |
parent | Releasing progress-linux version 1.65.0+dfsg1-2~progress7.99u1. (diff) | |
download | rustc-20431706a863f92cb37dc512fef6e48d192aaf2c.tar.xz rustc-20431706a863f92cb37dc512fef6e48d192aaf2c.zip |
Merging upstream version 1.66.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_hir_typeck/src/generator_interior/drop_ranges')
5 files changed, 1296 insertions, 0 deletions
diff --git a/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_build.rs b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_build.rs new file mode 100644 index 000000000..122ad7009 --- /dev/null +++ b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_build.rs @@ -0,0 +1,563 @@ +use super::{ + for_each_consumable, record_consumed_borrow::ConsumedAndBorrowedPlaces, DropRangesBuilder, + NodeInfo, PostOrderId, TrackedValue, TrackedValueIndex, +}; +use hir::{ + intravisit::{self, Visitor}, + Body, Expr, ExprKind, Guard, HirId, LoopIdError, +}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_hir as hir; +use rustc_index::vec::IndexVec; +use rustc_middle::{ + hir::map::Map, + ty::{TyCtxt, TypeckResults}, +}; +use std::mem::swap; + +/// Traverses the body to find the control flow graph and locations for the +/// relevant places are dropped or reinitialized. +/// +/// The resulting structure still needs to be iterated to a fixed point, which +/// can be done with propagate_to_fixpoint in cfg_propagate. +pub(super) fn build_control_flow_graph<'tcx>( + hir: Map<'tcx>, + tcx: TyCtxt<'tcx>, + typeck_results: &TypeckResults<'tcx>, + consumed_borrowed_places: ConsumedAndBorrowedPlaces, + body: &'tcx Body<'tcx>, + num_exprs: usize, +) -> (DropRangesBuilder, FxHashSet<HirId>) { + let mut drop_range_visitor = + DropRangeVisitor::new(hir, tcx, typeck_results, consumed_borrowed_places, num_exprs); + intravisit::walk_body(&mut drop_range_visitor, body); + + drop_range_visitor.drop_ranges.process_deferred_edges(); + if let Some(filename) = &tcx.sess.opts.unstable_opts.dump_drop_tracking_cfg { + super::cfg_visualize::write_graph_to_file(&drop_range_visitor.drop_ranges, filename, tcx); + } + + (drop_range_visitor.drop_ranges, drop_range_visitor.places.borrowed_temporaries) +} + +/// This struct is used to gather the information for `DropRanges` to determine the regions of the +/// HIR tree for which a value is dropped. +/// +/// We are interested in points where a variables is dropped or initialized, and the control flow +/// of the code. We identify locations in code by their post-order traversal index, so it is +/// important for this traversal to match that in `RegionResolutionVisitor` and `InteriorVisitor`. +/// +/// We make several simplifying assumptions, with the goal of being more conservative than +/// necessary rather than less conservative (since being less conservative is unsound, but more +/// conservative is still safe). These assumptions are: +/// +/// 1. Moving a variable `a` counts as a move of the whole variable. +/// 2. Moving a partial path like `a.b.c` is ignored. +/// 3. Reinitializing through a field (e.g. `a.b.c = 5`) counts as a reinitialization of all of +/// `a`. +/// +/// Some examples: +/// +/// Rule 1: +/// ```rust +/// let mut a = (vec![0], vec![0]); +/// drop(a); +/// // `a` is not considered initialized. +/// ``` +/// +/// Rule 2: +/// ```rust +/// let mut a = (vec![0], vec![0]); +/// drop(a.0); +/// drop(a.1); +/// // `a` is still considered initialized. +/// ``` +/// +/// Rule 3: +/// ```compile_fail,E0382 +/// let mut a = (vec![0], vec![0]); +/// drop(a); +/// a.1 = vec![1]; +/// // all of `a` is considered initialized +/// ``` + +struct DropRangeVisitor<'a, 'tcx> { + hir: Map<'tcx>, + places: ConsumedAndBorrowedPlaces, + drop_ranges: DropRangesBuilder, + expr_index: PostOrderId, + tcx: TyCtxt<'tcx>, + typeck_results: &'a TypeckResults<'tcx>, + label_stack: Vec<(Option<rustc_ast::Label>, PostOrderId)>, +} + +impl<'a, 'tcx> DropRangeVisitor<'a, 'tcx> { + fn new( + hir: Map<'tcx>, + tcx: TyCtxt<'tcx>, + typeck_results: &'a TypeckResults<'tcx>, + places: ConsumedAndBorrowedPlaces, + num_exprs: usize, + ) -> Self { + debug!("consumed_places: {:?}", places.consumed); + let drop_ranges = DropRangesBuilder::new( + places.consumed.iter().flat_map(|(_, places)| places.iter().cloned()), + hir, + num_exprs, + ); + Self { + hir, + places, + drop_ranges, + expr_index: PostOrderId::from_u32(0), + typeck_results, + tcx, + label_stack: vec![], + } + } + + fn record_drop(&mut self, value: TrackedValue) { + if self.places.borrowed.contains(&value) { + debug!("not marking {:?} as dropped because it is borrowed at some point", value); + } else { + debug!("marking {:?} as dropped at {:?}", value, self.expr_index); + let count = self.expr_index; + self.drop_ranges.drop_at(value, count); + } + } + + /// ExprUseVisitor's consume callback doesn't go deep enough for our purposes in all + /// expressions. This method consumes a little deeper into the expression when needed. + fn consume_expr(&mut self, expr: &hir::Expr<'_>) { + debug!("consuming expr {:?}, count={:?}", expr.kind, self.expr_index); + let places = self + .places + .consumed + .get(&expr.hir_id) + .map_or(vec![], |places| places.iter().cloned().collect()); + for place in places { + trace!(?place, "consuming place"); + for_each_consumable(self.hir, place, |value| self.record_drop(value)); + } + } + + /// Marks an expression as being reinitialized. + /// + /// Note that we always approximated on the side of things being more + /// initialized than they actually are, as opposed to less. In cases such + /// as `x.y = ...`, we would consider all of `x` as being initialized + /// instead of just the `y` field. + /// + /// This is because it is always safe to consider something initialized + /// even when it is not, but the other way around will cause problems. + /// + /// In the future, we will hopefully tighten up these rules to be more + /// precise. + fn reinit_expr(&mut self, expr: &hir::Expr<'_>) { + // Walk the expression to find the base. For example, in an expression + // like `*a[i].x`, we want to find the `a` and mark that as + // reinitialized. + match expr.kind { + ExprKind::Path(hir::QPath::Resolved( + _, + hir::Path { res: hir::def::Res::Local(hir_id), .. }, + )) => { + // This is the base case, where we have found an actual named variable. + + let location = self.expr_index; + debug!("reinitializing {:?} at {:?}", hir_id, location); + self.drop_ranges.reinit_at(TrackedValue::Variable(*hir_id), location); + } + + ExprKind::Field(base, _) => self.reinit_expr(base), + + // Most expressions do not refer to something where we need to track + // reinitializations. + // + // Some of these may be interesting in the future + ExprKind::Path(..) + | ExprKind::Box(..) + | ExprKind::ConstBlock(..) + | ExprKind::Array(..) + | ExprKind::Call(..) + | ExprKind::MethodCall(..) + | ExprKind::Tup(..) + | ExprKind::Binary(..) + | ExprKind::Unary(..) + | ExprKind::Lit(..) + | ExprKind::Cast(..) + | ExprKind::Type(..) + | ExprKind::DropTemps(..) + | ExprKind::Let(..) + | ExprKind::If(..) + | ExprKind::Loop(..) + | ExprKind::Match(..) + | ExprKind::Closure { .. } + | ExprKind::Block(..) + | ExprKind::Assign(..) + | ExprKind::AssignOp(..) + | ExprKind::Index(..) + | ExprKind::AddrOf(..) + | ExprKind::Break(..) + | ExprKind::Continue(..) + | ExprKind::Ret(..) + | ExprKind::InlineAsm(..) + | ExprKind::Struct(..) + | ExprKind::Repeat(..) + | ExprKind::Yield(..) + | ExprKind::Err => (), + } + } + + /// For an expression with an uninhabited return type (e.g. a function that returns !), + /// this adds a self edge to the CFG to model the fact that the function does not + /// return. + fn handle_uninhabited_return(&mut self, expr: &Expr<'tcx>) { + let ty = self.typeck_results.expr_ty(expr); + let ty = self.tcx.erase_regions(ty); + let m = self.tcx.parent_module(expr.hir_id).to_def_id(); + let param_env = self.tcx.param_env(m.expect_local()); + if self.tcx.is_ty_uninhabited_from(m, ty, param_env) { + // This function will not return. We model this fact as an infinite loop. + self.drop_ranges.add_control_edge(self.expr_index + 1, self.expr_index + 1); + } + } + + /// Map a Destination to an equivalent expression node + /// + /// The destination field of a Break or Continue expression can target either an + /// expression or a block. The drop range analysis, however, only deals in + /// expression nodes, so blocks that might be the destination of a Break or Continue + /// will not have a PostOrderId. + /// + /// If the destination is an expression, this function will simply return that expression's + /// hir_id. If the destination is a block, this function will return the hir_id of last + /// expression in the block. + fn find_target_expression_from_destination( + &self, + destination: hir::Destination, + ) -> Result<HirId, LoopIdError> { + destination.target_id.map(|target| { + let node = self.hir.get(target); + match node { + hir::Node::Expr(_) => target, + hir::Node::Block(b) => find_last_block_expression(b), + hir::Node::Param(..) + | hir::Node::Item(..) + | hir::Node::ForeignItem(..) + | hir::Node::TraitItem(..) + | hir::Node::ImplItem(..) + | hir::Node::Variant(..) + | hir::Node::Field(..) + | hir::Node::AnonConst(..) + | hir::Node::Stmt(..) + | hir::Node::PathSegment(..) + | hir::Node::Ty(..) + | hir::Node::TypeBinding(..) + | hir::Node::TraitRef(..) + | hir::Node::Pat(..) + | hir::Node::PatField(..) + | hir::Node::ExprField(..) + | hir::Node::Arm(..) + | hir::Node::Local(..) + | hir::Node::Ctor(..) + | hir::Node::Lifetime(..) + | hir::Node::GenericParam(..) + | hir::Node::Crate(..) + | hir::Node::Infer(..) => bug!("Unsupported branch target: {:?}", node), + } + }) + } +} + +fn find_last_block_expression(block: &hir::Block<'_>) -> HirId { + block.expr.map_or_else( + // If there is no tail expression, there will be at least one statement in the + // block because the block contains a break or continue statement. + || block.stmts.last().unwrap().hir_id, + |expr| expr.hir_id, + ) +} + +impl<'a, 'tcx> Visitor<'tcx> for DropRangeVisitor<'a, 'tcx> { + fn visit_expr(&mut self, expr: &'tcx Expr<'tcx>) { + let mut reinit = None; + match expr.kind { + ExprKind::Assign(lhs, rhs, _) => { + self.visit_expr(lhs); + self.visit_expr(rhs); + + reinit = Some(lhs); + } + + ExprKind::If(test, if_true, if_false) => { + self.visit_expr(test); + + let fork = self.expr_index; + + self.drop_ranges.add_control_edge(fork, self.expr_index + 1); + self.visit_expr(if_true); + let true_end = self.expr_index; + + self.drop_ranges.add_control_edge(fork, self.expr_index + 1); + if let Some(if_false) = if_false { + self.visit_expr(if_false); + } + + self.drop_ranges.add_control_edge(true_end, self.expr_index + 1); + } + ExprKind::Match(scrutinee, arms, ..) => { + // We walk through the match expression almost like a chain of if expressions. + // Here's a diagram to follow along with: + // + // ┌─┐ + // match │A│ { + // ┌───┴─┘ + // │ + // ┌▼┌───►┌─┐ ┌─┐ + // │B│ if │C│ =>│D│, + // └─┘ ├─┴──►└─┴──────┐ + // ┌──┘ │ + // ┌──┘ │ + // │ │ + // ┌▼┌───►┌─┐ ┌─┐ │ + // │E│ if │F│ =>│G│, │ + // └─┘ ├─┴──►└─┴┐ │ + // │ │ │ + // } ▼ ▼ │ + // ┌─┐◄───────────────────┘ + // │H│ + // └─┘ + // + // The order we want is that the scrutinee (A) flows into the first pattern (B), + // which flows into the guard (C). Then the guard either flows into the arm body + // (D) or into the start of the next arm (E). Finally, the body flows to the end + // of the match block (H). + // + // The subsequent arms follow the same ordering. First we go to the pattern, then + // the guard (if present, otherwise it flows straight into the body), then into + // the body and then to the end of the match expression. + // + // The comments below show which edge is being added. + self.visit_expr(scrutinee); + + let (guard_exit, arm_end_ids) = arms.iter().fold( + (self.expr_index, vec![]), + |(incoming_edge, mut arm_end_ids), hir::Arm { pat, body, guard, .. }| { + // A -> B, or C -> E + self.drop_ranges.add_control_edge(incoming_edge, self.expr_index + 1); + self.visit_pat(pat); + // B -> C and E -> F are added implicitly due to the traversal order. + match guard { + Some(Guard::If(expr)) => self.visit_expr(expr), + Some(Guard::IfLet(let_expr)) => { + self.visit_let_expr(let_expr); + } + None => (), + } + // Likewise, C -> D and F -> G are added implicitly. + + // Save C, F, so we can add the other outgoing edge. + let to_next_arm = self.expr_index; + + // The default edge does not get added since we also have an explicit edge, + // so we also need to add an edge to the next node as well. + // + // This adds C -> D, F -> G + self.drop_ranges.add_control_edge(self.expr_index, self.expr_index + 1); + self.visit_expr(body); + + // Save the end of the body so we can add the exit edge once we know where + // the exit is. + arm_end_ids.push(self.expr_index); + + // Pass C to the next iteration, as well as vec![D] + // + // On the last round through, we pass F and vec![D, G] so that we can + // add all the exit edges. + (to_next_arm, arm_end_ids) + }, + ); + // F -> H + self.drop_ranges.add_control_edge(guard_exit, self.expr_index + 1); + + arm_end_ids.into_iter().for_each(|arm_end| { + // D -> H, G -> H + self.drop_ranges.add_control_edge(arm_end, self.expr_index + 1) + }); + } + + ExprKind::Loop(body, label, ..) => { + let loop_begin = self.expr_index + 1; + self.label_stack.push((label, loop_begin)); + if body.stmts.is_empty() && body.expr.is_none() { + // For empty loops we won't have updated self.expr_index after visiting the + // body, meaning we'd get an edge from expr_index to expr_index + 1, but + // instead we want an edge from expr_index + 1 to expr_index + 1. + self.drop_ranges.add_control_edge(loop_begin, loop_begin); + } else { + self.visit_block(body); + self.drop_ranges.add_control_edge(self.expr_index, loop_begin); + } + self.label_stack.pop(); + } + // Find the loop entry by searching through the label stack for either the last entry + // (if label is none), or the first entry where the label matches this one. The Loop + // case maintains this stack mapping labels to the PostOrderId for the loop entry. + ExprKind::Continue(hir::Destination { label, .. }, ..) => self + .label_stack + .iter() + .rev() + .find(|(loop_label, _)| label.is_none() || *loop_label == label) + .map_or((), |(_, target)| { + self.drop_ranges.add_control_edge(self.expr_index, *target) + }), + + ExprKind::Break(destination, ..) => { + // destination either points to an expression or to a block. We use + // find_target_expression_from_destination to use the last expression of the block + // if destination points to a block. + // + // We add an edge to the hir_id of the expression/block we are breaking out of, and + // then in process_deferred_edges we will map this hir_id to its PostOrderId, which + // will refer to the end of the block due to the post order traversal. + self.find_target_expression_from_destination(destination).map_or((), |target| { + self.drop_ranges.add_control_edge_hir_id(self.expr_index, target) + }) + } + + ExprKind::Call(f, args) => { + self.visit_expr(f); + for arg in args { + self.visit_expr(arg); + } + + self.handle_uninhabited_return(expr); + } + ExprKind::MethodCall(_, receiver, exprs, _) => { + self.visit_expr(receiver); + for expr in exprs { + self.visit_expr(expr); + } + + self.handle_uninhabited_return(expr); + } + + ExprKind::AddrOf(..) + | ExprKind::Array(..) + | ExprKind::AssignOp(..) + | ExprKind::Binary(..) + | ExprKind::Block(..) + | ExprKind::Box(..) + | ExprKind::Cast(..) + | ExprKind::Closure { .. } + | ExprKind::ConstBlock(..) + | ExprKind::DropTemps(..) + | ExprKind::Err + | ExprKind::Field(..) + | ExprKind::Index(..) + | ExprKind::InlineAsm(..) + | ExprKind::Let(..) + | ExprKind::Lit(..) + | ExprKind::Path(..) + | ExprKind::Repeat(..) + | ExprKind::Ret(..) + | ExprKind::Struct(..) + | ExprKind::Tup(..) + | ExprKind::Type(..) + | ExprKind::Unary(..) + | ExprKind::Yield(..) => intravisit::walk_expr(self, expr), + } + + self.expr_index = self.expr_index + 1; + self.drop_ranges.add_node_mapping(expr.hir_id, self.expr_index); + self.consume_expr(expr); + if let Some(expr) = reinit { + self.reinit_expr(expr); + } + } + + fn visit_pat(&mut self, pat: &'tcx hir::Pat<'tcx>) { + intravisit::walk_pat(self, pat); + + // Increment expr_count here to match what InteriorVisitor expects. + self.expr_index = self.expr_index + 1; + } +} + +impl DropRangesBuilder { + fn new( + tracked_values: impl Iterator<Item = TrackedValue>, + hir: Map<'_>, + num_exprs: usize, + ) -> Self { + let mut tracked_value_map = FxHashMap::<_, TrackedValueIndex>::default(); + let mut next = <_>::from(0u32); + for value in tracked_values { + for_each_consumable(hir, value, |value| { + if !tracked_value_map.contains_key(&value) { + tracked_value_map.insert(value, next); + next = next + 1; + } + }); + } + debug!("hir_id_map: {:?}", tracked_value_map); + let num_values = tracked_value_map.len(); + Self { + tracked_value_map, + nodes: IndexVec::from_fn_n(|_| NodeInfo::new(num_values), num_exprs + 1), + deferred_edges: <_>::default(), + post_order_map: <_>::default(), + } + } + + fn tracked_value_index(&self, tracked_value: TrackedValue) -> TrackedValueIndex { + *self.tracked_value_map.get(&tracked_value).unwrap() + } + + /// Adds an entry in the mapping from HirIds to PostOrderIds + /// + /// Needed so that `add_control_edge_hir_id` can work. + fn add_node_mapping(&mut self, node_hir_id: HirId, post_order_id: PostOrderId) { + self.post_order_map.insert(node_hir_id, post_order_id); + } + + /// Like add_control_edge, but uses a hir_id as the target. + /// + /// This can be used for branches where we do not know the PostOrderId of the target yet, + /// such as when handling `break` or `continue`. + fn add_control_edge_hir_id(&mut self, from: PostOrderId, to: HirId) { + self.deferred_edges.push((from, to)); + } + + fn drop_at(&mut self, value: TrackedValue, location: PostOrderId) { + let value = self.tracked_value_index(value); + self.node_mut(location).drops.push(value); + } + + fn reinit_at(&mut self, value: TrackedValue, location: PostOrderId) { + let value = match self.tracked_value_map.get(&value) { + Some(value) => *value, + // If there's no value, this is never consumed and therefore is never dropped. We can + // ignore this. + None => return, + }; + self.node_mut(location).reinits.push(value); + } + + /// Looks up PostOrderId for any control edges added by HirId and adds a proper edge for them. + /// + /// Should be called after visiting the HIR but before solving the control flow, otherwise some + /// edges will be missed. + fn process_deferred_edges(&mut self) { + trace!("processing deferred edges. post_order_map={:#?}", self.post_order_map); + let mut edges = vec![]; + swap(&mut edges, &mut self.deferred_edges); + edges.into_iter().for_each(|(from, to)| { + trace!("Adding deferred edge from {:?} to {:?}", from, to); + let to = *self.post_order_map.get(&to).expect("Expression ID not found"); + trace!("target edge PostOrderId={:?}", to); + self.add_control_edge(from, to) + }); + } +} diff --git a/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_propagate.rs b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_propagate.rs new file mode 100644 index 000000000..139d17d2e --- /dev/null +++ b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_propagate.rs @@ -0,0 +1,92 @@ +use super::{DropRangesBuilder, PostOrderId}; +use rustc_index::{bit_set::BitSet, vec::IndexVec}; +use std::collections::BTreeMap; + +impl DropRangesBuilder { + pub fn propagate_to_fixpoint(&mut self) { + trace!("before fixpoint: {:#?}", self); + let preds = self.compute_predecessors(); + + trace!("predecessors: {:#?}", preds.iter_enumerated().collect::<BTreeMap<_, _>>()); + + let mut new_state = BitSet::new_empty(self.num_values()); + let mut changed_nodes = BitSet::new_empty(self.nodes.len()); + let mut unchanged_mask = BitSet::new_filled(self.nodes.len()); + changed_nodes.insert(0u32.into()); + + let mut propagate = || { + let mut changed = false; + unchanged_mask.insert_all(); + for id in self.nodes.indices() { + trace!("processing {:?}, changed_nodes: {:?}", id, changed_nodes); + // Check if any predecessor has changed, and if not then short-circuit. + // + // We handle the start node specially, since it doesn't have any predecessors, + // but we need to start somewhere. + if match id.index() { + 0 => !changed_nodes.contains(id), + _ => !preds[id].iter().any(|pred| changed_nodes.contains(*pred)), + } { + trace!("short-circuiting because none of {:?} have changed", preds[id]); + unchanged_mask.remove(id); + continue; + } + + if id.index() == 0 { + new_state.clear(); + } else { + // If we are not the start node and we have no predecessors, treat + // everything as dropped because there's no way to get here anyway. + new_state.insert_all(); + }; + + for pred in &preds[id] { + new_state.intersect(&self.nodes[*pred].drop_state); + } + + for drop in &self.nodes[id].drops { + new_state.insert(*drop); + } + + for reinit in &self.nodes[id].reinits { + new_state.remove(*reinit); + } + + if self.nodes[id].drop_state.intersect(&new_state) { + changed_nodes.insert(id); + changed = true; + } else { + unchanged_mask.remove(id); + } + } + + changed_nodes.intersect(&unchanged_mask); + changed + }; + + while propagate() { + trace!("drop_state changed, re-running propagation"); + } + + trace!("after fixpoint: {:#?}", self); + } + + fn compute_predecessors(&self) -> IndexVec<PostOrderId, Vec<PostOrderId>> { + let mut preds = IndexVec::from_fn_n(|_| vec![], self.nodes.len()); + for (id, node) in self.nodes.iter_enumerated() { + // If the node has no explicit successors, we assume that control + // will from this node into the next one. + // + // If there are successors listed, then we assume that all + // possible successors are given and we do not include the default. + if node.successors.len() == 0 && id.index() != self.nodes.len() - 1 { + preds[id + 1].push(id); + } else { + for succ in &node.successors { + preds[*succ].push(id); + } + } + } + preds + } +} diff --git a/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_visualize.rs b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_visualize.rs new file mode 100644 index 000000000..c0a0bfe8e --- /dev/null +++ b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/cfg_visualize.rs @@ -0,0 +1,91 @@ +//! Implementation of GraphWalk for DropRanges so we can visualize the control +//! flow graph when needed for debugging. + +use rustc_graphviz as dot; +use rustc_middle::ty::TyCtxt; + +use super::{DropRangesBuilder, PostOrderId}; + +/// Writes the CFG for DropRangesBuilder to a .dot file for visualization. +/// +/// It is not normally called, but is kept around to easily add debugging +/// code when needed. +pub(super) fn write_graph_to_file( + drop_ranges: &DropRangesBuilder, + filename: &str, + tcx: TyCtxt<'_>, +) { + dot::render( + &DropRangesGraph { drop_ranges, tcx }, + &mut std::fs::File::create(filename).unwrap(), + ) + .unwrap(); +} + +struct DropRangesGraph<'a, 'tcx> { + drop_ranges: &'a DropRangesBuilder, + tcx: TyCtxt<'tcx>, +} + +impl<'a> dot::GraphWalk<'a> for DropRangesGraph<'_, '_> { + type Node = PostOrderId; + + type Edge = (PostOrderId, PostOrderId); + + fn nodes(&'a self) -> dot::Nodes<'a, Self::Node> { + self.drop_ranges.nodes.iter_enumerated().map(|(i, _)| i).collect() + } + + fn edges(&'a self) -> dot::Edges<'a, Self::Edge> { + self.drop_ranges + .nodes + .iter_enumerated() + .flat_map(|(i, node)| { + if node.successors.len() == 0 { + vec![(i, i + 1)] + } else { + node.successors.iter().map(move |&s| (i, s)).collect() + } + }) + .collect() + } + + fn source(&'a self, edge: &Self::Edge) -> Self::Node { + edge.0 + } + + fn target(&'a self, edge: &Self::Edge) -> Self::Node { + edge.1 + } +} + +impl<'a> dot::Labeller<'a> for DropRangesGraph<'_, '_> { + type Node = PostOrderId; + + type Edge = (PostOrderId, PostOrderId); + + fn graph_id(&'a self) -> dot::Id<'a> { + dot::Id::new("drop_ranges").unwrap() + } + + fn node_id(&'a self, n: &Self::Node) -> dot::Id<'a> { + dot::Id::new(format!("id{}", n.index())).unwrap() + } + + fn node_label(&'a self, n: &Self::Node) -> dot::LabelText<'a> { + dot::LabelText::LabelStr( + format!( + "{n:?}: {}", + self.drop_ranges + .post_order_map + .iter() + .find(|(_hir_id, &post_order_id)| post_order_id == *n) + .map_or("<unknown>".into(), |(hir_id, _)| self + .tcx + .hir() + .node_to_string(*hir_id)) + ) + .into(), + ) + } +} diff --git a/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/mod.rs b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/mod.rs new file mode 100644 index 000000000..4f3bdfbe7 --- /dev/null +++ b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/mod.rs @@ -0,0 +1,309 @@ +//! Drop range analysis finds the portions of the tree where a value is guaranteed to be dropped +//! (i.e. moved, uninitialized, etc.). This is used to exclude the types of those values from the +//! generator type. See `InteriorVisitor::record` for where the results of this analysis are used. +//! +//! There are three phases to this analysis: +//! 1. Use `ExprUseVisitor` to identify the interesting values that are consumed and borrowed. +//! 2. Use `DropRangeVisitor` to find where the interesting values are dropped or reinitialized, +//! and also build a control flow graph. +//! 3. Use `DropRanges::propagate_to_fixpoint` to flow the dropped/reinitialized information through +//! the CFG and find the exact points where we know a value is definitely dropped. +//! +//! The end result is a data structure that maps the post-order index of each node in the HIR tree +//! to a set of values that are known to be dropped at that location. + +use self::cfg_build::build_control_flow_graph; +use self::record_consumed_borrow::find_consumed_and_borrowed; +use crate::FnCtxt; +use hir::def_id::DefId; +use hir::{Body, HirId, HirIdMap, Node}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_hir as hir; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::hir::map::Map; +use rustc_middle::hir::place::{PlaceBase, PlaceWithHirId}; +use rustc_middle::ty; +use std::collections::BTreeMap; +use std::fmt::Debug; + +mod cfg_build; +mod cfg_propagate; +mod cfg_visualize; +mod record_consumed_borrow; + +pub fn compute_drop_ranges<'a, 'tcx>( + fcx: &'a FnCtxt<'a, 'tcx>, + def_id: DefId, + body: &'tcx Body<'tcx>, +) -> DropRanges { + if fcx.sess().opts.unstable_opts.drop_tracking { + let consumed_borrowed_places = find_consumed_and_borrowed(fcx, def_id, body); + + let typeck_results = &fcx.typeck_results.borrow(); + let num_exprs = fcx.tcx.region_scope_tree(def_id).body_expr_count(body.id()).unwrap_or(0); + let (mut drop_ranges, borrowed_temporaries) = build_control_flow_graph( + fcx.tcx.hir(), + fcx.tcx, + typeck_results, + consumed_borrowed_places, + body, + num_exprs, + ); + + drop_ranges.propagate_to_fixpoint(); + + debug!("borrowed_temporaries = {borrowed_temporaries:?}"); + DropRanges { + tracked_value_map: drop_ranges.tracked_value_map, + nodes: drop_ranges.nodes, + borrowed_temporaries: Some(borrowed_temporaries), + } + } else { + // If drop range tracking is not enabled, skip all the analysis and produce an + // empty set of DropRanges. + DropRanges { + tracked_value_map: FxHashMap::default(), + nodes: IndexVec::new(), + borrowed_temporaries: None, + } + } +} + +/// Applies `f` to consumable node in the HIR subtree pointed to by `place`. +/// +/// This includes the place itself, and if the place is a reference to a local +/// variable then `f` is also called on the HIR node for that variable as well. +/// +/// For example, if `place` points to `foo()`, then `f` is called once for the +/// result of `foo`. On the other hand, if `place` points to `x` then `f` will +/// be called both on the `ExprKind::Path` node that represents the expression +/// as well as the HirId of the local `x` itself. +fn for_each_consumable<'tcx>(hir: Map<'tcx>, place: TrackedValue, mut f: impl FnMut(TrackedValue)) { + f(place); + let node = hir.find(place.hir_id()); + if let Some(Node::Expr(expr)) = node { + match expr.kind { + hir::ExprKind::Path(hir::QPath::Resolved( + _, + hir::Path { res: hir::def::Res::Local(hir_id), .. }, + )) => { + f(TrackedValue::Variable(*hir_id)); + } + _ => (), + } + } +} + +rustc_index::newtype_index! { + pub struct PostOrderId { + DEBUG_FORMAT = "id({})", + } +} + +rustc_index::newtype_index! { + pub struct TrackedValueIndex { + DEBUG_FORMAT = "hidx({})", + } +} + +/// Identifies a value whose drop state we need to track. +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +enum TrackedValue { + /// Represents a named variable, such as a let binding, parameter, or upvar. + /// + /// The HirId points to the variable's definition site. + Variable(HirId), + /// A value produced as a result of an expression. + /// + /// The HirId points to the expression that returns this value. + Temporary(HirId), +} + +impl Debug for TrackedValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ty::tls::with_opt(|opt_tcx| { + if let Some(tcx) = opt_tcx { + write!(f, "{}", tcx.hir().node_to_string(self.hir_id())) + } else { + match self { + Self::Variable(hir_id) => write!(f, "Variable({:?})", hir_id), + Self::Temporary(hir_id) => write!(f, "Temporary({:?})", hir_id), + } + } + }) + } +} + +impl TrackedValue { + fn hir_id(&self) -> HirId { + match self { + TrackedValue::Variable(hir_id) | TrackedValue::Temporary(hir_id) => *hir_id, + } + } + + fn from_place_with_projections_allowed(place_with_id: &PlaceWithHirId<'_>) -> Self { + match place_with_id.place.base { + PlaceBase::Rvalue | PlaceBase::StaticItem => { + TrackedValue::Temporary(place_with_id.hir_id) + } + PlaceBase::Local(hir_id) + | PlaceBase::Upvar(ty::UpvarId { var_path: ty::UpvarPath { hir_id }, .. }) => { + TrackedValue::Variable(hir_id) + } + } + } +} + +/// Represents a reason why we might not be able to convert a HirId or Place +/// into a tracked value. +#[derive(Debug)] +enum TrackedValueConversionError { + /// Place projects are not currently supported. + /// + /// The reasoning around these is kind of subtle, so we choose to be more + /// conservative around these for now. There is no reason in theory we + /// cannot support these, we just have not implemented it yet. + PlaceProjectionsNotSupported, +} + +impl TryFrom<&PlaceWithHirId<'_>> for TrackedValue { + type Error = TrackedValueConversionError; + + fn try_from(place_with_id: &PlaceWithHirId<'_>) -> Result<Self, Self::Error> { + if !place_with_id.place.projections.is_empty() { + debug!( + "TrackedValue from PlaceWithHirId: {:?} has projections, which are not supported.", + place_with_id + ); + return Err(TrackedValueConversionError::PlaceProjectionsNotSupported); + } + + Ok(TrackedValue::from_place_with_projections_allowed(place_with_id)) + } +} + +pub struct DropRanges { + tracked_value_map: FxHashMap<TrackedValue, TrackedValueIndex>, + nodes: IndexVec<PostOrderId, NodeInfo>, + borrowed_temporaries: Option<FxHashSet<HirId>>, +} + +impl DropRanges { + pub fn is_dropped_at(&self, hir_id: HirId, location: usize) -> bool { + self.tracked_value_map + .get(&TrackedValue::Temporary(hir_id)) + .or(self.tracked_value_map.get(&TrackedValue::Variable(hir_id))) + .cloned() + .map_or(false, |tracked_value_id| { + self.expect_node(location.into()).drop_state.contains(tracked_value_id) + }) + } + + pub fn is_borrowed_temporary(&self, expr: &hir::Expr<'_>) -> bool { + if let Some(b) = &self.borrowed_temporaries { b.contains(&expr.hir_id) } else { true } + } + + /// Returns a reference to the NodeInfo for a node, panicking if it does not exist + fn expect_node(&self, id: PostOrderId) -> &NodeInfo { + &self.nodes[id] + } +} + +/// Tracks information needed to compute drop ranges. +struct DropRangesBuilder { + /// The core of DropRangesBuilder is a set of nodes, which each represent + /// one expression. We primarily refer to them by their index in a + /// post-order traversal of the HIR tree, since this is what + /// generator_interior uses to talk about yield positions. + /// + /// This IndexVec keeps the relevant details for each node. See the + /// NodeInfo struct for more details, but this information includes things + /// such as the set of control-flow successors, which variables are dropped + /// or reinitialized, and whether each variable has been inferred to be + /// known-dropped or potentially reinitialized at each point. + nodes: IndexVec<PostOrderId, NodeInfo>, + /// We refer to values whose drop state we are tracking by the HirId of + /// where they are defined. Within a NodeInfo, however, we store the + /// drop-state in a bit vector indexed by a HirIdIndex + /// (see NodeInfo::drop_state). The hir_id_map field stores the mapping + /// from HirIds to the HirIdIndex that is used to represent that value in + /// bitvector. + tracked_value_map: FxHashMap<TrackedValue, TrackedValueIndex>, + + /// When building the control flow graph, we don't always know the + /// post-order index of the target node at the point we encounter it. + /// For example, this happens with break and continue. In those cases, + /// we store a pair of the PostOrderId of the source and the HirId + /// of the target. Once we have gathered all of these edges, we make a + /// pass over the set of deferred edges (see process_deferred_edges in + /// cfg_build.rs), look up the PostOrderId for the target (since now the + /// post-order index for all nodes is known), and add missing control flow + /// edges. + deferred_edges: Vec<(PostOrderId, HirId)>, + /// This maps HirIds of expressions to their post-order index. It is + /// used in process_deferred_edges to correctly add back-edges. + post_order_map: HirIdMap<PostOrderId>, +} + +impl Debug for DropRangesBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DropRanges") + .field("hir_id_map", &self.tracked_value_map) + .field("post_order_maps", &self.post_order_map) + .field("nodes", &self.nodes.iter_enumerated().collect::<BTreeMap<_, _>>()) + .finish() + } +} + +/// DropRanges keeps track of what values are definitely dropped at each point in the code. +/// +/// Values of interest are defined by the hir_id of their place. Locations in code are identified +/// by their index in the post-order traversal. At its core, DropRanges maps +/// (hir_id, post_order_id) -> bool, where a true value indicates that the value is definitely +/// dropped at the point of the node identified by post_order_id. +impl DropRangesBuilder { + /// Returns the number of values (hir_ids) that are tracked + fn num_values(&self) -> usize { + self.tracked_value_map.len() + } + + fn node_mut(&mut self, id: PostOrderId) -> &mut NodeInfo { + let size = self.num_values(); + self.nodes.ensure_contains_elem(id, || NodeInfo::new(size)); + &mut self.nodes[id] + } + + fn add_control_edge(&mut self, from: PostOrderId, to: PostOrderId) { + trace!("adding control edge from {:?} to {:?}", from, to); + self.node_mut(from).successors.push(to); + } +} + +#[derive(Debug)] +struct NodeInfo { + /// IDs of nodes that can follow this one in the control flow + /// + /// If the vec is empty, then control proceeds to the next node. + successors: Vec<PostOrderId>, + + /// List of hir_ids that are dropped by this node. + drops: Vec<TrackedValueIndex>, + + /// List of hir_ids that are reinitialized by this node. + reinits: Vec<TrackedValueIndex>, + + /// Set of values that are definitely dropped at this point. + drop_state: BitSet<TrackedValueIndex>, +} + +impl NodeInfo { + fn new(num_values: usize) -> Self { + Self { + successors: vec![], + drops: vec![], + reinits: vec![], + drop_state: BitSet::new_filled(num_values), + } + } +} diff --git a/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/record_consumed_borrow.rs b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/record_consumed_borrow.rs new file mode 100644 index 000000000..bfe95852a --- /dev/null +++ b/compiler/rustc_hir_typeck/src/generator_interior/drop_ranges/record_consumed_borrow.rs @@ -0,0 +1,241 @@ +use super::TrackedValue; +use crate::{ + expr_use_visitor::{self, ExprUseVisitor}, + FnCtxt, +}; +use hir::{def_id::DefId, Body, HirId, HirIdMap}; +use rustc_data_structures::fx::FxHashSet; +use rustc_hir as hir; +use rustc_middle::ty::{ParamEnv, TyCtxt}; +use rustc_middle::{ + hir::place::{PlaceBase, Projection, ProjectionKind}, + ty::TypeVisitable, +}; + +pub(super) fn find_consumed_and_borrowed<'a, 'tcx>( + fcx: &'a FnCtxt<'a, 'tcx>, + def_id: DefId, + body: &'tcx Body<'tcx>, +) -> ConsumedAndBorrowedPlaces { + let mut expr_use_visitor = ExprUseDelegate::new(fcx.tcx, fcx.param_env); + expr_use_visitor.consume_body(fcx, def_id, body); + expr_use_visitor.places +} + +pub(super) struct ConsumedAndBorrowedPlaces { + /// Records the variables/expressions that are dropped by a given expression. + /// + /// The key is the hir-id of the expression, and the value is a set or hir-ids for variables + /// or values that are consumed by that expression. + /// + /// Note that this set excludes "partial drops" -- for example, a statement like `drop(x.y)` is + /// not considered a drop of `x`, although it would be a drop of `x.y`. + pub(super) consumed: HirIdMap<FxHashSet<TrackedValue>>, + + /// A set of hir-ids of values or variables that are borrowed at some point within the body. + pub(super) borrowed: FxHashSet<TrackedValue>, + + /// A set of hir-ids of values or variables that are borrowed at some point within the body. + pub(super) borrowed_temporaries: FxHashSet<HirId>, +} + +/// Works with ExprUseVisitor to find interesting values for the drop range analysis. +/// +/// Interesting values are those that are either dropped or borrowed. For dropped values, we also +/// record the parent expression, which is the point where the drop actually takes place. +struct ExprUseDelegate<'tcx> { + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + places: ConsumedAndBorrowedPlaces, +} + +impl<'tcx> ExprUseDelegate<'tcx> { + fn new(tcx: TyCtxt<'tcx>, param_env: ParamEnv<'tcx>) -> Self { + Self { + tcx, + param_env, + places: ConsumedAndBorrowedPlaces { + consumed: <_>::default(), + borrowed: <_>::default(), + borrowed_temporaries: <_>::default(), + }, + } + } + + fn consume_body(&mut self, fcx: &'_ FnCtxt<'_, 'tcx>, def_id: DefId, body: &'tcx Body<'tcx>) { + // Run ExprUseVisitor to find where values are consumed. + ExprUseVisitor::new( + self, + &fcx.infcx, + def_id.expect_local(), + fcx.param_env, + &fcx.typeck_results.borrow(), + ) + .consume_body(body); + } + + fn mark_consumed(&mut self, consumer: HirId, target: TrackedValue) { + self.places.consumed.entry(consumer).or_insert_with(|| <_>::default()); + + debug!(?consumer, ?target, "mark_consumed"); + self.places.consumed.get_mut(&consumer).map(|places| places.insert(target)); + } + + fn borrow_place(&mut self, place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>) { + self.places + .borrowed + .insert(TrackedValue::from_place_with_projections_allowed(place_with_id)); + + // Ordinarily a value is consumed by it's parent, but in the special case of a + // borrowed RValue, we create a reference that lives as long as the temporary scope + // for that expression (typically, the innermost statement, but sometimes the enclosing + // block). We record this fact here so that later in generator_interior + // we can use the correct scope. + // + // We special case borrows through a dereference (`&*x`, `&mut *x` where `x` is + // some rvalue expression), since these are essentially a copy of a pointer. + // In other words, this borrow does not refer to the + // temporary (`*x`), but to the referent (whatever `x` is a borrow of). + // + // We were considering that we might encounter problems down the line if somehow, + // some part of the compiler were to look at this result and try to use it to + // drive a borrowck-like analysis (this does not currently happen, as of this writing). + // But even this should be fine, because the lifetime of the dereferenced reference + // found in the rvalue is only significant as an intermediate 'link' to the value we + // are producing, and we separately track whether that value is live over a yield. + // Example: + // + // ```notrust + // fn identity<T>(x: &mut T) -> &mut T { x } + // let a: A = ...; + // let y: &'y mut A = &mut *identity(&'a mut a); + // ^^^^^^^^^^^^^^^^^^^^^^^^^ the borrow we are talking about + // ``` + // + // The expression `*identity(...)` is a deref of an rvalue, + // where the `identity(...)` (the rvalue) produces a return type + // of `&'rv mut A`, where `'a: 'rv`. We then assign this result to + // `'y`, resulting in (transitively) `'a: 'y` (i.e., while `y` is in use, + // `a` will be considered borrowed). Other parts of the code will ensure + // that if `y` is live over a yield, `&'y mut A` appears in the generator + // state. If `'y` is live, then any sound region analysis must conclude + // that `'a` is also live. So if this causes a bug, blame some other + // part of the code! + let is_deref = place_with_id + .place + .projections + .iter() + .any(|Projection { kind, .. }| *kind == ProjectionKind::Deref); + + if let (false, PlaceBase::Rvalue) = (is_deref, place_with_id.place.base) { + self.places.borrowed_temporaries.insert(place_with_id.hir_id); + } + } +} + +impl<'tcx> expr_use_visitor::Delegate<'tcx> for ExprUseDelegate<'tcx> { + fn consume( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + ) { + let hir = self.tcx.hir(); + let parent = match hir.find_parent_node(place_with_id.hir_id) { + Some(parent) => parent, + None => place_with_id.hir_id, + }; + debug!( + "consume {:?}; diag_expr_id={}, using parent {}", + place_with_id, + hir.node_to_string(diag_expr_id), + hir.node_to_string(parent) + ); + place_with_id + .try_into() + .map_or((), |tracked_value| self.mark_consumed(parent, tracked_value)); + } + + fn borrow( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + bk: rustc_middle::ty::BorrowKind, + ) { + debug!( + "borrow: place_with_id = {place_with_id:#?}, diag_expr_id={diag_expr_id:#?}, \ + borrow_kind={bk:#?}" + ); + + self.borrow_place(place_with_id); + } + + fn copy( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + _diag_expr_id: HirId, + ) { + debug!("copy: place_with_id = {place_with_id:?}"); + + self.places + .borrowed + .insert(TrackedValue::from_place_with_projections_allowed(place_with_id)); + + // For copied we treat this mostly like a borrow except that we don't add the place + // to borrowed_temporaries because the copy is consumed. + } + + fn mutate( + &mut self, + assignee_place: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + ) { + debug!("mutate {assignee_place:?}; diag_expr_id={diag_expr_id:?}"); + + if assignee_place.place.base == PlaceBase::Rvalue + && assignee_place.place.projections.is_empty() + { + // Assigning to an Rvalue is illegal unless done through a dereference. We would have + // already gotten a type error, so we will just return here. + return; + } + + // If the type being assigned needs dropped, then the mutation counts as a borrow + // since it is essentially doing `Drop::drop(&mut x); x = new_value;`. + let ty = self.tcx.erase_regions(assignee_place.place.base_ty); + if ty.needs_infer() { + self.tcx.sess.delay_span_bug( + self.tcx.hir().span(assignee_place.hir_id), + &format!("inference variables in {ty}"), + ); + } else if ty.needs_drop(self.tcx, self.param_env) { + self.places + .borrowed + .insert(TrackedValue::from_place_with_projections_allowed(assignee_place)); + } + } + + fn bind( + &mut self, + binding_place: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + ) { + debug!("bind {binding_place:?}; diag_expr_id={diag_expr_id:?}"); + } + + fn fake_read( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + cause: rustc_middle::mir::FakeReadCause, + diag_expr_id: HirId, + ) { + debug!( + "fake_read place_with_id={place_with_id:?}; cause={cause:?}; diag_expr_id={diag_expr_id:?}" + ); + + // fake reads happen in places like the scrutinee of a match expression. + // we treat those as a borrow, much like a copy: the idea is that we are + // transiently creating a `&T` ref that we can read from to observe the current + // value (this `&T` is immediately dropped afterwards). + self.borrow_place(place_with_id); + } +} |