diff options
Diffstat (limited to '')
6 files changed, 1916 insertions, 0 deletions
diff --git a/compiler/rustc_typeck/src/check/generator_interior.rs b/compiler/rustc_typeck/src/check/generator_interior.rs new file mode 100644 index 000000000..d4f800149 --- /dev/null +++ b/compiler/rustc_typeck/src/check/generator_interior.rs @@ -0,0 +1,632 @@ +//! This calculates the types which has storage which lives across a suspension point in a +//! generator from the perspective of typeck. The actual types used at runtime +//! is calculated in `rustc_mir_transform::generator` and may be a subset of the +//! types computed here. + +use self::drop_ranges::DropRanges; +use super::FnCtxt; +use rustc_data_structures::fx::{FxHashSet, FxIndexSet}; +use rustc_errors::pluralize; +use rustc_hir as hir; +use rustc_hir::def::{CtorKind, DefKind, Res}; +use rustc_hir::def_id::DefId; +use rustc_hir::hir_id::HirIdSet; +use rustc_hir::intravisit::{self, Visitor}; +use rustc_hir::{Arm, Expr, ExprKind, Guard, HirId, Pat, PatKind}; +use rustc_middle::middle::region::{self, Scope, ScopeData, YieldData}; +use rustc_middle::ty::{self, RvalueScopes, Ty, TyCtxt, TypeVisitable}; +use rustc_span::symbol::sym; +use rustc_span::Span; +use tracing::debug; + +mod drop_ranges; + +struct InteriorVisitor<'a, 'tcx> { + fcx: &'a FnCtxt<'a, 'tcx>, + region_scope_tree: &'a region::ScopeTree, + types: FxIndexSet<ty::GeneratorInteriorTypeCause<'tcx>>, + rvalue_scopes: &'a RvalueScopes, + expr_count: usize, + kind: hir::GeneratorKind, + prev_unresolved_span: Option<Span>, + linted_values: HirIdSet, + drop_ranges: DropRanges, +} + +impl<'a, 'tcx> InteriorVisitor<'a, 'tcx> { + fn record( + &mut self, + ty: Ty<'tcx>, + hir_id: HirId, + scope: Option<region::Scope>, + expr: Option<&'tcx Expr<'tcx>>, + source_span: Span, + ) { + use rustc_span::DUMMY_SP; + + let ty = self.fcx.resolve_vars_if_possible(ty); + + debug!( + "attempting to record type ty={:?}; hir_id={:?}; scope={:?}; expr={:?}; source_span={:?}; expr_count={:?}", + ty, hir_id, scope, expr, source_span, self.expr_count, + ); + + let live_across_yield = scope + .map(|s| { + self.region_scope_tree.yield_in_scope(s).and_then(|yield_data| { + // If we are recording an expression that is the last yield + // in the scope, or that has a postorder CFG index larger + // than the one of all of the yields, then its value can't + // be storage-live (and therefore live) at any of the yields. + // + // See the mega-comment at `yield_in_scope` for a proof. + + yield_data + .iter() + .find(|yield_data| { + debug!( + "comparing counts yield: {} self: {}, source_span = {:?}", + yield_data.expr_and_pat_count, self.expr_count, source_span + ); + + if self.fcx.sess().opts.unstable_opts.drop_tracking + && self + .drop_ranges + .is_dropped_at(hir_id, yield_data.expr_and_pat_count) + { + debug!("value is dropped at yield point; not recording"); + return false; + } + + // If it is a borrowing happening in the guard, + // it needs to be recorded regardless because they + // do live across this yield point. + yield_data.expr_and_pat_count >= self.expr_count + }) + .cloned() + }) + }) + .unwrap_or_else(|| { + Some(YieldData { span: DUMMY_SP, expr_and_pat_count: 0, source: self.kind.into() }) + }); + + if let Some(yield_data) = live_across_yield { + debug!( + "type in expr = {:?}, scope = {:?}, type = {:?}, count = {}, yield_span = {:?}", + expr, scope, ty, self.expr_count, yield_data.span + ); + + if let Some((unresolved_type, unresolved_type_span)) = + self.fcx.unresolved_type_vars(&ty) + { + // If unresolved type isn't a ty_var then unresolved_type_span is None + let span = self + .prev_unresolved_span + .unwrap_or_else(|| unresolved_type_span.unwrap_or(source_span)); + + // If we encounter an int/float variable, then inference fallback didn't + // finish due to some other error. Don't emit spurious additional errors. + if let ty::Infer(ty::InferTy::IntVar(_) | ty::InferTy::FloatVar(_)) = + unresolved_type.kind() + { + self.fcx + .tcx + .sess + .delay_span_bug(span, &format!("Encountered var {:?}", unresolved_type)); + } else { + let note = format!( + "the type is part of the {} because of this {}", + self.kind, yield_data.source + ); + + self.fcx + .need_type_info_err_in_generator(self.kind, span, unresolved_type) + .span_note(yield_data.span, &*note) + .emit(); + } + } else { + // Insert the type into the ordered set. + let scope_span = scope.map(|s| s.span(self.fcx.tcx, self.region_scope_tree)); + + if !self.linted_values.contains(&hir_id) { + check_must_not_suspend_ty( + self.fcx, + ty, + hir_id, + SuspendCheckData { + expr, + source_span, + yield_span: yield_data.span, + plural_len: 1, + ..Default::default() + }, + ); + self.linted_values.insert(hir_id); + } + + self.types.insert(ty::GeneratorInteriorTypeCause { + span: source_span, + ty, + scope_span, + yield_span: yield_data.span, + expr: expr.map(|e| e.hir_id), + }); + } + } else { + debug!( + "no type in expr = {:?}, count = {:?}, span = {:?}", + expr, + self.expr_count, + expr.map(|e| e.span) + ); + if let Some((unresolved_type, unresolved_type_span)) = + self.fcx.unresolved_type_vars(&ty) + { + debug!( + "remained unresolved_type = {:?}, unresolved_type_span: {:?}", + unresolved_type, unresolved_type_span + ); + self.prev_unresolved_span = unresolved_type_span; + } + } + } +} + +pub fn resolve_interior<'a, 'tcx>( + fcx: &'a FnCtxt<'a, 'tcx>, + def_id: DefId, + body_id: hir::BodyId, + interior: Ty<'tcx>, + kind: hir::GeneratorKind, +) { + let body = fcx.tcx.hir().body(body_id); + let typeck_results = fcx.inh.typeck_results.borrow(); + let mut visitor = InteriorVisitor { + fcx, + types: FxIndexSet::default(), + region_scope_tree: fcx.tcx.region_scope_tree(def_id), + rvalue_scopes: &typeck_results.rvalue_scopes, + expr_count: 0, + kind, + prev_unresolved_span: None, + linted_values: <_>::default(), + drop_ranges: drop_ranges::compute_drop_ranges(fcx, def_id, body), + }; + intravisit::walk_body(&mut visitor, body); + + // Check that we visited the same amount of expressions as the RegionResolutionVisitor + let region_expr_count = fcx.tcx.region_scope_tree(def_id).body_expr_count(body_id).unwrap(); + assert_eq!(region_expr_count, visitor.expr_count); + + // The types are already kept in insertion order. + let types = visitor.types; + + // The types in the generator interior contain lifetimes local to the generator itself, + // which should not be exposed outside of the generator. Therefore, we replace these + // lifetimes with existentially-bound lifetimes, which reflect the exact value of the + // lifetimes not being known by users. + // + // These lifetimes are used in auto trait impl checking (for example, + // if a Sync generator contains an &'α T, we need to check whether &'α T: Sync), + // so knowledge of the exact relationships between them isn't particularly important. + + debug!("types in generator {:?}, span = {:?}", types, body.value.span); + + let mut counter = 0; + let mut captured_tys = FxHashSet::default(); + let type_causes: Vec<_> = types + .into_iter() + .filter_map(|mut cause| { + // Erase regions and canonicalize late-bound regions to deduplicate as many types as we + // can. + let erased = fcx.tcx.erase_regions(cause.ty); + if captured_tys.insert(erased) { + // Replace all regions inside the generator interior with late bound regions. + // Note that each region slot in the types gets a new fresh late bound region, + // which means that none of the regions inside relate to any other, even if + // typeck had previously found constraints that would cause them to be related. + let folded = fcx.tcx.fold_regions(erased, |_, current_depth| { + let br = ty::BoundRegion { + var: ty::BoundVar::from_u32(counter), + kind: ty::BrAnon(counter), + }; + let r = fcx.tcx.mk_region(ty::ReLateBound(current_depth, br)); + counter += 1; + r + }); + + cause.ty = folded; + Some(cause) + } else { + None + } + }) + .collect(); + + // Extract type components to build the witness type. + let type_list = fcx.tcx.mk_type_list(type_causes.iter().map(|cause| cause.ty)); + let bound_vars = fcx.tcx.mk_bound_variable_kinds( + (0..counter).map(|i| ty::BoundVariableKind::Region(ty::BrAnon(i))), + ); + let witness = + fcx.tcx.mk_generator_witness(ty::Binder::bind_with_vars(type_list, bound_vars.clone())); + + drop(typeck_results); + // Store the generator types and spans into the typeck results for this generator. + fcx.inh.typeck_results.borrow_mut().generator_interior_types = + ty::Binder::bind_with_vars(type_causes, bound_vars); + + debug!( + "types in generator after region replacement {:?}, span = {:?}", + witness, body.value.span + ); + + // Unify the type variable inside the generator with the new witness + match fcx.at(&fcx.misc(body.value.span), fcx.param_env).eq(interior, witness) { + Ok(ok) => fcx.register_infer_ok_obligations(ok), + _ => bug!(), + } +} + +// This visitor has to have the same visit_expr calls as RegionResolutionVisitor in +// librustc_middle/middle/region.rs since `expr_count` is compared against the results +// there. +impl<'a, 'tcx> Visitor<'tcx> for InteriorVisitor<'a, 'tcx> { + fn visit_arm(&mut self, arm: &'tcx Arm<'tcx>) { + let Arm { guard, pat, body, .. } = arm; + self.visit_pat(pat); + if let Some(ref g) = guard { + { + // If there is a guard, we need to count all variables bound in the pattern as + // borrowed for the entire guard body, regardless of whether they are accessed. + // We do this by walking the pattern bindings and recording `&T` for any `x: T` + // that is bound. + + struct ArmPatCollector<'a, 'b, 'tcx> { + interior_visitor: &'a mut InteriorVisitor<'b, 'tcx>, + scope: Scope, + } + + impl<'a, 'b, 'tcx> Visitor<'tcx> for ArmPatCollector<'a, 'b, 'tcx> { + fn visit_pat(&mut self, pat: &'tcx Pat<'tcx>) { + intravisit::walk_pat(self, pat); + if let PatKind::Binding(_, id, ident, ..) = pat.kind { + let ty = + self.interior_visitor.fcx.typeck_results.borrow().node_type(id); + let tcx = self.interior_visitor.fcx.tcx; + let ty = tcx.mk_ref( + // Use `ReErased` as `resolve_interior` is going to replace all the + // regions anyway. + tcx.mk_region(ty::ReErased), + ty::TypeAndMut { ty, mutbl: hir::Mutability::Not }, + ); + self.interior_visitor.record( + ty, + id, + Some(self.scope), + None, + ident.span, + ); + } + } + } + + ArmPatCollector { + interior_visitor: self, + scope: Scope { id: g.body().hir_id.local_id, data: ScopeData::Node }, + } + .visit_pat(pat); + } + + match g { + Guard::If(ref e) => { + self.visit_expr(e); + } + Guard::IfLet(ref l) => { + self.visit_let_expr(l); + } + } + } + self.visit_expr(body); + } + + fn visit_pat(&mut self, pat: &'tcx Pat<'tcx>) { + intravisit::walk_pat(self, pat); + + self.expr_count += 1; + + if let PatKind::Binding(..) = pat.kind { + let scope = self.region_scope_tree.var_scope(pat.hir_id.local_id).unwrap(); + let ty = self.fcx.typeck_results.borrow().pat_ty(pat); + self.record(ty, pat.hir_id, Some(scope), None, pat.span); + } + } + + fn visit_expr(&mut self, expr: &'tcx Expr<'tcx>) { + match &expr.kind { + ExprKind::Call(callee, args) => match &callee.kind { + ExprKind::Path(qpath) => { + let res = self.fcx.typeck_results.borrow().qpath_res(qpath, callee.hir_id); + match res { + // Direct calls never need to keep the callee `ty::FnDef` + // ZST in a temporary, so skip its type, just in case it + // can significantly complicate the generator type. + Res::Def( + DefKind::Fn | DefKind::AssocFn | DefKind::Ctor(_, CtorKind::Fn), + _, + ) => { + // NOTE(eddyb) this assumes a path expression has + // no nested expressions to keep track of. + self.expr_count += 1; + + // Record the rest of the call expression normally. + for arg in *args { + self.visit_expr(arg); + } + } + _ => intravisit::walk_expr(self, expr), + } + } + _ => intravisit::walk_expr(self, expr), + }, + _ => intravisit::walk_expr(self, expr), + } + + self.expr_count += 1; + + debug!("is_borrowed_temporary: {:?}", self.drop_ranges.is_borrowed_temporary(expr)); + + let ty = self.fcx.typeck_results.borrow().expr_ty_adjusted_opt(expr); + let may_need_drop = |ty: Ty<'tcx>| { + // Avoid ICEs in needs_drop. + let ty = self.fcx.resolve_vars_if_possible(ty); + let ty = self.fcx.tcx.erase_regions(ty); + if ty.needs_infer() { + return true; + } + ty.needs_drop(self.fcx.tcx, self.fcx.param_env) + }; + + // Typically, the value produced by an expression is consumed by its parent in some way, + // so we only have to check if the parent contains a yield (note that the parent may, for + // example, store the value into a local variable, but then we already consider local + // variables to be live across their scope). + // + // However, in the case of temporary values, we are going to store the value into a + // temporary on the stack that is live for the current temporary scope and then return a + // reference to it. That value may be live across the entire temporary scope. + // + // There's another subtlety: if the type has an observable drop, it must be dropped after + // the yield, even if it's not borrowed or referenced after the yield. Ideally this would + // *only* happen for types with observable drop, not all types which wrap them, but that + // doesn't match the behavior of MIR borrowck and causes ICEs. See the FIXME comment in + // src/test/ui/generator/drop-tracking-parent-expression.rs. + let scope = if self.drop_ranges.is_borrowed_temporary(expr) + || ty.map_or(true, |ty| { + let needs_drop = may_need_drop(ty); + debug!(?needs_drop, ?ty); + needs_drop + }) { + self.rvalue_scopes.temporary_scope(self.region_scope_tree, expr.hir_id.local_id) + } else { + debug!("parent_node: {:?}", self.fcx.tcx.hir().find_parent_node(expr.hir_id)); + match self.fcx.tcx.hir().find_parent_node(expr.hir_id) { + Some(parent) => Some(Scope { id: parent.local_id, data: ScopeData::Node }), + None => { + self.rvalue_scopes.temporary_scope(self.region_scope_tree, expr.hir_id.local_id) + } + } + }; + + // If there are adjustments, then record the final type -- + // this is the actual value that is being produced. + if let Some(adjusted_ty) = ty { + self.record(adjusted_ty, expr.hir_id, scope, Some(expr), expr.span); + } + + // Also record the unadjusted type (which is the only type if + // there are no adjustments). The reason for this is that the + // unadjusted value is sometimes a "temporary" that would wind + // up in a MIR temporary. + // + // As an example, consider an expression like `vec![].push(x)`. + // Here, the `vec![]` would wind up MIR stored into a + // temporary variable `t` which we can borrow to invoke + // `<Vec<_>>::push(&mut t, x)`. + // + // Note that an expression can have many adjustments, and we + // are just ignoring those intermediate types. This is because + // those intermediate values are always linearly "consumed" by + // the other adjustments, and hence would never be directly + // captured in the MIR. + // + // (Note that this partly relies on the fact that the `Deref` + // traits always return references, which means their content + // can be reborrowed without needing to spill to a temporary. + // If this were not the case, then we could conceivably have + // to create intermediate temporaries.) + // + // The type table might not have information for this expression + // if it is in a malformed scope. (#66387) + if let Some(ty) = self.fcx.typeck_results.borrow().expr_ty_opt(expr) { + self.record(ty, expr.hir_id, scope, Some(expr), expr.span); + } else { + self.fcx.tcx.sess.delay_span_bug(expr.span, "no type for node"); + } + } +} + +#[derive(Default)] +pub struct SuspendCheckData<'a, 'tcx> { + expr: Option<&'tcx Expr<'tcx>>, + source_span: Span, + yield_span: Span, + descr_pre: &'a str, + descr_post: &'a str, + plural_len: usize, +} + +// Returns whether it emitted a diagnostic or not +// Note that this fn and the proceeding one are based on the code +// for creating must_use diagnostics +// +// Note that this technique was chosen over things like a `Suspend` marker trait +// as it is simpler and has precedent in the compiler +pub fn check_must_not_suspend_ty<'tcx>( + fcx: &FnCtxt<'_, 'tcx>, + ty: Ty<'tcx>, + hir_id: HirId, + data: SuspendCheckData<'_, 'tcx>, +) -> bool { + if ty.is_unit() + // FIXME: should this check `is_ty_uninhabited_from`. This query is not available in this stage + // of typeck (before ReVar and RePlaceholder are removed), but may remove noise, like in + // `must_use` + // || fcx.tcx.is_ty_uninhabited_from(fcx.tcx.parent_module(hir_id).to_def_id(), ty, fcx.param_env) + { + return false; + } + + let plural_suffix = pluralize!(data.plural_len); + + match *ty.kind() { + ty::Adt(..) if ty.is_box() => { + let boxed_ty = ty.boxed_ty(); + let descr_pre = &format!("{}boxed ", data.descr_pre); + check_must_not_suspend_ty(fcx, boxed_ty, hir_id, SuspendCheckData { descr_pre, ..data }) + } + ty::Adt(def, _) => check_must_not_suspend_def(fcx.tcx, def.did(), hir_id, data), + // FIXME: support adding the attribute to TAITs + ty::Opaque(def, _) => { + let mut has_emitted = false; + for &(predicate, _) in fcx.tcx.explicit_item_bounds(def) { + // We only look at the `DefId`, so it is safe to skip the binder here. + if let ty::PredicateKind::Trait(ref poly_trait_predicate) = + predicate.kind().skip_binder() + { + let def_id = poly_trait_predicate.trait_ref.def_id; + let descr_pre = &format!("{}implementer{} of ", data.descr_pre, plural_suffix); + if check_must_not_suspend_def( + fcx.tcx, + def_id, + hir_id, + SuspendCheckData { descr_pre, ..data }, + ) { + has_emitted = true; + break; + } + } + } + has_emitted + } + ty::Dynamic(binder, _) => { + let mut has_emitted = false; + for predicate in binder.iter() { + if let ty::ExistentialPredicate::Trait(ref trait_ref) = predicate.skip_binder() { + let def_id = trait_ref.def_id; + let descr_post = &format!(" trait object{}{}", plural_suffix, data.descr_post); + if check_must_not_suspend_def( + fcx.tcx, + def_id, + hir_id, + SuspendCheckData { descr_post, ..data }, + ) { + has_emitted = true; + break; + } + } + } + has_emitted + } + ty::Tuple(fields) => { + let mut has_emitted = false; + let comps = match data.expr.map(|e| &e.kind) { + Some(hir::ExprKind::Tup(comps)) => { + debug_assert_eq!(comps.len(), fields.len()); + Some(comps) + } + _ => None, + }; + for (i, ty) in fields.iter().enumerate() { + let descr_post = &format!(" in tuple element {i}"); + let span = comps.and_then(|c| c.get(i)).map(|e| e.span).unwrap_or(data.source_span); + if check_must_not_suspend_ty( + fcx, + ty, + hir_id, + SuspendCheckData { + descr_post, + expr: comps.and_then(|comps| comps.get(i)), + source_span: span, + ..data + }, + ) { + has_emitted = true; + } + } + has_emitted + } + ty::Array(ty, len) => { + let descr_pre = &format!("{}array{} of ", data.descr_pre, plural_suffix); + check_must_not_suspend_ty( + fcx, + ty, + hir_id, + SuspendCheckData { + descr_pre, + plural_len: len.try_eval_usize(fcx.tcx, fcx.param_env).unwrap_or(0) as usize + + 1, + ..data + }, + ) + } + _ => false, + } +} + +fn check_must_not_suspend_def( + tcx: TyCtxt<'_>, + def_id: DefId, + hir_id: HirId, + data: SuspendCheckData<'_, '_>, +) -> bool { + if let Some(attr) = tcx.get_attr(def_id, sym::must_not_suspend) { + tcx.struct_span_lint_hir( + rustc_session::lint::builtin::MUST_NOT_SUSPEND, + hir_id, + data.source_span, + |lint| { + let msg = format!( + "{}`{}`{} held across a suspend point, but should not be", + data.descr_pre, + tcx.def_path_str(def_id), + data.descr_post, + ); + let mut err = lint.build(&msg); + + // add span pointing to the offending yield/await + err.span_label(data.yield_span, "the value is held across this suspend point"); + + // Add optional reason note + if let Some(note) = attr.value_str() { + // FIXME(guswynn): consider formatting this better + err.span_note(data.source_span, note.as_str()); + } + + // Add some quick suggestions on what to do + // FIXME: can `drop` work as a suggestion here as well? + err.span_help( + data.source_span, + "consider using a block (`{ ... }`) \ + to shrink the value's scope, ending before the suspend point", + ); + + err.emit(); + }, + ); + + true + } else { + false + } +} diff --git a/compiler/rustc_typeck/src/check/generator_interior/drop_ranges.rs b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges.rs new file mode 100644 index 000000000..518cd7342 --- /dev/null +++ b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges.rs @@ -0,0 +1,309 @@ +//! Drop range analysis finds the portions of the tree where a value is guaranteed to be dropped +//! (i.e. moved, uninitialized, etc.). This is used to exclude the types of those values from the +//! generator type. See `InteriorVisitor::record` for where the results of this analysis are used. +//! +//! There are three phases to this analysis: +//! 1. Use `ExprUseVisitor` to identify the interesting values that are consumed and borrowed. +//! 2. Use `DropRangeVisitor` to find where the interesting values are dropped or reinitialized, +//! and also build a control flow graph. +//! 3. Use `DropRanges::propagate_to_fixpoint` to flow the dropped/reinitialized information through +//! the CFG and find the exact points where we know a value is definitely dropped. +//! +//! The end result is a data structure that maps the post-order index of each node in the HIR tree +//! to a set of values that are known to be dropped at that location. + +use self::cfg_build::build_control_flow_graph; +use self::record_consumed_borrow::find_consumed_and_borrowed; +use crate::check::FnCtxt; +use hir::def_id::DefId; +use hir::{Body, HirId, HirIdMap, Node}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_hir as hir; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::hir::map::Map; +use rustc_middle::hir::place::{PlaceBase, PlaceWithHirId}; +use rustc_middle::ty; +use std::collections::BTreeMap; +use std::fmt::Debug; + +mod cfg_build; +mod cfg_propagate; +mod cfg_visualize; +mod record_consumed_borrow; + +pub fn compute_drop_ranges<'a, 'tcx>( + fcx: &'a FnCtxt<'a, 'tcx>, + def_id: DefId, + body: &'tcx Body<'tcx>, +) -> DropRanges { + if fcx.sess().opts.unstable_opts.drop_tracking { + let consumed_borrowed_places = find_consumed_and_borrowed(fcx, def_id, body); + + let typeck_results = &fcx.typeck_results.borrow(); + let num_exprs = fcx.tcx.region_scope_tree(def_id).body_expr_count(body.id()).unwrap_or(0); + let (mut drop_ranges, borrowed_temporaries) = build_control_flow_graph( + fcx.tcx.hir(), + fcx.tcx, + typeck_results, + consumed_borrowed_places, + body, + num_exprs, + ); + + drop_ranges.propagate_to_fixpoint(); + + debug!("borrowed_temporaries = {borrowed_temporaries:?}"); + DropRanges { + tracked_value_map: drop_ranges.tracked_value_map, + nodes: drop_ranges.nodes, + borrowed_temporaries: Some(borrowed_temporaries), + } + } else { + // If drop range tracking is not enabled, skip all the analysis and produce an + // empty set of DropRanges. + DropRanges { + tracked_value_map: FxHashMap::default(), + nodes: IndexVec::new(), + borrowed_temporaries: None, + } + } +} + +/// Applies `f` to consumable node in the HIR subtree pointed to by `place`. +/// +/// This includes the place itself, and if the place is a reference to a local +/// variable then `f` is also called on the HIR node for that variable as well. +/// +/// For example, if `place` points to `foo()`, then `f` is called once for the +/// result of `foo`. On the other hand, if `place` points to `x` then `f` will +/// be called both on the `ExprKind::Path` node that represents the expression +/// as well as the HirId of the local `x` itself. +fn for_each_consumable<'tcx>(hir: Map<'tcx>, place: TrackedValue, mut f: impl FnMut(TrackedValue)) { + f(place); + let node = hir.find(place.hir_id()); + if let Some(Node::Expr(expr)) = node { + match expr.kind { + hir::ExprKind::Path(hir::QPath::Resolved( + _, + hir::Path { res: hir::def::Res::Local(hir_id), .. }, + )) => { + f(TrackedValue::Variable(*hir_id)); + } + _ => (), + } + } +} + +rustc_index::newtype_index! { + pub struct PostOrderId { + DEBUG_FORMAT = "id({})", + } +} + +rustc_index::newtype_index! { + pub struct TrackedValueIndex { + DEBUG_FORMAT = "hidx({})", + } +} + +/// Identifies a value whose drop state we need to track. +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +enum TrackedValue { + /// Represents a named variable, such as a let binding, parameter, or upvar. + /// + /// The HirId points to the variable's definition site. + Variable(HirId), + /// A value produced as a result of an expression. + /// + /// The HirId points to the expression that returns this value. + Temporary(HirId), +} + +impl Debug for TrackedValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + ty::tls::with_opt(|opt_tcx| { + if let Some(tcx) = opt_tcx { + write!(f, "{}", tcx.hir().node_to_string(self.hir_id())) + } else { + match self { + Self::Variable(hir_id) => write!(f, "Variable({:?})", hir_id), + Self::Temporary(hir_id) => write!(f, "Temporary({:?})", hir_id), + } + } + }) + } +} + +impl TrackedValue { + fn hir_id(&self) -> HirId { + match self { + TrackedValue::Variable(hir_id) | TrackedValue::Temporary(hir_id) => *hir_id, + } + } + + fn from_place_with_projections_allowed(place_with_id: &PlaceWithHirId<'_>) -> Self { + match place_with_id.place.base { + PlaceBase::Rvalue | PlaceBase::StaticItem => { + TrackedValue::Temporary(place_with_id.hir_id) + } + PlaceBase::Local(hir_id) + | PlaceBase::Upvar(ty::UpvarId { var_path: ty::UpvarPath { hir_id }, .. }) => { + TrackedValue::Variable(hir_id) + } + } + } +} + +/// Represents a reason why we might not be able to convert a HirId or Place +/// into a tracked value. +#[derive(Debug)] +enum TrackedValueConversionError { + /// Place projects are not currently supported. + /// + /// The reasoning around these is kind of subtle, so we choose to be more + /// conservative around these for now. There is no reason in theory we + /// cannot support these, we just have not implemented it yet. + PlaceProjectionsNotSupported, +} + +impl TryFrom<&PlaceWithHirId<'_>> for TrackedValue { + type Error = TrackedValueConversionError; + + fn try_from(place_with_id: &PlaceWithHirId<'_>) -> Result<Self, Self::Error> { + if !place_with_id.place.projections.is_empty() { + debug!( + "TrackedValue from PlaceWithHirId: {:?} has projections, which are not supported.", + place_with_id + ); + return Err(TrackedValueConversionError::PlaceProjectionsNotSupported); + } + + Ok(TrackedValue::from_place_with_projections_allowed(place_with_id)) + } +} + +pub struct DropRanges { + tracked_value_map: FxHashMap<TrackedValue, TrackedValueIndex>, + nodes: IndexVec<PostOrderId, NodeInfo>, + borrowed_temporaries: Option<FxHashSet<HirId>>, +} + +impl DropRanges { + pub fn is_dropped_at(&self, hir_id: HirId, location: usize) -> bool { + self.tracked_value_map + .get(&TrackedValue::Temporary(hir_id)) + .or(self.tracked_value_map.get(&TrackedValue::Variable(hir_id))) + .cloned() + .map_or(false, |tracked_value_id| { + self.expect_node(location.into()).drop_state.contains(tracked_value_id) + }) + } + + pub fn is_borrowed_temporary(&self, expr: &hir::Expr<'_>) -> bool { + if let Some(b) = &self.borrowed_temporaries { b.contains(&expr.hir_id) } else { true } + } + + /// Returns a reference to the NodeInfo for a node, panicking if it does not exist + fn expect_node(&self, id: PostOrderId) -> &NodeInfo { + &self.nodes[id] + } +} + +/// Tracks information needed to compute drop ranges. +struct DropRangesBuilder { + /// The core of DropRangesBuilder is a set of nodes, which each represent + /// one expression. We primarily refer to them by their index in a + /// post-order traversal of the HIR tree, since this is what + /// generator_interior uses to talk about yield positions. + /// + /// This IndexVec keeps the relevant details for each node. See the + /// NodeInfo struct for more details, but this information includes things + /// such as the set of control-flow successors, which variables are dropped + /// or reinitialized, and whether each variable has been inferred to be + /// known-dropped or potentially reinitialized at each point. + nodes: IndexVec<PostOrderId, NodeInfo>, + /// We refer to values whose drop state we are tracking by the HirId of + /// where they are defined. Within a NodeInfo, however, we store the + /// drop-state in a bit vector indexed by a HirIdIndex + /// (see NodeInfo::drop_state). The hir_id_map field stores the mapping + /// from HirIds to the HirIdIndex that is used to represent that value in + /// bitvector. + tracked_value_map: FxHashMap<TrackedValue, TrackedValueIndex>, + + /// When building the control flow graph, we don't always know the + /// post-order index of the target node at the point we encounter it. + /// For example, this happens with break and continue. In those cases, + /// we store a pair of the PostOrderId of the source and the HirId + /// of the target. Once we have gathered all of these edges, we make a + /// pass over the set of deferred edges (see process_deferred_edges in + /// cfg_build.rs), look up the PostOrderId for the target (since now the + /// post-order index for all nodes is known), and add missing control flow + /// edges. + deferred_edges: Vec<(PostOrderId, HirId)>, + /// This maps HirIds of expressions to their post-order index. It is + /// used in process_deferred_edges to correctly add back-edges. + post_order_map: HirIdMap<PostOrderId>, +} + +impl Debug for DropRangesBuilder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DropRanges") + .field("hir_id_map", &self.tracked_value_map) + .field("post_order_maps", &self.post_order_map) + .field("nodes", &self.nodes.iter_enumerated().collect::<BTreeMap<_, _>>()) + .finish() + } +} + +/// DropRanges keeps track of what values are definitely dropped at each point in the code. +/// +/// Values of interest are defined by the hir_id of their place. Locations in code are identified +/// by their index in the post-order traversal. At its core, DropRanges maps +/// (hir_id, post_order_id) -> bool, where a true value indicates that the value is definitely +/// dropped at the point of the node identified by post_order_id. +impl DropRangesBuilder { + /// Returns the number of values (hir_ids) that are tracked + fn num_values(&self) -> usize { + self.tracked_value_map.len() + } + + fn node_mut(&mut self, id: PostOrderId) -> &mut NodeInfo { + let size = self.num_values(); + self.nodes.ensure_contains_elem(id, || NodeInfo::new(size)); + &mut self.nodes[id] + } + + fn add_control_edge(&mut self, from: PostOrderId, to: PostOrderId) { + trace!("adding control edge from {:?} to {:?}", from, to); + self.node_mut(from).successors.push(to); + } +} + +#[derive(Debug)] +struct NodeInfo { + /// IDs of nodes that can follow this one in the control flow + /// + /// If the vec is empty, then control proceeds to the next node. + successors: Vec<PostOrderId>, + + /// List of hir_ids that are dropped by this node. + drops: Vec<TrackedValueIndex>, + + /// List of hir_ids that are reinitialized by this node. + reinits: Vec<TrackedValueIndex>, + + /// Set of values that are definitely dropped at this point. + drop_state: BitSet<TrackedValueIndex>, +} + +impl NodeInfo { + fn new(num_values: usize) -> Self { + Self { + successors: vec![], + drops: vec![], + reinits: vec![], + drop_state: BitSet::new_filled(num_values), + } + } +} diff --git a/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_build.rs b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_build.rs new file mode 100644 index 000000000..a2c23db16 --- /dev/null +++ b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_build.rs @@ -0,0 +1,560 @@ +use super::{ + for_each_consumable, record_consumed_borrow::ConsumedAndBorrowedPlaces, DropRangesBuilder, + NodeInfo, PostOrderId, TrackedValue, TrackedValueIndex, +}; +use hir::{ + intravisit::{self, Visitor}, + Body, Expr, ExprKind, Guard, HirId, LoopIdError, +}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_hir as hir; +use rustc_index::vec::IndexVec; +use rustc_middle::{ + hir::map::Map, + ty::{TyCtxt, TypeckResults}, +}; +use std::mem::swap; + +/// Traverses the body to find the control flow graph and locations for the +/// relevant places are dropped or reinitialized. +/// +/// The resulting structure still needs to be iterated to a fixed point, which +/// can be done with propagate_to_fixpoint in cfg_propagate. +pub(super) fn build_control_flow_graph<'tcx>( + hir: Map<'tcx>, + tcx: TyCtxt<'tcx>, + typeck_results: &TypeckResults<'tcx>, + consumed_borrowed_places: ConsumedAndBorrowedPlaces, + body: &'tcx Body<'tcx>, + num_exprs: usize, +) -> (DropRangesBuilder, FxHashSet<HirId>) { + let mut drop_range_visitor = + DropRangeVisitor::new(hir, tcx, typeck_results, consumed_borrowed_places, num_exprs); + intravisit::walk_body(&mut drop_range_visitor, body); + + drop_range_visitor.drop_ranges.process_deferred_edges(); + if let Some(filename) = &tcx.sess.opts.unstable_opts.dump_drop_tracking_cfg { + super::cfg_visualize::write_graph_to_file(&drop_range_visitor.drop_ranges, filename, tcx); + } + + (drop_range_visitor.drop_ranges, drop_range_visitor.places.borrowed_temporaries) +} + +/// This struct is used to gather the information for `DropRanges` to determine the regions of the +/// HIR tree for which a value is dropped. +/// +/// We are interested in points where a variables is dropped or initialized, and the control flow +/// of the code. We identify locations in code by their post-order traversal index, so it is +/// important for this traversal to match that in `RegionResolutionVisitor` and `InteriorVisitor`. +/// +/// We make several simplifying assumptions, with the goal of being more conservative than +/// necessary rather than less conservative (since being less conservative is unsound, but more +/// conservative is still safe). These assumptions are: +/// +/// 1. Moving a variable `a` counts as a move of the whole variable. +/// 2. Moving a partial path like `a.b.c` is ignored. +/// 3. Reinitializing through a field (e.g. `a.b.c = 5`) counts as a reinitialization of all of +/// `a`. +/// +/// Some examples: +/// +/// Rule 1: +/// ```rust +/// let mut a = (vec![0], vec![0]); +/// drop(a); +/// // `a` is not considered initialized. +/// ``` +/// +/// Rule 2: +/// ```rust +/// let mut a = (vec![0], vec![0]); +/// drop(a.0); +/// drop(a.1); +/// // `a` is still considered initialized. +/// ``` +/// +/// Rule 3: +/// ```compile_fail,E0382 +/// let mut a = (vec![0], vec![0]); +/// drop(a); +/// a.1 = vec![1]; +/// // all of `a` is considered initialized +/// ``` + +struct DropRangeVisitor<'a, 'tcx> { + hir: Map<'tcx>, + places: ConsumedAndBorrowedPlaces, + drop_ranges: DropRangesBuilder, + expr_index: PostOrderId, + tcx: TyCtxt<'tcx>, + typeck_results: &'a TypeckResults<'tcx>, + label_stack: Vec<(Option<rustc_ast::Label>, PostOrderId)>, +} + +impl<'a, 'tcx> DropRangeVisitor<'a, 'tcx> { + fn new( + hir: Map<'tcx>, + tcx: TyCtxt<'tcx>, + typeck_results: &'a TypeckResults<'tcx>, + places: ConsumedAndBorrowedPlaces, + num_exprs: usize, + ) -> Self { + debug!("consumed_places: {:?}", places.consumed); + let drop_ranges = DropRangesBuilder::new( + places.consumed.iter().flat_map(|(_, places)| places.iter().cloned()), + hir, + num_exprs, + ); + Self { + hir, + places, + drop_ranges, + expr_index: PostOrderId::from_u32(0), + typeck_results, + tcx, + label_stack: vec![], + } + } + + fn record_drop(&mut self, value: TrackedValue) { + if self.places.borrowed.contains(&value) { + debug!("not marking {:?} as dropped because it is borrowed at some point", value); + } else { + debug!("marking {:?} as dropped at {:?}", value, self.expr_index); + let count = self.expr_index; + self.drop_ranges.drop_at(value, count); + } + } + + /// ExprUseVisitor's consume callback doesn't go deep enough for our purposes in all + /// expressions. This method consumes a little deeper into the expression when needed. + fn consume_expr(&mut self, expr: &hir::Expr<'_>) { + debug!("consuming expr {:?}, count={:?}", expr.kind, self.expr_index); + let places = self + .places + .consumed + .get(&expr.hir_id) + .map_or(vec![], |places| places.iter().cloned().collect()); + for place in places { + trace!(?place, "consuming place"); + for_each_consumable(self.hir, place, |value| self.record_drop(value)); + } + } + + /// Marks an expression as being reinitialized. + /// + /// Note that we always approximated on the side of things being more + /// initialized than they actually are, as opposed to less. In cases such + /// as `x.y = ...`, we would consider all of `x` as being initialized + /// instead of just the `y` field. + /// + /// This is because it is always safe to consider something initialized + /// even when it is not, but the other way around will cause problems. + /// + /// In the future, we will hopefully tighten up these rules to be more + /// precise. + fn reinit_expr(&mut self, expr: &hir::Expr<'_>) { + // Walk the expression to find the base. For example, in an expression + // like `*a[i].x`, we want to find the `a` and mark that as + // reinitialized. + match expr.kind { + ExprKind::Path(hir::QPath::Resolved( + _, + hir::Path { res: hir::def::Res::Local(hir_id), .. }, + )) => { + // This is the base case, where we have found an actual named variable. + + let location = self.expr_index; + debug!("reinitializing {:?} at {:?}", hir_id, location); + self.drop_ranges.reinit_at(TrackedValue::Variable(*hir_id), location); + } + + ExprKind::Field(base, _) => self.reinit_expr(base), + + // Most expressions do not refer to something where we need to track + // reinitializations. + // + // Some of these may be interesting in the future + ExprKind::Path(..) + | ExprKind::Box(..) + | ExprKind::ConstBlock(..) + | ExprKind::Array(..) + | ExprKind::Call(..) + | ExprKind::MethodCall(..) + | ExprKind::Tup(..) + | ExprKind::Binary(..) + | ExprKind::Unary(..) + | ExprKind::Lit(..) + | ExprKind::Cast(..) + | ExprKind::Type(..) + | ExprKind::DropTemps(..) + | ExprKind::Let(..) + | ExprKind::If(..) + | ExprKind::Loop(..) + | ExprKind::Match(..) + | ExprKind::Closure { .. } + | ExprKind::Block(..) + | ExprKind::Assign(..) + | ExprKind::AssignOp(..) + | ExprKind::Index(..) + | ExprKind::AddrOf(..) + | ExprKind::Break(..) + | ExprKind::Continue(..) + | ExprKind::Ret(..) + | ExprKind::InlineAsm(..) + | ExprKind::Struct(..) + | ExprKind::Repeat(..) + | ExprKind::Yield(..) + | ExprKind::Err => (), + } + } + + /// For an expression with an uninhabited return type (e.g. a function that returns !), + /// this adds a self edge to to the CFG to model the fact that the function does not + /// return. + fn handle_uninhabited_return(&mut self, expr: &Expr<'tcx>) { + let ty = self.typeck_results.expr_ty(expr); + let ty = self.tcx.erase_regions(ty); + let m = self.tcx.parent_module(expr.hir_id).to_def_id(); + let param_env = self.tcx.param_env(m.expect_local()); + if self.tcx.is_ty_uninhabited_from(m, ty, param_env) { + // This function will not return. We model this fact as an infinite loop. + self.drop_ranges.add_control_edge(self.expr_index + 1, self.expr_index + 1); + } + } + + /// Map a Destination to an equivalent expression node + /// + /// The destination field of a Break or Continue expression can target either an + /// expression or a block. The drop range analysis, however, only deals in + /// expression nodes, so blocks that might be the destination of a Break or Continue + /// will not have a PostOrderId. + /// + /// If the destination is an expression, this function will simply return that expression's + /// hir_id. If the destination is a block, this function will return the hir_id of last + /// expression in the block. + fn find_target_expression_from_destination( + &self, + destination: hir::Destination, + ) -> Result<HirId, LoopIdError> { + destination.target_id.map(|target| { + let node = self.hir.get(target); + match node { + hir::Node::Expr(_) => target, + hir::Node::Block(b) => find_last_block_expression(b), + hir::Node::Param(..) + | hir::Node::Item(..) + | hir::Node::ForeignItem(..) + | hir::Node::TraitItem(..) + | hir::Node::ImplItem(..) + | hir::Node::Variant(..) + | hir::Node::Field(..) + | hir::Node::AnonConst(..) + | hir::Node::Stmt(..) + | hir::Node::PathSegment(..) + | hir::Node::Ty(..) + | hir::Node::TypeBinding(..) + | hir::Node::TraitRef(..) + | hir::Node::Pat(..) + | hir::Node::Arm(..) + | hir::Node::Local(..) + | hir::Node::Ctor(..) + | hir::Node::Lifetime(..) + | hir::Node::GenericParam(..) + | hir::Node::Crate(..) + | hir::Node::Infer(..) => bug!("Unsupported branch target: {:?}", node), + } + }) + } +} + +fn find_last_block_expression(block: &hir::Block<'_>) -> HirId { + block.expr.map_or_else( + // If there is no tail expression, there will be at least one statement in the + // block because the block contains a break or continue statement. + || block.stmts.last().unwrap().hir_id, + |expr| expr.hir_id, + ) +} + +impl<'a, 'tcx> Visitor<'tcx> for DropRangeVisitor<'a, 'tcx> { + fn visit_expr(&mut self, expr: &'tcx Expr<'tcx>) { + let mut reinit = None; + match expr.kind { + ExprKind::Assign(lhs, rhs, _) => { + self.visit_expr(lhs); + self.visit_expr(rhs); + + reinit = Some(lhs); + } + + ExprKind::If(test, if_true, if_false) => { + self.visit_expr(test); + + let fork = self.expr_index; + + self.drop_ranges.add_control_edge(fork, self.expr_index + 1); + self.visit_expr(if_true); + let true_end = self.expr_index; + + self.drop_ranges.add_control_edge(fork, self.expr_index + 1); + if let Some(if_false) = if_false { + self.visit_expr(if_false); + } + + self.drop_ranges.add_control_edge(true_end, self.expr_index + 1); + } + ExprKind::Match(scrutinee, arms, ..) => { + // We walk through the match expression almost like a chain of if expressions. + // Here's a diagram to follow along with: + // + // ┌─┐ + // match │A│ { + // ┌───┴─┘ + // │ + // ┌▼┌───►┌─┐ ┌─┐ + // │B│ if │C│ =>│D│, + // └─┘ ├─┴──►└─┴──────┐ + // ┌──┘ │ + // ┌──┘ │ + // │ │ + // ┌▼┌───►┌─┐ ┌─┐ │ + // │E│ if │F│ =>│G│, │ + // └─┘ ├─┴──►└─┴┐ │ + // │ │ │ + // } ▼ ▼ │ + // ┌─┐◄───────────────────┘ + // │H│ + // └─┘ + // + // The order we want is that the scrutinee (A) flows into the first pattern (B), + // which flows into the guard (C). Then the guard either flows into the arm body + // (D) or into the start of the next arm (E). Finally, the body flows to the end + // of the match block (H). + // + // The subsequent arms follow the same ordering. First we go to the pattern, then + // the guard (if present, otherwise it flows straight into the body), then into + // the body and then to the end of the match expression. + // + // The comments below show which edge is being added. + self.visit_expr(scrutinee); + + let (guard_exit, arm_end_ids) = arms.iter().fold( + (self.expr_index, vec![]), + |(incoming_edge, mut arm_end_ids), hir::Arm { pat, body, guard, .. }| { + // A -> B, or C -> E + self.drop_ranges.add_control_edge(incoming_edge, self.expr_index + 1); + self.visit_pat(pat); + // B -> C and E -> F are added implicitly due to the traversal order. + match guard { + Some(Guard::If(expr)) => self.visit_expr(expr), + Some(Guard::IfLet(let_expr)) => { + self.visit_let_expr(let_expr); + } + None => (), + } + // Likewise, C -> D and F -> G are added implicitly. + + // Save C, F, so we can add the other outgoing edge. + let to_next_arm = self.expr_index; + + // The default edge does not get added since we also have an explicit edge, + // so we also need to add an edge to the next node as well. + // + // This adds C -> D, F -> G + self.drop_ranges.add_control_edge(self.expr_index, self.expr_index + 1); + self.visit_expr(body); + + // Save the end of the body so we can add the exit edge once we know where + // the exit is. + arm_end_ids.push(self.expr_index); + + // Pass C to the next iteration, as well as vec![D] + // + // On the last round through, we pass F and vec![D, G] so that we can + // add all the exit edges. + (to_next_arm, arm_end_ids) + }, + ); + // F -> H + self.drop_ranges.add_control_edge(guard_exit, self.expr_index + 1); + + arm_end_ids.into_iter().for_each(|arm_end| { + // D -> H, G -> H + self.drop_ranges.add_control_edge(arm_end, self.expr_index + 1) + }); + } + + ExprKind::Loop(body, label, ..) => { + let loop_begin = self.expr_index + 1; + self.label_stack.push((label, loop_begin)); + if body.stmts.is_empty() && body.expr.is_none() { + // For empty loops we won't have updated self.expr_index after visiting the + // body, meaning we'd get an edge from expr_index to expr_index + 1, but + // instead we want an edge from expr_index + 1 to expr_index + 1. + self.drop_ranges.add_control_edge(loop_begin, loop_begin); + } else { + self.visit_block(body); + self.drop_ranges.add_control_edge(self.expr_index, loop_begin); + } + self.label_stack.pop(); + } + // Find the loop entry by searching through the label stack for either the last entry + // (if label is none), or the first entry where the label matches this one. The Loop + // case maintains this stack mapping labels to the PostOrderId for the loop entry. + ExprKind::Continue(hir::Destination { label, .. }, ..) => self + .label_stack + .iter() + .rev() + .find(|(loop_label, _)| label.is_none() || *loop_label == label) + .map_or((), |(_, target)| { + self.drop_ranges.add_control_edge(self.expr_index, *target) + }), + + ExprKind::Break(destination, ..) => { + // destination either points to an expression or to a block. We use + // find_target_expression_from_destination to use the last expression of the block + // if destination points to a block. + // + // We add an edge to the hir_id of the expression/block we are breaking out of, and + // then in process_deferred_edges we will map this hir_id to its PostOrderId, which + // will refer to the end of the block due to the post order traversal. + self.find_target_expression_from_destination(destination).map_or((), |target| { + self.drop_ranges.add_control_edge_hir_id(self.expr_index, target) + }) + } + + ExprKind::Call(f, args) => { + self.visit_expr(f); + for arg in args { + self.visit_expr(arg); + } + + self.handle_uninhabited_return(expr); + } + ExprKind::MethodCall(_, exprs, _) => { + for expr in exprs { + self.visit_expr(expr); + } + + self.handle_uninhabited_return(expr); + } + + ExprKind::AddrOf(..) + | ExprKind::Array(..) + | ExprKind::AssignOp(..) + | ExprKind::Binary(..) + | ExprKind::Block(..) + | ExprKind::Box(..) + | ExprKind::Cast(..) + | ExprKind::Closure { .. } + | ExprKind::ConstBlock(..) + | ExprKind::DropTemps(..) + | ExprKind::Err + | ExprKind::Field(..) + | ExprKind::Index(..) + | ExprKind::InlineAsm(..) + | ExprKind::Let(..) + | ExprKind::Lit(..) + | ExprKind::Path(..) + | ExprKind::Repeat(..) + | ExprKind::Ret(..) + | ExprKind::Struct(..) + | ExprKind::Tup(..) + | ExprKind::Type(..) + | ExprKind::Unary(..) + | ExprKind::Yield(..) => intravisit::walk_expr(self, expr), + } + + self.expr_index = self.expr_index + 1; + self.drop_ranges.add_node_mapping(expr.hir_id, self.expr_index); + self.consume_expr(expr); + if let Some(expr) = reinit { + self.reinit_expr(expr); + } + } + + fn visit_pat(&mut self, pat: &'tcx hir::Pat<'tcx>) { + intravisit::walk_pat(self, pat); + + // Increment expr_count here to match what InteriorVisitor expects. + self.expr_index = self.expr_index + 1; + } +} + +impl DropRangesBuilder { + fn new( + tracked_values: impl Iterator<Item = TrackedValue>, + hir: Map<'_>, + num_exprs: usize, + ) -> Self { + let mut tracked_value_map = FxHashMap::<_, TrackedValueIndex>::default(); + let mut next = <_>::from(0u32); + for value in tracked_values { + for_each_consumable(hir, value, |value| { + if !tracked_value_map.contains_key(&value) { + tracked_value_map.insert(value, next); + next = next + 1; + } + }); + } + debug!("hir_id_map: {:?}", tracked_value_map); + let num_values = tracked_value_map.len(); + Self { + tracked_value_map, + nodes: IndexVec::from_fn_n(|_| NodeInfo::new(num_values), num_exprs + 1), + deferred_edges: <_>::default(), + post_order_map: <_>::default(), + } + } + + fn tracked_value_index(&self, tracked_value: TrackedValue) -> TrackedValueIndex { + *self.tracked_value_map.get(&tracked_value).unwrap() + } + + /// Adds an entry in the mapping from HirIds to PostOrderIds + /// + /// Needed so that `add_control_edge_hir_id` can work. + fn add_node_mapping(&mut self, node_hir_id: HirId, post_order_id: PostOrderId) { + self.post_order_map.insert(node_hir_id, post_order_id); + } + + /// Like add_control_edge, but uses a hir_id as the target. + /// + /// This can be used for branches where we do not know the PostOrderId of the target yet, + /// such as when handling `break` or `continue`. + fn add_control_edge_hir_id(&mut self, from: PostOrderId, to: HirId) { + self.deferred_edges.push((from, to)); + } + + fn drop_at(&mut self, value: TrackedValue, location: PostOrderId) { + let value = self.tracked_value_index(value); + self.node_mut(location).drops.push(value); + } + + fn reinit_at(&mut self, value: TrackedValue, location: PostOrderId) { + let value = match self.tracked_value_map.get(&value) { + Some(value) => *value, + // If there's no value, this is never consumed and therefore is never dropped. We can + // ignore this. + None => return, + }; + self.node_mut(location).reinits.push(value); + } + + /// Looks up PostOrderId for any control edges added by HirId and adds a proper edge for them. + /// + /// Should be called after visiting the HIR but before solving the control flow, otherwise some + /// edges will be missed. + fn process_deferred_edges(&mut self) { + trace!("processing deferred edges. post_order_map={:#?}", self.post_order_map); + let mut edges = vec![]; + swap(&mut edges, &mut self.deferred_edges); + edges.into_iter().for_each(|(from, to)| { + trace!("Adding deferred edge from {:?} to {:?}", from, to); + let to = *self.post_order_map.get(&to).expect("Expression ID not found"); + trace!("target edge PostOrderId={:?}", to); + self.add_control_edge(from, to) + }); + } +} diff --git a/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_propagate.rs b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_propagate.rs new file mode 100644 index 000000000..139d17d2e --- /dev/null +++ b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_propagate.rs @@ -0,0 +1,92 @@ +use super::{DropRangesBuilder, PostOrderId}; +use rustc_index::{bit_set::BitSet, vec::IndexVec}; +use std::collections::BTreeMap; + +impl DropRangesBuilder { + pub fn propagate_to_fixpoint(&mut self) { + trace!("before fixpoint: {:#?}", self); + let preds = self.compute_predecessors(); + + trace!("predecessors: {:#?}", preds.iter_enumerated().collect::<BTreeMap<_, _>>()); + + let mut new_state = BitSet::new_empty(self.num_values()); + let mut changed_nodes = BitSet::new_empty(self.nodes.len()); + let mut unchanged_mask = BitSet::new_filled(self.nodes.len()); + changed_nodes.insert(0u32.into()); + + let mut propagate = || { + let mut changed = false; + unchanged_mask.insert_all(); + for id in self.nodes.indices() { + trace!("processing {:?}, changed_nodes: {:?}", id, changed_nodes); + // Check if any predecessor has changed, and if not then short-circuit. + // + // We handle the start node specially, since it doesn't have any predecessors, + // but we need to start somewhere. + if match id.index() { + 0 => !changed_nodes.contains(id), + _ => !preds[id].iter().any(|pred| changed_nodes.contains(*pred)), + } { + trace!("short-circuiting because none of {:?} have changed", preds[id]); + unchanged_mask.remove(id); + continue; + } + + if id.index() == 0 { + new_state.clear(); + } else { + // If we are not the start node and we have no predecessors, treat + // everything as dropped because there's no way to get here anyway. + new_state.insert_all(); + }; + + for pred in &preds[id] { + new_state.intersect(&self.nodes[*pred].drop_state); + } + + for drop in &self.nodes[id].drops { + new_state.insert(*drop); + } + + for reinit in &self.nodes[id].reinits { + new_state.remove(*reinit); + } + + if self.nodes[id].drop_state.intersect(&new_state) { + changed_nodes.insert(id); + changed = true; + } else { + unchanged_mask.remove(id); + } + } + + changed_nodes.intersect(&unchanged_mask); + changed + }; + + while propagate() { + trace!("drop_state changed, re-running propagation"); + } + + trace!("after fixpoint: {:#?}", self); + } + + fn compute_predecessors(&self) -> IndexVec<PostOrderId, Vec<PostOrderId>> { + let mut preds = IndexVec::from_fn_n(|_| vec![], self.nodes.len()); + for (id, node) in self.nodes.iter_enumerated() { + // If the node has no explicit successors, we assume that control + // will from this node into the next one. + // + // If there are successors listed, then we assume that all + // possible successors are given and we do not include the default. + if node.successors.len() == 0 && id.index() != self.nodes.len() - 1 { + preds[id + 1].push(id); + } else { + for succ in &node.successors { + preds[*succ].push(id); + } + } + } + preds + } +} diff --git a/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_visualize.rs b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_visualize.rs new file mode 100644 index 000000000..c0a0bfe8e --- /dev/null +++ b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/cfg_visualize.rs @@ -0,0 +1,91 @@ +//! Implementation of GraphWalk for DropRanges so we can visualize the control +//! flow graph when needed for debugging. + +use rustc_graphviz as dot; +use rustc_middle::ty::TyCtxt; + +use super::{DropRangesBuilder, PostOrderId}; + +/// Writes the CFG for DropRangesBuilder to a .dot file for visualization. +/// +/// It is not normally called, but is kept around to easily add debugging +/// code when needed. +pub(super) fn write_graph_to_file( + drop_ranges: &DropRangesBuilder, + filename: &str, + tcx: TyCtxt<'_>, +) { + dot::render( + &DropRangesGraph { drop_ranges, tcx }, + &mut std::fs::File::create(filename).unwrap(), + ) + .unwrap(); +} + +struct DropRangesGraph<'a, 'tcx> { + drop_ranges: &'a DropRangesBuilder, + tcx: TyCtxt<'tcx>, +} + +impl<'a> dot::GraphWalk<'a> for DropRangesGraph<'_, '_> { + type Node = PostOrderId; + + type Edge = (PostOrderId, PostOrderId); + + fn nodes(&'a self) -> dot::Nodes<'a, Self::Node> { + self.drop_ranges.nodes.iter_enumerated().map(|(i, _)| i).collect() + } + + fn edges(&'a self) -> dot::Edges<'a, Self::Edge> { + self.drop_ranges + .nodes + .iter_enumerated() + .flat_map(|(i, node)| { + if node.successors.len() == 0 { + vec![(i, i + 1)] + } else { + node.successors.iter().map(move |&s| (i, s)).collect() + } + }) + .collect() + } + + fn source(&'a self, edge: &Self::Edge) -> Self::Node { + edge.0 + } + + fn target(&'a self, edge: &Self::Edge) -> Self::Node { + edge.1 + } +} + +impl<'a> dot::Labeller<'a> for DropRangesGraph<'_, '_> { + type Node = PostOrderId; + + type Edge = (PostOrderId, PostOrderId); + + fn graph_id(&'a self) -> dot::Id<'a> { + dot::Id::new("drop_ranges").unwrap() + } + + fn node_id(&'a self, n: &Self::Node) -> dot::Id<'a> { + dot::Id::new(format!("id{}", n.index())).unwrap() + } + + fn node_label(&'a self, n: &Self::Node) -> dot::LabelText<'a> { + dot::LabelText::LabelStr( + format!( + "{n:?}: {}", + self.drop_ranges + .post_order_map + .iter() + .find(|(_hir_id, &post_order_id)| post_order_id == *n) + .map_or("<unknown>".into(), |(hir_id, _)| self + .tcx + .hir() + .node_to_string(*hir_id)) + ) + .into(), + ) + } +} diff --git a/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/record_consumed_borrow.rs b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/record_consumed_borrow.rs new file mode 100644 index 000000000..ded0888c3 --- /dev/null +++ b/compiler/rustc_typeck/src/check/generator_interior/drop_ranges/record_consumed_borrow.rs @@ -0,0 +1,232 @@ +use super::TrackedValue; +use crate::{ + check::FnCtxt, + expr_use_visitor::{self, ExprUseVisitor}, +}; +use hir::{def_id::DefId, Body, HirId, HirIdMap}; +use rustc_data_structures::fx::FxHashSet; +use rustc_hir as hir; +use rustc_middle::hir::place::{PlaceBase, Projection, ProjectionKind}; +use rustc_middle::ty::{ParamEnv, TyCtxt}; + +pub(super) fn find_consumed_and_borrowed<'a, 'tcx>( + fcx: &'a FnCtxt<'a, 'tcx>, + def_id: DefId, + body: &'tcx Body<'tcx>, +) -> ConsumedAndBorrowedPlaces { + let mut expr_use_visitor = ExprUseDelegate::new(fcx.tcx, fcx.param_env); + expr_use_visitor.consume_body(fcx, def_id, body); + expr_use_visitor.places +} + +pub(super) struct ConsumedAndBorrowedPlaces { + /// Records the variables/expressions that are dropped by a given expression. + /// + /// The key is the hir-id of the expression, and the value is a set or hir-ids for variables + /// or values that are consumed by that expression. + /// + /// Note that this set excludes "partial drops" -- for example, a statement like `drop(x.y)` is + /// not considered a drop of `x`, although it would be a drop of `x.y`. + pub(super) consumed: HirIdMap<FxHashSet<TrackedValue>>, + + /// A set of hir-ids of values or variables that are borrowed at some point within the body. + pub(super) borrowed: FxHashSet<TrackedValue>, + + /// A set of hir-ids of values or variables that are borrowed at some point within the body. + pub(super) borrowed_temporaries: FxHashSet<HirId>, +} + +/// Works with ExprUseVisitor to find interesting values for the drop range analysis. +/// +/// Interesting values are those that are either dropped or borrowed. For dropped values, we also +/// record the parent expression, which is the point where the drop actually takes place. +struct ExprUseDelegate<'tcx> { + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + places: ConsumedAndBorrowedPlaces, +} + +impl<'tcx> ExprUseDelegate<'tcx> { + fn new(tcx: TyCtxt<'tcx>, param_env: ParamEnv<'tcx>) -> Self { + Self { + tcx, + param_env, + places: ConsumedAndBorrowedPlaces { + consumed: <_>::default(), + borrowed: <_>::default(), + borrowed_temporaries: <_>::default(), + }, + } + } + + fn consume_body(&mut self, fcx: &'_ FnCtxt<'_, 'tcx>, def_id: DefId, body: &'tcx Body<'tcx>) { + // Run ExprUseVisitor to find where values are consumed. + ExprUseVisitor::new( + self, + &fcx.infcx, + def_id.expect_local(), + fcx.param_env, + &fcx.typeck_results.borrow(), + ) + .consume_body(body); + } + + fn mark_consumed(&mut self, consumer: HirId, target: TrackedValue) { + self.places.consumed.entry(consumer).or_insert_with(|| <_>::default()); + + debug!(?consumer, ?target, "mark_consumed"); + self.places.consumed.get_mut(&consumer).map(|places| places.insert(target)); + } + + fn borrow_place(&mut self, place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>) { + self.places + .borrowed + .insert(TrackedValue::from_place_with_projections_allowed(place_with_id)); + + // Ordinarily a value is consumed by it's parent, but in the special case of a + // borrowed RValue, we create a reference that lives as long as the temporary scope + // for that expression (typically, the innermost statement, but sometimes the enclosing + // block). We record this fact here so that later in generator_interior + // we can use the correct scope. + // + // We special case borrows through a dereference (`&*x`, `&mut *x` where `x` is + // some rvalue expression), since these are essentially a copy of a pointer. + // In other words, this borrow does not refer to the + // temporary (`*x`), but to the referent (whatever `x` is a borrow of). + // + // We were considering that we might encounter problems down the line if somehow, + // some part of the compiler were to look at this result and try to use it to + // drive a borrowck-like analysis (this does not currently happen, as of this writing). + // But even this should be fine, because the lifetime of the dereferenced reference + // found in the rvalue is only significant as an intermediate 'link' to the value we + // are producing, and we separately track whether that value is live over a yield. + // Example: + // + // ```notrust + // fn identity<T>(x: &mut T) -> &mut T { x } + // let a: A = ...; + // let y: &'y mut A = &mut *identity(&'a mut a); + // ^^^^^^^^^^^^^^^^^^^^^^^^^ the borrow we are talking about + // ``` + // + // The expression `*identity(...)` is a deref of an rvalue, + // where the `identity(...)` (the rvalue) produces a return type + // of `&'rv mut A`, where `'a: 'rv`. We then assign this result to + // `'y`, resulting in (transitively) `'a: 'y` (i.e., while `y` is in use, + // `a` will be considered borrowed). Other parts of the code will ensure + // that if `y` is live over a yield, `&'y mut A` appears in the generator + // state. If `'y` is live, then any sound region analysis must conclude + // that `'a` is also live. So if this causes a bug, blame some other + // part of the code! + let is_deref = place_with_id + .place + .projections + .iter() + .any(|Projection { kind, .. }| *kind == ProjectionKind::Deref); + + if let (false, PlaceBase::Rvalue) = (is_deref, place_with_id.place.base) { + self.places.borrowed_temporaries.insert(place_with_id.hir_id); + } + } +} + +impl<'tcx> expr_use_visitor::Delegate<'tcx> for ExprUseDelegate<'tcx> { + fn consume( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + ) { + let hir = self.tcx.hir(); + let parent = match hir.find_parent_node(place_with_id.hir_id) { + Some(parent) => parent, + None => place_with_id.hir_id, + }; + debug!( + "consume {:?}; diag_expr_id={}, using parent {}", + place_with_id, + hir.node_to_string(diag_expr_id), + hir.node_to_string(parent) + ); + place_with_id + .try_into() + .map_or((), |tracked_value| self.mark_consumed(parent, tracked_value)); + } + + fn borrow( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + bk: rustc_middle::ty::BorrowKind, + ) { + debug!( + "borrow: place_with_id = {place_with_id:?}, diag_expr_id={diag_expr_id:?}, \ + borrow_kind={bk:?}" + ); + + self.borrow_place(place_with_id); + } + + fn copy( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + _diag_expr_id: HirId, + ) { + debug!("copy: place_with_id = {place_with_id:?}"); + + self.places + .borrowed + .insert(TrackedValue::from_place_with_projections_allowed(place_with_id)); + + // For copied we treat this mostly like a borrow except that we don't add the place + // to borrowed_temporaries because the copy is consumed. + } + + fn mutate( + &mut self, + assignee_place: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + ) { + debug!("mutate {assignee_place:?}; diag_expr_id={diag_expr_id:?}"); + + if assignee_place.place.base == PlaceBase::Rvalue + && assignee_place.place.projections.is_empty() + { + // Assigning to an Rvalue is illegal unless done through a dereference. We would have + // already gotten a type error, so we will just return here. + return; + } + + // If the type being assigned needs dropped, then the mutation counts as a borrow + // since it is essentially doing `Drop::drop(&mut x); x = new_value;`. + if assignee_place.place.base_ty.needs_drop(self.tcx, self.param_env) { + self.places + .borrowed + .insert(TrackedValue::from_place_with_projections_allowed(assignee_place)); + } + } + + fn bind( + &mut self, + binding_place: &expr_use_visitor::PlaceWithHirId<'tcx>, + diag_expr_id: HirId, + ) { + debug!("bind {binding_place:?}; diag_expr_id={diag_expr_id:?}"); + } + + fn fake_read( + &mut self, + place_with_id: &expr_use_visitor::PlaceWithHirId<'tcx>, + cause: rustc_middle::mir::FakeReadCause, + diag_expr_id: HirId, + ) { + debug!( + "fake_read place_with_id={place_with_id:?}; cause={cause:?}; diag_expr_id={diag_expr_id:?}" + ); + + // fake reads happen in places like the scrutinee of a match expression. + // we treat those as a borrow, much like a copy: the idea is that we are + // transiently creating a `&T` ref that we can read from to observe the current + // value (this `&T` is immediately dropped afterwards). + self.borrow_place(place_with_id); + } +} |