diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_mir_dataflow/src | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_mir_dataflow/src')
23 files changed, 7972 insertions, 0 deletions
diff --git a/compiler/rustc_mir_dataflow/src/drop_flag_effects.rs b/compiler/rustc_mir_dataflow/src/drop_flag_effects.rs new file mode 100644 index 000000000..f102872cd --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/drop_flag_effects.rs @@ -0,0 +1,268 @@ +use crate::elaborate_drops::DropFlagState; +use rustc_middle::mir::{self, Body, Location}; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_target::abi::VariantIdx; + +use super::indexes::MovePathIndex; +use super::move_paths::{InitKind, LookupResult, MoveData}; +use super::MoveDataParamEnv; + +pub fn move_path_children_matching<'tcx, F>( + move_data: &MoveData<'tcx>, + path: MovePathIndex, + mut cond: F, +) -> Option<MovePathIndex> +where + F: FnMut(mir::PlaceElem<'tcx>) -> bool, +{ + let mut next_child = move_data.move_paths[path].first_child; + while let Some(child_index) = next_child { + let move_path_children = &move_data.move_paths[child_index]; + if let Some(&elem) = move_path_children.place.projection.last() { + if cond(elem) { + return Some(child_index); + } + } + next_child = move_path_children.next_sibling; + } + + None +} + +/// When enumerating the child fragments of a path, don't recurse into +/// paths (1.) past arrays, slices, and pointers, nor (2.) into a type +/// that implements `Drop`. +/// +/// Places behind references or arrays are not tracked by elaboration +/// and are always assumed to be initialized when accessible. As +/// references and indexes can be reseated, trying to track them can +/// only lead to trouble. +/// +/// Places behind ADT's with a Drop impl are not tracked by +/// elaboration since they can never have a drop-flag state that +/// differs from that of the parent with the Drop impl. +/// +/// In both cases, the contents can only be accessed if and only if +/// their parents are initialized. This implies for example that there +/// is no need to maintain separate drop flags to track such state. +// +// FIXME: we have to do something for moving slice patterns. +fn place_contents_drop_state_cannot_differ<'tcx>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + place: mir::Place<'tcx>, +) -> bool { + let ty = place.ty(body, tcx).ty; + match ty.kind() { + ty::Array(..) => { + debug!( + "place_contents_drop_state_cannot_differ place: {:?} ty: {:?} => false", + place, ty + ); + false + } + ty::Slice(..) | ty::Ref(..) | ty::RawPtr(..) => { + debug!( + "place_contents_drop_state_cannot_differ place: {:?} ty: {:?} refd => true", + place, ty + ); + true + } + ty::Adt(def, _) if (def.has_dtor(tcx) && !def.is_box()) || def.is_union() => { + debug!( + "place_contents_drop_state_cannot_differ place: {:?} ty: {:?} Drop => true", + place, ty + ); + true + } + _ => false, + } +} + +pub fn on_lookup_result_bits<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + move_data: &MoveData<'tcx>, + lookup_result: LookupResult, + each_child: F, +) where + F: FnMut(MovePathIndex), +{ + match lookup_result { + LookupResult::Parent(..) => { + // access to untracked value - do not touch children + } + LookupResult::Exact(e) => on_all_children_bits(tcx, body, move_data, e, each_child), + } +} + +pub fn on_all_children_bits<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + move_data: &MoveData<'tcx>, + move_path_index: MovePathIndex, + mut each_child: F, +) where + F: FnMut(MovePathIndex), +{ + fn is_terminal_path<'tcx>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + move_data: &MoveData<'tcx>, + path: MovePathIndex, + ) -> bool { + place_contents_drop_state_cannot_differ(tcx, body, move_data.move_paths[path].place) + } + + fn on_all_children_bits<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + move_data: &MoveData<'tcx>, + move_path_index: MovePathIndex, + each_child: &mut F, + ) where + F: FnMut(MovePathIndex), + { + each_child(move_path_index); + + if is_terminal_path(tcx, body, move_data, move_path_index) { + return; + } + + let mut next_child_index = move_data.move_paths[move_path_index].first_child; + while let Some(child_index) = next_child_index { + on_all_children_bits(tcx, body, move_data, child_index, each_child); + next_child_index = move_data.move_paths[child_index].next_sibling; + } + } + on_all_children_bits(tcx, body, move_data, move_path_index, &mut each_child); +} + +pub fn on_all_drop_children_bits<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + ctxt: &MoveDataParamEnv<'tcx>, + path: MovePathIndex, + mut each_child: F, +) where + F: FnMut(MovePathIndex), +{ + on_all_children_bits(tcx, body, &ctxt.move_data, path, |child| { + let place = &ctxt.move_data.move_paths[path].place; + let ty = place.ty(body, tcx).ty; + debug!("on_all_drop_children_bits({:?}, {:?} : {:?})", path, place, ty); + + let erased_ty = tcx.erase_regions(ty); + if erased_ty.needs_drop(tcx, ctxt.param_env) { + each_child(child); + } else { + debug!("on_all_drop_children_bits - skipping") + } + }) +} + +pub fn drop_flag_effects_for_function_entry<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + ctxt: &MoveDataParamEnv<'tcx>, + mut callback: F, +) where + F: FnMut(MovePathIndex, DropFlagState), +{ + let move_data = &ctxt.move_data; + for arg in body.args_iter() { + let place = mir::Place::from(arg); + let lookup_result = move_data.rev_lookup.find(place.as_ref()); + on_lookup_result_bits(tcx, body, move_data, lookup_result, |mpi| { + callback(mpi, DropFlagState::Present) + }); + } +} + +pub fn drop_flag_effects_for_location<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + ctxt: &MoveDataParamEnv<'tcx>, + loc: Location, + mut callback: F, +) where + F: FnMut(MovePathIndex, DropFlagState), +{ + let move_data = &ctxt.move_data; + debug!("drop_flag_effects_for_location({:?})", loc); + + // first, move out of the RHS + for mi in &move_data.loc_map[loc] { + let path = mi.move_path_index(move_data); + debug!("moving out of path {:?}", move_data.move_paths[path]); + + on_all_children_bits(tcx, body, move_data, path, |mpi| callback(mpi, DropFlagState::Absent)) + } + + debug!("drop_flag_effects: assignment for location({:?})", loc); + + for_location_inits(tcx, body, move_data, loc, |mpi| callback(mpi, DropFlagState::Present)); +} + +pub fn for_location_inits<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + move_data: &MoveData<'tcx>, + loc: Location, + mut callback: F, +) where + F: FnMut(MovePathIndex), +{ + for ii in &move_data.init_loc_map[loc] { + let init = move_data.inits[*ii]; + match init.kind { + InitKind::Deep => { + let path = init.path; + + on_all_children_bits(tcx, body, move_data, path, &mut callback) + } + InitKind::Shallow => { + let mpi = init.path; + callback(mpi); + } + InitKind::NonPanicPathOnly => (), + } + } +} + +/// Calls `handle_inactive_variant` for each descendant move path of `enum_place` that contains a +/// `Downcast` to a variant besides the `active_variant`. +/// +/// NOTE: If there are no move paths corresponding to an inactive variant, +/// `handle_inactive_variant` will not be called for that variant. +pub(crate) fn on_all_inactive_variants<'tcx>( + tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + move_data: &MoveData<'tcx>, + enum_place: mir::Place<'tcx>, + active_variant: VariantIdx, + mut handle_inactive_variant: impl FnMut(MovePathIndex), +) { + let LookupResult::Exact(enum_mpi) = move_data.rev_lookup.find(enum_place.as_ref()) else { + return; + }; + + let enum_path = &move_data.move_paths[enum_mpi]; + for (variant_mpi, variant_path) in enum_path.children(&move_data.move_paths) { + // Because of the way we build the `MoveData` tree, each child should have exactly one more + // projection than `enum_place`. This additional projection must be a downcast since the + // base is an enum. + let (downcast, base_proj) = variant_path.place.projection.split_last().unwrap(); + assert_eq!(enum_place.projection.len(), base_proj.len()); + + let mir::ProjectionElem::Downcast(_, variant_idx) = *downcast else { + unreachable!(); + }; + + if variant_idx != active_variant { + on_all_children_bits(tcx, body, move_data, variant_mpi, |mpi| { + handle_inactive_variant(mpi) + }); + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/elaborate_drops.rs b/compiler/rustc_mir_dataflow/src/elaborate_drops.rs new file mode 100644 index 000000000..c0b0cc3c5 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/elaborate_drops.rs @@ -0,0 +1,1056 @@ +use rustc_hir as hir; +use rustc_hir::lang_items::LangItem; +use rustc_index::vec::Idx; +use rustc_middle::mir::patch::MirPatch; +use rustc_middle::mir::*; +use rustc_middle::traits::Reveal; +use rustc_middle::ty::subst::SubstsRef; +use rustc_middle::ty::util::IntTypeExt; +use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_target::abi::VariantIdx; +use std::{fmt, iter}; + +/// The value of an inserted drop flag. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum DropFlagState { + /// The tracked value is initialized and needs to be dropped when leaving its scope. + Present, + + /// The tracked value is uninitialized or was moved out of and does not need to be dropped when + /// leaving its scope. + Absent, +} + +impl DropFlagState { + pub fn value(self) -> bool { + match self { + DropFlagState::Present => true, + DropFlagState::Absent => false, + } + } +} + +/// Describes how/if a value should be dropped. +#[derive(Debug)] +pub enum DropStyle { + /// The value is already dead at the drop location, no drop will be executed. + Dead, + + /// The value is known to always be initialized at the drop location, drop will always be + /// executed. + Static, + + /// Whether the value needs to be dropped depends on its drop flag. + Conditional, + + /// An "open" drop is one where only the fields of a value are dropped. + /// + /// For example, this happens when moving out of a struct field: The rest of the struct will be + /// dropped in such an "open" drop. It is also used to generate drop glue for the individual + /// components of a value, for example for dropping array elements. + Open, +} + +/// Which drop flags to affect/check with an operation. +#[derive(Debug)] +pub enum DropFlagMode { + /// Only affect the top-level drop flag, not that of any contained fields. + Shallow, + /// Affect all nested drop flags in addition to the top-level one. + Deep, +} + +/// Describes if unwinding is necessary and where to unwind to if a panic occurs. +#[derive(Copy, Clone, Debug)] +pub enum Unwind { + /// Unwind to this block. + To(BasicBlock), + /// Already in an unwind path, any panic will cause an abort. + InCleanup, +} + +impl Unwind { + fn is_cleanup(self) -> bool { + match self { + Unwind::To(..) => false, + Unwind::InCleanup => true, + } + } + + fn into_option(self) -> Option<BasicBlock> { + match self { + Unwind::To(bb) => Some(bb), + Unwind::InCleanup => None, + } + } + + fn map<F>(self, f: F) -> Self + where + F: FnOnce(BasicBlock) -> BasicBlock, + { + match self { + Unwind::To(bb) => Unwind::To(f(bb)), + Unwind::InCleanup => Unwind::InCleanup, + } + } +} + +pub trait DropElaborator<'a, 'tcx>: fmt::Debug { + /// The type representing paths that can be moved out of. + /// + /// Users can move out of individual fields of a struct, such as `a.b.c`. This type is used to + /// represent such move paths. Sometimes tracking individual move paths is not necessary, in + /// which case this may be set to (for example) `()`. + type Path: Copy + fmt::Debug; + + // Accessors + + fn patch(&mut self) -> &mut MirPatch<'tcx>; + fn body(&self) -> &'a Body<'tcx>; + fn tcx(&self) -> TyCtxt<'tcx>; + fn param_env(&self) -> ty::ParamEnv<'tcx>; + + // Drop logic + + /// Returns how `path` should be dropped, given `mode`. + fn drop_style(&self, path: Self::Path, mode: DropFlagMode) -> DropStyle; + + /// Returns the drop flag of `path` as a MIR `Operand` (or `None` if `path` has no drop flag). + fn get_drop_flag(&mut self, path: Self::Path) -> Option<Operand<'tcx>>; + + /// Modifies the MIR patch so that the drop flag of `path` (if any) is cleared at `location`. + /// + /// If `mode` is deep, drop flags of all child paths should also be cleared by inserting + /// additional statements. + fn clear_drop_flag(&mut self, location: Location, path: Self::Path, mode: DropFlagMode); + + // Subpaths + + /// Returns the subpath of a field of `path` (or `None` if there is no dedicated subpath). + /// + /// If this returns `None`, `field` will not get a dedicated drop flag. + fn field_subpath(&self, path: Self::Path, field: Field) -> Option<Self::Path>; + + /// Returns the subpath of a dereference of `path` (or `None` if there is no dedicated subpath). + /// + /// If this returns `None`, `*path` will not get a dedicated drop flag. + /// + /// This is only relevant for `Box<T>`, where the contained `T` can be moved out of the box. + fn deref_subpath(&self, path: Self::Path) -> Option<Self::Path>; + + /// Returns the subpath of downcasting `path` to one of its variants. + /// + /// If this returns `None`, the downcast of `path` will not get a dedicated drop flag. + fn downcast_subpath(&self, path: Self::Path, variant: VariantIdx) -> Option<Self::Path>; + + /// Returns the subpath of indexing a fixed-size array `path`. + /// + /// If this returns `None`, elements of `path` will not get a dedicated drop flag. + /// + /// This is only relevant for array patterns, which can move out of individual array elements. + fn array_subpath(&self, path: Self::Path, index: u64, size: u64) -> Option<Self::Path>; +} + +#[derive(Debug)] +struct DropCtxt<'l, 'b, 'tcx, D> +where + D: DropElaborator<'b, 'tcx>, +{ + elaborator: &'l mut D, + + source_info: SourceInfo, + + place: Place<'tcx>, + path: D::Path, + succ: BasicBlock, + unwind: Unwind, +} + +/// "Elaborates" a drop of `place`/`path` and patches `bb`'s terminator to execute it. +/// +/// The passed `elaborator` is used to determine what should happen at the drop terminator. It +/// decides whether the drop can be statically determined or whether it needs a dynamic drop flag, +/// and whether the drop is "open", ie. should be expanded to drop all subfields of the dropped +/// value. +/// +/// When this returns, the MIR patch in the `elaborator` contains the necessary changes. +pub fn elaborate_drop<'b, 'tcx, D>( + elaborator: &mut D, + source_info: SourceInfo, + place: Place<'tcx>, + path: D::Path, + succ: BasicBlock, + unwind: Unwind, + bb: BasicBlock, +) where + D: DropElaborator<'b, 'tcx>, + 'tcx: 'b, +{ + DropCtxt { elaborator, source_info, place, path, succ, unwind }.elaborate_drop(bb) +} + +impl<'l, 'b, 'tcx, D> DropCtxt<'l, 'b, 'tcx, D> +where + D: DropElaborator<'b, 'tcx>, + 'tcx: 'b, +{ + fn place_ty(&self, place: Place<'tcx>) -> Ty<'tcx> { + place.ty(self.elaborator.body(), self.tcx()).ty + } + + fn tcx(&self) -> TyCtxt<'tcx> { + self.elaborator.tcx() + } + + /// This elaborates a single drop instruction, located at `bb`, and + /// patches over it. + /// + /// The elaborated drop checks the drop flags to only drop what + /// is initialized. + /// + /// In addition, the relevant drop flags also need to be cleared + /// to avoid double-drops. However, in the middle of a complex + /// drop, one must avoid clearing some of the flags before they + /// are read, as that would cause a memory leak. + /// + /// In particular, when dropping an ADT, multiple fields may be + /// joined together under the `rest` subpath. They are all controlled + /// by the primary drop flag, but only the last rest-field dropped + /// should clear it (and it must also not clear anything else). + // + // FIXME: I think we should just control the flags externally, + // and then we do not need this machinery. + pub fn elaborate_drop(&mut self, bb: BasicBlock) { + debug!("elaborate_drop({:?}, {:?})", bb, self); + let style = self.elaborator.drop_style(self.path, DropFlagMode::Deep); + debug!("elaborate_drop({:?}, {:?}): live - {:?}", bb, self, style); + match style { + DropStyle::Dead => { + self.elaborator + .patch() + .patch_terminator(bb, TerminatorKind::Goto { target: self.succ }); + } + DropStyle::Static => { + self.elaborator.patch().patch_terminator( + bb, + TerminatorKind::Drop { + place: self.place, + target: self.succ, + unwind: self.unwind.into_option(), + }, + ); + } + DropStyle::Conditional => { + let drop_bb = self.complete_drop(self.succ, self.unwind); + self.elaborator + .patch() + .patch_terminator(bb, TerminatorKind::Goto { target: drop_bb }); + } + DropStyle::Open => { + let drop_bb = self.open_drop(); + self.elaborator + .patch() + .patch_terminator(bb, TerminatorKind::Goto { target: drop_bb }); + } + } + } + + /// Returns the place and move path for each field of `variant`, + /// (the move path is `None` if the field is a rest field). + fn move_paths_for_fields( + &self, + base_place: Place<'tcx>, + variant_path: D::Path, + variant: &'tcx ty::VariantDef, + substs: SubstsRef<'tcx>, + ) -> Vec<(Place<'tcx>, Option<D::Path>)> { + variant + .fields + .iter() + .enumerate() + .map(|(i, f)| { + let field = Field::new(i); + let subpath = self.elaborator.field_subpath(variant_path, field); + let tcx = self.tcx(); + + assert_eq!(self.elaborator.param_env().reveal(), Reveal::All); + let field_ty = + tcx.normalize_erasing_regions(self.elaborator.param_env(), f.ty(tcx, substs)); + (tcx.mk_place_field(base_place, field, field_ty), subpath) + }) + .collect() + } + + fn drop_subpath( + &mut self, + place: Place<'tcx>, + path: Option<D::Path>, + succ: BasicBlock, + unwind: Unwind, + ) -> BasicBlock { + if let Some(path) = path { + debug!("drop_subpath: for std field {:?}", place); + + DropCtxt { + elaborator: self.elaborator, + source_info: self.source_info, + path, + place, + succ, + unwind, + } + .elaborated_drop_block() + } else { + debug!("drop_subpath: for rest field {:?}", place); + + DropCtxt { + elaborator: self.elaborator, + source_info: self.source_info, + place, + succ, + unwind, + // Using `self.path` here to condition the drop on + // our own drop flag. + path: self.path, + } + .complete_drop(succ, unwind) + } + } + + /// Creates one-half of the drop ladder for a list of fields, and return + /// the list of steps in it in reverse order, with the first step + /// dropping 0 fields and so on. + /// + /// `unwind_ladder` is such a list of steps in reverse order, + /// which is called if the matching step of the drop glue panics. + fn drop_halfladder( + &mut self, + unwind_ladder: &[Unwind], + mut succ: BasicBlock, + fields: &[(Place<'tcx>, Option<D::Path>)], + ) -> Vec<BasicBlock> { + iter::once(succ) + .chain(fields.iter().rev().zip(unwind_ladder).map(|(&(place, path), &unwind_succ)| { + succ = self.drop_subpath(place, path, succ, unwind_succ); + succ + })) + .collect() + } + + fn drop_ladder_bottom(&mut self) -> (BasicBlock, Unwind) { + // Clear the "master" drop flag at the end. This is needed + // because the "master" drop protects the ADT's discriminant, + // which is invalidated after the ADT is dropped. + (self.drop_flag_reset_block(DropFlagMode::Shallow, self.succ, self.unwind), self.unwind) + } + + /// Creates a full drop ladder, consisting of 2 connected half-drop-ladders + /// + /// For example, with 3 fields, the drop ladder is + /// + /// .d0: + /// ELAB(drop location.0 [target=.d1, unwind=.c1]) + /// .d1: + /// ELAB(drop location.1 [target=.d2, unwind=.c2]) + /// .d2: + /// ELAB(drop location.2 [target=`self.succ`, unwind=`self.unwind`]) + /// .c1: + /// ELAB(drop location.1 [target=.c2]) + /// .c2: + /// ELAB(drop location.2 [target=`self.unwind`]) + /// + /// NOTE: this does not clear the master drop flag, so you need + /// to point succ/unwind on a `drop_ladder_bottom`. + fn drop_ladder( + &mut self, + fields: Vec<(Place<'tcx>, Option<D::Path>)>, + succ: BasicBlock, + unwind: Unwind, + ) -> (BasicBlock, Unwind) { + debug!("drop_ladder({:?}, {:?})", self, fields); + + let mut fields = fields; + fields.retain(|&(place, _)| { + self.place_ty(place).needs_drop(self.tcx(), self.elaborator.param_env()) + }); + + debug!("drop_ladder - fields needing drop: {:?}", fields); + + let unwind_ladder = vec![Unwind::InCleanup; fields.len() + 1]; + let unwind_ladder: Vec<_> = if let Unwind::To(target) = unwind { + let halfladder = self.drop_halfladder(&unwind_ladder, target, &fields); + halfladder.into_iter().map(Unwind::To).collect() + } else { + unwind_ladder + }; + + let normal_ladder = self.drop_halfladder(&unwind_ladder, succ, &fields); + + (*normal_ladder.last().unwrap(), *unwind_ladder.last().unwrap()) + } + + fn open_drop_for_tuple(&mut self, tys: &[Ty<'tcx>]) -> BasicBlock { + debug!("open_drop_for_tuple({:?}, {:?})", self, tys); + + let fields = tys + .iter() + .enumerate() + .map(|(i, &ty)| { + ( + self.tcx().mk_place_field(self.place, Field::new(i), ty), + self.elaborator.field_subpath(self.path, Field::new(i)), + ) + }) + .collect(); + + let (succ, unwind) = self.drop_ladder_bottom(); + self.drop_ladder(fields, succ, unwind).0 + } + + fn open_drop_for_box(&mut self, adt: ty::AdtDef<'tcx>, substs: SubstsRef<'tcx>) -> BasicBlock { + debug!("open_drop_for_box({:?}, {:?}, {:?})", self, adt, substs); + + // drop glue is sent straight to codegen + // box cannot be directly dereferenced + let unique_ty = adt.non_enum_variant().fields[0].ty(self.tcx(), substs); + let nonnull_ty = + unique_ty.ty_adt_def().unwrap().non_enum_variant().fields[0].ty(self.tcx(), substs); + let ptr_ty = self.tcx().mk_imm_ptr(substs[0].expect_ty()); + + let unique_place = self.tcx().mk_place_field(self.place, Field::new(0), unique_ty); + let nonnull_place = self.tcx().mk_place_field(unique_place, Field::new(0), nonnull_ty); + let ptr_place = self.tcx().mk_place_field(nonnull_place, Field::new(0), ptr_ty); + let interior = self.tcx().mk_place_deref(ptr_place); + + let interior_path = self.elaborator.deref_subpath(self.path); + + let succ = self.box_free_block(adt, substs, self.succ, self.unwind); + let unwind_succ = + self.unwind.map(|unwind| self.box_free_block(adt, substs, unwind, Unwind::InCleanup)); + + self.drop_subpath(interior, interior_path, succ, unwind_succ) + } + + fn open_drop_for_adt(&mut self, adt: ty::AdtDef<'tcx>, substs: SubstsRef<'tcx>) -> BasicBlock { + debug!("open_drop_for_adt({:?}, {:?}, {:?})", self, adt, substs); + if adt.variants().is_empty() { + return self.elaborator.patch().new_block(BasicBlockData { + statements: vec![], + terminator: Some(Terminator { + source_info: self.source_info, + kind: TerminatorKind::Unreachable, + }), + is_cleanup: self.unwind.is_cleanup(), + }); + } + + let skip_contents = + adt.is_union() || Some(adt.did()) == self.tcx().lang_items().manually_drop(); + let contents_drop = if skip_contents { + (self.succ, self.unwind) + } else { + self.open_drop_for_adt_contents(adt, substs) + }; + + if adt.has_dtor(self.tcx()) { + self.destructor_call_block(contents_drop) + } else { + contents_drop.0 + } + } + + fn open_drop_for_adt_contents( + &mut self, + adt: ty::AdtDef<'tcx>, + substs: SubstsRef<'tcx>, + ) -> (BasicBlock, Unwind) { + let (succ, unwind) = self.drop_ladder_bottom(); + if !adt.is_enum() { + let fields = self.move_paths_for_fields( + self.place, + self.path, + &adt.variant(VariantIdx::new(0)), + substs, + ); + self.drop_ladder(fields, succ, unwind) + } else { + self.open_drop_for_multivariant(adt, substs, succ, unwind) + } + } + + fn open_drop_for_multivariant( + &mut self, + adt: ty::AdtDef<'tcx>, + substs: SubstsRef<'tcx>, + succ: BasicBlock, + unwind: Unwind, + ) -> (BasicBlock, Unwind) { + let mut values = Vec::with_capacity(adt.variants().len()); + let mut normal_blocks = Vec::with_capacity(adt.variants().len()); + let mut unwind_blocks = + if unwind.is_cleanup() { None } else { Some(Vec::with_capacity(adt.variants().len())) }; + + let mut have_otherwise_with_drop_glue = false; + let mut have_otherwise = false; + let tcx = self.tcx(); + + for (variant_index, discr) in adt.discriminants(tcx) { + let variant = &adt.variant(variant_index); + let subpath = self.elaborator.downcast_subpath(self.path, variant_index); + + if let Some(variant_path) = subpath { + let base_place = tcx.mk_place_elem( + self.place, + ProjectionElem::Downcast(Some(variant.name), variant_index), + ); + let fields = self.move_paths_for_fields(base_place, variant_path, &variant, substs); + values.push(discr.val); + if let Unwind::To(unwind) = unwind { + // We can't use the half-ladder from the original + // drop ladder, because this breaks the + // "funclet can't have 2 successor funclets" + // requirement from MSVC: + // + // switch unwind-switch + // / \ / \ + // v1.0 v2.0 v2.0-unwind v1.0-unwind + // | | / | + // v1.1-unwind v2.1-unwind | + // ^ | + // \-------------------------------/ + // + // Create a duplicate half-ladder to avoid that. We + // could technically only do this on MSVC, but I + // I want to minimize the divergence between MSVC + // and non-MSVC. + + let unwind_blocks = unwind_blocks.as_mut().unwrap(); + let unwind_ladder = vec![Unwind::InCleanup; fields.len() + 1]; + let halfladder = self.drop_halfladder(&unwind_ladder, unwind, &fields); + unwind_blocks.push(halfladder.last().cloned().unwrap()); + } + let (normal, _) = self.drop_ladder(fields, succ, unwind); + normal_blocks.push(normal); + } else { + have_otherwise = true; + + let param_env = self.elaborator.param_env(); + let have_field_with_drop_glue = variant + .fields + .iter() + .any(|field| field.ty(tcx, substs).needs_drop(tcx, param_env)); + if have_field_with_drop_glue { + have_otherwise_with_drop_glue = true; + } + } + } + + if !have_otherwise { + values.pop(); + } else if !have_otherwise_with_drop_glue { + normal_blocks.push(self.goto_block(succ, unwind)); + if let Unwind::To(unwind) = unwind { + unwind_blocks.as_mut().unwrap().push(self.goto_block(unwind, Unwind::InCleanup)); + } + } else { + normal_blocks.push(self.drop_block(succ, unwind)); + if let Unwind::To(unwind) = unwind { + unwind_blocks.as_mut().unwrap().push(self.drop_block(unwind, Unwind::InCleanup)); + } + } + + ( + self.adt_switch_block(adt, normal_blocks, &values, succ, unwind), + unwind.map(|unwind| { + self.adt_switch_block( + adt, + unwind_blocks.unwrap(), + &values, + unwind, + Unwind::InCleanup, + ) + }), + ) + } + + fn adt_switch_block( + &mut self, + adt: ty::AdtDef<'tcx>, + blocks: Vec<BasicBlock>, + values: &[u128], + succ: BasicBlock, + unwind: Unwind, + ) -> BasicBlock { + // If there are multiple variants, then if something + // is present within the enum the discriminant, tracked + // by the rest path, must be initialized. + // + // Additionally, we do not want to switch on the + // discriminant after it is free-ed, because that + // way lies only trouble. + let discr_ty = adt.repr().discr_type().to_ty(self.tcx()); + let discr = Place::from(self.new_temp(discr_ty)); + let discr_rv = Rvalue::Discriminant(self.place); + let switch_block = BasicBlockData { + statements: vec![self.assign(discr, discr_rv)], + terminator: Some(Terminator { + source_info: self.source_info, + kind: TerminatorKind::SwitchInt { + discr: Operand::Move(discr), + switch_ty: discr_ty, + targets: SwitchTargets::new( + values.iter().copied().zip(blocks.iter().copied()), + *blocks.last().unwrap(), + ), + }, + }), + is_cleanup: unwind.is_cleanup(), + }; + let switch_block = self.elaborator.patch().new_block(switch_block); + self.drop_flag_test_block(switch_block, succ, unwind) + } + + fn destructor_call_block(&mut self, (succ, unwind): (BasicBlock, Unwind)) -> BasicBlock { + debug!("destructor_call_block({:?}, {:?})", self, succ); + let tcx = self.tcx(); + let drop_trait = tcx.require_lang_item(LangItem::Drop, None); + let drop_fn = tcx.associated_item_def_ids(drop_trait)[0]; + let ty = self.place_ty(self.place); + let substs = tcx.mk_substs_trait(ty, &[]); + + let ref_ty = + tcx.mk_ref(tcx.lifetimes.re_erased, ty::TypeAndMut { ty, mutbl: hir::Mutability::Mut }); + let ref_place = self.new_temp(ref_ty); + let unit_temp = Place::from(self.new_temp(tcx.mk_unit())); + + let result = BasicBlockData { + statements: vec![self.assign( + Place::from(ref_place), + Rvalue::Ref( + tcx.lifetimes.re_erased, + BorrowKind::Mut { allow_two_phase_borrow: false }, + self.place, + ), + )], + terminator: Some(Terminator { + kind: TerminatorKind::Call { + func: Operand::function_handle(tcx, drop_fn, substs, self.source_info.span), + args: vec![Operand::Move(Place::from(ref_place))], + destination: unit_temp, + target: Some(succ), + cleanup: unwind.into_option(), + from_hir_call: true, + fn_span: self.source_info.span, + }, + source_info: self.source_info, + }), + is_cleanup: unwind.is_cleanup(), + }; + self.elaborator.patch().new_block(result) + } + + /// Create a loop that drops an array: + /// + /// ```text + /// loop-block: + /// can_go = cur == length_or_end + /// if can_go then succ else drop-block + /// drop-block: + /// if ptr_based { + /// ptr = cur + /// cur = cur.offset(1) + /// } else { + /// ptr = &raw mut P[cur] + /// cur = cur + 1 + /// } + /// drop(ptr) + /// ``` + fn drop_loop( + &mut self, + succ: BasicBlock, + cur: Local, + length_or_end: Place<'tcx>, + ety: Ty<'tcx>, + unwind: Unwind, + ptr_based: bool, + ) -> BasicBlock { + let copy = |place: Place<'tcx>| Operand::Copy(place); + let move_ = |place: Place<'tcx>| Operand::Move(place); + let tcx = self.tcx(); + + let ptr_ty = tcx.mk_ptr(ty::TypeAndMut { ty: ety, mutbl: hir::Mutability::Mut }); + let ptr = Place::from(self.new_temp(ptr_ty)); + let can_go = Place::from(self.new_temp(tcx.types.bool)); + + let one = self.constant_usize(1); + let (ptr_next, cur_next) = if ptr_based { + ( + Rvalue::Use(copy(cur.into())), + Rvalue::BinaryOp(BinOp::Offset, Box::new((move_(cur.into()), one))), + ) + } else { + ( + Rvalue::AddressOf(Mutability::Mut, tcx.mk_place_index(self.place, cur)), + Rvalue::BinaryOp(BinOp::Add, Box::new((move_(cur.into()), one))), + ) + }; + + let drop_block = BasicBlockData { + statements: vec![self.assign(ptr, ptr_next), self.assign(Place::from(cur), cur_next)], + is_cleanup: unwind.is_cleanup(), + terminator: Some(Terminator { + source_info: self.source_info, + // this gets overwritten by drop elaboration. + kind: TerminatorKind::Unreachable, + }), + }; + let drop_block = self.elaborator.patch().new_block(drop_block); + + let loop_block = BasicBlockData { + statements: vec![self.assign( + can_go, + Rvalue::BinaryOp( + BinOp::Eq, + Box::new((copy(Place::from(cur)), copy(length_or_end))), + ), + )], + is_cleanup: unwind.is_cleanup(), + terminator: Some(Terminator { + source_info: self.source_info, + kind: TerminatorKind::if_(tcx, move_(can_go), succ, drop_block), + }), + }; + let loop_block = self.elaborator.patch().new_block(loop_block); + + self.elaborator.patch().patch_terminator( + drop_block, + TerminatorKind::Drop { + place: tcx.mk_place_deref(ptr), + target: loop_block, + unwind: unwind.into_option(), + }, + ); + + loop_block + } + + fn open_drop_for_array(&mut self, ety: Ty<'tcx>, opt_size: Option<u64>) -> BasicBlock { + debug!("open_drop_for_array({:?}, {:?})", ety, opt_size); + + // if size_of::<ety>() == 0 { + // index_based_loop + // } else { + // ptr_based_loop + // } + + let tcx = self.tcx(); + + if let Some(size) = opt_size { + let fields: Vec<(Place<'tcx>, Option<D::Path>)> = (0..size) + .map(|i| { + ( + tcx.mk_place_elem( + self.place, + ProjectionElem::ConstantIndex { + offset: i, + min_length: size, + from_end: false, + }, + ), + self.elaborator.array_subpath(self.path, i, size), + ) + }) + .collect(); + + if fields.iter().any(|(_, path)| path.is_some()) { + let (succ, unwind) = self.drop_ladder_bottom(); + return self.drop_ladder(fields, succ, unwind).0; + } + } + + let move_ = |place: Place<'tcx>| Operand::Move(place); + let elem_size = Place::from(self.new_temp(tcx.types.usize)); + let len = Place::from(self.new_temp(tcx.types.usize)); + + let base_block = BasicBlockData { + statements: vec![ + self.assign(elem_size, Rvalue::NullaryOp(NullOp::SizeOf, ety)), + self.assign(len, Rvalue::Len(self.place)), + ], + is_cleanup: self.unwind.is_cleanup(), + terminator: Some(Terminator { + source_info: self.source_info, + kind: TerminatorKind::SwitchInt { + discr: move_(elem_size), + switch_ty: tcx.types.usize, + targets: SwitchTargets::static_if( + 0, + self.drop_loop_pair(ety, false, len), + self.drop_loop_pair(ety, true, len), + ), + }, + }), + }; + self.elaborator.patch().new_block(base_block) + } + + /// Creates a pair of drop-loops of `place`, which drops its contents, even + /// in the case of 1 panic. If `ptr_based`, creates a pointer loop, + /// otherwise create an index loop. + fn drop_loop_pair( + &mut self, + ety: Ty<'tcx>, + ptr_based: bool, + length: Place<'tcx>, + ) -> BasicBlock { + debug!("drop_loop_pair({:?}, {:?})", ety, ptr_based); + let tcx = self.tcx(); + let iter_ty = if ptr_based { tcx.mk_mut_ptr(ety) } else { tcx.types.usize }; + + let cur = self.new_temp(iter_ty); + let length_or_end = if ptr_based { Place::from(self.new_temp(iter_ty)) } else { length }; + + let unwind = self.unwind.map(|unwind| { + self.drop_loop(unwind, cur, length_or_end, ety, Unwind::InCleanup, ptr_based) + }); + + let loop_block = self.drop_loop(self.succ, cur, length_or_end, ety, unwind, ptr_based); + + let cur = Place::from(cur); + let drop_block_stmts = if ptr_based { + let tmp_ty = tcx.mk_mut_ptr(self.place_ty(self.place)); + let tmp = Place::from(self.new_temp(tmp_ty)); + // tmp = &raw mut P; + // cur = tmp as *mut T; + // end = Offset(cur, len); + vec![ + self.assign(tmp, Rvalue::AddressOf(Mutability::Mut, self.place)), + self.assign(cur, Rvalue::Cast(CastKind::Misc, Operand::Move(tmp), iter_ty)), + self.assign( + length_or_end, + Rvalue::BinaryOp( + BinOp::Offset, + Box::new((Operand::Copy(cur), Operand::Move(length))), + ), + ), + ] + } else { + // cur = 0 (length already pushed) + let zero = self.constant_usize(0); + vec![self.assign(cur, Rvalue::Use(zero))] + }; + let drop_block = self.elaborator.patch().new_block(BasicBlockData { + statements: drop_block_stmts, + is_cleanup: unwind.is_cleanup(), + terminator: Some(Terminator { + source_info: self.source_info, + kind: TerminatorKind::Goto { target: loop_block }, + }), + }); + + // FIXME(#34708): handle partially-dropped array/slice elements. + let reset_block = self.drop_flag_reset_block(DropFlagMode::Deep, drop_block, unwind); + self.drop_flag_test_block(reset_block, self.succ, unwind) + } + + /// The slow-path - create an "open", elaborated drop for a type + /// which is moved-out-of only partially, and patch `bb` to a jump + /// to it. This must not be called on ADTs with a destructor, + /// as these can't be moved-out-of, except for `Box<T>`, which is + /// special-cased. + /// + /// This creates a "drop ladder" that drops the needed fields of the + /// ADT, both in the success case or if one of the destructors fail. + fn open_drop(&mut self) -> BasicBlock { + let ty = self.place_ty(self.place); + match ty.kind() { + ty::Closure(_, substs) => { + let tys: Vec<_> = substs.as_closure().upvar_tys().collect(); + self.open_drop_for_tuple(&tys) + } + // Note that `elaborate_drops` only drops the upvars of a generator, + // and this is ok because `open_drop` here can only be reached + // within that own generator's resume function. + // This should only happen for the self argument on the resume function. + // It effectively only contains upvars until the generator transformation runs. + // See librustc_body/transform/generator.rs for more details. + ty::Generator(_, substs, _) => { + let tys: Vec<_> = substs.as_generator().upvar_tys().collect(); + self.open_drop_for_tuple(&tys) + } + ty::Tuple(fields) => self.open_drop_for_tuple(fields), + ty::Adt(def, substs) => { + if def.is_box() { + self.open_drop_for_box(*def, substs) + } else { + self.open_drop_for_adt(*def, substs) + } + } + ty::Dynamic(..) => self.complete_drop(self.succ, self.unwind), + ty::Array(ety, size) => { + let size = size.try_eval_usize(self.tcx(), self.elaborator.param_env()); + self.open_drop_for_array(*ety, size) + } + ty::Slice(ety) => self.open_drop_for_array(*ety, None), + + _ => bug!("open drop from non-ADT `{:?}`", ty), + } + } + + fn complete_drop(&mut self, succ: BasicBlock, unwind: Unwind) -> BasicBlock { + debug!("complete_drop(succ={:?}, unwind={:?})", succ, unwind); + + let drop_block = self.drop_block(succ, unwind); + + self.drop_flag_test_block(drop_block, succ, unwind) + } + + /// Creates a block that resets the drop flag. If `mode` is deep, all children drop flags will + /// also be cleared. + fn drop_flag_reset_block( + &mut self, + mode: DropFlagMode, + succ: BasicBlock, + unwind: Unwind, + ) -> BasicBlock { + debug!("drop_flag_reset_block({:?},{:?})", self, mode); + + if unwind.is_cleanup() { + // The drop flag isn't read again on the unwind path, so don't + // bother setting it. + return succ; + } + let block = self.new_block(unwind, TerminatorKind::Goto { target: succ }); + let block_start = Location { block, statement_index: 0 }; + self.elaborator.clear_drop_flag(block_start, self.path, mode); + block + } + + fn elaborated_drop_block(&mut self) -> BasicBlock { + debug!("elaborated_drop_block({:?})", self); + let blk = self.drop_block(self.succ, self.unwind); + self.elaborate_drop(blk); + blk + } + + /// Creates a block that frees the backing memory of a `Box` if its drop is required (either + /// statically or by checking its drop flag). + /// + /// The contained value will not be dropped. + fn box_free_block( + &mut self, + adt: ty::AdtDef<'tcx>, + substs: SubstsRef<'tcx>, + target: BasicBlock, + unwind: Unwind, + ) -> BasicBlock { + let block = self.unelaborated_free_block(adt, substs, target, unwind); + self.drop_flag_test_block(block, target, unwind) + } + + /// Creates a block that frees the backing memory of a `Box` (without dropping the contained + /// value). + fn unelaborated_free_block( + &mut self, + adt: ty::AdtDef<'tcx>, + substs: SubstsRef<'tcx>, + target: BasicBlock, + unwind: Unwind, + ) -> BasicBlock { + let tcx = self.tcx(); + let unit_temp = Place::from(self.new_temp(tcx.mk_unit())); + let free_func = tcx.require_lang_item(LangItem::BoxFree, Some(self.source_info.span)); + let args = adt + .variant(VariantIdx::new(0)) + .fields + .iter() + .enumerate() + .map(|(i, f)| { + let field = Field::new(i); + let field_ty = f.ty(tcx, substs); + Operand::Move(tcx.mk_place_field(self.place, field, field_ty)) + }) + .collect(); + + let call = TerminatorKind::Call { + func: Operand::function_handle(tcx, free_func, substs, self.source_info.span), + args, + destination: unit_temp, + target: Some(target), + cleanup: None, + from_hir_call: false, + fn_span: self.source_info.span, + }; // FIXME(#43234) + let free_block = self.new_block(unwind, call); + + let block_start = Location { block: free_block, statement_index: 0 }; + self.elaborator.clear_drop_flag(block_start, self.path, DropFlagMode::Shallow); + free_block + } + + fn drop_block(&mut self, target: BasicBlock, unwind: Unwind) -> BasicBlock { + let block = + TerminatorKind::Drop { place: self.place, target, unwind: unwind.into_option() }; + self.new_block(unwind, block) + } + + fn goto_block(&mut self, target: BasicBlock, unwind: Unwind) -> BasicBlock { + let block = TerminatorKind::Goto { target }; + self.new_block(unwind, block) + } + + /// Returns the block to jump to in order to test the drop flag and execute the drop. + /// + /// Depending on the required `DropStyle`, this might be a generated block with an `if` + /// terminator (for dynamic/open drops), or it might be `on_set` or `on_unset` itself, in case + /// the drop can be statically determined. + fn drop_flag_test_block( + &mut self, + on_set: BasicBlock, + on_unset: BasicBlock, + unwind: Unwind, + ) -> BasicBlock { + let style = self.elaborator.drop_style(self.path, DropFlagMode::Shallow); + debug!( + "drop_flag_test_block({:?},{:?},{:?},{:?}) - {:?}", + self, on_set, on_unset, unwind, style + ); + + match style { + DropStyle::Dead => on_unset, + DropStyle::Static => on_set, + DropStyle::Conditional | DropStyle::Open => { + let flag = self.elaborator.get_drop_flag(self.path).unwrap(); + let term = TerminatorKind::if_(self.tcx(), flag, on_set, on_unset); + self.new_block(unwind, term) + } + } + } + + fn new_block(&mut self, unwind: Unwind, k: TerminatorKind<'tcx>) -> BasicBlock { + self.elaborator.patch().new_block(BasicBlockData { + statements: vec![], + terminator: Some(Terminator { source_info: self.source_info, kind: k }), + is_cleanup: unwind.is_cleanup(), + }) + } + + fn new_temp(&mut self, ty: Ty<'tcx>) -> Local { + self.elaborator.patch().new_temp(ty, self.source_info.span) + } + + fn constant_usize(&self, val: u16) -> Operand<'tcx> { + Operand::Constant(Box::new(Constant { + span: self.source_info.span, + user_ty: None, + literal: ConstantKind::from_usize(self.tcx(), val.into()), + })) + } + + fn assign(&self, lhs: Place<'tcx>, rhs: Rvalue<'tcx>) -> Statement<'tcx> { + Statement { + source_info: self.source_info, + kind: StatementKind::Assign(Box::new((lhs, rhs))), + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/cursor.rs b/compiler/rustc_mir_dataflow/src/framework/cursor.rs new file mode 100644 index 000000000..f3b5544aa --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/cursor.rs @@ -0,0 +1,235 @@ +//! Random access inspection of the results of a dataflow analysis. + +use crate::framework::BitSetExt; + +use std::borrow::Borrow; +use std::cmp::Ordering; + +#[cfg(debug_assertions)] +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::{self, BasicBlock, Location}; + +use super::{Analysis, Direction, Effect, EffectIndex, Results}; + +/// A `ResultsCursor` that borrows the underlying `Results`. +pub type ResultsRefCursor<'a, 'mir, 'tcx, A> = ResultsCursor<'mir, 'tcx, A, &'a Results<'tcx, A>>; + +/// Allows random access inspection of the results of a dataflow analysis. +/// +/// This cursor only has linear performance within a basic block when its statements are visited in +/// the same order as the `DIRECTION` of the analysis. In the worst case—when statements are +/// visited in *reverse* order—performance will be quadratic in the number of statements in the +/// block. The order in which basic blocks are inspected has no impact on performance. +/// +/// A `ResultsCursor` can either own (the default) or borrow the dataflow results it inspects. The +/// type of ownership is determined by `R` (see `ResultsRefCursor` above). +pub struct ResultsCursor<'mir, 'tcx, A, R = Results<'tcx, A>> +where + A: Analysis<'tcx>, +{ + body: &'mir mir::Body<'tcx>, + results: R, + state: A::Domain, + + pos: CursorPosition, + + /// Indicates that `state` has been modified with a custom effect. + /// + /// When this flag is set, we need to reset to an entry set before doing a seek. + state_needs_reset: bool, + + #[cfg(debug_assertions)] + reachable_blocks: BitSet<BasicBlock>, +} + +impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R> +where + A: Analysis<'tcx>, + R: Borrow<Results<'tcx, A>>, +{ + /// Returns a new cursor that can inspect `results`. + pub fn new(body: &'mir mir::Body<'tcx>, results: R) -> Self { + let bottom_value = results.borrow().analysis.bottom_value(body); + ResultsCursor { + body, + results, + + // Initialize to the `bottom_value` and set `state_needs_reset` to tell the cursor that + // it needs to reset to block entry before the first seek. The cursor position is + // immaterial. + state_needs_reset: true, + state: bottom_value, + pos: CursorPosition::block_entry(mir::START_BLOCK), + + #[cfg(debug_assertions)] + reachable_blocks: mir::traversal::reachable_as_bitset(body), + } + } + + /// Allows inspection of unreachable basic blocks even with `debug_assertions` enabled. + #[cfg(test)] + pub(crate) fn allow_unreachable(&mut self) { + #[cfg(debug_assertions)] + self.reachable_blocks.insert_all() + } + + /// Returns the underlying `Results`. + pub fn results(&self) -> &Results<'tcx, A> { + &self.results.borrow() + } + + /// Returns the `Analysis` used to generate the underlying `Results`. + pub fn analysis(&self) -> &A { + &self.results.borrow().analysis + } + + /// Returns the dataflow state at the current location. + pub fn get(&self) -> &A::Domain { + &self.state + } + + /// Resets the cursor to hold the entry set for the given basic block. + /// + /// For forward dataflow analyses, this is the dataflow state prior to the first statement. + /// + /// For backward dataflow analyses, this is the dataflow state after the terminator. + pub(super) fn seek_to_block_entry(&mut self, block: BasicBlock) { + #[cfg(debug_assertions)] + assert!(self.reachable_blocks.contains(block)); + + self.state.clone_from(&self.results.borrow().entry_set_for_block(block)); + self.pos = CursorPosition::block_entry(block); + self.state_needs_reset = false; + } + + /// Resets the cursor to hold the state prior to the first statement in a basic block. + /// + /// For forward analyses, this is the entry set for the given block. + /// + /// For backward analyses, this is the state that will be propagated to its + /// predecessors (ignoring edge-specific effects). + pub fn seek_to_block_start(&mut self, block: BasicBlock) { + if A::Direction::IS_FORWARD { + self.seek_to_block_entry(block) + } else { + self.seek_after(Location { block, statement_index: 0 }, Effect::Primary) + } + } + + /// Resets the cursor to hold the state after the terminator in a basic block. + /// + /// For backward analyses, this is the entry set for the given block. + /// + /// For forward analyses, this is the state that will be propagated to its + /// successors (ignoring edge-specific effects). + pub fn seek_to_block_end(&mut self, block: BasicBlock) { + if A::Direction::IS_BACKWARD { + self.seek_to_block_entry(block) + } else { + self.seek_after(self.body.terminator_loc(block), Effect::Primary) + } + } + + /// Advances the cursor to hold the dataflow state at `target` before its "primary" effect is + /// applied. + /// + /// The "before" effect at the target location *will be* applied. + pub fn seek_before_primary_effect(&mut self, target: Location) { + self.seek_after(target, Effect::Before) + } + + /// Advances the cursor to hold the dataflow state at `target` after its "primary" effect is + /// applied. + /// + /// The "before" effect at the target location will be applied as well. + pub fn seek_after_primary_effect(&mut self, target: Location) { + self.seek_after(target, Effect::Primary) + } + + fn seek_after(&mut self, target: Location, effect: Effect) { + assert!(target <= self.body.terminator_loc(target.block)); + + // Reset to the entry of the target block if any of the following are true: + // - A custom effect has been applied to the cursor state. + // - We are in a different block than the target. + // - We are in the same block but have advanced past the target effect. + if self.state_needs_reset || self.pos.block != target.block { + self.seek_to_block_entry(target.block); + } else if let Some(curr_effect) = self.pos.curr_effect_index { + let mut ord = curr_effect.statement_index.cmp(&target.statement_index); + if A::Direction::IS_BACKWARD { + ord = ord.reverse() + } + + match ord.then_with(|| curr_effect.effect.cmp(&effect)) { + Ordering::Equal => return, + Ordering::Greater => self.seek_to_block_entry(target.block), + Ordering::Less => {} + } + } + + // At this point, the cursor is in the same block as the target location at an earlier + // statement. + debug_assert_eq!(target.block, self.pos.block); + + let block_data = &self.body[target.block]; + let next_effect = if A::Direction::IS_FORWARD { + #[rustfmt::skip] + self.pos.curr_effect_index.map_or_else( + || Effect::Before.at_index(0), + EffectIndex::next_in_forward_order, + ) + } else { + self.pos.curr_effect_index.map_or_else( + || Effect::Before.at_index(block_data.statements.len()), + EffectIndex::next_in_backward_order, + ) + }; + + let analysis = &self.results.borrow().analysis; + let target_effect_index = effect.at_index(target.statement_index); + + A::Direction::apply_effects_in_range( + analysis, + &mut self.state, + target.block, + block_data, + next_effect..=target_effect_index, + ); + + self.pos = + CursorPosition { block: target.block, curr_effect_index: Some(target_effect_index) }; + } + + /// Applies `f` to the cursor's internal state. + /// + /// This can be used, e.g., to apply the call return effect directly to the cursor without + /// creating an extra copy of the dataflow state. + pub fn apply_custom_effect(&mut self, f: impl FnOnce(&A, &mut A::Domain)) { + f(&self.results.borrow().analysis, &mut self.state); + self.state_needs_reset = true; + } +} + +impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R> +where + A: crate::GenKillAnalysis<'tcx>, + A::Domain: BitSetExt<A::Idx>, + R: Borrow<Results<'tcx, A>>, +{ + pub fn contains(&self, elem: A::Idx) -> bool { + self.get().contains(elem) + } +} + +#[derive(Clone, Copy, Debug)] +struct CursorPosition { + block: BasicBlock, + curr_effect_index: Option<EffectIndex>, +} + +impl CursorPosition { + fn block_entry(block: BasicBlock) -> CursorPosition { + CursorPosition { block, curr_effect_index: None } + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/direction.rs b/compiler/rustc_mir_dataflow/src/framework/direction.rs new file mode 100644 index 000000000..5c77f3ea3 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/direction.rs @@ -0,0 +1,656 @@ +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::{self, BasicBlock, Location, SwitchTargets}; +use rustc_middle::ty::TyCtxt; +use std::ops::RangeInclusive; + +use super::visitor::{ResultsVisitable, ResultsVisitor}; +use super::{ + Analysis, CallReturnPlaces, Effect, EffectIndex, GenKillAnalysis, GenKillSet, SwitchIntTarget, +}; + +pub trait Direction { + const IS_FORWARD: bool; + + const IS_BACKWARD: bool = !Self::IS_FORWARD; + + /// Applies all effects between the given `EffectIndex`s. + /// + /// `effects.start()` must precede or equal `effects.end()` in this direction. + fn apply_effects_in_range<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + effects: RangeInclusive<EffectIndex>, + ) where + A: Analysis<'tcx>; + + fn apply_effects_in_block<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: Analysis<'tcx>; + + fn gen_kill_effects_in_block<'tcx, A>( + analysis: &A, + trans: &mut GenKillSet<A::Idx>, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: GenKillAnalysis<'tcx>; + + fn visit_results_in_block<'mir, 'tcx, F, R>( + state: &mut F, + block: BasicBlock, + block_data: &'mir mir::BasicBlockData<'tcx>, + results: &R, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, + ) where + R: ResultsVisitable<'tcx, FlowState = F>; + + fn join_state_into_successors_of<'tcx, A>( + analysis: &A, + tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + dead_unwinds: Option<&BitSet<BasicBlock>>, + exit_state: &mut A::Domain, + block: (BasicBlock, &'_ mir::BasicBlockData<'tcx>), + propagate: impl FnMut(BasicBlock, &A::Domain), + ) where + A: Analysis<'tcx>; +} + +/// Dataflow that runs from the exit of a block (the terminator), to its entry (the first statement). +pub struct Backward; + +impl Direction for Backward { + const IS_FORWARD: bool = false; + + fn apply_effects_in_block<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: Analysis<'tcx>, + { + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_before_terminator_effect(state, terminator, location); + analysis.apply_terminator_effect(state, terminator, location); + + for (statement_index, statement) in block_data.statements.iter().enumerate().rev() { + let location = Location { block, statement_index }; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + } + + fn gen_kill_effects_in_block<'tcx, A>( + analysis: &A, + trans: &mut GenKillSet<A::Idx>, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: GenKillAnalysis<'tcx>, + { + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.before_terminator_effect(trans, terminator, location); + analysis.terminator_effect(trans, terminator, location); + + for (statement_index, statement) in block_data.statements.iter().enumerate().rev() { + let location = Location { block, statement_index }; + analysis.before_statement_effect(trans, statement, location); + analysis.statement_effect(trans, statement, location); + } + } + + fn apply_effects_in_range<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + effects: RangeInclusive<EffectIndex>, + ) where + A: Analysis<'tcx>, + { + let (from, to) = (*effects.start(), *effects.end()); + let terminator_index = block_data.statements.len(); + + assert!(from.statement_index <= terminator_index); + assert!(!to.precedes_in_backward_order(from)); + + // Handle the statement (or terminator) at `from`. + + let next_effect = match from.effect { + // If we need to apply the terminator effect in all or in part, do so now. + _ if from.statement_index == terminator_index => { + let location = Location { block, statement_index: from.statement_index }; + let terminator = block_data.terminator(); + + if from.effect == Effect::Before { + analysis.apply_before_terminator_effect(state, terminator, location); + if to == Effect::Before.at_index(terminator_index) { + return; + } + } + + analysis.apply_terminator_effect(state, terminator, location); + if to == Effect::Primary.at_index(terminator_index) { + return; + } + + // If `from.statement_index` is `0`, we will have hit one of the earlier comparisons + // with `to`. + from.statement_index - 1 + } + + Effect::Primary => { + let location = Location { block, statement_index: from.statement_index }; + let statement = &block_data.statements[from.statement_index]; + + analysis.apply_statement_effect(state, statement, location); + if to == Effect::Primary.at_index(from.statement_index) { + return; + } + + from.statement_index - 1 + } + + Effect::Before => from.statement_index, + }; + + // Handle all statements between `first_unapplied_idx` and `to.statement_index`. + + for statement_index in (to.statement_index..next_effect).rev().map(|i| i + 1) { + let location = Location { block, statement_index }; + let statement = &block_data.statements[statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + + // Handle the statement at `to`. + + let location = Location { block, statement_index: to.statement_index }; + let statement = &block_data.statements[to.statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + + if to.effect == Effect::Before { + return; + } + + analysis.apply_statement_effect(state, statement, location); + } + + fn visit_results_in_block<'mir, 'tcx, F, R>( + state: &mut F, + block: BasicBlock, + block_data: &'mir mir::BasicBlockData<'tcx>, + results: &R, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, + ) where + R: ResultsVisitable<'tcx, FlowState = F>, + { + results.reset_to_block_entry(state, block); + + vis.visit_block_end(&state, block_data, block); + + // Terminator + let loc = Location { block, statement_index: block_data.statements.len() }; + let term = block_data.terminator(); + results.reconstruct_before_terminator_effect(state, term, loc); + vis.visit_terminator_before_primary_effect(state, term, loc); + results.reconstruct_terminator_effect(state, term, loc); + vis.visit_terminator_after_primary_effect(state, term, loc); + + for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() { + let loc = Location { block, statement_index }; + results.reconstruct_before_statement_effect(state, stmt, loc); + vis.visit_statement_before_primary_effect(state, stmt, loc); + results.reconstruct_statement_effect(state, stmt, loc); + vis.visit_statement_after_primary_effect(state, stmt, loc); + } + + vis.visit_block_start(state, block_data, block); + } + + fn join_state_into_successors_of<'tcx, A>( + analysis: &A, + _tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + dead_unwinds: Option<&BitSet<BasicBlock>>, + exit_state: &mut A::Domain, + (bb, _bb_data): (BasicBlock, &'_ mir::BasicBlockData<'tcx>), + mut propagate: impl FnMut(BasicBlock, &A::Domain), + ) where + A: Analysis<'tcx>, + { + for pred in body.basic_blocks.predecessors()[bb].iter().copied() { + match body[pred].terminator().kind { + // Apply terminator-specific edge effects. + // + // FIXME(ecstaticmorse): Avoid cloning the exit state unconditionally. + mir::TerminatorKind::Call { destination, target: Some(dest), .. } if dest == bb => { + let mut tmp = exit_state.clone(); + analysis.apply_call_return_effect( + &mut tmp, + pred, + CallReturnPlaces::Call(destination), + ); + propagate(pred, &tmp); + } + + mir::TerminatorKind::InlineAsm { + destination: Some(dest), ref operands, .. + } if dest == bb => { + let mut tmp = exit_state.clone(); + analysis.apply_call_return_effect( + &mut tmp, + pred, + CallReturnPlaces::InlineAsm(operands), + ); + propagate(pred, &tmp); + } + + mir::TerminatorKind::Yield { resume, resume_arg, .. } if resume == bb => { + let mut tmp = exit_state.clone(); + analysis.apply_yield_resume_effect(&mut tmp, resume, resume_arg); + propagate(pred, &tmp); + } + + mir::TerminatorKind::SwitchInt { targets: _, ref discr, switch_ty: _ } => { + let mut applier = BackwardSwitchIntEdgeEffectsApplier { + body, + pred, + exit_state, + bb, + propagate: &mut propagate, + effects_applied: false, + }; + + analysis.apply_switch_int_edge_effects(pred, discr, &mut applier); + + if !applier.effects_applied { + propagate(pred, exit_state) + } + } + + // Ignore dead unwinds. + mir::TerminatorKind::Call { cleanup: Some(unwind), .. } + | mir::TerminatorKind::Assert { cleanup: Some(unwind), .. } + | mir::TerminatorKind::Drop { unwind: Some(unwind), .. } + | mir::TerminatorKind::DropAndReplace { unwind: Some(unwind), .. } + | mir::TerminatorKind::FalseUnwind { unwind: Some(unwind), .. } + | mir::TerminatorKind::InlineAsm { cleanup: Some(unwind), .. } + if unwind == bb => + { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(pred, exit_state); + } + } + + _ => propagate(pred, exit_state), + } + } + } +} + +struct BackwardSwitchIntEdgeEffectsApplier<'a, 'tcx, D, F> { + body: &'a mir::Body<'tcx>, + pred: BasicBlock, + exit_state: &'a mut D, + bb: BasicBlock, + propagate: &'a mut F, + + effects_applied: bool, +} + +impl<D, F> super::SwitchIntEdgeEffects<D> for BackwardSwitchIntEdgeEffectsApplier<'_, '_, D, F> +where + D: Clone, + F: FnMut(BasicBlock, &D), +{ + fn apply(&mut self, mut apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)) { + assert!(!self.effects_applied); + + let values = &self.body.basic_blocks.switch_sources()[&(self.bb, self.pred)]; + let targets = values.iter().map(|&value| SwitchIntTarget { value, target: self.bb }); + + let mut tmp = None; + for target in targets { + let tmp = opt_clone_from_or_clone(&mut tmp, self.exit_state); + apply_edge_effect(tmp, target); + (self.propagate)(self.pred, tmp); + } + + self.effects_applied = true; + } +} + +/// Dataflow that runs from the entry of a block (the first statement), to its exit (terminator). +pub struct Forward; + +impl Direction for Forward { + const IS_FORWARD: bool = true; + + fn apply_effects_in_block<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: Analysis<'tcx>, + { + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_before_terminator_effect(state, terminator, location); + analysis.apply_terminator_effect(state, terminator, location); + } + + fn gen_kill_effects_in_block<'tcx, A>( + analysis: &A, + trans: &mut GenKillSet<A::Idx>, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: GenKillAnalysis<'tcx>, + { + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.before_statement_effect(trans, statement, location); + analysis.statement_effect(trans, statement, location); + } + + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.before_terminator_effect(trans, terminator, location); + analysis.terminator_effect(trans, terminator, location); + } + + fn apply_effects_in_range<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + effects: RangeInclusive<EffectIndex>, + ) where + A: Analysis<'tcx>, + { + let (from, to) = (*effects.start(), *effects.end()); + let terminator_index = block_data.statements.len(); + + assert!(to.statement_index <= terminator_index); + assert!(!to.precedes_in_forward_order(from)); + + // If we have applied the before affect of the statement or terminator at `from` but not its + // after effect, do so now and start the loop below from the next statement. + + let first_unapplied_index = match from.effect { + Effect::Before => from.statement_index, + + Effect::Primary if from.statement_index == terminator_index => { + debug_assert_eq!(from, to); + + let location = Location { block, statement_index: terminator_index }; + let terminator = block_data.terminator(); + analysis.apply_terminator_effect(state, terminator, location); + return; + } + + Effect::Primary => { + let location = Location { block, statement_index: from.statement_index }; + let statement = &block_data.statements[from.statement_index]; + analysis.apply_statement_effect(state, statement, location); + + // If we only needed to apply the after effect of the statement at `idx`, we are done. + if from == to { + return; + } + + from.statement_index + 1 + } + }; + + // Handle all statements between `from` and `to` whose effects must be applied in full. + + for statement_index in first_unapplied_index..to.statement_index { + let location = Location { block, statement_index }; + let statement = &block_data.statements[statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + + // Handle the statement or terminator at `to`. + + let location = Location { block, statement_index: to.statement_index }; + if to.statement_index == terminator_index { + let terminator = block_data.terminator(); + analysis.apply_before_terminator_effect(state, terminator, location); + + if to.effect == Effect::Primary { + analysis.apply_terminator_effect(state, terminator, location); + } + } else { + let statement = &block_data.statements[to.statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + + if to.effect == Effect::Primary { + analysis.apply_statement_effect(state, statement, location); + } + } + } + + fn visit_results_in_block<'mir, 'tcx, F, R>( + state: &mut F, + block: BasicBlock, + block_data: &'mir mir::BasicBlockData<'tcx>, + results: &R, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, + ) where + R: ResultsVisitable<'tcx, FlowState = F>, + { + results.reset_to_block_entry(state, block); + + vis.visit_block_start(state, block_data, block); + + for (statement_index, stmt) in block_data.statements.iter().enumerate() { + let loc = Location { block, statement_index }; + results.reconstruct_before_statement_effect(state, stmt, loc); + vis.visit_statement_before_primary_effect(state, stmt, loc); + results.reconstruct_statement_effect(state, stmt, loc); + vis.visit_statement_after_primary_effect(state, stmt, loc); + } + + let loc = Location { block, statement_index: block_data.statements.len() }; + let term = block_data.terminator(); + results.reconstruct_before_terminator_effect(state, term, loc); + vis.visit_terminator_before_primary_effect(state, term, loc); + results.reconstruct_terminator_effect(state, term, loc); + vis.visit_terminator_after_primary_effect(state, term, loc); + + vis.visit_block_end(state, block_data, block); + } + + fn join_state_into_successors_of<'tcx, A>( + analysis: &A, + _tcx: TyCtxt<'tcx>, + _body: &mir::Body<'tcx>, + dead_unwinds: Option<&BitSet<BasicBlock>>, + exit_state: &mut A::Domain, + (bb, bb_data): (BasicBlock, &'_ mir::BasicBlockData<'tcx>), + mut propagate: impl FnMut(BasicBlock, &A::Domain), + ) where + A: Analysis<'tcx>, + { + use mir::TerminatorKind::*; + match bb_data.terminator().kind { + Return | Resume | Abort | GeneratorDrop | Unreachable => {} + + Goto { target } => propagate(target, exit_state), + + Assert { target, cleanup: unwind, expected: _, msg: _, cond: _ } + | Drop { target, unwind, place: _ } + | DropAndReplace { target, unwind, value: _, place: _ } + | FalseUnwind { real_target: target, unwind } => { + if let Some(unwind) = unwind { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(unwind, exit_state); + } + } + + propagate(target, exit_state); + } + + FalseEdge { real_target, imaginary_target } => { + propagate(real_target, exit_state); + propagate(imaginary_target, exit_state); + } + + Yield { resume: target, drop, resume_arg, value: _ } => { + if let Some(drop) = drop { + propagate(drop, exit_state); + } + + analysis.apply_yield_resume_effect(exit_state, target, resume_arg); + propagate(target, exit_state); + } + + Call { + cleanup, + destination, + target, + func: _, + args: _, + from_hir_call: _, + fn_span: _, + } => { + if let Some(unwind) = cleanup { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(unwind, exit_state); + } + } + + if let Some(target) = target { + // N.B.: This must be done *last*, otherwise the unwind path will see the call + // return effect. + analysis.apply_call_return_effect( + exit_state, + bb, + CallReturnPlaces::Call(destination), + ); + propagate(target, exit_state); + } + } + + InlineAsm { + template: _, + ref operands, + options: _, + line_spans: _, + destination, + cleanup, + } => { + if let Some(unwind) = cleanup { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(unwind, exit_state); + } + } + + if let Some(target) = destination { + // N.B.: This must be done *last*, otherwise the unwind path will see the call + // return effect. + analysis.apply_call_return_effect( + exit_state, + bb, + CallReturnPlaces::InlineAsm(operands), + ); + propagate(target, exit_state); + } + } + + SwitchInt { ref targets, ref discr, switch_ty: _ } => { + let mut applier = ForwardSwitchIntEdgeEffectsApplier { + exit_state, + targets, + propagate, + effects_applied: false, + }; + + analysis.apply_switch_int_edge_effects(bb, discr, &mut applier); + + let ForwardSwitchIntEdgeEffectsApplier { + exit_state, + mut propagate, + effects_applied, + .. + } = applier; + + if !effects_applied { + for target in targets.all_targets() { + propagate(*target, exit_state); + } + } + } + } + } +} + +struct ForwardSwitchIntEdgeEffectsApplier<'a, D, F> { + exit_state: &'a mut D, + targets: &'a SwitchTargets, + propagate: F, + + effects_applied: bool, +} + +impl<D, F> super::SwitchIntEdgeEffects<D> for ForwardSwitchIntEdgeEffectsApplier<'_, D, F> +where + D: Clone, + F: FnMut(BasicBlock, &D), +{ + fn apply(&mut self, mut apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)) { + assert!(!self.effects_applied); + + let mut tmp = None; + for (value, target) in self.targets.iter() { + let tmp = opt_clone_from_or_clone(&mut tmp, self.exit_state); + apply_edge_effect(tmp, SwitchIntTarget { value: Some(value), target }); + (self.propagate)(target, tmp); + } + + // Once we get to the final, "otherwise" branch, there is no need to preserve `exit_state`, + // so pass it directly to `apply_edge_effect` to save a clone of the dataflow state. + let otherwise = self.targets.otherwise(); + apply_edge_effect(self.exit_state, SwitchIntTarget { value: None, target: otherwise }); + (self.propagate)(otherwise, self.exit_state); + + self.effects_applied = true; + } +} + +/// An analogue of `Option::get_or_insert_with` that stores a clone of `val` into `opt`, but uses +/// the more efficient `clone_from` if `opt` was `Some`. +/// +/// Returns a mutable reference to the new clone that resides in `opt`. +// +// FIXME: Figure out how to express this using `Option::clone_from`, or maybe lift it into the +// standard library? +fn opt_clone_from_or_clone<'a, T: Clone>(opt: &'a mut Option<T>, val: &T) -> &'a mut T { + if opt.is_some() { + let ret = opt.as_mut().unwrap(); + ret.clone_from(val); + ret + } else { + *opt = Some(val.clone()); + opt.as_mut().unwrap() + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/engine.rs b/compiler/rustc_mir_dataflow/src/framework/engine.rs new file mode 100644 index 000000000..f374658ce --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/engine.rs @@ -0,0 +1,413 @@ +//! A solver for dataflow problems. + +use crate::framework::BitSetExt; + +use std::ffi::OsString; +use std::path::PathBuf; + +use rustc_ast as ast; +use rustc_data_structures::work_queue::WorkQueue; +use rustc_graphviz as dot; +use rustc_hir::def_id::DefId; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::mir::{self, traversal, BasicBlock}; +use rustc_middle::mir::{create_dump_file, dump_enabled}; +use rustc_middle::ty::TyCtxt; +use rustc_span::symbol::{sym, Symbol}; + +use super::fmt::DebugWithContext; +use super::graphviz; +use super::{ + visit_results, Analysis, Direction, GenKill, GenKillAnalysis, GenKillSet, JoinSemiLattice, + ResultsCursor, ResultsVisitor, +}; + +/// A dataflow analysis that has converged to fixpoint. +pub struct Results<'tcx, A> +where + A: Analysis<'tcx>, +{ + pub analysis: A, + pub(super) entry_sets: IndexVec<BasicBlock, A::Domain>, +} + +impl<'tcx, A> Results<'tcx, A> +where + A: Analysis<'tcx>, +{ + /// Creates a `ResultsCursor` that can inspect these `Results`. + pub fn into_results_cursor<'mir>( + self, + body: &'mir mir::Body<'tcx>, + ) -> ResultsCursor<'mir, 'tcx, A> { + ResultsCursor::new(body, self) + } + + /// Gets the dataflow state for the given block. + pub fn entry_set_for_block(&self, block: BasicBlock) -> &A::Domain { + &self.entry_sets[block] + } + + pub fn visit_with<'mir>( + &self, + body: &'mir mir::Body<'tcx>, + blocks: impl IntoIterator<Item = BasicBlock>, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = A::Domain>, + ) { + visit_results(body, blocks, self, vis) + } + + pub fn visit_reachable_with<'mir>( + &self, + body: &'mir mir::Body<'tcx>, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = A::Domain>, + ) { + let blocks = mir::traversal::reachable(body); + visit_results(body, blocks.map(|(bb, _)| bb), self, vis) + } +} + +/// A solver for dataflow problems. +pub struct Engine<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + tcx: TyCtxt<'tcx>, + body: &'a mir::Body<'tcx>, + dead_unwinds: Option<&'a BitSet<BasicBlock>>, + entry_sets: IndexVec<BasicBlock, A::Domain>, + pass_name: Option<&'static str>, + analysis: A, + + /// Cached, cumulative transfer functions for each block. + // + // FIXME(ecstaticmorse): This boxed `Fn` trait object is invoked inside a tight loop for + // gen/kill problems on cyclic CFGs. This is not ideal, but it doesn't seem to degrade + // performance in practice. I've tried a few ways to avoid this, but they have downsides. See + // the message for the commit that added this FIXME for more information. + apply_trans_for_block: Option<Box<dyn Fn(BasicBlock, &mut A::Domain)>>, +} + +impl<'a, 'tcx, A, D, T> Engine<'a, 'tcx, A> +where + A: GenKillAnalysis<'tcx, Idx = T, Domain = D>, + D: Clone + JoinSemiLattice + GenKill<T> + BitSetExt<T>, + T: Idx, +{ + /// Creates a new `Engine` to solve a gen-kill dataflow problem. + pub fn new_gen_kill(tcx: TyCtxt<'tcx>, body: &'a mir::Body<'tcx>, analysis: A) -> Self { + // If there are no back-edges in the control-flow graph, we only ever need to apply the + // transfer function for each block exactly once (assuming that we process blocks in RPO). + // + // In this case, there's no need to compute the block transfer functions ahead of time. + if !body.basic_blocks.is_cfg_cyclic() { + return Self::new(tcx, body, analysis, None); + } + + // Otherwise, compute and store the cumulative transfer function for each block. + + let identity = GenKillSet::identity(analysis.bottom_value(body).domain_size()); + let mut trans_for_block = IndexVec::from_elem(identity, body.basic_blocks()); + + for (block, block_data) in body.basic_blocks().iter_enumerated() { + let trans = &mut trans_for_block[block]; + A::Direction::gen_kill_effects_in_block(&analysis, trans, block, block_data); + } + + let apply_trans = Box::new(move |bb: BasicBlock, state: &mut A::Domain| { + trans_for_block[bb].apply(state); + }); + + Self::new(tcx, body, analysis, Some(apply_trans as Box<_>)) + } +} + +impl<'a, 'tcx, A, D> Engine<'a, 'tcx, A> +where + A: Analysis<'tcx, Domain = D>, + D: Clone + JoinSemiLattice, +{ + /// Creates a new `Engine` to solve a dataflow problem with an arbitrary transfer + /// function. + /// + /// Gen-kill problems should use `new_gen_kill`, which will coalesce transfer functions for + /// better performance. + pub fn new_generic(tcx: TyCtxt<'tcx>, body: &'a mir::Body<'tcx>, analysis: A) -> Self { + Self::new(tcx, body, analysis, None) + } + + fn new( + tcx: TyCtxt<'tcx>, + body: &'a mir::Body<'tcx>, + analysis: A, + apply_trans_for_block: Option<Box<dyn Fn(BasicBlock, &mut A::Domain)>>, + ) -> Self { + let bottom_value = analysis.bottom_value(body); + let mut entry_sets = IndexVec::from_elem(bottom_value.clone(), body.basic_blocks()); + analysis.initialize_start_block(body, &mut entry_sets[mir::START_BLOCK]); + + if A::Direction::IS_BACKWARD && entry_sets[mir::START_BLOCK] != bottom_value { + bug!("`initialize_start_block` is not yet supported for backward dataflow analyses"); + } + + Engine { + analysis, + tcx, + body, + dead_unwinds: None, + pass_name: None, + entry_sets, + apply_trans_for_block, + } + } + + /// Signals that we do not want dataflow state to propagate across unwind edges for these + /// `BasicBlock`s. + /// + /// You must take care that `dead_unwinds` does not contain a `BasicBlock` that *can* actually + /// unwind during execution. Otherwise, your dataflow results will not be correct. + pub fn dead_unwinds(mut self, dead_unwinds: &'a BitSet<BasicBlock>) -> Self { + self.dead_unwinds = Some(dead_unwinds); + self + } + + /// Adds an identifier to the graphviz output for this particular run of a dataflow analysis. + /// + /// Some analyses are run multiple times in the compilation pipeline. Give them a `pass_name` + /// to differentiate them. Otherwise, only the results for the latest run will be saved. + pub fn pass_name(mut self, name: &'static str) -> Self { + self.pass_name = Some(name); + self + } + + /// Computes the fixpoint for this dataflow problem and returns it. + pub fn iterate_to_fixpoint(self) -> Results<'tcx, A> + where + A::Domain: DebugWithContext<A>, + { + let Engine { + analysis, + body, + dead_unwinds, + mut entry_sets, + tcx, + apply_trans_for_block, + pass_name, + .. + } = self; + + let mut dirty_queue: WorkQueue<BasicBlock> = + WorkQueue::with_none(body.basic_blocks().len()); + + if A::Direction::IS_FORWARD { + for (bb, _) in traversal::reverse_postorder(body) { + dirty_queue.insert(bb); + } + } else { + // Reverse post-order on the reverse CFG may generate a better iteration order for + // backward dataflow analyses, but probably not enough to matter. + for (bb, _) in traversal::postorder(body) { + dirty_queue.insert(bb); + } + } + + // `state` is not actually used between iterations; + // this is just an optimization to avoid reallocating + // every iteration. + let mut state = analysis.bottom_value(body); + while let Some(bb) = dirty_queue.pop() { + let bb_data = &body[bb]; + + // Set the state to the entry state of the block. + // This is equivalent to `state = entry_sets[bb].clone()`, + // but it saves an allocation, thus improving compile times. + state.clone_from(&entry_sets[bb]); + + // Apply the block transfer function, using the cached one if it exists. + match &apply_trans_for_block { + Some(apply) => apply(bb, &mut state), + None => A::Direction::apply_effects_in_block(&analysis, &mut state, bb, bb_data), + } + + A::Direction::join_state_into_successors_of( + &analysis, + tcx, + body, + dead_unwinds, + &mut state, + (bb, bb_data), + |target: BasicBlock, state: &A::Domain| { + let set_changed = entry_sets[target].join(state); + if set_changed { + dirty_queue.insert(target); + } + }, + ); + } + + let results = Results { analysis, entry_sets }; + + let res = write_graphviz_results(tcx, &body, &results, pass_name); + if let Err(e) = res { + error!("Failed to write graphviz dataflow results: {}", e); + } + + results + } +} + +// Graphviz + +/// Writes a DOT file containing the results of a dataflow analysis if the user requested it via +/// `rustc_mir` attributes. +fn write_graphviz_results<'tcx, A>( + tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + results: &Results<'tcx, A>, + pass_name: Option<&'static str>, +) -> std::io::Result<()> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + use std::fs; + use std::io::{self, Write}; + + let def_id = body.source.def_id(); + let Ok(attrs) = RustcMirAttrs::parse(tcx, def_id) else { + // Invalid `rustc_mir` attrs are reported in `RustcMirAttrs::parse` + return Ok(()); + }; + + let mut file = match attrs.output_path(A::NAME) { + Some(path) => { + debug!("printing dataflow results for {:?} to {}", def_id, path.display()); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + io::BufWriter::new(fs::File::create(&path)?) + } + + None if tcx.sess.opts.unstable_opts.dump_mir_dataflow + && dump_enabled(tcx, A::NAME, def_id) => + { + create_dump_file( + tcx, + ".dot", + None, + A::NAME, + &pass_name.unwrap_or("-----"), + body.source, + )? + } + + _ => return Ok(()), + }; + + let style = match attrs.formatter { + Some(sym::two_phase) => graphviz::OutputStyle::BeforeAndAfter, + _ => graphviz::OutputStyle::AfterOnly, + }; + + let mut buf = Vec::new(); + + let graphviz = graphviz::Formatter::new(body, results, style); + let mut render_opts = + vec![dot::RenderOption::Fontname(tcx.sess.opts.unstable_opts.graphviz_font.clone())]; + if tcx.sess.opts.unstable_opts.graphviz_dark_mode { + render_opts.push(dot::RenderOption::DarkTheme); + } + dot::render_opts(&graphviz, &mut buf, &render_opts)?; + + file.write_all(&buf)?; + + Ok(()) +} + +#[derive(Default)] +struct RustcMirAttrs { + basename_and_suffix: Option<PathBuf>, + formatter: Option<Symbol>, +} + +impl RustcMirAttrs { + fn parse(tcx: TyCtxt<'_>, def_id: DefId) -> Result<Self, ()> { + let mut result = Ok(()); + let mut ret = RustcMirAttrs::default(); + + let rustc_mir_attrs = tcx + .get_attrs(def_id, sym::rustc_mir) + .flat_map(|attr| attr.meta_item_list().into_iter().flat_map(|v| v.into_iter())); + + for attr in rustc_mir_attrs { + let attr_result = if attr.has_name(sym::borrowck_graphviz_postflow) { + Self::set_field(&mut ret.basename_and_suffix, tcx, &attr, |s| { + let path = PathBuf::from(s.to_string()); + match path.file_name() { + Some(_) => Ok(path), + None => { + tcx.sess.span_err(attr.span(), "path must end in a filename"); + Err(()) + } + } + }) + } else if attr.has_name(sym::borrowck_graphviz_format) { + Self::set_field(&mut ret.formatter, tcx, &attr, |s| match s { + sym::gen_kill | sym::two_phase => Ok(s), + _ => { + tcx.sess.span_err(attr.span(), "unknown formatter"); + Err(()) + } + }) + } else { + Ok(()) + }; + + result = result.and(attr_result); + } + + result.map(|()| ret) + } + + fn set_field<T>( + field: &mut Option<T>, + tcx: TyCtxt<'_>, + attr: &ast::NestedMetaItem, + mapper: impl FnOnce(Symbol) -> Result<T, ()>, + ) -> Result<(), ()> { + if field.is_some() { + tcx.sess + .span_err(attr.span(), &format!("duplicate values for `{}`", attr.name_or_empty())); + + return Err(()); + } + + if let Some(s) = attr.value_str() { + *field = Some(mapper(s)?); + Ok(()) + } else { + tcx.sess + .span_err(attr.span(), &format!("`{}` requires an argument", attr.name_or_empty())); + Err(()) + } + } + + /// Returns the path where dataflow results should be written, or `None` + /// `borrowck_graphviz_postflow` was not specified. + /// + /// This performs the following transformation to the argument of `borrowck_graphviz_postflow`: + /// + /// "path/suffix.dot" -> "path/analysis_name_suffix.dot" + fn output_path(&self, analysis_name: &str) -> Option<PathBuf> { + let mut ret = self.basename_and_suffix.as_ref().cloned()?; + let suffix = ret.file_name().unwrap(); // Checked when parsing attrs + + let mut file_name: OsString = analysis_name.into(); + file_name.push("_"); + file_name.push(suffix); + ret.set_file_name(file_name); + + Some(ret) + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs new file mode 100644 index 000000000..209e6f7ac --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs @@ -0,0 +1,211 @@ +//! Custom formatting traits used when outputting Graphviz diagrams with the results of a dataflow +//! analysis. + +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; +use rustc_index::vec::Idx; +use std::fmt; + +/// An extension to `fmt::Debug` for data that can be better printed with some auxiliary data `C`. +pub trait DebugWithContext<C>: Eq + fmt::Debug { + fn fmt_with(&self, _ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self, f) + } + + /// Print the difference between `self` and `old`. + /// + /// This should print nothing if `self == old`. + /// + /// `+` and `-` are typically used to indicate differences. However, these characters are + /// fairly common and may be needed to print a types representation. If using them to indicate + /// a diff, prefix them with the "Unit Separator" control character (␟ U+001F). + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self == old { + return Ok(()); + } + + write!(f, "\u{001f}+")?; + self.fmt_with(ctxt, f)?; + + if f.alternate() { + write!(f, "\n")?; + } else { + write!(f, "\t")?; + } + + write!(f, "\u{001f}-")?; + old.fmt_with(ctxt, f) + } +} + +/// Implements `fmt::Debug` by deferring to `<T as DebugWithContext<C>>::fmt_with`. +pub struct DebugWithAdapter<'a, T, C> { + pub this: T, + pub ctxt: &'a C, +} + +impl<T, C> fmt::Debug for DebugWithAdapter<'_, T, C> +where + T: DebugWithContext<C>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.this.fmt_with(self.ctxt, f) + } +} + +/// Implements `fmt::Debug` by deferring to `<T as DebugWithContext<C>>::fmt_diff_with`. +pub struct DebugDiffWithAdapter<'a, T, C> { + pub new: T, + pub old: T, + pub ctxt: &'a C, +} + +impl<T, C> fmt::Debug for DebugDiffWithAdapter<'_, T, C> +where + T: DebugWithContext<C>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.new.fmt_diff_with(&self.old, self.ctxt, f) + } +} + +// Impls + +impl<T, C> DebugWithContext<C> for BitSet<T> +where + T: Idx + DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter().map(|i| DebugWithAdapter { this: i, ctxt })).finish() + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let size = self.domain_size(); + assert_eq!(size, old.domain_size()); + + let mut set_in_self = HybridBitSet::new_empty(size); + let mut cleared_in_self = HybridBitSet::new_empty(size); + + for i in (0..size).map(T::new) { + match (self.contains(i), old.contains(i)) { + (true, false) => set_in_self.insert(i), + (false, true) => cleared_in_self.insert(i), + _ => continue, + }; + } + + fmt_diff(&set_in_self, &cleared_in_self, ctxt, f) + } +} + +impl<T, C> DebugWithContext<C> for ChunkedBitSet<T> +where + T: Idx + DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter().map(|i| DebugWithAdapter { this: i, ctxt })).finish() + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let size = self.domain_size(); + assert_eq!(size, old.domain_size()); + + let mut set_in_self = HybridBitSet::new_empty(size); + let mut cleared_in_self = HybridBitSet::new_empty(size); + + for i in (0..size).map(T::new) { + match (self.contains(i), old.contains(i)) { + (true, false) => set_in_self.insert(i), + (false, true) => cleared_in_self.insert(i), + _ => continue, + }; + } + + fmt_diff(&set_in_self, &cleared_in_self, ctxt, f) + } +} + +fn fmt_diff<T, C>( + inserted: &HybridBitSet<T>, + removed: &HybridBitSet<T>, + ctxt: &C, + f: &mut fmt::Formatter<'_>, +) -> fmt::Result +where + T: Idx + DebugWithContext<C>, +{ + let mut first = true; + for idx in inserted.iter() { + let delim = if first { + "\u{001f}+" + } else if f.alternate() { + "\n\u{001f}+" + } else { + ", " + }; + + write!(f, "{}", delim)?; + idx.fmt_with(ctxt, f)?; + first = false; + } + + if !f.alternate() { + first = true; + if !inserted.is_empty() && !removed.is_empty() { + write!(f, "\t")?; + } + } + + for idx in removed.iter() { + let delim = if first { + "\u{001f}-" + } else if f.alternate() { + "\n\u{001f}-" + } else { + ", " + }; + + write!(f, "{}", delim)?; + idx.fmt_with(ctxt, f)?; + first = false; + } + + Ok(()) +} + +impl<T, C> DebugWithContext<C> for &'_ T +where + T: DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*self).fmt_with(ctxt, f) + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*self).fmt_diff_with(*old, ctxt, f) + } +} + +impl<C> DebugWithContext<C> for rustc_middle::mir::Local {} +impl<C> DebugWithContext<C> for crate::move_paths::InitIndex {} + +impl<'tcx, C> DebugWithContext<C> for crate::move_paths::MovePathIndex +where + C: crate::move_paths::HasMoveData<'tcx>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", ctxt.move_data().move_paths[*self]) + } +} + +impl<T, C> DebugWithContext<C> for crate::lattice::Dual<T> +where + T: DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (self.0).fmt_with(ctxt, f) + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (self.0).fmt_diff_with(&old.0, ctxt, f) + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/graphviz.rs b/compiler/rustc_mir_dataflow/src/framework/graphviz.rs new file mode 100644 index 000000000..c94198c56 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/graphviz.rs @@ -0,0 +1,667 @@ +//! A helpful diagram for debugging dataflow problems. + +use std::borrow::Cow; +use std::sync::OnceLock; +use std::{io, ops, str}; + +use regex::Regex; +use rustc_graphviz as dot; +use rustc_middle::mir::graphviz_safe_def_name; +use rustc_middle::mir::{self, BasicBlock, Body, Location}; + +use super::fmt::{DebugDiffWithAdapter, DebugWithAdapter, DebugWithContext}; +use super::{Analysis, CallReturnPlaces, Direction, Results, ResultsRefCursor, ResultsVisitor}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OutputStyle { + AfterOnly, + BeforeAndAfter, +} + +impl OutputStyle { + fn num_state_columns(&self) -> usize { + match self { + Self::AfterOnly => 1, + Self::BeforeAndAfter => 2, + } + } +} + +pub struct Formatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + body: &'a Body<'tcx>, + results: &'a Results<'tcx, A>, + style: OutputStyle, +} + +impl<'a, 'tcx, A> Formatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + pub fn new(body: &'a Body<'tcx>, results: &'a Results<'tcx, A>, style: OutputStyle) -> Self { + Formatter { body, results, style } + } +} + +/// A pair of a basic block and an index into that basic blocks `successors`. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct CfgEdge { + source: BasicBlock, + index: usize, +} + +fn dataflow_successors(body: &Body<'_>, bb: BasicBlock) -> Vec<CfgEdge> { + body[bb] + .terminator() + .successors() + .enumerate() + .map(|(index, _)| CfgEdge { source: bb, index }) + .collect() +} + +impl<'tcx, A> dot::Labeller<'_> for Formatter<'_, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + type Node = BasicBlock; + type Edge = CfgEdge; + + fn graph_id(&self) -> dot::Id<'_> { + let name = graphviz_safe_def_name(self.body.source.def_id()); + dot::Id::new(format!("graph_for_def_id_{}", name)).unwrap() + } + + fn node_id(&self, n: &Self::Node) -> dot::Id<'_> { + dot::Id::new(format!("bb_{}", n.index())).unwrap() + } + + fn node_label(&self, block: &Self::Node) -> dot::LabelText<'_> { + let mut label = Vec::new(); + let mut fmt = BlockFormatter { + results: ResultsRefCursor::new(self.body, self.results), + style: self.style, + bg: Background::Light, + }; + + fmt.write_node_label(&mut label, self.body, *block).unwrap(); + dot::LabelText::html(String::from_utf8(label).unwrap()) + } + + fn node_shape(&self, _n: &Self::Node) -> Option<dot::LabelText<'_>> { + Some(dot::LabelText::label("none")) + } + + fn edge_label(&self, e: &Self::Edge) -> dot::LabelText<'_> { + let label = &self.body[e.source].terminator().kind.fmt_successor_labels()[e.index]; + dot::LabelText::label(label.clone()) + } +} + +impl<'a, 'tcx, A> dot::GraphWalk<'a> for Formatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + type Node = BasicBlock; + type Edge = CfgEdge; + + fn nodes(&self) -> dot::Nodes<'_, Self::Node> { + self.body.basic_blocks().indices().collect::<Vec<_>>().into() + } + + fn edges(&self) -> dot::Edges<'_, Self::Edge> { + self.body + .basic_blocks() + .indices() + .flat_map(|bb| dataflow_successors(self.body, bb)) + .collect::<Vec<_>>() + .into() + } + + fn source(&self, edge: &Self::Edge) -> Self::Node { + edge.source + } + + fn target(&self, edge: &Self::Edge) -> Self::Node { + self.body[edge.source].terminator().successors().nth(edge.index).unwrap() + } +} + +struct BlockFormatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + results: ResultsRefCursor<'a, 'a, 'tcx, A>, + bg: Background, + style: OutputStyle, +} + +impl<'a, 'tcx, A> BlockFormatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + const HEADER_COLOR: &'static str = "#a0a0a0"; + + fn toggle_background(&mut self) -> Background { + let bg = self.bg; + self.bg = !bg; + bg + } + + fn write_node_label( + &mut self, + w: &mut impl io::Write, + body: &'a Body<'tcx>, + block: BasicBlock, + ) -> io::Result<()> { + // Sample output: + // +-+-----------------------------------------------+ + // A | bb4 | + // +-+----------------------------------+------------+ + // B | MIR | STATE | + // +-+----------------------------------+------------+ + // C | | (on entry) | {_0,_2,_3} | + // +-+----------------------------------+------------+ + // D |0| StorageLive(_7) | | + // +-+----------------------------------+------------+ + // |1| StorageLive(_8) | | + // +-+----------------------------------+------------+ + // |2| _8 = &mut _1 | +_8 | + // +-+----------------------------------+------------+ + // E |T| _4 = const Foo::twiddle(move _2) | -_2 | + // +-+----------------------------------+------------+ + // F | | (on unwind) | {_0,_3,_8} | + // +-+----------------------------------+------------+ + // | | (on successful return) | +_4 | + // +-+----------------------------------+------------+ + + // N.B., Some attributes (`align`, `balign`) are repeated on parent elements and their + // children. This is because `xdot` seemed to have a hard time correctly propagating + // attributes. Make sure to test the output before trying to remove the redundancy. + // Notably, `align` was found to have no effect when applied only to <table>. + + let table_fmt = concat!( + " border=\"1\"", + " cellborder=\"1\"", + " cellspacing=\"0\"", + " cellpadding=\"3\"", + " sides=\"rb\"", + ); + write!(w, r#"<table{fmt}>"#, fmt = table_fmt)?; + + // A + B: Block header + match self.style { + OutputStyle::AfterOnly => self.write_block_header_simple(w, block)?, + OutputStyle::BeforeAndAfter => { + self.write_block_header_with_state_columns(w, block, &["BEFORE", "AFTER"])? + } + } + + // C: State at start of block + self.bg = Background::Light; + self.results.seek_to_block_start(block); + let block_start_state = self.results.get().clone(); + self.write_row_with_full_state(w, "", "(on start)")?; + + // D + E: Statement and terminator transfer functions + self.write_statements_and_terminator(w, body, block)?; + + // F: State at end of block + + let terminator = body[block].terminator(); + + // Write the full dataflow state immediately after the terminator if it differs from the + // state at block entry. + self.results.seek_to_block_end(block); + if self.results.get() != &block_start_state || A::Direction::IS_BACKWARD { + let after_terminator_name = match terminator.kind { + mir::TerminatorKind::Call { target: Some(_), .. } => "(on unwind)", + _ => "(on end)", + }; + + self.write_row_with_full_state(w, "", after_terminator_name)?; + } + + // Write any changes caused by terminator-specific effects. + // + // FIXME: These should really be printed as part of each outgoing edge rather than the node + // for the basic block itself. That way, we could display terminator-specific effects for + // backward dataflow analyses as well as effects for `SwitchInt` terminators. + match terminator.kind { + mir::TerminatorKind::Call { destination, .. } => { + self.write_row(w, "", "(on successful return)", |this, w, fmt| { + let state_on_unwind = this.results.get().clone(); + this.results.apply_custom_effect(|analysis, state| { + analysis.apply_call_return_effect( + state, + block, + CallReturnPlaces::Call(destination), + ); + }); + + write!( + w, + r#"<td balign="left" colspan="{colspan}" {fmt} align="left">{diff}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + diff = diff_pretty( + this.results.get(), + &state_on_unwind, + this.results.analysis() + ), + ) + })?; + } + + mir::TerminatorKind::Yield { resume, resume_arg, .. } => { + self.write_row(w, "", "(on yield resume)", |this, w, fmt| { + let state_on_generator_drop = this.results.get().clone(); + this.results.apply_custom_effect(|analysis, state| { + analysis.apply_yield_resume_effect(state, resume, resume_arg); + }); + + write!( + w, + r#"<td balign="left" colspan="{colspan}" {fmt} align="left">{diff}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + diff = diff_pretty( + this.results.get(), + &state_on_generator_drop, + this.results.analysis() + ), + ) + })?; + } + + mir::TerminatorKind::InlineAsm { destination: Some(_), ref operands, .. } => { + self.write_row(w, "", "(on successful return)", |this, w, fmt| { + let state_on_unwind = this.results.get().clone(); + this.results.apply_custom_effect(|analysis, state| { + analysis.apply_call_return_effect( + state, + block, + CallReturnPlaces::InlineAsm(operands), + ); + }); + + write!( + w, + r#"<td balign="left" colspan="{colspan}" {fmt} align="left">{diff}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + diff = diff_pretty( + this.results.get(), + &state_on_unwind, + this.results.analysis() + ), + ) + })?; + } + + _ => {} + }; + + write!(w, "</table>") + } + + fn write_block_header_simple( + &mut self, + w: &mut impl io::Write, + block: BasicBlock, + ) -> io::Result<()> { + // +-------------------------------------------------+ + // A | bb4 | + // +-----------------------------------+-------------+ + // B | MIR | STATE | + // +-+---------------------------------+-------------+ + // | | ... | | + + // A + write!( + w, + concat!("<tr>", r#"<td colspan="3" sides="tl">bb{block_id}</td>"#, "</tr>",), + block_id = block.index(), + )?; + + // B + write!( + w, + concat!( + "<tr>", + r#"<td colspan="2" {fmt}>MIR</td>"#, + r#"<td {fmt}>STATE</td>"#, + "</tr>", + ), + fmt = format!("bgcolor=\"{}\" sides=\"tl\"", Self::HEADER_COLOR), + ) + } + + fn write_block_header_with_state_columns( + &mut self, + w: &mut impl io::Write, + block: BasicBlock, + state_column_names: &[&str], + ) -> io::Result<()> { + // +------------------------------------+-------------+ + // A | bb4 | STATE | + // +------------------------------------+------+------+ + // B | MIR | GEN | KILL | + // +-+----------------------------------+------+------+ + // | | ... | | | + + // A + write!( + w, + concat!( + "<tr>", + r#"<td {fmt} colspan="2">bb{block_id}</td>"#, + r#"<td {fmt} colspan="{num_state_cols}">STATE</td>"#, + "</tr>", + ), + fmt = "sides=\"tl\"", + num_state_cols = state_column_names.len(), + block_id = block.index(), + )?; + + // B + let fmt = format!("bgcolor=\"{}\" sides=\"tl\"", Self::HEADER_COLOR); + write!(w, concat!("<tr>", r#"<td colspan="2" {fmt}>MIR</td>"#,), fmt = fmt,)?; + + for name in state_column_names { + write!(w, "<td {fmt}>{name}</td>", fmt = fmt, name = name)?; + } + + write!(w, "</tr>") + } + + fn write_statements_and_terminator( + &mut self, + w: &mut impl io::Write, + body: &'a Body<'tcx>, + block: BasicBlock, + ) -> io::Result<()> { + let diffs = StateDiffCollector::run(body, block, self.results.results(), self.style); + + let mut befores = diffs.before.map(|v| v.into_iter()); + let mut afters = diffs.after.into_iter(); + + let next_in_dataflow_order = |it: &mut std::vec::IntoIter<_>| { + if A::Direction::IS_FORWARD { it.next().unwrap() } else { it.next_back().unwrap() } + }; + + for (i, statement) in body[block].statements.iter().enumerate() { + let statement_str = format!("{:?}", statement); + let index_str = format!("{}", i); + + let after = next_in_dataflow_order(&mut afters); + let before = befores.as_mut().map(next_in_dataflow_order); + + self.write_row(w, &index_str, &statement_str, |_this, w, fmt| { + if let Some(before) = before { + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = before)?; + } + + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = after) + })?; + } + + let after = next_in_dataflow_order(&mut afters); + let before = befores.as_mut().map(next_in_dataflow_order); + + assert!(afters.is_empty()); + assert!(befores.as_ref().map_or(true, ExactSizeIterator::is_empty)); + + let terminator = body[block].terminator(); + let mut terminator_str = String::new(); + terminator.kind.fmt_head(&mut terminator_str).unwrap(); + + self.write_row(w, "T", &terminator_str, |_this, w, fmt| { + if let Some(before) = before { + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = before)?; + } + + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = after) + }) + } + + /// Write a row with the given index and MIR, using the function argument to fill in the + /// "STATE" column(s). + fn write_row<W: io::Write>( + &mut self, + w: &mut W, + i: &str, + mir: &str, + f: impl FnOnce(&mut Self, &mut W, &str) -> io::Result<()>, + ) -> io::Result<()> { + let bg = self.toggle_background(); + let valign = if mir.starts_with("(on ") && mir != "(on entry)" { "bottom" } else { "top" }; + + let fmt = format!("valign=\"{}\" sides=\"tl\" {}", valign, bg.attr()); + + write!( + w, + concat!( + "<tr>", + r#"<td {fmt} align="right">{i}</td>"#, + r#"<td {fmt} align="left">{mir}</td>"#, + ), + i = i, + fmt = fmt, + mir = dot::escape_html(mir), + )?; + + f(self, w, &fmt)?; + write!(w, "</tr>") + } + + fn write_row_with_full_state( + &mut self, + w: &mut impl io::Write, + i: &str, + mir: &str, + ) -> io::Result<()> { + self.write_row(w, i, mir, |this, w, fmt| { + let state = this.results.get(); + let analysis = this.results.analysis(); + + // FIXME: The full state vector can be quite long. It would be nice to split on commas + // and use some text wrapping algorithm. + write!( + w, + r#"<td colspan="{colspan}" {fmt} align="left">{state}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + state = format!("{:?}", DebugWithAdapter { this: state, ctxt: analysis }), + ) + }) + } +} + +struct StateDiffCollector<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + analysis: &'a A, + prev_state: A::Domain, + before: Option<Vec<String>>, + after: Vec<String>, +} + +impl<'a, 'tcx, A> StateDiffCollector<'a, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + fn run( + body: &'a mir::Body<'tcx>, + block: BasicBlock, + results: &'a Results<'tcx, A>, + style: OutputStyle, + ) -> Self { + let mut collector = StateDiffCollector { + analysis: &results.analysis, + prev_state: results.analysis.bottom_value(body), + after: vec![], + before: (style == OutputStyle::BeforeAndAfter).then_some(vec![]), + }; + + results.visit_with(body, std::iter::once(block), &mut collector); + collector + } +} + +impl<'a, 'tcx, A> ResultsVisitor<'a, 'tcx> for StateDiffCollector<'a, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + type FlowState = A::Domain; + + fn visit_block_start( + &mut self, + state: &Self::FlowState, + _block_data: &mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + if A::Direction::IS_FORWARD { + self.prev_state.clone_from(state); + } + } + + fn visit_block_end( + &mut self, + state: &Self::FlowState, + _block_data: &mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + if A::Direction::IS_BACKWARD { + self.prev_state.clone_from(state); + } + } + + fn visit_statement_before_primary_effect( + &mut self, + state: &Self::FlowState, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + if let Some(before) = self.before.as_mut() { + before.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } + } + + fn visit_statement_after_primary_effect( + &mut self, + state: &Self::FlowState, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + self.after.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } + + fn visit_terminator_before_primary_effect( + &mut self, + state: &Self::FlowState, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + if let Some(before) = self.before.as_mut() { + before.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } + } + + fn visit_terminator_after_primary_effect( + &mut self, + state: &Self::FlowState, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + self.after.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } +} + +macro_rules! regex { + ($re:literal $(,)?) => {{ + static RE: OnceLock<regex::Regex> = OnceLock::new(); + RE.get_or_init(|| Regex::new($re).unwrap()) + }}; +} + +fn diff_pretty<T, C>(new: T, old: T, ctxt: &C) -> String +where + T: DebugWithContext<C>, +{ + if new == old { + return String::new(); + } + + let re = regex!("\t?\u{001f}([+-])"); + + let raw_diff = format!("{:#?}", DebugDiffWithAdapter { new, old, ctxt }); + + // Replace newlines in the `Debug` output with `<br/>` + let raw_diff = raw_diff.replace('\n', r#"<br align="left"/>"#); + + let mut inside_font_tag = false; + let html_diff = re.replace_all(&raw_diff, |captures: ®ex::Captures<'_>| { + let mut ret = String::new(); + if inside_font_tag { + ret.push_str(r#"</font>"#); + } + + let tag = match &captures[1] { + "+" => r#"<font color="darkgreen">+"#, + "-" => r#"<font color="red">-"#, + _ => unreachable!(), + }; + + inside_font_tag = true; + ret.push_str(tag); + ret + }); + + let Cow::Owned(mut html_diff) = html_diff else { + return raw_diff; + }; + + if inside_font_tag { + html_diff.push_str("</font>"); + } + + html_diff +} + +/// The background color used for zebra-striping the table. +#[derive(Clone, Copy)] +enum Background { + Light, + Dark, +} + +impl Background { + fn attr(self) -> &'static str { + match self { + Self::Dark => "bgcolor=\"#f0f0f0\"", + Self::Light => "", + } + } +} + +impl ops::Not for Background { + type Output = Self; + + fn not(self) -> Self { + match self { + Self::Light => Self::Dark, + Self::Dark => Self::Light, + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/lattice.rs b/compiler/rustc_mir_dataflow/src/framework/lattice.rs new file mode 100644 index 000000000..d6b89eb82 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/lattice.rs @@ -0,0 +1,252 @@ +//! Traits used to represent [lattices] for use as the domain of a dataflow analysis. +//! +//! # Overview +//! +//! The most common lattice is a powerset of some set `S`, ordered by [set inclusion]. The [Hasse +//! diagram] for the powerset of a set with two elements (`X` and `Y`) is shown below. Note that +//! distinct elements at the same height in a Hasse diagram (e.g. `{X}` and `{Y}`) are +//! *incomparable*, not equal. +//! +//! ```text +//! {X, Y} <- top +//! / \ +//! {X} {Y} +//! \ / +//! {} <- bottom +//! +//! ``` +//! +//! The defining characteristic of a lattice—the one that differentiates it from a [partially +//! ordered set][poset]—is the existence of a *unique* least upper and greatest lower bound for +//! every pair of elements. The lattice join operator (`∨`) returns the least upper bound, and the +//! lattice meet operator (`∧`) returns the greatest lower bound. Types that implement one operator +//! but not the other are known as semilattices. Dataflow analysis only uses the join operator and +//! will work with any join-semilattice, but both should be specified when possible. +//! +//! ## `PartialOrd` +//! +//! Given that they represent partially ordered sets, you may be surprised that [`JoinSemiLattice`] +//! and [`MeetSemiLattice`] do not have [`PartialOrd`][std::cmp::PartialOrd] as a supertrait. This +//! is because most standard library types use lexicographic ordering instead of set inclusion for +//! their `PartialOrd` impl. Since we do not actually need to compare lattice elements to run a +//! dataflow analysis, there's no need for a newtype wrapper with a custom `PartialOrd` impl. The +//! only benefit would be the ability to check that the least upper (or greatest lower) bound +//! returned by the lattice join (or meet) operator was in fact greater (or lower) than the inputs. +//! +//! [lattices]: https://en.wikipedia.org/wiki/Lattice_(order) +//! [set inclusion]: https://en.wikipedia.org/wiki/Subset +//! [Hasse diagram]: https://en.wikipedia.org/wiki/Hasse_diagram +//! [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set + +use crate::framework::BitSetExt; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; +use rustc_index::vec::{Idx, IndexVec}; +use std::iter; + +/// A [partially ordered set][poset] that has a [least upper bound][lub] for any pair of elements +/// in the set. +/// +/// [lub]: https://en.wikipedia.org/wiki/Infimum_and_supremum +/// [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set +pub trait JoinSemiLattice: Eq { + /// Computes the least upper bound of two elements, storing the result in `self` and returning + /// `true` if `self` has changed. + /// + /// The lattice join operator is abbreviated as `∨`. + fn join(&mut self, other: &Self) -> bool; +} + +/// A [partially ordered set][poset] that has a [greatest lower bound][glb] for any pair of +/// elements in the set. +/// +/// Dataflow analyses only require that their domains implement [`JoinSemiLattice`], not +/// `MeetSemiLattice`. However, types that will be used as dataflow domains should implement both +/// so that they can be used with [`Dual`]. +/// +/// [glb]: https://en.wikipedia.org/wiki/Infimum_and_supremum +/// [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set +pub trait MeetSemiLattice: Eq { + /// Computes the greatest lower bound of two elements, storing the result in `self` and + /// returning `true` if `self` has changed. + /// + /// The lattice meet operator is abbreviated as `∧`. + fn meet(&mut self, other: &Self) -> bool; +} + +/// A `bool` is a "two-point" lattice with `true` as the top element and `false` as the bottom: +/// +/// ```text +/// true +/// | +/// false +/// ``` +impl JoinSemiLattice for bool { + fn join(&mut self, other: &Self) -> bool { + if let (false, true) = (*self, *other) { + *self = true; + return true; + } + + false + } +} + +impl MeetSemiLattice for bool { + fn meet(&mut self, other: &Self) -> bool { + if let (true, false) = (*self, *other) { + *self = false; + return true; + } + + false + } +} + +/// A tuple (or list) of lattices is itself a lattice whose least upper bound is the concatenation +/// of the least upper bounds of each element of the tuple (or list). +/// +/// In other words: +/// (A₀, A₁, ..., Aₙ) ∨ (B₀, B₁, ..., Bₙ) = (A₀∨B₀, A₁∨B₁, ..., Aₙ∨Bₙ) +impl<I: Idx, T: JoinSemiLattice> JoinSemiLattice for IndexVec<I, T> { + fn join(&mut self, other: &Self) -> bool { + assert_eq!(self.len(), other.len()); + + let mut changed = false; + for (a, b) in iter::zip(self, other) { + changed |= a.join(b); + } + changed + } +} + +impl<I: Idx, T: MeetSemiLattice> MeetSemiLattice for IndexVec<I, T> { + fn meet(&mut self, other: &Self) -> bool { + assert_eq!(self.len(), other.len()); + + let mut changed = false; + for (a, b) in iter::zip(self, other) { + changed |= a.meet(b); + } + changed + } +} + +/// A `BitSet` represents the lattice formed by the powerset of all possible values of +/// the index type `T` ordered by inclusion. Equivalently, it is a tuple of "two-point" lattices, +/// one for each possible value of `T`. +impl<T: Idx> JoinSemiLattice for BitSet<T> { + fn join(&mut self, other: &Self) -> bool { + self.union(other) + } +} + +impl<T: Idx> MeetSemiLattice for BitSet<T> { + fn meet(&mut self, other: &Self) -> bool { + self.intersect(other) + } +} + +impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> { + fn join(&mut self, other: &Self) -> bool { + self.union(other) + } +} + +impl<T: Idx> MeetSemiLattice for ChunkedBitSet<T> { + fn meet(&mut self, other: &Self) -> bool { + self.intersect(other) + } +} + +/// The counterpart of a given semilattice `T` using the [inverse order]. +/// +/// The dual of a join-semilattice is a meet-semilattice and vice versa. For example, the dual of a +/// powerset has the empty set as its top element and the full set as its bottom element and uses +/// set *intersection* as its join operator. +/// +/// [inverse order]: https://en.wikipedia.org/wiki/Duality_(order_theory) +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Dual<T>(pub T); + +impl<T: Idx> BitSetExt<T> for Dual<BitSet<T>> { + fn domain_size(&self) -> usize { + self.0.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.0.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet<T>) { + self.0.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet<T>) { + self.0.subtract(other); + } +} + +impl<T: MeetSemiLattice> JoinSemiLattice for Dual<T> { + fn join(&mut self, other: &Self) -> bool { + self.0.meet(&other.0) + } +} + +impl<T: JoinSemiLattice> MeetSemiLattice for Dual<T> { + fn meet(&mut self, other: &Self) -> bool { + self.0.join(&other.0) + } +} + +/// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no +/// value of `T` is comparable with any other. +/// +/// A flat set has the following [Hasse diagram]: +/// +/// ```text +/// top +/// / ... / / \ \ ... \ +/// all possible values of `T` +/// \ ... \ \ / / ... / +/// bottom +/// ``` +/// +/// [Hasse diagram]: https://en.wikipedia.org/wiki/Hasse_diagram +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum FlatSet<T> { + Bottom, + Elem(T), + Top, +} + +impl<T: Clone + Eq> JoinSemiLattice for FlatSet<T> { + fn join(&mut self, other: &Self) -> bool { + let result = match (&*self, other) { + (Self::Top, _) | (_, Self::Bottom) => return false, + (Self::Elem(a), Self::Elem(b)) if a == b => return false, + + (Self::Bottom, Self::Elem(x)) => Self::Elem(x.clone()), + + _ => Self::Top, + }; + + *self = result; + true + } +} + +impl<T: Clone + Eq> MeetSemiLattice for FlatSet<T> { + fn meet(&mut self, other: &Self) -> bool { + let result = match (&*self, other) { + (Self::Bottom, _) | (_, Self::Top) => return false, + (Self::Elem(ref a), Self::Elem(ref b)) if a == b => return false, + + (Self::Top, Self::Elem(ref x)) => Self::Elem(x.clone()), + + _ => Self::Bottom, + }; + + *self = result; + true + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs new file mode 100644 index 000000000..f9fd6c9c5 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs @@ -0,0 +1,624 @@ +//! A framework that can express both [gen-kill] and generic dataflow problems. +//! +//! To use this framework, implement either the [`Analysis`] or the +//! [`GenKillAnalysis`] trait. If your transfer function can be expressed with only gen/kill +//! operations, prefer `GenKillAnalysis` since it will run faster while iterating to fixpoint. The +//! `impls` module contains several examples of gen/kill dataflow analyses. +//! +//! Create an `Engine` for your analysis using the `into_engine` method on the `Analysis` trait, +//! then call `iterate_to_fixpoint`. From there, you can use a `ResultsCursor` to inspect the +//! fixpoint solution to your dataflow problem, or implement the `ResultsVisitor` interface and use +//! `visit_results`. The following example uses the `ResultsCursor` approach. +//! +//! ```ignore (cross-crate-imports) +//! use rustc_const_eval::dataflow::Analysis; // Makes `into_engine` available. +//! +//! fn do_my_analysis(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) { +//! let analysis = MyAnalysis::new() +//! .into_engine(tcx, body) +//! .iterate_to_fixpoint() +//! .into_results_cursor(body); +//! +//! // Print the dataflow state *after* each statement in the start block. +//! for (_, statement_index) in body.block_data[START_BLOCK].statements.iter_enumerated() { +//! cursor.seek_after(Location { block: START_BLOCK, statement_index }); +//! let state = cursor.get(); +//! println!("{:?}", state); +//! } +//! } +//! ``` +//! +//! [gen-kill]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems + +use std::cmp::Ordering; + +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; +use rustc_index::vec::Idx; +use rustc_middle::mir::{self, BasicBlock, Location}; +use rustc_middle::ty::TyCtxt; + +mod cursor; +mod direction; +mod engine; +pub mod fmt; +pub mod graphviz; +pub mod lattice; +mod visitor; + +pub use self::cursor::{ResultsCursor, ResultsRefCursor}; +pub use self::direction::{Backward, Direction, Forward}; +pub use self::engine::{Engine, Results}; +pub use self::lattice::{JoinSemiLattice, MeetSemiLattice}; +pub use self::visitor::{visit_results, ResultsVisitable, ResultsVisitor}; + +/// Analysis domains are all bitsets of various kinds. This trait holds +/// operations needed by all of them. +pub trait BitSetExt<T> { + fn domain_size(&self) -> usize; + fn contains(&self, elem: T) -> bool; + fn union(&mut self, other: &HybridBitSet<T>); + fn subtract(&mut self, other: &HybridBitSet<T>); +} + +impl<T: Idx> BitSetExt<T> for BitSet<T> { + fn domain_size(&self) -> usize { + self.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet<T>) { + self.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet<T>) { + self.subtract(other); + } +} + +impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> { + fn domain_size(&self) -> usize { + self.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet<T>) { + self.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet<T>) { + self.subtract(other); + } +} + +/// Defines the domain of a dataflow problem. +/// +/// This trait specifies the lattice on which this analysis operates (the domain) as well as its +/// initial value at the entry point of each basic block. +pub trait AnalysisDomain<'tcx> { + /// The type that holds the dataflow state at any given point in the program. + type Domain: Clone + JoinSemiLattice; + + /// The direction of this analysis. Either `Forward` or `Backward`. + type Direction: Direction = Forward; + + /// A descriptive name for this analysis. Used only for debugging. + /// + /// This name should be brief and contain no spaces, periods or other characters that are not + /// suitable as part of a filename. + const NAME: &'static str; + + /// Returns the initial value of the dataflow state upon entry to each basic block. + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain; + + /// Mutates the initial value of the dataflow state upon entry to the `START_BLOCK`. + /// + /// For backward analyses, initial state (besides the bottom value) is not yet supported. Trying + /// to mutate the initial state will result in a panic. + // + // FIXME: For backward dataflow analyses, the initial state should be applied to every basic + // block where control flow could exit the MIR body (e.g., those terminated with `return` or + // `resume`). It's not obvious how to handle `yield` points in generators, however. + fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain); +} + +/// A dataflow problem with an arbitrarily complex transfer function. +/// +/// # Convergence +/// +/// When implementing this trait directly (not via [`GenKillAnalysis`]), it's possible to choose a +/// transfer function such that the analysis does not reach fixpoint. To guarantee convergence, +/// your transfer functions must maintain the following invariant: +/// +/// > If the dataflow state **before** some point in the program changes to be greater +/// than the prior state **before** that point, the dataflow state **after** that point must +/// also change to be greater than the prior state **after** that point. +/// +/// This invariant guarantees that the dataflow state at a given point in the program increases +/// monotonically until fixpoint is reached. Note that this monotonicity requirement only applies +/// to the same point in the program at different points in time. The dataflow state at a given +/// point in the program may or may not be greater than the state at any preceding point. +pub trait Analysis<'tcx>: AnalysisDomain<'tcx> { + /// Updates the current dataflow state with the effect of evaluating a statement. + fn apply_statement_effect( + &self, + state: &mut Self::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + /// Updates the current dataflow state with an effect that occurs immediately *before* the + /// given statement. + /// + /// This method is useful if the consumer of the results of this analysis only needs to observe + /// *part* of the effect of a statement (e.g. for two-phase borrows). As a general rule, + /// analyses should not implement this without also implementing `apply_statement_effect`. + fn apply_before_statement_effect( + &self, + _state: &mut Self::Domain, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// Updates the current dataflow state with the effect of evaluating a terminator. + /// + /// The effect of a successful return from a `Call` terminator should **not** be accounted for + /// in this function. That should go in `apply_call_return_effect`. For example, in the + /// `InitializedPlaces` analyses, the return place for a function call is not marked as + /// initialized here. + fn apply_terminator_effect( + &self, + state: &mut Self::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); + + /// Updates the current dataflow state with an effect that occurs immediately *before* the + /// given terminator. + /// + /// This method is useful if the consumer of the results of this analysis needs only to observe + /// *part* of the effect of a terminator (e.g. for two-phase borrows). As a general rule, + /// analyses should not implement this without also implementing `apply_terminator_effect`. + fn apply_before_terminator_effect( + &self, + _state: &mut Self::Domain, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + } + + /* Edge-specific effects */ + + /// Updates the current dataflow state with the effect of a successful return from a `Call` + /// terminator. + /// + /// This is separate from `apply_terminator_effect` to properly track state across unwind + /// edges. + fn apply_call_return_effect( + &self, + state: &mut Self::Domain, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ); + + /// Updates the current dataflow state with the effect of resuming from a `Yield` terminator. + /// + /// This is similar to `apply_call_return_effect` in that it only takes place after the + /// generator is resumed, not when it is dropped. + /// + /// By default, no effects happen. + fn apply_yield_resume_effect( + &self, + _state: &mut Self::Domain, + _resume_block: BasicBlock, + _resume_place: mir::Place<'tcx>, + ) { + } + + /// Updates the current dataflow state with the effect of taking a particular branch in a + /// `SwitchInt` terminator. + /// + /// Unlike the other edge-specific effects, which are allowed to mutate `Self::Domain` + /// directly, overriders of this method must pass a callback to + /// `SwitchIntEdgeEffects::apply`. The callback will be run once for each outgoing edge and + /// will have access to the dataflow state that will be propagated along that edge. + /// + /// This interface is somewhat more complex than the other visitor-like "effect" methods. + /// However, it is both more ergonomic—callers don't need to recompute or cache information + /// about a given `SwitchInt` terminator for each one of its edges—and more efficient—the + /// engine doesn't need to clone the exit state for a block unless + /// `SwitchIntEdgeEffects::apply` is actually called. + fn apply_switch_int_edge_effects( + &self, + _block: BasicBlock, + _discr: &mir::Operand<'tcx>, + _apply_edge_effects: &mut impl SwitchIntEdgeEffects<Self::Domain>, + ) { + } + + /* Extension methods */ + + /// Creates an `Engine` to find the fixpoint for this dataflow problem. + /// + /// You shouldn't need to override this outside this module, since the combination of the + /// default impl and the one for all `A: GenKillAnalysis` will do the right thing. + /// Its purpose is to enable method chaining like so: + /// + /// ```ignore (cross-crate-imports) + /// let results = MyAnalysis::new(tcx, body) + /// .into_engine(tcx, body, def_id) + /// .iterate_to_fixpoint() + /// .into_results_cursor(body); + /// ``` + fn into_engine<'mir>( + self, + tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + ) -> Engine<'mir, 'tcx, Self> + where + Self: Sized, + { + Engine::new_generic(tcx, body, self) + } +} + +/// A gen/kill dataflow problem. +/// +/// Each method in this trait has a corresponding one in `Analysis`. However, these methods only +/// allow modification of the dataflow state via "gen" and "kill" operations. By defining transfer +/// functions for each statement in this way, the transfer function for an entire basic block can +/// be computed efficiently. +/// +/// `Analysis` is automatically implemented for all implementers of `GenKillAnalysis`. +pub trait GenKillAnalysis<'tcx>: Analysis<'tcx> { + type Idx: Idx; + + /// See `Analysis::apply_statement_effect`. + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + /// See `Analysis::apply_before_statement_effect`. + fn before_statement_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// See `Analysis::apply_terminator_effect`. + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); + + /// See `Analysis::apply_before_terminator_effect`. + fn before_terminator_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + } + + /* Edge-specific effects */ + + /// See `Analysis::apply_call_return_effect`. + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ); + + /// See `Analysis::apply_yield_resume_effect`. + fn yield_resume_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _resume_block: BasicBlock, + _resume_place: mir::Place<'tcx>, + ) { + } + + /// See `Analysis::apply_switch_int_edge_effects`. + fn switch_int_edge_effects<G: GenKill<Self::Idx>>( + &self, + _block: BasicBlock, + _discr: &mir::Operand<'tcx>, + _edge_effects: &mut impl SwitchIntEdgeEffects<G>, + ) { + } +} + +impl<'tcx, A> Analysis<'tcx> for A +where + A: GenKillAnalysis<'tcx>, + A::Domain: GenKill<A::Idx> + BitSetExt<A::Idx>, +{ + fn apply_statement_effect( + &self, + state: &mut A::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + self.statement_effect(state, statement, location); + } + + fn apply_before_statement_effect( + &self, + state: &mut A::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + self.before_statement_effect(state, statement, location); + } + + fn apply_terminator_effect( + &self, + state: &mut A::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + self.terminator_effect(state, terminator, location); + } + + fn apply_before_terminator_effect( + &self, + state: &mut A::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + self.before_terminator_effect(state, terminator, location); + } + + /* Edge-specific effects */ + + fn apply_call_return_effect( + &self, + state: &mut A::Domain, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + self.call_return_effect(state, block, return_places); + } + + fn apply_yield_resume_effect( + &self, + state: &mut A::Domain, + resume_block: BasicBlock, + resume_place: mir::Place<'tcx>, + ) { + self.yield_resume_effect(state, resume_block, resume_place); + } + + fn apply_switch_int_edge_effects( + &self, + block: BasicBlock, + discr: &mir::Operand<'tcx>, + edge_effects: &mut impl SwitchIntEdgeEffects<A::Domain>, + ) { + self.switch_int_edge_effects(block, discr, edge_effects); + } + + /* Extension methods */ + + fn into_engine<'mir>( + self, + tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + ) -> Engine<'mir, 'tcx, Self> + where + Self: Sized, + { + Engine::new_gen_kill(tcx, body, self) + } +} + +/// The legal operations for a transfer function in a gen/kill problem. +/// +/// This abstraction exists because there are two different contexts in which we call the methods in +/// `GenKillAnalysis`. Sometimes we need to store a single transfer function that can be efficiently +/// applied multiple times, such as when computing the cumulative transfer function for each block. +/// These cases require a `GenKillSet`, which in turn requires two `BitSet`s of storage. Oftentimes, +/// however, we only need to apply an effect once. In *these* cases, it is more efficient to pass the +/// `BitSet` representing the state vector directly into the `*_effect` methods as opposed to +/// building up a `GenKillSet` and then throwing it away. +pub trait GenKill<T> { + /// Inserts `elem` into the state vector. + fn gen(&mut self, elem: T); + + /// Removes `elem` from the state vector. + fn kill(&mut self, elem: T); + + /// Calls `gen` for each element in `elems`. + fn gen_all(&mut self, elems: impl IntoIterator<Item = T>) { + for elem in elems { + self.gen(elem); + } + } + + /// Calls `kill` for each element in `elems`. + fn kill_all(&mut self, elems: impl IntoIterator<Item = T>) { + for elem in elems { + self.kill(elem); + } + } +} + +/// Stores a transfer function for a gen/kill problem. +/// +/// Calling `gen`/`kill` on a `GenKillSet` will "build up" a transfer function so that it can be +/// applied multiple times efficiently. When there are multiple calls to `gen` and/or `kill` for +/// the same element, the most recent one takes precedence. +#[derive(Clone)] +pub struct GenKillSet<T> { + gen: HybridBitSet<T>, + kill: HybridBitSet<T>, +} + +impl<T: Idx> GenKillSet<T> { + /// Creates a new transfer function that will leave the dataflow state unchanged. + pub fn identity(universe: usize) -> Self { + GenKillSet { + gen: HybridBitSet::new_empty(universe), + kill: HybridBitSet::new_empty(universe), + } + } + + pub fn apply(&self, state: &mut impl BitSetExt<T>) { + state.union(&self.gen); + state.subtract(&self.kill); + } +} + +impl<T: Idx> GenKill<T> for GenKillSet<T> { + fn gen(&mut self, elem: T) { + self.gen.insert(elem); + self.kill.remove(elem); + } + + fn kill(&mut self, elem: T) { + self.kill.insert(elem); + self.gen.remove(elem); + } +} + +impl<T: Idx> GenKill<T> for BitSet<T> { + fn gen(&mut self, elem: T) { + self.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.remove(elem); + } +} + +impl<T: Idx> GenKill<T> for ChunkedBitSet<T> { + fn gen(&mut self, elem: T) { + self.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.remove(elem); + } +} + +impl<T: Idx> GenKill<T> for lattice::Dual<BitSet<T>> { + fn gen(&mut self, elem: T) { + self.0.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.0.remove(elem); + } +} + +// NOTE: DO NOT CHANGE VARIANT ORDER. The derived `Ord` impls rely on the current order. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Effect { + /// The "before" effect (e.g., `apply_before_statement_effect`) for a statement (or + /// terminator). + Before, + + /// The "primary" effect (e.g., `apply_statement_effect`) for a statement (or terminator). + Primary, +} + +impl Effect { + pub const fn at_index(self, statement_index: usize) -> EffectIndex { + EffectIndex { effect: self, statement_index } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct EffectIndex { + statement_index: usize, + effect: Effect, +} + +impl EffectIndex { + fn next_in_forward_order(self) -> Self { + match self.effect { + Effect::Before => Effect::Primary.at_index(self.statement_index), + Effect::Primary => Effect::Before.at_index(self.statement_index + 1), + } + } + + fn next_in_backward_order(self) -> Self { + match self.effect { + Effect::Before => Effect::Primary.at_index(self.statement_index), + Effect::Primary => Effect::Before.at_index(self.statement_index - 1), + } + } + + /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other` + /// in forward order. + fn precedes_in_forward_order(self, other: Self) -> bool { + let ord = self + .statement_index + .cmp(&other.statement_index) + .then_with(|| self.effect.cmp(&other.effect)); + ord == Ordering::Less + } + + /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other` + /// in backward order. + fn precedes_in_backward_order(self, other: Self) -> bool { + let ord = other + .statement_index + .cmp(&self.statement_index) + .then_with(|| self.effect.cmp(&other.effect)); + ord == Ordering::Less + } +} + +pub struct SwitchIntTarget { + pub value: Option<u128>, + pub target: BasicBlock, +} + +/// A type that records the edge-specific effects for a `SwitchInt` terminator. +pub trait SwitchIntEdgeEffects<D> { + /// Calls `apply_edge_effect` for each outgoing edge from a `SwitchInt` terminator and + /// records the results. + fn apply(&mut self, apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)); +} + +/// List of places that are written to after a successful (non-unwind) return +/// from a `Call` or `InlineAsm`. +pub enum CallReturnPlaces<'a, 'tcx> { + Call(mir::Place<'tcx>), + InlineAsm(&'a [mir::InlineAsmOperand<'tcx>]), +} + +impl<'tcx> CallReturnPlaces<'_, 'tcx> { + pub fn for_each(&self, mut f: impl FnMut(mir::Place<'tcx>)) { + match *self { + Self::Call(place) => f(place), + Self::InlineAsm(operands) => { + for op in operands { + match *op { + mir::InlineAsmOperand::Out { place: Some(place), .. } + | mir::InlineAsmOperand::InOut { out_place: Some(place), .. } => f(place), + _ => {} + } + } + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_mir_dataflow/src/framework/tests.rs b/compiler/rustc_mir_dataflow/src/framework/tests.rs new file mode 100644 index 000000000..d9461fd3a --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/tests.rs @@ -0,0 +1,322 @@ +//! A test for the logic that updates the state in a `ResultsCursor` during seek. + +use std::marker::PhantomData; + +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::{self, BasicBlock, Location}; +use rustc_middle::ty; +use rustc_span::DUMMY_SP; + +use super::*; + +/// Creates a `mir::Body` with a few disconnected basic blocks. +/// +/// This is the `Body` that will be used by the `MockAnalysis` below. The shape of its CFG is not +/// important. +fn mock_body<'tcx>() -> mir::Body<'tcx> { + let source_info = mir::SourceInfo::outermost(DUMMY_SP); + + let mut blocks = IndexVec::new(); + let mut block = |n, kind| { + let nop = mir::Statement { source_info, kind: mir::StatementKind::Nop }; + + blocks.push(mir::BasicBlockData { + statements: std::iter::repeat(&nop).cloned().take(n).collect(), + terminator: Some(mir::Terminator { source_info, kind }), + is_cleanup: false, + }) + }; + + let dummy_place = mir::Place { local: mir::RETURN_PLACE, projection: ty::List::empty() }; + + block(4, mir::TerminatorKind::Return); + block(1, mir::TerminatorKind::Return); + block( + 2, + mir::TerminatorKind::Call { + func: mir::Operand::Copy(dummy_place.clone()), + args: vec![], + destination: dummy_place.clone(), + target: Some(mir::START_BLOCK), + cleanup: None, + from_hir_call: false, + fn_span: DUMMY_SP, + }, + ); + block(3, mir::TerminatorKind::Return); + block(0, mir::TerminatorKind::Return); + block( + 4, + mir::TerminatorKind::Call { + func: mir::Operand::Copy(dummy_place.clone()), + args: vec![], + destination: dummy_place.clone(), + target: Some(mir::START_BLOCK), + cleanup: None, + from_hir_call: false, + fn_span: DUMMY_SP, + }, + ); + + mir::Body::new_cfg_only(blocks) +} + +/// A dataflow analysis whose state is unique at every possible `SeekTarget`. +/// +/// Uniqueness is achieved by having a *locally* unique effect before and after each statement and +/// terminator (see `effect_at_target`) while ensuring that the entry set for each block is +/// *globally* unique (see `mock_entry_set`). +/// +/// For example, a `BasicBlock` with ID `2` and a `Call` terminator has the following state at each +/// location ("+x" indicates that "x" is added to the state). +/// +/// | Location | Before | After | +/// |------------------------|-------------------|--------| +/// | (on_entry) | {102} || +/// | statement 0 | +0 | +1 | +/// | statement 1 | +2 | +3 | +/// | `Call` terminator | +4 | +5 | +/// | (on unwind) | {102,0,1,2,3,4,5} || +/// +/// The `102` in the block's entry set is derived from the basic block index and ensures that the +/// expected state is unique across all basic blocks. Remember, it is generated by +/// `mock_entry_sets`, not from actually running `MockAnalysis` to fixpoint. +struct MockAnalysis<'tcx, D> { + body: &'tcx mir::Body<'tcx>, + dir: PhantomData<D>, +} + +impl<D: Direction> MockAnalysis<'_, D> { + const BASIC_BLOCK_OFFSET: usize = 100; + + /// The entry set for each `BasicBlock` is the ID of that block offset by a fixed amount to + /// avoid colliding with the statement/terminator effects. + fn mock_entry_set(&self, bb: BasicBlock) -> BitSet<usize> { + let mut ret = self.bottom_value(self.body); + ret.insert(Self::BASIC_BLOCK_OFFSET + bb.index()); + ret + } + + fn mock_entry_sets(&self) -> IndexVec<BasicBlock, BitSet<usize>> { + let empty = self.bottom_value(self.body); + let mut ret = IndexVec::from_elem(empty, &self.body.basic_blocks()); + + for (bb, _) in self.body.basic_blocks().iter_enumerated() { + ret[bb] = self.mock_entry_set(bb); + } + + ret + } + + /// Returns the index that should be added to the dataflow state at the given target. + fn effect(&self, loc: EffectIndex) -> usize { + let idx = match loc.effect { + Effect::Before => loc.statement_index * 2, + Effect::Primary => loc.statement_index * 2 + 1, + }; + + assert!(idx < Self::BASIC_BLOCK_OFFSET, "Too many statements in basic block"); + idx + } + + /// Returns the expected state at the given `SeekTarget`. + /// + /// This is the union of index of the target basic block, the index assigned to the + /// target statement or terminator, and the indices of all preceding statements in the target + /// basic block. + /// + /// For example, the expected state when calling + /// `seek_before_primary_effect(Location { block: 2, statement_index: 2 })` + /// would be `[102, 0, 1, 2, 3, 4]`. + fn expected_state_at_target(&self, target: SeekTarget) -> BitSet<usize> { + let block = target.block(); + let mut ret = self.bottom_value(self.body); + ret.insert(Self::BASIC_BLOCK_OFFSET + block.index()); + + let target = match target { + SeekTarget::BlockEntry { .. } => return ret, + SeekTarget::Before(loc) => Effect::Before.at_index(loc.statement_index), + SeekTarget::After(loc) => Effect::Primary.at_index(loc.statement_index), + }; + + let mut pos = if D::IS_FORWARD { + Effect::Before.at_index(0) + } else { + Effect::Before.at_index(self.body[block].statements.len()) + }; + + loop { + ret.insert(self.effect(pos)); + + if pos == target { + return ret; + } + + if D::IS_FORWARD { + pos = pos.next_in_forward_order(); + } else { + pos = pos.next_in_backward_order(); + } + } + } +} + +impl<'tcx, D: Direction> AnalysisDomain<'tcx> for MockAnalysis<'tcx, D> { + type Domain = BitSet<usize>; + type Direction = D; + + const NAME: &'static str = "mock"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + BitSet::new_empty(Self::BASIC_BLOCK_OFFSET + body.basic_blocks().len()) + } + + fn initialize_start_block(&self, _: &mir::Body<'tcx>, _: &mut Self::Domain) { + unimplemented!("This is never called since `MockAnalysis` is never iterated to fixpoint"); + } +} + +impl<'tcx, D: Direction> Analysis<'tcx> for MockAnalysis<'tcx, D> { + fn apply_statement_effect( + &self, + state: &mut Self::Domain, + _statement: &mir::Statement<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Primary.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_before_statement_effect( + &self, + state: &mut Self::Domain, + _statement: &mir::Statement<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Before.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_terminator_effect( + &self, + state: &mut Self::Domain, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Primary.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_before_terminator_effect( + &self, + state: &mut Self::Domain, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Before.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_call_return_effect( + &self, + _state: &mut Self::Domain, + _block: BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum SeekTarget { + BlockEntry(BasicBlock), + Before(Location), + After(Location), +} + +impl SeekTarget { + fn block(&self) -> BasicBlock { + use SeekTarget::*; + + match *self { + BlockEntry(block) => block, + Before(loc) | After(loc) => loc.block, + } + } + + /// An iterator over all possible `SeekTarget`s in a given block in order, starting with + /// `BlockEntry`. + fn iter_in_block(body: &mir::Body<'_>, block: BasicBlock) -> impl Iterator<Item = Self> { + let statements_and_terminator = (0..=body[block].statements.len()) + .flat_map(|i| (0..2).map(move |j| (i, j))) + .map(move |(i, kind)| { + let loc = Location { block, statement_index: i }; + match kind { + 0 => SeekTarget::Before(loc), + 1 => SeekTarget::After(loc), + _ => unreachable!(), + } + }); + + std::iter::once(SeekTarget::BlockEntry(block)).chain(statements_and_terminator) + } +} + +fn test_cursor<D: Direction>(analysis: MockAnalysis<'_, D>) { + let body = analysis.body; + + let mut cursor = + Results { entry_sets: analysis.mock_entry_sets(), analysis }.into_results_cursor(body); + + cursor.allow_unreachable(); + + let every_target = || { + body.basic_blocks() + .iter_enumerated() + .flat_map(|(bb, _)| SeekTarget::iter_in_block(body, bb)) + }; + + let mut seek_to_target = |targ| { + use SeekTarget::*; + + match targ { + BlockEntry(block) => cursor.seek_to_block_entry(block), + Before(loc) => cursor.seek_before_primary_effect(loc), + After(loc) => cursor.seek_after_primary_effect(loc), + } + + assert_eq!(cursor.get(), &cursor.analysis().expected_state_at_target(targ)); + }; + + // Seek *to* every possible `SeekTarget` *from* every possible `SeekTarget`. + // + // By resetting the cursor to `from` each time it changes, we end up checking some edges twice. + // What we really want is an Eulerian cycle for the complete digraph over all possible + // `SeekTarget`s, but it's not worth spending the time to compute it. + for from in every_target() { + seek_to_target(from); + + for to in every_target() { + dbg!(from); + dbg!(to); + seek_to_target(to); + seek_to_target(from); + } + } +} + +#[test] +fn backward_cursor() { + let body = mock_body(); + let body = &body; + let analysis = MockAnalysis { body, dir: PhantomData::<Backward> }; + test_cursor(analysis) +} + +#[test] +fn forward_cursor() { + let body = mock_body(); + let body = &body; + let analysis = MockAnalysis { body, dir: PhantomData::<Forward> }; + test_cursor(analysis) +} diff --git a/compiler/rustc_mir_dataflow/src/framework/visitor.rs b/compiler/rustc_mir_dataflow/src/framework/visitor.rs new file mode 100644 index 000000000..75b4e150a --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/visitor.rs @@ -0,0 +1,187 @@ +use rustc_middle::mir::{self, BasicBlock, Location}; + +use super::{Analysis, Direction, Results}; + +/// Calls the corresponding method in `ResultsVisitor` for every location in a `mir::Body` with the +/// dataflow state at that location. +pub fn visit_results<'mir, 'tcx, F, V>( + body: &'mir mir::Body<'tcx>, + blocks: impl IntoIterator<Item = BasicBlock>, + results: &V, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, +) where + V: ResultsVisitable<'tcx, FlowState = F>, +{ + let mut state = results.new_flow_state(body); + + #[cfg(debug_assertions)] + let reachable_blocks = mir::traversal::reachable_as_bitset(body); + + for block in blocks { + #[cfg(debug_assertions)] + assert!(reachable_blocks.contains(block)); + + let block_data = &body[block]; + V::Direction::visit_results_in_block(&mut state, block, block_data, results, vis); + } +} + +pub trait ResultsVisitor<'mir, 'tcx> { + type FlowState; + + fn visit_block_start( + &mut self, + _state: &Self::FlowState, + _block_data: &'mir mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + } + + /// Called with the `before_statement_effect` of the given statement applied to `state` but not + /// its `statement_effect`. + fn visit_statement_before_primary_effect( + &mut self, + _state: &Self::FlowState, + _statement: &'mir mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// Called with both the `before_statement_effect` and the `statement_effect` of the given + /// statement applied to `state`. + fn visit_statement_after_primary_effect( + &mut self, + _state: &Self::FlowState, + _statement: &'mir mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// Called with the `before_terminator_effect` of the given terminator applied to `state` but not + /// its `terminator_effect`. + fn visit_terminator_before_primary_effect( + &mut self, + _state: &Self::FlowState, + _terminator: &'mir mir::Terminator<'tcx>, + _location: Location, + ) { + } + + /// Called with both the `before_terminator_effect` and the `terminator_effect` of the given + /// terminator applied to `state`. + /// + /// The `call_return_effect` (if one exists) will *not* be applied to `state`. + fn visit_terminator_after_primary_effect( + &mut self, + _state: &Self::FlowState, + _terminator: &'mir mir::Terminator<'tcx>, + _location: Location, + ) { + } + + fn visit_block_end( + &mut self, + _state: &Self::FlowState, + _block_data: &'mir mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + } +} + +/// Things that can be visited by a `ResultsVisitor`. +/// +/// This trait exists so that we can visit the results of multiple dataflow analyses simultaneously. +/// DO NOT IMPLEMENT MANUALLY. Instead, use the `impl_visitable` macro below. +pub trait ResultsVisitable<'tcx> { + type Direction: Direction; + type FlowState; + + /// Creates an empty `FlowState` to hold the transient state for these dataflow results. + /// + /// The value of the newly created `FlowState` will be overwritten by `reset_to_block_entry` + /// before it can be observed by a `ResultsVisitor`. + fn new_flow_state(&self, body: &mir::Body<'tcx>) -> Self::FlowState; + + fn reset_to_block_entry(&self, state: &mut Self::FlowState, block: BasicBlock); + + fn reconstruct_before_statement_effect( + &self, + state: &mut Self::FlowState, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + fn reconstruct_statement_effect( + &self, + state: &mut Self::FlowState, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + fn reconstruct_before_terminator_effect( + &self, + state: &mut Self::FlowState, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); + + fn reconstruct_terminator_effect( + &self, + state: &mut Self::FlowState, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); +} + +impl<'tcx, A> ResultsVisitable<'tcx> for Results<'tcx, A> +where + A: Analysis<'tcx>, +{ + type FlowState = A::Domain; + + type Direction = A::Direction; + + fn new_flow_state(&self, body: &mir::Body<'tcx>) -> Self::FlowState { + self.analysis.bottom_value(body) + } + + fn reset_to_block_entry(&self, state: &mut Self::FlowState, block: BasicBlock) { + state.clone_from(&self.entry_set_for_block(block)); + } + + fn reconstruct_before_statement_effect( + &self, + state: &mut Self::FlowState, + stmt: &mir::Statement<'tcx>, + loc: Location, + ) { + self.analysis.apply_before_statement_effect(state, stmt, loc); + } + + fn reconstruct_statement_effect( + &self, + state: &mut Self::FlowState, + stmt: &mir::Statement<'tcx>, + loc: Location, + ) { + self.analysis.apply_statement_effect(state, stmt, loc); + } + + fn reconstruct_before_terminator_effect( + &self, + state: &mut Self::FlowState, + term: &mir::Terminator<'tcx>, + loc: Location, + ) { + self.analysis.apply_before_terminator_effect(state, term, loc); + } + + fn reconstruct_terminator_effect( + &self, + state: &mut Self::FlowState, + term: &mir::Terminator<'tcx>, + loc: Location, + ) { + self.analysis.apply_terminator_effect(state, term, loc); + } +} diff --git a/compiler/rustc_mir_dataflow/src/impls/borrowed_locals.rs b/compiler/rustc_mir_dataflow/src/impls/borrowed_locals.rs new file mode 100644 index 000000000..0f8e86d1d --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/impls/borrowed_locals.rs @@ -0,0 +1,162 @@ +use super::*; + +use crate::{AnalysisDomain, CallReturnPlaces, GenKill, GenKillAnalysis}; +use rustc_middle::mir::visit::Visitor; +use rustc_middle::mir::*; + +/// A dataflow analysis that tracks whether a pointer or reference could possibly exist that points +/// to a given local. +/// +/// At present, this is used as a very limited form of alias analysis. For example, +/// `MaybeBorrowedLocals` is used to compute which locals are live during a yield expression for +/// immovable generators. +pub struct MaybeBorrowedLocals; + +impl MaybeBorrowedLocals { + fn transfer_function<'a, T>(&'a self, trans: &'a mut T) -> TransferFunction<'a, T> { + TransferFunction { trans } + } +} + +impl<'tcx> AnalysisDomain<'tcx> for MaybeBorrowedLocals { + type Domain = BitSet<Local>; + const NAME: &'static str = "maybe_borrowed_locals"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + // bottom = unborrowed + BitSet::new_empty(body.local_decls().len()) + } + + fn initialize_start_block(&self, _: &mir::Body<'tcx>, _: &mut Self::Domain) { + // No locals are aliased on function entry + } +} + +impl<'tcx> GenKillAnalysis<'tcx> for MaybeBorrowedLocals { + type Idx = Local; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + self.transfer_function(trans).visit_statement(statement, location); + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + self.transfer_function(trans).visit_terminator(terminator, location); + } + + fn call_return_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _block: mir::BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + } +} + +/// A `Visitor` that defines the transfer function for `MaybeBorrowedLocals`. +struct TransferFunction<'a, T> { + trans: &'a mut T, +} + +impl<'tcx, T> Visitor<'tcx> for TransferFunction<'_, T> +where + T: GenKill<Local>, +{ + fn visit_statement(&mut self, stmt: &Statement<'tcx>, location: Location) { + self.super_statement(stmt, location); + + // When we reach a `StorageDead` statement, we can assume that any pointers to this memory + // are now invalid. + if let StatementKind::StorageDead(local) = stmt.kind { + self.trans.kill(local); + } + } + + fn visit_rvalue(&mut self, rvalue: &mir::Rvalue<'tcx>, location: Location) { + self.super_rvalue(rvalue, location); + + match rvalue { + mir::Rvalue::AddressOf(_, borrowed_place) | mir::Rvalue::Ref(_, _, borrowed_place) => { + if !borrowed_place.is_indirect() { + self.trans.gen(borrowed_place.local); + } + } + + mir::Rvalue::Cast(..) + | mir::Rvalue::ShallowInitBox(..) + | mir::Rvalue::Use(..) + | mir::Rvalue::ThreadLocalRef(..) + | mir::Rvalue::Repeat(..) + | mir::Rvalue::Len(..) + | mir::Rvalue::BinaryOp(..) + | mir::Rvalue::CheckedBinaryOp(..) + | mir::Rvalue::NullaryOp(..) + | mir::Rvalue::UnaryOp(..) + | mir::Rvalue::Discriminant(..) + | mir::Rvalue::Aggregate(..) + | mir::Rvalue::CopyForDeref(..) => {} + } + } + + fn visit_terminator(&mut self, terminator: &mir::Terminator<'tcx>, location: Location) { + self.super_terminator(terminator, location); + + match terminator.kind { + mir::TerminatorKind::Drop { place: dropped_place, .. } + | mir::TerminatorKind::DropAndReplace { place: dropped_place, .. } => { + // Drop terminators may call custom drop glue (`Drop::drop`), which takes `&mut + // self` as a parameter. In the general case, a drop impl could launder that + // reference into the surrounding environment through a raw pointer, thus creating + // a valid `*mut` pointing to the dropped local. We are not yet willing to declare + // this particular case UB, so we must treat all dropped locals as mutably borrowed + // for now. See discussion on [#61069]. + // + // [#61069]: https://github.com/rust-lang/rust/pull/61069 + self.trans.gen(dropped_place.local); + } + + TerminatorKind::Abort + | TerminatorKind::Assert { .. } + | TerminatorKind::Call { .. } + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } + | TerminatorKind::GeneratorDrop + | TerminatorKind::Goto { .. } + | TerminatorKind::InlineAsm { .. } + | TerminatorKind::Resume + | TerminatorKind::Return + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::Unreachable + | TerminatorKind::Yield { .. } => {} + } + } +} + +/// The set of locals that are borrowed at some point in the MIR body. +pub fn borrowed_locals(body: &Body<'_>) -> BitSet<Local> { + struct Borrowed(BitSet<Local>); + + impl GenKill<Local> for Borrowed { + #[inline] + fn gen(&mut self, elem: Local) { + self.0.gen(elem) + } + #[inline] + fn kill(&mut self, _: Local) { + // Ignore borrow invalidation. + } + } + + let mut borrowed = Borrowed(BitSet::new_empty(body.local_decls.len())); + TransferFunction { trans: &mut borrowed }.visit_body(body); + borrowed.0 +} diff --git a/compiler/rustc_mir_dataflow/src/impls/init_locals.rs b/compiler/rustc_mir_dataflow/src/impls/init_locals.rs new file mode 100644 index 000000000..83ce4c44b --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/impls/init_locals.rs @@ -0,0 +1,122 @@ +//! A less precise version of `MaybeInitializedPlaces` whose domain is entire locals. +//! +//! A local will be maybe initialized if *any* projections of that local might be initialized. + +use crate::{CallReturnPlaces, GenKill}; + +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::visit::{PlaceContext, Visitor}; +use rustc_middle::mir::{self, BasicBlock, Local, Location}; + +pub struct MaybeInitializedLocals; + +impl<'tcx> crate::AnalysisDomain<'tcx> for MaybeInitializedLocals { + type Domain = BitSet<Local>; + + const NAME: &'static str = "maybe_init_locals"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + // bottom = uninit + BitSet::new_empty(body.local_decls.len()) + } + + fn initialize_start_block(&self, body: &mir::Body<'tcx>, entry_set: &mut Self::Domain) { + // Function arguments are initialized to begin with. + for arg in body.args_iter() { + entry_set.insert(arg); + } + } +} + +impl<'tcx> crate::GenKillAnalysis<'tcx> for MaybeInitializedLocals { + type Idx = Local; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + statement: &mir::Statement<'tcx>, + loc: Location, + ) { + TransferFunction { trans }.visit_statement(statement, loc) + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + loc: Location, + ) { + TransferFunction { trans }.visit_terminator(terminator, loc) + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| trans.gen(place.local)); + } + + /// See `Analysis::apply_yield_resume_effect`. + fn yield_resume_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _resume_block: BasicBlock, + resume_place: mir::Place<'tcx>, + ) { + trans.gen(resume_place.local) + } +} + +struct TransferFunction<'a, T> { + trans: &'a mut T, +} + +impl<T> Visitor<'_> for TransferFunction<'_, T> +where + T: GenKill<Local>, +{ + // FIXME: Using `visit_local` here is a bug. For example, on `move _5.field` we mark `_5` as + // deinitialized, although clearly it is only partially deinitialized. This analysis is not + // actually used anywhere at the moment, so this is not critical, but this does need to be fixed + // before it starts being used again. + fn visit_local(&mut self, local: Local, context: PlaceContext, _: Location) { + use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, NonUseContext}; + match context { + // These are handled specially in `call_return_effect` and `yield_resume_effect`. + PlaceContext::MutatingUse( + MutatingUseContext::Call + | MutatingUseContext::AsmOutput + | MutatingUseContext::Yield, + ) => {} + + // If it's deinitialized, it's no longer init + PlaceContext::MutatingUse(MutatingUseContext::Deinit) => self.trans.kill(local), + + // Otherwise, when a place is mutated, we must consider it possibly initialized. + PlaceContext::MutatingUse(_) => self.trans.gen(local), + + // If the local is moved out of, or if it gets marked `StorageDead`, consider it no + // longer initialized. + PlaceContext::NonUse(NonUseContext::StorageDead) + | PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) => self.trans.kill(local), + + // All other uses do not affect this analysis. + PlaceContext::NonUse( + NonUseContext::StorageLive + | NonUseContext::AscribeUserTy + | NonUseContext::VarDebugInfo, + ) + | PlaceContext::NonMutatingUse( + NonMutatingUseContext::Inspect + | NonMutatingUseContext::Copy + | NonMutatingUseContext::SharedBorrow + | NonMutatingUseContext::ShallowBorrow + | NonMutatingUseContext::UniqueBorrow + | NonMutatingUseContext::AddressOf + | NonMutatingUseContext::Projection, + ) => {} + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/impls/liveness.rs b/compiler/rustc_mir_dataflow/src/impls/liveness.rs new file mode 100644 index 000000000..21132eb99 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/impls/liveness.rs @@ -0,0 +1,297 @@ +use rustc_index::bit_set::{BitSet, ChunkedBitSet}; +use rustc_middle::mir::visit::{MutatingUseContext, NonMutatingUseContext, PlaceContext, Visitor}; +use rustc_middle::mir::{self, Local, Location, Place, StatementKind}; + +use crate::{Analysis, AnalysisDomain, Backward, CallReturnPlaces, GenKill, GenKillAnalysis}; + +/// A [live-variable dataflow analysis][liveness]. +/// +/// This analysis considers references as being used only at the point of the +/// borrow. In other words, this analysis does not track uses because of references that already +/// exist. See [this `mir-dataflow` test][flow-test] for an example. You almost never want to use +/// this analysis without also looking at the results of [`MaybeBorrowedLocals`]. +/// +/// ## Field-(in)sensitivity +/// +/// As the name suggests, this analysis is field insensitive. If a projection of a variable `x` is +/// assigned to (e.g. `x.0 = 42`), it does not "define" `x` as far as liveness is concerned. In fact, +/// such an assignment is currently marked as a "use" of `x` in an attempt to be maximally +/// conservative. +/// +/// [`MaybeBorrowedLocals`]: super::MaybeBorrowedLocals +/// [flow-test]: https://github.com/rust-lang/rust/blob/a08c47310c7d49cbdc5d7afb38408ba519967ecd/src/test/ui/mir-dataflow/liveness-ptr.rs +/// [liveness]: https://en.wikipedia.org/wiki/Live_variable_analysis +pub struct MaybeLiveLocals; + +impl MaybeLiveLocals { + fn transfer_function<'a, T>(&self, trans: &'a mut T) -> TransferFunction<'a, T> { + TransferFunction(trans) + } +} + +impl<'tcx> AnalysisDomain<'tcx> for MaybeLiveLocals { + type Domain = ChunkedBitSet<Local>; + type Direction = Backward; + + const NAME: &'static str = "liveness"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + // bottom = not live + ChunkedBitSet::new_empty(body.local_decls.len()) + } + + fn initialize_start_block(&self, _: &mir::Body<'tcx>, _: &mut Self::Domain) { + // No variables are live until we observe a use + } +} + +impl<'tcx> GenKillAnalysis<'tcx> for MaybeLiveLocals { + type Idx = Local; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + self.transfer_function(trans).visit_statement(statement, location); + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + self.transfer_function(trans).visit_terminator(terminator, location); + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _block: mir::BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| { + if let Some(local) = place.as_local() { + trans.kill(local); + } + }); + } + + fn yield_resume_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _resume_block: mir::BasicBlock, + resume_place: mir::Place<'tcx>, + ) { + if let Some(local) = resume_place.as_local() { + trans.kill(local); + } + } +} + +struct TransferFunction<'a, T>(&'a mut T); + +impl<'tcx, T> Visitor<'tcx> for TransferFunction<'_, T> +where + T: GenKill<Local>, +{ + fn visit_place(&mut self, place: &mir::Place<'tcx>, context: PlaceContext, location: Location) { + let local = place.local; + + // We purposefully do not call `super_place` here to avoid calling `visit_local` for this + // place with one of the `Projection` variants of `PlaceContext`. + self.visit_projection(place.as_ref(), context, location); + + match DefUse::for_place(*place, context) { + Some(DefUse::Def) => self.0.kill(local), + Some(DefUse::Use) => self.0.gen(local), + None => {} + } + } + + fn visit_local(&mut self, local: Local, context: PlaceContext, _: Location) { + // Because we do not call `super_place` above, `visit_local` is only called for locals that + // do not appear as part of a `Place` in the MIR. This handles cases like the implicit use + // of the return place in a `Return` terminator or the index in an `Index` projection. + match DefUse::for_place(local.into(), context) { + Some(DefUse::Def) => self.0.kill(local), + Some(DefUse::Use) => self.0.gen(local), + None => {} + } + } +} + +#[derive(Eq, PartialEq, Clone)] +enum DefUse { + Def, + Use, +} + +impl DefUse { + fn for_place<'tcx>(place: Place<'tcx>, context: PlaceContext) -> Option<DefUse> { + match context { + PlaceContext::NonUse(_) => None, + + PlaceContext::MutatingUse(MutatingUseContext::Store | MutatingUseContext::Deinit) => { + if place.is_indirect() { + // Treat derefs as a use of the base local. `*p = 4` is not a def of `p` but a + // use. + Some(DefUse::Use) + } else if place.projection.is_empty() { + Some(DefUse::Def) + } else { + None + } + } + + // Setting the discriminant is not a use because it does no reading, but it is also not + // a def because it does not overwrite the whole place + PlaceContext::MutatingUse(MutatingUseContext::SetDiscriminant) => { + place.is_indirect().then_some(DefUse::Use) + } + + // For the associated terminators, this is only a `Def` when the terminator returns + // "successfully." As such, we handle this case separately in `call_return_effect` + // above. However, if the place looks like `*_5`, this is still unconditionally a use of + // `_5`. + PlaceContext::MutatingUse( + MutatingUseContext::Call + | MutatingUseContext::Yield + | MutatingUseContext::AsmOutput, + ) => place.is_indirect().then_some(DefUse::Use), + + // All other contexts are uses... + PlaceContext::MutatingUse( + MutatingUseContext::AddressOf + | MutatingUseContext::Borrow + | MutatingUseContext::Drop + | MutatingUseContext::Retag, + ) + | PlaceContext::NonMutatingUse( + NonMutatingUseContext::AddressOf + | NonMutatingUseContext::Copy + | NonMutatingUseContext::Inspect + | NonMutatingUseContext::Move + | NonMutatingUseContext::ShallowBorrow + | NonMutatingUseContext::SharedBorrow + | NonMutatingUseContext::UniqueBorrow, + ) => Some(DefUse::Use), + + PlaceContext::MutatingUse(MutatingUseContext::Projection) + | PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) => { + unreachable!("A projection could be a def or a use and must be handled separately") + } + } + } +} + +/// Like `MaybeLiveLocals`, but does not mark locals as live if they are used in a dead assignment. +/// +/// This is basically written for dead store elimination and nothing else. +/// +/// All of the caveats of `MaybeLiveLocals` apply. +pub struct MaybeTransitiveLiveLocals<'a> { + always_live: &'a BitSet<Local>, +} + +impl<'a> MaybeTransitiveLiveLocals<'a> { + /// The `always_alive` set is the set of locals to which all stores should unconditionally be + /// considered live. + /// + /// This should include at least all locals that are ever borrowed. + pub fn new(always_live: &'a BitSet<Local>) -> Self { + MaybeTransitiveLiveLocals { always_live } + } +} + +impl<'a, 'tcx> AnalysisDomain<'tcx> for MaybeTransitiveLiveLocals<'a> { + type Domain = ChunkedBitSet<Local>; + type Direction = Backward; + + const NAME: &'static str = "transitive liveness"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + // bottom = not live + ChunkedBitSet::new_empty(body.local_decls.len()) + } + + fn initialize_start_block(&self, _: &mir::Body<'tcx>, _: &mut Self::Domain) { + // No variables are live until we observe a use + } +} + +impl<'a, 'tcx> Analysis<'tcx> for MaybeTransitiveLiveLocals<'a> { + fn apply_statement_effect( + &self, + trans: &mut Self::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + // Compute the place that we are storing to, if any + let destination = match &statement.kind { + StatementKind::Assign(assign) => { + if assign.1.is_safe_to_remove() { + Some(assign.0) + } else { + None + } + } + StatementKind::SetDiscriminant { place, .. } | StatementKind::Deinit(place) => { + Some(**place) + } + StatementKind::FakeRead(_) + | StatementKind::StorageLive(_) + | StatementKind::StorageDead(_) + | StatementKind::Retag(..) + | StatementKind::AscribeUserType(..) + | StatementKind::Coverage(..) + | StatementKind::CopyNonOverlapping(..) + | StatementKind::Nop => None, + }; + if let Some(destination) = destination { + if !destination.is_indirect() + && !trans.contains(destination.local) + && !self.always_live.contains(destination.local) + { + // This store is dead + return; + } + } + TransferFunction(trans).visit_statement(statement, location); + } + + fn apply_terminator_effect( + &self, + trans: &mut Self::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + TransferFunction(trans).visit_terminator(terminator, location); + } + + fn apply_call_return_effect( + &self, + trans: &mut Self::Domain, + _block: mir::BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| { + if let Some(local) = place.as_local() { + trans.remove(local); + } + }); + } + + fn apply_yield_resume_effect( + &self, + trans: &mut Self::Domain, + _resume_block: mir::BasicBlock, + resume_place: mir::Place<'tcx>, + ) { + if let Some(local) = resume_place.as_local() { + trans.remove(local); + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/impls/mod.rs b/compiler/rustc_mir_dataflow/src/impls/mod.rs new file mode 100644 index 000000000..fd1e49277 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/impls/mod.rs @@ -0,0 +1,766 @@ +//! Dataflow analyses are built upon some interpretation of the +//! bitvectors attached to each basic block, represented via a +//! zero-sized structure. + +use rustc_index::bit_set::{BitSet, ChunkedBitSet}; +use rustc_index::vec::Idx; +use rustc_middle::mir::visit::{MirVisitable, Visitor}; +use rustc_middle::mir::{self, Body, Location}; +use rustc_middle::ty::{self, TyCtxt}; + +use crate::drop_flag_effects_for_function_entry; +use crate::drop_flag_effects_for_location; +use crate::elaborate_drops::DropFlagState; +use crate::framework::{CallReturnPlaces, SwitchIntEdgeEffects}; +use crate::move_paths::{HasMoveData, InitIndex, InitKind, LookupResult, MoveData, MovePathIndex}; +use crate::on_lookup_result_bits; +use crate::MoveDataParamEnv; +use crate::{drop_flag_effects, on_all_children_bits}; +use crate::{lattice, AnalysisDomain, GenKill, GenKillAnalysis}; + +mod borrowed_locals; +mod init_locals; +mod liveness; +mod storage_liveness; + +pub use self::borrowed_locals::borrowed_locals; +pub use self::borrowed_locals::MaybeBorrowedLocals; +pub use self::init_locals::MaybeInitializedLocals; +pub use self::liveness::MaybeLiveLocals; +pub use self::liveness::MaybeTransitiveLiveLocals; +pub use self::storage_liveness::{MaybeRequiresStorage, MaybeStorageLive}; + +/// `MaybeInitializedPlaces` tracks all places that might be +/// initialized upon reaching a particular point in the control flow +/// for a function. +/// +/// For example, in code like the following, we have corresponding +/// dataflow information shown in the right-hand comments. +/// +/// ```rust +/// struct S; +/// fn foo(pred: bool) { // maybe-init: +/// // {} +/// let a = S; let mut b = S; let c; let d; // {a, b} +/// +/// if pred { +/// drop(a); // { b} +/// b = S; // { b} +/// +/// } else { +/// drop(b); // {a} +/// d = S; // {a, d} +/// +/// } // {a, b, d} +/// +/// c = S; // {a, b, c, d} +/// } +/// ``` +/// +/// To determine whether a place *must* be initialized at a +/// particular control-flow point, one can take the set-difference +/// between this data and the data from `MaybeUninitializedPlaces` at the +/// corresponding control-flow point. +/// +/// Similarly, at a given `drop` statement, the set-intersection +/// between this data and `MaybeUninitializedPlaces` yields the set of +/// places that would require a dynamic drop-flag at that statement. +pub struct MaybeInitializedPlaces<'a, 'tcx> { + tcx: TyCtxt<'tcx>, + body: &'a Body<'tcx>, + mdpe: &'a MoveDataParamEnv<'tcx>, +} + +impl<'a, 'tcx> MaybeInitializedPlaces<'a, 'tcx> { + pub fn new(tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, mdpe: &'a MoveDataParamEnv<'tcx>) -> Self { + MaybeInitializedPlaces { tcx, body, mdpe } + } +} + +impl<'a, 'tcx> HasMoveData<'tcx> for MaybeInitializedPlaces<'a, 'tcx> { + fn move_data(&self) -> &MoveData<'tcx> { + &self.mdpe.move_data + } +} + +/// `MaybeUninitializedPlaces` tracks all places that might be +/// uninitialized upon reaching a particular point in the control flow +/// for a function. +/// +/// For example, in code like the following, we have corresponding +/// dataflow information shown in the right-hand comments. +/// +/// ```rust +/// struct S; +/// fn foo(pred: bool) { // maybe-uninit: +/// // {a, b, c, d} +/// let a = S; let mut b = S; let c; let d; // { c, d} +/// +/// if pred { +/// drop(a); // {a, c, d} +/// b = S; // {a, c, d} +/// +/// } else { +/// drop(b); // { b, c, d} +/// d = S; // { b, c } +/// +/// } // {a, b, c, d} +/// +/// c = S; // {a, b, d} +/// } +/// ``` +/// +/// To determine whether a place *must* be uninitialized at a +/// particular control-flow point, one can take the set-difference +/// between this data and the data from `MaybeInitializedPlaces` at the +/// corresponding control-flow point. +/// +/// Similarly, at a given `drop` statement, the set-intersection +/// between this data and `MaybeInitializedPlaces` yields the set of +/// places that would require a dynamic drop-flag at that statement. +pub struct MaybeUninitializedPlaces<'a, 'tcx> { + tcx: TyCtxt<'tcx>, + body: &'a Body<'tcx>, + mdpe: &'a MoveDataParamEnv<'tcx>, + + mark_inactive_variants_as_uninit: bool, +} + +impl<'a, 'tcx> MaybeUninitializedPlaces<'a, 'tcx> { + pub fn new(tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, mdpe: &'a MoveDataParamEnv<'tcx>) -> Self { + MaybeUninitializedPlaces { tcx, body, mdpe, mark_inactive_variants_as_uninit: false } + } + + /// Causes inactive enum variants to be marked as "maybe uninitialized" after a switch on an + /// enum discriminant. + /// + /// This is correct in a vacuum but is not the default because it causes problems in the borrow + /// checker, where this information gets propagated along `FakeEdge`s. + pub fn mark_inactive_variants_as_uninit(mut self) -> Self { + self.mark_inactive_variants_as_uninit = true; + self + } +} + +impl<'a, 'tcx> HasMoveData<'tcx> for MaybeUninitializedPlaces<'a, 'tcx> { + fn move_data(&self) -> &MoveData<'tcx> { + &self.mdpe.move_data + } +} + +/// `DefinitelyInitializedPlaces` tracks all places that are definitely +/// initialized upon reaching a particular point in the control flow +/// for a function. +/// +/// For example, in code like the following, we have corresponding +/// dataflow information shown in the right-hand comments. +/// +/// ```rust +/// struct S; +/// fn foo(pred: bool) { // definite-init: +/// // { } +/// let a = S; let mut b = S; let c; let d; // {a, b } +/// +/// if pred { +/// drop(a); // { b, } +/// b = S; // { b, } +/// +/// } else { +/// drop(b); // {a, } +/// d = S; // {a, d} +/// +/// } // { } +/// +/// c = S; // { c } +/// } +/// ``` +/// +/// To determine whether a place *may* be uninitialized at a +/// particular control-flow point, one can take the set-complement +/// of this data. +/// +/// Similarly, at a given `drop` statement, the set-difference between +/// this data and `MaybeInitializedPlaces` yields the set of places +/// that would require a dynamic drop-flag at that statement. +pub struct DefinitelyInitializedPlaces<'a, 'tcx> { + tcx: TyCtxt<'tcx>, + body: &'a Body<'tcx>, + mdpe: &'a MoveDataParamEnv<'tcx>, +} + +impl<'a, 'tcx> DefinitelyInitializedPlaces<'a, 'tcx> { + pub fn new(tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, mdpe: &'a MoveDataParamEnv<'tcx>) -> Self { + DefinitelyInitializedPlaces { tcx, body, mdpe } + } +} + +impl<'a, 'tcx> HasMoveData<'tcx> for DefinitelyInitializedPlaces<'a, 'tcx> { + fn move_data(&self) -> &MoveData<'tcx> { + &self.mdpe.move_data + } +} + +/// `EverInitializedPlaces` tracks all places that might have ever been +/// initialized upon reaching a particular point in the control flow +/// for a function, without an intervening `StorageDead`. +/// +/// This dataflow is used to determine if an immutable local variable may +/// be assigned to. +/// +/// For example, in code like the following, we have corresponding +/// dataflow information shown in the right-hand comments. +/// +/// ```rust +/// struct S; +/// fn foo(pred: bool) { // ever-init: +/// // { } +/// let a = S; let mut b = S; let c; let d; // {a, b } +/// +/// if pred { +/// drop(a); // {a, b, } +/// b = S; // {a, b, } +/// +/// } else { +/// drop(b); // {a, b, } +/// d = S; // {a, b, d } +/// +/// } // {a, b, d } +/// +/// c = S; // {a, b, c, d } +/// } +/// ``` +pub struct EverInitializedPlaces<'a, 'tcx> { + #[allow(dead_code)] + tcx: TyCtxt<'tcx>, + body: &'a Body<'tcx>, + mdpe: &'a MoveDataParamEnv<'tcx>, +} + +impl<'a, 'tcx> EverInitializedPlaces<'a, 'tcx> { + pub fn new(tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, mdpe: &'a MoveDataParamEnv<'tcx>) -> Self { + EverInitializedPlaces { tcx, body, mdpe } + } +} + +impl<'a, 'tcx> HasMoveData<'tcx> for EverInitializedPlaces<'a, 'tcx> { + fn move_data(&self) -> &MoveData<'tcx> { + &self.mdpe.move_data + } +} + +impl<'a, 'tcx> MaybeInitializedPlaces<'a, 'tcx> { + fn update_bits( + trans: &mut impl GenKill<MovePathIndex>, + path: MovePathIndex, + state: DropFlagState, + ) { + match state { + DropFlagState::Absent => trans.kill(path), + DropFlagState::Present => trans.gen(path), + } + } +} + +impl<'a, 'tcx> MaybeUninitializedPlaces<'a, 'tcx> { + fn update_bits( + trans: &mut impl GenKill<MovePathIndex>, + path: MovePathIndex, + state: DropFlagState, + ) { + match state { + DropFlagState::Absent => trans.gen(path), + DropFlagState::Present => trans.kill(path), + } + } +} + +impl<'a, 'tcx> DefinitelyInitializedPlaces<'a, 'tcx> { + fn update_bits( + trans: &mut impl GenKill<MovePathIndex>, + path: MovePathIndex, + state: DropFlagState, + ) { + match state { + DropFlagState::Absent => trans.kill(path), + DropFlagState::Present => trans.gen(path), + } + } +} + +impl<'tcx> AnalysisDomain<'tcx> for MaybeInitializedPlaces<'_, 'tcx> { + type Domain = ChunkedBitSet<MovePathIndex>; + const NAME: &'static str = "maybe_init"; + + fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { + // bottom = uninitialized + ChunkedBitSet::new_empty(self.move_data().move_paths.len()) + } + + fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) { + drop_flag_effects_for_function_entry(self.tcx, self.body, self.mdpe, |path, s| { + assert!(s == DropFlagState::Present); + state.insert(path); + }); + } +} + +impl<'tcx> GenKillAnalysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> { + type Idx = MovePathIndex; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + drop_flag_effects_for_location(self.tcx, self.body, self.mdpe, location, |path, s| { + Self::update_bits(trans, path, s) + }); + + if !self.tcx.sess.opts.unstable_opts.precise_enum_drop_elaboration { + return; + } + + // Mark all places as "maybe init" if they are mutably borrowed. See #90752. + for_each_mut_borrow(statement, location, |place| { + let LookupResult::Exact(mpi) = self.move_data().rev_lookup.find(place.as_ref()) else { return }; + on_all_children_bits(self.tcx, self.body, self.move_data(), mpi, |child| { + trans.gen(child); + }) + }) + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + drop_flag_effects_for_location(self.tcx, self.body, self.mdpe, location, |path, s| { + Self::update_bits(trans, path, s) + }); + + if !self.tcx.sess.opts.unstable_opts.precise_enum_drop_elaboration { + return; + } + + for_each_mut_borrow(terminator, location, |place| { + let LookupResult::Exact(mpi) = self.move_data().rev_lookup.find(place.as_ref()) else { return }; + on_all_children_bits(self.tcx, self.body, self.move_data(), mpi, |child| { + trans.gen(child); + }) + }) + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _block: mir::BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| { + // when a call returns successfully, that means we need to set + // the bits for that dest_place to 1 (initialized). + on_lookup_result_bits( + self.tcx, + self.body, + self.move_data(), + self.move_data().rev_lookup.find(place.as_ref()), + |mpi| { + trans.gen(mpi); + }, + ); + }); + } + + fn switch_int_edge_effects<G: GenKill<Self::Idx>>( + &self, + block: mir::BasicBlock, + discr: &mir::Operand<'tcx>, + edge_effects: &mut impl SwitchIntEdgeEffects<G>, + ) { + if !self.tcx.sess.opts.unstable_opts.precise_enum_drop_elaboration { + return; + } + + let enum_ = discr.place().and_then(|discr| { + switch_on_enum_discriminant(self.tcx, &self.body, &self.body[block], discr) + }); + + let Some((enum_place, enum_def)) = enum_ else { + return; + }; + + let mut discriminants = enum_def.discriminants(self.tcx); + edge_effects.apply(|trans, edge| { + let Some(value) = edge.value else { + return; + }; + + // MIR building adds discriminants to the `values` array in the same order as they + // are yielded by `AdtDef::discriminants`. We rely on this to match each + // discriminant in `values` to its corresponding variant in linear time. + let (variant, _) = discriminants + .find(|&(_, discr)| discr.val == value) + .expect("Order of `AdtDef::discriminants` differed from `SwitchInt::values`"); + + // Kill all move paths that correspond to variants we know to be inactive along this + // particular outgoing edge of a `SwitchInt`. + drop_flag_effects::on_all_inactive_variants( + self.tcx, + self.body, + self.move_data(), + enum_place, + variant, + |mpi| trans.kill(mpi), + ); + }); + } +} + +impl<'tcx> AnalysisDomain<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> { + type Domain = ChunkedBitSet<MovePathIndex>; + + const NAME: &'static str = "maybe_uninit"; + + fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { + // bottom = initialized (start_block_effect counters this at outset) + ChunkedBitSet::new_empty(self.move_data().move_paths.len()) + } + + // sets on_entry bits for Arg places + fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) { + // set all bits to 1 (uninit) before gathering counter-evidence + state.insert_all(); + + drop_flag_effects_for_function_entry(self.tcx, self.body, self.mdpe, |path, s| { + assert!(s == DropFlagState::Present); + state.remove(path); + }); + } +} + +impl<'tcx> GenKillAnalysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> { + type Idx = MovePathIndex; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _statement: &mir::Statement<'tcx>, + location: Location, + ) { + drop_flag_effects_for_location(self.tcx, self.body, self.mdpe, location, |path, s| { + Self::update_bits(trans, path, s) + }); + + // Unlike in `MaybeInitializedPlaces` above, we don't need to change the state when a + // mutable borrow occurs. Places cannot become uninitialized through a mutable reference. + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + drop_flag_effects_for_location(self.tcx, self.body, self.mdpe, location, |path, s| { + Self::update_bits(trans, path, s) + }); + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _block: mir::BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| { + // when a call returns successfully, that means we need to set + // the bits for that dest_place to 0 (initialized). + on_lookup_result_bits( + self.tcx, + self.body, + self.move_data(), + self.move_data().rev_lookup.find(place.as_ref()), + |mpi| { + trans.kill(mpi); + }, + ); + }); + } + + fn switch_int_edge_effects<G: GenKill<Self::Idx>>( + &self, + block: mir::BasicBlock, + discr: &mir::Operand<'tcx>, + edge_effects: &mut impl SwitchIntEdgeEffects<G>, + ) { + if !self.tcx.sess.opts.unstable_opts.precise_enum_drop_elaboration { + return; + } + + if !self.mark_inactive_variants_as_uninit { + return; + } + + let enum_ = discr.place().and_then(|discr| { + switch_on_enum_discriminant(self.tcx, &self.body, &self.body[block], discr) + }); + + let Some((enum_place, enum_def)) = enum_ else { + return; + }; + + let mut discriminants = enum_def.discriminants(self.tcx); + edge_effects.apply(|trans, edge| { + let Some(value) = edge.value else { + return; + }; + + // MIR building adds discriminants to the `values` array in the same order as they + // are yielded by `AdtDef::discriminants`. We rely on this to match each + // discriminant in `values` to its corresponding variant in linear time. + let (variant, _) = discriminants + .find(|&(_, discr)| discr.val == value) + .expect("Order of `AdtDef::discriminants` differed from `SwitchInt::values`"); + + // Mark all move paths that correspond to variants other than this one as maybe + // uninitialized (in reality, they are *definitely* uninitialized). + drop_flag_effects::on_all_inactive_variants( + self.tcx, + self.body, + self.move_data(), + enum_place, + variant, + |mpi| trans.gen(mpi), + ); + }); + } +} + +impl<'a, 'tcx> AnalysisDomain<'tcx> for DefinitelyInitializedPlaces<'a, 'tcx> { + /// Use set intersection as the join operator. + type Domain = lattice::Dual<BitSet<MovePathIndex>>; + + const NAME: &'static str = "definite_init"; + + fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { + // bottom = initialized (start_block_effect counters this at outset) + lattice::Dual(BitSet::new_filled(self.move_data().move_paths.len())) + } + + // sets on_entry bits for Arg places + fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) { + state.0.clear(); + + drop_flag_effects_for_function_entry(self.tcx, self.body, self.mdpe, |path, s| { + assert!(s == DropFlagState::Present); + state.0.insert(path); + }); + } +} + +impl<'tcx> GenKillAnalysis<'tcx> for DefinitelyInitializedPlaces<'_, 'tcx> { + type Idx = MovePathIndex; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _statement: &mir::Statement<'tcx>, + location: Location, + ) { + drop_flag_effects_for_location(self.tcx, self.body, self.mdpe, location, |path, s| { + Self::update_bits(trans, path, s) + }) + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + drop_flag_effects_for_location(self.tcx, self.body, self.mdpe, location, |path, s| { + Self::update_bits(trans, path, s) + }) + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _block: mir::BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| { + // when a call returns successfully, that means we need to set + // the bits for that dest_place to 1 (initialized). + on_lookup_result_bits( + self.tcx, + self.body, + self.move_data(), + self.move_data().rev_lookup.find(place.as_ref()), + |mpi| { + trans.gen(mpi); + }, + ); + }); + } +} + +impl<'tcx> AnalysisDomain<'tcx> for EverInitializedPlaces<'_, 'tcx> { + type Domain = ChunkedBitSet<InitIndex>; + + const NAME: &'static str = "ever_init"; + + fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { + // bottom = no initialized variables by default + ChunkedBitSet::new_empty(self.move_data().inits.len()) + } + + fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain) { + for arg_init in 0..body.arg_count { + state.insert(InitIndex::new(arg_init)); + } + } +} + +impl<'tcx> GenKillAnalysis<'tcx> for EverInitializedPlaces<'_, 'tcx> { + type Idx = InitIndex; + + #[instrument(skip(self, trans), level = "debug")] + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + stmt: &mir::Statement<'tcx>, + location: Location, + ) { + let move_data = self.move_data(); + let init_path_map = &move_data.init_path_map; + let init_loc_map = &move_data.init_loc_map; + let rev_lookup = &move_data.rev_lookup; + + debug!("initializes move_indexes {:?}", &init_loc_map[location]); + trans.gen_all(init_loc_map[location].iter().copied()); + + if let mir::StatementKind::StorageDead(local) = stmt.kind { + // End inits for StorageDead, so that an immutable variable can + // be reinitialized on the next iteration of the loop. + let move_path_index = rev_lookup.find_local(local); + debug!("clears the ever initialized status of {:?}", init_path_map[move_path_index]); + trans.kill_all(init_path_map[move_path_index].iter().copied()); + } + } + + #[instrument(skip(self, trans, _terminator), level = "debug")] + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + let (body, move_data) = (self.body, self.move_data()); + let term = body[location.block].terminator(); + let init_loc_map = &move_data.init_loc_map; + debug!(?term); + debug!("initializes move_indexes {:?}", init_loc_map[location]); + trans.gen_all( + init_loc_map[location] + .iter() + .filter(|init_index| { + move_data.inits[**init_index].kind != InitKind::NonPanicPathOnly + }) + .copied(), + ); + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + block: mir::BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + let move_data = self.move_data(); + let init_loc_map = &move_data.init_loc_map; + + let call_loc = self.body.terminator_loc(block); + for init_index in &init_loc_map[call_loc] { + trans.gen(*init_index); + } + } +} + +/// Inspect a `SwitchInt`-terminated basic block to see if the condition of that `SwitchInt` is +/// an enum discriminant. +/// +/// We expect such blocks to have a call to `discriminant` as their last statement like so: +/// +/// ```text +/// ... +/// _42 = discriminant(_1) +/// SwitchInt(_42, ..) +/// ``` +/// +/// If the basic block matches this pattern, this function returns the place corresponding to the +/// enum (`_1` in the example above) as well as the `AdtDef` of that enum. +fn switch_on_enum_discriminant<'mir, 'tcx>( + tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + block: &'mir mir::BasicBlockData<'tcx>, + switch_on: mir::Place<'tcx>, +) -> Option<(mir::Place<'tcx>, ty::AdtDef<'tcx>)> { + for statement in block.statements.iter().rev() { + match &statement.kind { + mir::StatementKind::Assign(box (lhs, mir::Rvalue::Discriminant(discriminated))) + if *lhs == switch_on => + { + match discriminated.ty(body, tcx).ty.kind() { + ty::Adt(def, _) => return Some((*discriminated, *def)), + + // `Rvalue::Discriminant` is also used to get the active yield point for a + // generator, but we do not need edge-specific effects in that case. This may + // change in the future. + ty::Generator(..) => return None, + + t => bug!("`discriminant` called on unexpected type {:?}", t), + } + } + mir::StatementKind::Coverage(_) => continue, + _ => return None, + } + } + None +} + +struct OnMutBorrow<F>(F); + +impl<F> Visitor<'_> for OnMutBorrow<F> +where + F: FnMut(&mir::Place<'_>), +{ + fn visit_rvalue(&mut self, rvalue: &mir::Rvalue<'_>, location: Location) { + // FIXME: Does `&raw const foo` allow mutation? See #90413. + match rvalue { + mir::Rvalue::Ref(_, mir::BorrowKind::Mut { .. }, place) + | mir::Rvalue::AddressOf(_, place) => (self.0)(place), + + _ => {} + } + + self.super_rvalue(rvalue, location) + } +} + +/// Calls `f` for each mutable borrow or raw reference in the program. +/// +/// This DOES NOT call `f` for a shared borrow of a type with interior mutability. That's okay for +/// initializedness, because we cannot move from an `UnsafeCell` (outside of `core::cell`), but +/// other analyses will likely need to check for `!Freeze`. +fn for_each_mut_borrow<'tcx>( + mir: &impl MirVisitable<'tcx>, + location: Location, + f: impl FnMut(&mir::Place<'_>), +) { + let mut vis = OnMutBorrow(f); + + mir.apply(location, &mut vis); +} diff --git a/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs new file mode 100644 index 000000000..f6b5af90a --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/impls/storage_liveness.rs @@ -0,0 +1,300 @@ +pub use super::*; + +use crate::{CallReturnPlaces, GenKill, Results, ResultsRefCursor}; +use rustc_middle::mir::visit::{NonMutatingUseContext, PlaceContext, Visitor}; +use rustc_middle::mir::*; +use std::cell::RefCell; + +#[derive(Clone)] +pub struct MaybeStorageLive { + always_live_locals: BitSet<Local>, +} + +impl MaybeStorageLive { + pub fn new(always_live_locals: BitSet<Local>) -> Self { + MaybeStorageLive { always_live_locals } + } +} + +impl<'tcx> crate::AnalysisDomain<'tcx> for MaybeStorageLive { + type Domain = BitSet<Local>; + + const NAME: &'static str = "maybe_storage_live"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + // bottom = dead + BitSet::new_empty(body.local_decls.len()) + } + + fn initialize_start_block(&self, body: &mir::Body<'tcx>, on_entry: &mut Self::Domain) { + assert_eq!(body.local_decls.len(), self.always_live_locals.domain_size()); + for local in self.always_live_locals.iter() { + on_entry.insert(local); + } + + for arg in body.args_iter() { + on_entry.insert(arg); + } + } +} + +impl<'tcx> crate::GenKillAnalysis<'tcx> for MaybeStorageLive { + type Idx = Local; + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + stmt: &mir::Statement<'tcx>, + _: Location, + ) { + match stmt.kind { + StatementKind::StorageLive(l) => trans.gen(l), + StatementKind::StorageDead(l) => trans.kill(l), + _ => (), + } + } + + fn terminator_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _: &mir::Terminator<'tcx>, + _: Location, + ) { + // Terminators have no effect + } + + fn call_return_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _block: BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + // Nothing to do when a call returns successfully + } +} + +type BorrowedLocalsResults<'a, 'tcx> = ResultsRefCursor<'a, 'a, 'tcx, MaybeBorrowedLocals>; + +/// Dataflow analysis that determines whether each local requires storage at a +/// given location; i.e. whether its storage can go away without being observed. +pub struct MaybeRequiresStorage<'mir, 'tcx> { + body: &'mir Body<'tcx>, + borrowed_locals: RefCell<BorrowedLocalsResults<'mir, 'tcx>>, +} + +impl<'mir, 'tcx> MaybeRequiresStorage<'mir, 'tcx> { + pub fn new( + body: &'mir Body<'tcx>, + borrowed_locals: &'mir Results<'tcx, MaybeBorrowedLocals>, + ) -> Self { + MaybeRequiresStorage { + body, + borrowed_locals: RefCell::new(ResultsRefCursor::new(&body, borrowed_locals)), + } + } +} + +impl<'mir, 'tcx> crate::AnalysisDomain<'tcx> for MaybeRequiresStorage<'mir, 'tcx> { + type Domain = BitSet<Local>; + + const NAME: &'static str = "requires_storage"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + // bottom = dead + BitSet::new_empty(body.local_decls.len()) + } + + fn initialize_start_block(&self, body: &mir::Body<'tcx>, on_entry: &mut Self::Domain) { + // The resume argument is live on function entry (we don't care about + // the `self` argument) + for arg in body.args_iter().skip(1) { + on_entry.insert(arg); + } + } +} + +impl<'mir, 'tcx> crate::GenKillAnalysis<'tcx> for MaybeRequiresStorage<'mir, 'tcx> { + type Idx = Local; + + fn before_statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + stmt: &mir::Statement<'tcx>, + loc: Location, + ) { + // If a place is borrowed in a statement, it needs storage for that statement. + self.borrowed_locals.borrow().analysis().statement_effect(trans, stmt, loc); + + match &stmt.kind { + StatementKind::StorageDead(l) => trans.kill(*l), + + // If a place is assigned to in a statement, it needs storage for that statement. + StatementKind::Assign(box (place, _)) + | StatementKind::SetDiscriminant { box place, .. } + | StatementKind::Deinit(box place) => { + trans.gen(place.local); + } + + // Nothing to do for these. Match exhaustively so this fails to compile when new + // variants are added. + StatementKind::AscribeUserType(..) + | StatementKind::Coverage(..) + | StatementKind::FakeRead(..) + | StatementKind::Nop + | StatementKind::Retag(..) + | StatementKind::CopyNonOverlapping(..) + | StatementKind::StorageLive(..) => {} + } + } + + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _: &mir::Statement<'tcx>, + loc: Location, + ) { + // If we move from a place then it only stops needing storage *after* + // that statement. + self.check_for_move(trans, loc); + } + + fn before_terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + loc: Location, + ) { + // If a place is borrowed in a terminator, it needs storage for that terminator. + self.borrowed_locals.borrow().analysis().terminator_effect(trans, terminator, loc); + + match &terminator.kind { + TerminatorKind::Call { destination, .. } => { + trans.gen(destination.local); + } + + // Note that we do *not* gen the `resume_arg` of `Yield` terminators. The reason for + // that is that a `yield` will return from the function, and `resume_arg` is written + // only when the generator is later resumed. Unlike `Call`, this doesn't require the + // place to have storage *before* the yield, only after. + TerminatorKind::Yield { .. } => {} + + TerminatorKind::InlineAsm { operands, .. } => { + for op in operands { + match op { + InlineAsmOperand::Out { place, .. } + | InlineAsmOperand::InOut { out_place: place, .. } => { + if let Some(place) = place { + trans.gen(place.local); + } + } + InlineAsmOperand::In { .. } + | InlineAsmOperand::Const { .. } + | InlineAsmOperand::SymFn { .. } + | InlineAsmOperand::SymStatic { .. } => {} + } + } + } + + // Nothing to do for these. Match exhaustively so this fails to compile when new + // variants are added. + TerminatorKind::Abort + | TerminatorKind::Assert { .. } + | TerminatorKind::Drop { .. } + | TerminatorKind::DropAndReplace { .. } + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } + | TerminatorKind::GeneratorDrop + | TerminatorKind::Goto { .. } + | TerminatorKind::Resume + | TerminatorKind::Return + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::Unreachable => {} + } + } + + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + loc: Location, + ) { + match terminator.kind { + // For call terminators the destination requires storage for the call + // and after the call returns successfully, but not after a panic. + // Since `propagate_call_unwind` doesn't exist, we have to kill the + // destination here, and then gen it again in `call_return_effect`. + TerminatorKind::Call { destination, .. } => { + trans.kill(destination.local); + } + + // The same applies to InlineAsm outputs. + TerminatorKind::InlineAsm { ref operands, .. } => { + CallReturnPlaces::InlineAsm(operands).for_each(|place| trans.kill(place.local)); + } + + // Nothing to do for these. Match exhaustively so this fails to compile when new + // variants are added. + TerminatorKind::Yield { .. } + | TerminatorKind::Abort + | TerminatorKind::Assert { .. } + | TerminatorKind::Drop { .. } + | TerminatorKind::DropAndReplace { .. } + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } + | TerminatorKind::GeneratorDrop + | TerminatorKind::Goto { .. } + | TerminatorKind::Resume + | TerminatorKind::Return + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::Unreachable => {} + } + + self.check_for_move(trans, loc); + } + + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + return_places.for_each(|place| trans.gen(place.local)); + } + + fn yield_resume_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + _resume_block: BasicBlock, + resume_place: mir::Place<'tcx>, + ) { + trans.gen(resume_place.local); + } +} + +impl<'mir, 'tcx> MaybeRequiresStorage<'mir, 'tcx> { + /// Kill locals that are fully moved and have not been borrowed. + fn check_for_move(&self, trans: &mut impl GenKill<Local>, loc: Location) { + let mut visitor = MoveVisitor { trans, borrowed_locals: &self.borrowed_locals }; + visitor.visit_location(&self.body, loc); + } +} + +struct MoveVisitor<'a, 'mir, 'tcx, T> { + borrowed_locals: &'a RefCell<BorrowedLocalsResults<'mir, 'tcx>>, + trans: &'a mut T, +} + +impl<'a, 'mir, 'tcx, T> Visitor<'tcx> for MoveVisitor<'a, 'mir, 'tcx, T> +where + T: GenKill<Local>, +{ + fn visit_local(&mut self, local: Local, context: PlaceContext, loc: Location) { + if PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) == context { + let mut borrowed_locals = self.borrowed_locals.borrow_mut(); + borrowed_locals.seek_before_primary_effect(loc); + if !borrowed_locals.contains(local) { + self.trans.kill(local); + } + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/lib.rs b/compiler/rustc_mir_dataflow/src/lib.rs new file mode 100644 index 000000000..5793a286b --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/lib.rs @@ -0,0 +1,63 @@ +#![feature(associated_type_defaults)] +#![feature(box_patterns)] +#![feature(exact_size_is_empty)] +#![feature(let_else)] +#![feature(min_specialization)] +#![feature(once_cell)] +#![feature(stmt_expr_attributes)] +#![feature(trusted_step)] +#![recursion_limit = "256"] + +#[macro_use] +extern crate tracing; +#[macro_use] +extern crate rustc_middle; + +use rustc_ast::MetaItem; +use rustc_hir::def_id::DefId; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_span::symbol::{sym, Symbol}; + +pub use self::drop_flag_effects::{ + drop_flag_effects_for_function_entry, drop_flag_effects_for_location, + move_path_children_matching, on_all_children_bits, on_all_drop_children_bits, + on_lookup_result_bits, +}; +pub use self::framework::{ + fmt, graphviz, lattice, visit_results, Analysis, AnalysisDomain, Backward, CallReturnPlaces, + Direction, Engine, Forward, GenKill, GenKillAnalysis, JoinSemiLattice, Results, ResultsCursor, + ResultsRefCursor, ResultsVisitable, ResultsVisitor, SwitchIntEdgeEffects, +}; + +use self::move_paths::MoveData; + +pub mod drop_flag_effects; +pub mod elaborate_drops; +mod framework; +pub mod impls; +pub mod move_paths; +pub mod rustc_peek; +pub mod storage; +pub mod un_derefer; + +pub(crate) mod indexes { + pub(crate) use super::move_paths::MovePathIndex; +} + +pub struct MoveDataParamEnv<'tcx> { + pub move_data: MoveData<'tcx>, + pub param_env: ty::ParamEnv<'tcx>, +} + +pub fn has_rustc_mir_with(tcx: TyCtxt<'_>, def_id: DefId, name: Symbol) -> Option<MetaItem> { + for attr in tcx.get_attrs(def_id, sym::rustc_mir) { + let items = attr.meta_item_list(); + for item in items.iter().flat_map(|l| l.iter()) { + match item.meta_item() { + Some(mi) if mi.has_name(name) => return Some(mi.clone()), + _ => continue, + } + } + } + None +} diff --git a/compiler/rustc_mir_dataflow/src/move_paths/abs_domain.rs b/compiler/rustc_mir_dataflow/src/move_paths/abs_domain.rs new file mode 100644 index 000000000..28936274b --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/move_paths/abs_domain.rs @@ -0,0 +1,61 @@ +//! The move-analysis portion of borrowck needs to work in an abstract +//! domain of lifted `Place`s. Most of the `Place` variants fall into a +//! one-to-one mapping between the concrete and abstract (e.g., a +//! field-deref on a local variable, `x.field`, has the same meaning +//! in both domains). Indexed projections are the exception: `a[x]` +//! needs to be treated as mapping to the same move path as `a[y]` as +//! well as `a[13]`, etc. +//! +//! (In theory, the analysis could be extended to work with sets of +//! paths, so that `a[0]` and `a[13]` could be kept distinct, while +//! `a[x]` would still overlap them both. But that is not this +//! representation does today.) + +use rustc_middle::mir::{Local, Operand, PlaceElem, ProjectionElem}; +use rustc_middle::ty::Ty; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct AbstractOperand; +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct AbstractType; +pub type AbstractElem = ProjectionElem<AbstractOperand, AbstractType>; + +pub trait Lift { + type Abstract; + fn lift(&self) -> Self::Abstract; +} +impl<'tcx> Lift for Operand<'tcx> { + type Abstract = AbstractOperand; + fn lift(&self) -> Self::Abstract { + AbstractOperand + } +} +impl Lift for Local { + type Abstract = AbstractOperand; + fn lift(&self) -> Self::Abstract { + AbstractOperand + } +} +impl<'tcx> Lift for Ty<'tcx> { + type Abstract = AbstractType; + fn lift(&self) -> Self::Abstract { + AbstractType + } +} +impl<'tcx> Lift for PlaceElem<'tcx> { + type Abstract = AbstractElem; + fn lift(&self) -> Self::Abstract { + match *self { + ProjectionElem::Deref => ProjectionElem::Deref, + ProjectionElem::Field(f, ty) => ProjectionElem::Field(f, ty.lift()), + ProjectionElem::Index(ref i) => ProjectionElem::Index(i.lift()), + ProjectionElem::Subslice { from, to, from_end } => { + ProjectionElem::Subslice { from, to, from_end } + } + ProjectionElem::ConstantIndex { offset, min_length, from_end } => { + ProjectionElem::ConstantIndex { offset, min_length, from_end } + } + ProjectionElem::Downcast(a, u) => ProjectionElem::Downcast(a, u), + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/move_paths/builder.rs b/compiler/rustc_mir_dataflow/src/move_paths/builder.rs new file mode 100644 index 000000000..116e5c1f3 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/move_paths/builder.rs @@ -0,0 +1,559 @@ +use crate::move_paths::FxHashMap; +use crate::un_derefer::UnDerefer; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::tcx::RvalueInitializationState; +use rustc_middle::mir::*; +use rustc_middle::ty::{self, TyCtxt}; +use smallvec::{smallvec, SmallVec}; + +use std::mem; + +use super::abs_domain::Lift; +use super::IllegalMoveOriginKind::*; +use super::{Init, InitIndex, InitKind, InitLocation, LookupResult, MoveError}; +use super::{ + LocationMap, MoveData, MoveOut, MoveOutIndex, MovePath, MovePathIndex, MovePathLookup, +}; + +struct MoveDataBuilder<'a, 'tcx> { + body: &'a Body<'tcx>, + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + data: MoveData<'tcx>, + errors: Vec<(Place<'tcx>, MoveError<'tcx>)>, + un_derefer: UnDerefer<'tcx>, +} + +impl<'a, 'tcx> MoveDataBuilder<'a, 'tcx> { + fn new(body: &'a Body<'tcx>, tcx: TyCtxt<'tcx>, param_env: ty::ParamEnv<'tcx>) -> Self { + let mut move_paths = IndexVec::new(); + let mut path_map = IndexVec::new(); + let mut init_path_map = IndexVec::new(); + + MoveDataBuilder { + body, + tcx, + param_env, + errors: Vec::new(), + un_derefer: UnDerefer { tcx: tcx, derefer_sidetable: Default::default() }, + data: MoveData { + moves: IndexVec::new(), + loc_map: LocationMap::new(body), + rev_lookup: MovePathLookup { + locals: body + .local_decls + .indices() + .map(|i| { + Self::new_move_path( + &mut move_paths, + &mut path_map, + &mut init_path_map, + None, + Place::from(i), + ) + }) + .collect(), + projections: Default::default(), + }, + move_paths, + path_map, + inits: IndexVec::new(), + init_loc_map: LocationMap::new(body), + init_path_map, + }, + } + } + + fn new_move_path( + move_paths: &mut IndexVec<MovePathIndex, MovePath<'tcx>>, + path_map: &mut IndexVec<MovePathIndex, SmallVec<[MoveOutIndex; 4]>>, + init_path_map: &mut IndexVec<MovePathIndex, SmallVec<[InitIndex; 4]>>, + parent: Option<MovePathIndex>, + place: Place<'tcx>, + ) -> MovePathIndex { + let move_path = + move_paths.push(MovePath { next_sibling: None, first_child: None, parent, place }); + + if let Some(parent) = parent { + let next_sibling = mem::replace(&mut move_paths[parent].first_child, Some(move_path)); + move_paths[move_path].next_sibling = next_sibling; + } + + let path_map_ent = path_map.push(smallvec![]); + assert_eq!(path_map_ent, move_path); + + let init_path_map_ent = init_path_map.push(smallvec![]); + assert_eq!(init_path_map_ent, move_path); + + move_path + } +} + +impl<'b, 'a, 'tcx> Gatherer<'b, 'a, 'tcx> { + /// This creates a MovePath for a given place, returning an `MovePathError` + /// if that place can't be moved from. + /// + /// NOTE: places behind references *do not* get a move path, which is + /// problematic for borrowck. + /// + /// Maybe we should have separate "borrowck" and "moveck" modes. + fn move_path_for(&mut self, place: Place<'tcx>) -> Result<MovePathIndex, MoveError<'tcx>> { + if let Some(new_place) = self.builder.un_derefer.derefer(place.as_ref(), self.builder.body) + { + return self.move_path_for(new_place); + } + + debug!("lookup({:?})", place); + let mut base = self.builder.data.rev_lookup.locals[place.local]; + + // The move path index of the first union that we find. Once this is + // some we stop creating child move paths, since moves from unions + // move the whole thing. + // We continue looking for other move errors though so that moving + // from `*(u.f: &_)` isn't allowed. + let mut union_path = None; + + for (i, elem) in place.projection.iter().enumerate() { + let proj_base = &place.projection[..i]; + let body = self.builder.body; + let tcx = self.builder.tcx; + let place_ty = Place::ty_from(place.local, proj_base, body, tcx).ty; + match place_ty.kind() { + ty::Ref(..) | ty::RawPtr(..) => { + let proj = &place.projection[..i + 1]; + return Err(MoveError::cannot_move_out_of( + self.loc, + BorrowedContent { + target_place: Place { + local: place.local, + projection: tcx.intern_place_elems(proj), + }, + }, + )); + } + ty::Adt(adt, _) if adt.has_dtor(tcx) && !adt.is_box() => { + return Err(MoveError::cannot_move_out_of( + self.loc, + InteriorOfTypeWithDestructor { container_ty: place_ty }, + )); + } + ty::Adt(adt, _) if adt.is_union() => { + union_path.get_or_insert(base); + } + ty::Slice(_) => { + return Err(MoveError::cannot_move_out_of( + self.loc, + InteriorOfSliceOrArray { + ty: place_ty, + is_index: matches!(elem, ProjectionElem::Index(..)), + }, + )); + } + + ty::Array(..) => { + if let ProjectionElem::Index(..) = elem { + return Err(MoveError::cannot_move_out_of( + self.loc, + InteriorOfSliceOrArray { ty: place_ty, is_index: true }, + )); + } + } + + _ => {} + }; + + if union_path.is_none() { + base = self.add_move_path(base, elem, |tcx| Place { + local: place.local, + projection: tcx.intern_place_elems(&place.projection[..i + 1]), + }); + } + } + + if let Some(base) = union_path { + // Move out of union - always move the entire union. + Err(MoveError::UnionMove { path: base }) + } else { + Ok(base) + } + } + + fn add_move_path( + &mut self, + base: MovePathIndex, + elem: PlaceElem<'tcx>, + mk_place: impl FnOnce(TyCtxt<'tcx>) -> Place<'tcx>, + ) -> MovePathIndex { + let MoveDataBuilder { + data: MoveData { rev_lookup, move_paths, path_map, init_path_map, .. }, + tcx, + .. + } = self.builder; + *rev_lookup.projections.entry((base, elem.lift())).or_insert_with(move || { + MoveDataBuilder::new_move_path( + move_paths, + path_map, + init_path_map, + Some(base), + mk_place(*tcx), + ) + }) + } + + fn create_move_path(&mut self, place: Place<'tcx>) { + // This is an non-moving access (such as an overwrite or + // drop), so this not being a valid move path is OK. + let _ = self.move_path_for(place); + } +} + +pub type MoveDat<'tcx> = Result< + (FxHashMap<Local, Place<'tcx>>, MoveData<'tcx>), + (MoveData<'tcx>, Vec<(Place<'tcx>, MoveError<'tcx>)>), +>; + +impl<'a, 'tcx> MoveDataBuilder<'a, 'tcx> { + fn finalize(self) -> MoveDat<'tcx> { + debug!("{}", { + debug!("moves for {:?}:", self.body.span); + for (j, mo) in self.data.moves.iter_enumerated() { + debug!(" {:?} = {:?}", j, mo); + } + debug!("move paths for {:?}:", self.body.span); + for (j, path) in self.data.move_paths.iter_enumerated() { + debug!(" {:?} = {:?}", j, path); + } + "done dumping moves" + }); + + if self.errors.is_empty() { + Ok((self.un_derefer.derefer_sidetable, self.data)) + } else { + Err((self.data, self.errors)) + } + } +} + +pub(super) fn gather_moves<'tcx>( + body: &Body<'tcx>, + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, +) -> MoveDat<'tcx> { + let mut builder = MoveDataBuilder::new(body, tcx, param_env); + + builder.gather_args(); + + for (bb, block) in body.basic_blocks().iter_enumerated() { + for (i, stmt) in block.statements.iter().enumerate() { + let source = Location { block: bb, statement_index: i }; + builder.gather_statement(source, stmt); + } + + let terminator_loc = Location { block: bb, statement_index: block.statements.len() }; + builder.gather_terminator(terminator_loc, block.terminator()); + } + + builder.finalize() +} + +impl<'a, 'tcx> MoveDataBuilder<'a, 'tcx> { + fn gather_args(&mut self) { + for arg in self.body.args_iter() { + let path = self.data.rev_lookup.locals[arg]; + + let init = self.data.inits.push(Init { + path, + kind: InitKind::Deep, + location: InitLocation::Argument(arg), + }); + + debug!("gather_args: adding init {:?} of {:?} for argument {:?}", init, path, arg); + + self.data.init_path_map[path].push(init); + } + } + + fn gather_statement(&mut self, loc: Location, stmt: &Statement<'tcx>) { + debug!("gather_statement({:?}, {:?})", loc, stmt); + (Gatherer { builder: self, loc }).gather_statement(stmt); + } + + fn gather_terminator(&mut self, loc: Location, term: &Terminator<'tcx>) { + debug!("gather_terminator({:?}, {:?})", loc, term); + (Gatherer { builder: self, loc }).gather_terminator(term); + } +} + +struct Gatherer<'b, 'a, 'tcx> { + builder: &'b mut MoveDataBuilder<'a, 'tcx>, + loc: Location, +} + +impl<'b, 'a, 'tcx> Gatherer<'b, 'a, 'tcx> { + fn gather_statement(&mut self, stmt: &Statement<'tcx>) { + match &stmt.kind { + StatementKind::Assign(box (place, Rvalue::CopyForDeref(reffed))) => { + assert!(place.projection.is_empty()); + if self.builder.body.local_decls[place.local].is_deref_temp() { + self.builder.un_derefer.derefer_sidetable.insert(place.local, *reffed); + } + } + StatementKind::Assign(box (place, rval)) => { + self.create_move_path(*place); + if let RvalueInitializationState::Shallow = rval.initialization_state() { + // Box starts out uninitialized - need to create a separate + // move-path for the interior so it will be separate from + // the exterior. + self.create_move_path(self.builder.tcx.mk_place_deref(*place)); + self.gather_init(place.as_ref(), InitKind::Shallow); + } else { + self.gather_init(place.as_ref(), InitKind::Deep); + } + self.gather_rvalue(rval); + } + StatementKind::FakeRead(box (_, place)) => { + self.create_move_path(*place); + } + StatementKind::StorageLive(_) => {} + StatementKind::StorageDead(local) => { + // DerefTemp locals (results of CopyForDeref) don't actually move anything. + if !self.builder.un_derefer.derefer_sidetable.contains_key(&local) { + self.gather_move(Place::from(*local)); + } + } + StatementKind::SetDiscriminant { .. } | StatementKind::Deinit(..) => { + span_bug!( + stmt.source_info.span, + "SetDiscriminant/Deinit should not exist during borrowck" + ); + } + StatementKind::Retag { .. } + | StatementKind::AscribeUserType(..) + | StatementKind::Coverage(..) + | StatementKind::CopyNonOverlapping(..) + | StatementKind::Nop => {} + } + } + + fn gather_rvalue(&mut self, rvalue: &Rvalue<'tcx>) { + match *rvalue { + Rvalue::ThreadLocalRef(_) => {} // not-a-move + Rvalue::Use(ref operand) + | Rvalue::Repeat(ref operand, _) + | Rvalue::Cast(_, ref operand, _) + | Rvalue::ShallowInitBox(ref operand, _) + | Rvalue::UnaryOp(_, ref operand) => self.gather_operand(operand), + Rvalue::BinaryOp(ref _binop, box (ref lhs, ref rhs)) + | Rvalue::CheckedBinaryOp(ref _binop, box (ref lhs, ref rhs)) => { + self.gather_operand(lhs); + self.gather_operand(rhs); + } + Rvalue::Aggregate(ref _kind, ref operands) => { + for operand in operands { + self.gather_operand(operand); + } + } + Rvalue::CopyForDeref(..) => unreachable!(), + Rvalue::Ref(..) + | Rvalue::AddressOf(..) + | Rvalue::Discriminant(..) + | Rvalue::Len(..) + | Rvalue::NullaryOp(NullOp::SizeOf | NullOp::AlignOf, _) => {} + } + } + + fn gather_terminator(&mut self, term: &Terminator<'tcx>) { + match term.kind { + TerminatorKind::Goto { target: _ } + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } + // In some sense returning moves the return place into the current + // call's destination, however, since there are no statements after + // this that could possibly access the return place, this doesn't + // need recording. + | TerminatorKind::Return + | TerminatorKind::Resume + | TerminatorKind::Abort + | TerminatorKind::GeneratorDrop + | TerminatorKind::Unreachable => {} + + TerminatorKind::Assert { ref cond, .. } => { + self.gather_operand(cond); + } + + TerminatorKind::SwitchInt { ref discr, .. } => { + self.gather_operand(discr); + } + + TerminatorKind::Yield { ref value, resume_arg: place, .. } => { + self.gather_operand(value); + self.create_move_path(place); + self.gather_init(place.as_ref(), InitKind::Deep); + } + + TerminatorKind::Drop { place, target: _, unwind: _ } => { + self.gather_move(place); + } + TerminatorKind::DropAndReplace { place, ref value, .. } => { + self.create_move_path(place); + self.gather_operand(value); + self.gather_init(place.as_ref(), InitKind::Deep); + } + TerminatorKind::Call { + ref func, + ref args, + destination, + target, + cleanup: _, + from_hir_call: _, + fn_span: _, + } => { + self.gather_operand(func); + for arg in args { + self.gather_operand(arg); + } + if let Some(_bb) = target { + self.create_move_path(destination); + self.gather_init(destination.as_ref(), InitKind::NonPanicPathOnly); + } + } + TerminatorKind::InlineAsm { + template: _, + ref operands, + options: _, + line_spans: _, + destination: _, + cleanup: _, + } => { + for op in operands { + match *op { + InlineAsmOperand::In { reg: _, ref value } + => { + self.gather_operand(value); + } + InlineAsmOperand::Out { reg: _, late: _, place, .. } => { + if let Some(place) = place { + self.create_move_path(place); + self.gather_init(place.as_ref(), InitKind::Deep); + } + } + InlineAsmOperand::InOut { reg: _, late: _, ref in_value, out_place } => { + self.gather_operand(in_value); + if let Some(out_place) = out_place { + self.create_move_path(out_place); + self.gather_init(out_place.as_ref(), InitKind::Deep); + } + } + InlineAsmOperand::Const { value: _ } + | InlineAsmOperand::SymFn { value: _ } + | InlineAsmOperand::SymStatic { def_id: _ } => {} + } + } + } + } + } + + fn gather_operand(&mut self, operand: &Operand<'tcx>) { + match *operand { + Operand::Constant(..) | Operand::Copy(..) => {} // not-a-move + Operand::Move(place) => { + // a move + self.gather_move(place); + } + } + } + + fn gather_move(&mut self, place: Place<'tcx>) { + debug!("gather_move({:?}, {:?})", self.loc, place); + if let Some(new_place) = self.builder.un_derefer.derefer(place.as_ref(), self.builder.body) + { + self.gather_move(new_place); + return; + } + + if let [ref base @ .., ProjectionElem::Subslice { from, to, from_end: false }] = + **place.projection + { + // Split `Subslice` patterns into the corresponding list of + // `ConstIndex` patterns. This is done to ensure that all move paths + // are disjoint, which is expected by drop elaboration. + let base_place = + Place { local: place.local, projection: self.builder.tcx.intern_place_elems(base) }; + let base_path = match self.move_path_for(base_place) { + Ok(path) => path, + Err(MoveError::UnionMove { path }) => { + self.record_move(place, path); + return; + } + Err(error @ MoveError::IllegalMove { .. }) => { + self.builder.errors.push((base_place, error)); + return; + } + }; + let base_ty = base_place.ty(self.builder.body, self.builder.tcx).ty; + let len: u64 = match base_ty.kind() { + ty::Array(_, size) => size.eval_usize(self.builder.tcx, self.builder.param_env), + _ => bug!("from_end: false slice pattern of non-array type"), + }; + for offset in from..to { + let elem = + ProjectionElem::ConstantIndex { offset, min_length: len, from_end: false }; + let path = + self.add_move_path(base_path, elem, |tcx| tcx.mk_place_elem(base_place, elem)); + self.record_move(place, path); + } + } else { + match self.move_path_for(place) { + Ok(path) | Err(MoveError::UnionMove { path }) => self.record_move(place, path), + Err(error @ MoveError::IllegalMove { .. }) => { + self.builder.errors.push((place, error)); + } + }; + } + } + + fn record_move(&mut self, place: Place<'tcx>, path: MovePathIndex) { + let move_out = self.builder.data.moves.push(MoveOut { path, source: self.loc }); + debug!( + "gather_move({:?}, {:?}): adding move {:?} of {:?}", + self.loc, place, move_out, path + ); + self.builder.data.path_map[path].push(move_out); + self.builder.data.loc_map[self.loc].push(move_out); + } + + fn gather_init(&mut self, place: PlaceRef<'tcx>, kind: InitKind) { + debug!("gather_init({:?}, {:?})", self.loc, place); + + if let Some(new_place) = self.builder.un_derefer.derefer(place, self.builder.body) { + self.gather_init(new_place.as_ref(), kind); + return; + } + + let mut place = place; + + // Check if we are assigning into a field of a union, if so, lookup the place + // of the union so it is marked as initialized again. + if let Some((place_base, ProjectionElem::Field(_, _))) = place.last_projection() { + if place_base.ty(self.builder.body, self.builder.tcx).ty.is_union() { + place = place_base; + } + } + + if let LookupResult::Exact(path) = self.builder.data.rev_lookup.find(place) { + let init = self.builder.data.inits.push(Init { + location: InitLocation::Statement(self.loc), + path, + kind, + }); + + debug!( + "gather_init({:?}, {:?}): adding init {:?} of {:?}", + self.loc, place, init, path + ); + + self.builder.data.init_path_map[path].push(init); + self.builder.data.init_loc_map[self.loc].push(init); + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/move_paths/mod.rs b/compiler/rustc_mir_dataflow/src/move_paths/mod.rs new file mode 100644 index 000000000..a951c5b0b --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/move_paths/mod.rs @@ -0,0 +1,422 @@ +use crate::move_paths::builder::MoveDat; +use rustc_data_structures::fx::FxHashMap; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::*; +use rustc_middle::ty::{ParamEnv, Ty, TyCtxt}; +use rustc_span::Span; +use smallvec::SmallVec; + +use std::fmt; +use std::ops::{Index, IndexMut}; + +use self::abs_domain::{AbstractElem, Lift}; + +mod abs_domain; + +rustc_index::newtype_index! { + pub struct MovePathIndex { + DEBUG_FORMAT = "mp{}" + } +} + +impl polonius_engine::Atom for MovePathIndex { + fn index(self) -> usize { + rustc_index::vec::Idx::index(self) + } +} + +rustc_index::newtype_index! { + pub struct MoveOutIndex { + DEBUG_FORMAT = "mo{}" + } +} + +rustc_index::newtype_index! { + pub struct InitIndex { + DEBUG_FORMAT = "in{}" + } +} + +impl MoveOutIndex { + pub fn move_path_index(self, move_data: &MoveData<'_>) -> MovePathIndex { + move_data.moves[self].path + } +} + +/// `MovePath` is a canonicalized representation of a path that is +/// moved or assigned to. +/// +/// It follows a tree structure. +/// +/// Given `struct X { m: M, n: N }` and `x: X`, moves like `drop x.m;` +/// move *out* of the place `x.m`. +/// +/// The MovePaths representing `x.m` and `x.n` are siblings (that is, +/// one of them will link to the other via the `next_sibling` field, +/// and the other will have no entry in its `next_sibling` field), and +/// they both have the MovePath representing `x` as their parent. +#[derive(Clone)] +pub struct MovePath<'tcx> { + pub next_sibling: Option<MovePathIndex>, + pub first_child: Option<MovePathIndex>, + pub parent: Option<MovePathIndex>, + pub place: Place<'tcx>, +} + +impl<'tcx> MovePath<'tcx> { + /// Returns an iterator over the parents of `self`. + pub fn parents<'a>( + &self, + move_paths: &'a IndexVec<MovePathIndex, MovePath<'tcx>>, + ) -> impl 'a + Iterator<Item = (MovePathIndex, &'a MovePath<'tcx>)> { + let first = self.parent.map(|mpi| (mpi, &move_paths[mpi])); + MovePathLinearIter { + next: first, + fetch_next: move |_, parent: &MovePath<'_>| { + parent.parent.map(|mpi| (mpi, &move_paths[mpi])) + }, + } + } + + /// Returns an iterator over the immediate children of `self`. + pub fn children<'a>( + &self, + move_paths: &'a IndexVec<MovePathIndex, MovePath<'tcx>>, + ) -> impl 'a + Iterator<Item = (MovePathIndex, &'a MovePath<'tcx>)> { + let first = self.first_child.map(|mpi| (mpi, &move_paths[mpi])); + MovePathLinearIter { + next: first, + fetch_next: move |_, child: &MovePath<'_>| { + child.next_sibling.map(|mpi| (mpi, &move_paths[mpi])) + }, + } + } + + /// Finds the closest descendant of `self` for which `f` returns `true` using a breadth-first + /// search. + /// + /// `f` will **not** be called on `self`. + pub fn find_descendant( + &self, + move_paths: &IndexVec<MovePathIndex, MovePath<'_>>, + f: impl Fn(MovePathIndex) -> bool, + ) -> Option<MovePathIndex> { + let mut todo = if let Some(child) = self.first_child { + vec![child] + } else { + return None; + }; + + while let Some(mpi) = todo.pop() { + if f(mpi) { + return Some(mpi); + } + + let move_path = &move_paths[mpi]; + if let Some(child) = move_path.first_child { + todo.push(child); + } + + // After we've processed the original `mpi`, we should always + // traverse the siblings of any of its children. + if let Some(sibling) = move_path.next_sibling { + todo.push(sibling); + } + } + + None + } +} + +impl<'tcx> fmt::Debug for MovePath<'tcx> { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(w, "MovePath {{")?; + if let Some(parent) = self.parent { + write!(w, " parent: {:?},", parent)?; + } + if let Some(first_child) = self.first_child { + write!(w, " first_child: {:?},", first_child)?; + } + if let Some(next_sibling) = self.next_sibling { + write!(w, " next_sibling: {:?}", next_sibling)?; + } + write!(w, " place: {:?} }}", self.place) + } +} + +impl<'tcx> fmt::Display for MovePath<'tcx> { + fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(w, "{:?}", self.place) + } +} + +struct MovePathLinearIter<'a, 'tcx, F> { + next: Option<(MovePathIndex, &'a MovePath<'tcx>)>, + fetch_next: F, +} + +impl<'a, 'tcx, F> Iterator for MovePathLinearIter<'a, 'tcx, F> +where + F: FnMut(MovePathIndex, &'a MovePath<'tcx>) -> Option<(MovePathIndex, &'a MovePath<'tcx>)>, +{ + type Item = (MovePathIndex, &'a MovePath<'tcx>); + + fn next(&mut self) -> Option<Self::Item> { + let ret = self.next.take()?; + self.next = (self.fetch_next)(ret.0, ret.1); + Some(ret) + } +} + +#[derive(Debug)] +pub struct MoveData<'tcx> { + pub move_paths: IndexVec<MovePathIndex, MovePath<'tcx>>, + pub moves: IndexVec<MoveOutIndex, MoveOut>, + /// Each Location `l` is mapped to the MoveOut's that are effects + /// of executing the code at `l`. (There can be multiple MoveOut's + /// for a given `l` because each MoveOut is associated with one + /// particular path being moved.) + pub loc_map: LocationMap<SmallVec<[MoveOutIndex; 4]>>, + pub path_map: IndexVec<MovePathIndex, SmallVec<[MoveOutIndex; 4]>>, + pub rev_lookup: MovePathLookup, + pub inits: IndexVec<InitIndex, Init>, + /// Each Location `l` is mapped to the Inits that are effects + /// of executing the code at `l`. + pub init_loc_map: LocationMap<SmallVec<[InitIndex; 4]>>, + pub init_path_map: IndexVec<MovePathIndex, SmallVec<[InitIndex; 4]>>, +} + +pub trait HasMoveData<'tcx> { + fn move_data(&self) -> &MoveData<'tcx>; +} + +#[derive(Debug)] +pub struct LocationMap<T> { + /// Location-indexed (BasicBlock for outer index, index within BB + /// for inner index) map. + pub(crate) map: IndexVec<BasicBlock, Vec<T>>, +} + +impl<T> Index<Location> for LocationMap<T> { + type Output = T; + fn index(&self, index: Location) -> &Self::Output { + &self.map[index.block][index.statement_index] + } +} + +impl<T> IndexMut<Location> for LocationMap<T> { + fn index_mut(&mut self, index: Location) -> &mut Self::Output { + &mut self.map[index.block][index.statement_index] + } +} + +impl<T> LocationMap<T> +where + T: Default + Clone, +{ + fn new(body: &Body<'_>) -> Self { + LocationMap { + map: body + .basic_blocks() + .iter() + .map(|block| vec![T::default(); block.statements.len() + 1]) + .collect(), + } + } +} + +/// `MoveOut` represents a point in a program that moves out of some +/// L-value; i.e., "creates" uninitialized memory. +/// +/// With respect to dataflow analysis: +/// - Generated by moves and declaration of uninitialized variables. +/// - Killed by assignments to the memory. +#[derive(Copy, Clone)] +pub struct MoveOut { + /// path being moved + pub path: MovePathIndex, + /// location of move + pub source: Location, +} + +impl fmt::Debug for MoveOut { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "{:?}@{:?}", self.path, self.source) + } +} + +/// `Init` represents a point in a program that initializes some L-value; +#[derive(Copy, Clone)] +pub struct Init { + /// path being initialized + pub path: MovePathIndex, + /// location of initialization + pub location: InitLocation, + /// Extra information about this initialization + pub kind: InitKind, +} + +/// Initializations can be from an argument or from a statement. Arguments +/// do not have locations, in those cases the `Local` is kept.. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum InitLocation { + Argument(Local), + Statement(Location), +} + +/// Additional information about the initialization. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum InitKind { + /// Deep init, even on panic + Deep, + /// Only does a shallow init + Shallow, + /// This doesn't initialize the variable on panic (and a panic is possible). + NonPanicPathOnly, +} + +impl fmt::Debug for Init { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "{:?}@{:?} ({:?})", self.path, self.location, self.kind) + } +} + +impl Init { + pub fn span<'tcx>(&self, body: &Body<'tcx>) -> Span { + match self.location { + InitLocation::Argument(local) => body.local_decls[local].source_info.span, + InitLocation::Statement(location) => body.source_info(location).span, + } + } +} + +/// Tables mapping from a place to its MovePathIndex. +#[derive(Debug)] +pub struct MovePathLookup { + locals: IndexVec<Local, MovePathIndex>, + + /// projections are made from a base-place and a projection + /// elem. The base-place will have a unique MovePathIndex; we use + /// the latter as the index into the outer vector (narrowing + /// subsequent search so that it is solely relative to that + /// base-place). For the remaining lookup, we map the projection + /// elem to the associated MovePathIndex. + projections: FxHashMap<(MovePathIndex, AbstractElem), MovePathIndex>, +} + +mod builder; + +#[derive(Copy, Clone, Debug)] +pub enum LookupResult { + Exact(MovePathIndex), + Parent(Option<MovePathIndex>), +} + +impl MovePathLookup { + // Unlike the builder `fn move_path_for` below, this lookup + // alternative will *not* create a MovePath on the fly for an + // unknown place, but will rather return the nearest available + // parent. + pub fn find(&self, place: PlaceRef<'_>) -> LookupResult { + let mut result = self.locals[place.local]; + + for elem in place.projection.iter() { + if let Some(&subpath) = self.projections.get(&(result, elem.lift())) { + result = subpath; + } else { + return LookupResult::Parent(Some(result)); + } + } + + LookupResult::Exact(result) + } + + pub fn find_local(&self, local: Local) -> MovePathIndex { + self.locals[local] + } + + /// An enumerated iterator of `local`s and their associated + /// `MovePathIndex`es. + pub fn iter_locals_enumerated( + &self, + ) -> impl DoubleEndedIterator<Item = (Local, MovePathIndex)> + ExactSizeIterator + '_ { + self.locals.iter_enumerated().map(|(l, &idx)| (l, idx)) + } +} + +#[derive(Debug)] +pub struct IllegalMoveOrigin<'tcx> { + pub location: Location, + pub kind: IllegalMoveOriginKind<'tcx>, +} + +#[derive(Debug)] +pub enum IllegalMoveOriginKind<'tcx> { + /// Illegal move due to attempt to move from behind a reference. + BorrowedContent { + /// The place the reference refers to: if erroneous code was trying to + /// move from `(*x).f` this will be `*x`. + target_place: Place<'tcx>, + }, + + /// Illegal move due to attempt to move from field of an ADT that + /// implements `Drop`. Rust maintains invariant that all `Drop` + /// ADT's remain fully-initialized so that user-defined destructor + /// can safely read from all of the ADT's fields. + InteriorOfTypeWithDestructor { container_ty: Ty<'tcx> }, + + /// Illegal move due to attempt to move out of a slice or array. + InteriorOfSliceOrArray { ty: Ty<'tcx>, is_index: bool }, +} + +#[derive(Debug)] +pub enum MoveError<'tcx> { + IllegalMove { cannot_move_out_of: IllegalMoveOrigin<'tcx> }, + UnionMove { path: MovePathIndex }, +} + +impl<'tcx> MoveError<'tcx> { + fn cannot_move_out_of(location: Location, kind: IllegalMoveOriginKind<'tcx>) -> Self { + let origin = IllegalMoveOrigin { location, kind }; + MoveError::IllegalMove { cannot_move_out_of: origin } + } +} + +impl<'tcx> MoveData<'tcx> { + pub fn gather_moves( + body: &Body<'tcx>, + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + ) -> MoveDat<'tcx> { + builder::gather_moves(body, tcx, param_env) + } + + /// For the move path `mpi`, returns the root local variable (if any) that starts the path. + /// (e.g., for a path like `a.b.c` returns `Some(a)`) + pub fn base_local(&self, mut mpi: MovePathIndex) -> Option<Local> { + loop { + let path = &self.move_paths[mpi]; + if let Some(l) = path.place.as_local() { + return Some(l); + } + if let Some(parent) = path.parent { + mpi = parent; + continue; + } else { + return None; + } + } + } + + pub fn find_in_move_path_or_its_descendants( + &self, + root: MovePathIndex, + pred: impl Fn(MovePathIndex) -> bool, + ) -> Option<MovePathIndex> { + if pred(root) { + return Some(root); + } + + self.move_paths[root].find_descendant(&self.move_paths, pred) + } +} diff --git a/compiler/rustc_mir_dataflow/src/rustc_peek.rs b/compiler/rustc_mir_dataflow/src/rustc_peek.rs new file mode 100644 index 000000000..f2471f37a --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/rustc_peek.rs @@ -0,0 +1,287 @@ +use rustc_span::symbol::sym; +use rustc_span::Span; + +use rustc_index::bit_set::ChunkedBitSet; +use rustc_middle::mir::MirPass; +use rustc_middle::mir::{self, Body, Local, Location}; +use rustc_middle::ty::{self, Ty, TyCtxt}; + +use crate::framework::BitSetExt; +use crate::impls::{ + DefinitelyInitializedPlaces, MaybeInitializedPlaces, MaybeLiveLocals, MaybeUninitializedPlaces, +}; +use crate::move_paths::{HasMoveData, MoveData}; +use crate::move_paths::{LookupResult, MovePathIndex}; +use crate::MoveDataParamEnv; +use crate::{Analysis, JoinSemiLattice, Results, ResultsCursor}; + +pub struct SanityCheck; + +// FIXME: This should be a `MirLint`, but it needs to be moved back to `rustc_mir_transform` first. +impl<'tcx> MirPass<'tcx> for SanityCheck { + fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { + use crate::has_rustc_mir_with; + let def_id = body.source.def_id(); + if !tcx.has_attr(def_id, sym::rustc_mir) { + debug!("skipping rustc_peek::SanityCheck on {}", tcx.def_path_str(def_id)); + return; + } else { + debug!("running rustc_peek::SanityCheck on {}", tcx.def_path_str(def_id)); + } + + let param_env = tcx.param_env(def_id); + let (_, move_data) = MoveData::gather_moves(body, tcx, param_env).unwrap(); + let mdpe = MoveDataParamEnv { move_data, param_env }; + + if has_rustc_mir_with(tcx, def_id, sym::rustc_peek_maybe_init).is_some() { + let flow_inits = MaybeInitializedPlaces::new(tcx, body, &mdpe) + .into_engine(tcx, body) + .iterate_to_fixpoint(); + + sanity_check_via_rustc_peek(tcx, body, &flow_inits); + } + + if has_rustc_mir_with(tcx, def_id, sym::rustc_peek_maybe_uninit).is_some() { + let flow_uninits = MaybeUninitializedPlaces::new(tcx, body, &mdpe) + .into_engine(tcx, body) + .iterate_to_fixpoint(); + + sanity_check_via_rustc_peek(tcx, body, &flow_uninits); + } + + if has_rustc_mir_with(tcx, def_id, sym::rustc_peek_definite_init).is_some() { + let flow_def_inits = DefinitelyInitializedPlaces::new(tcx, body, &mdpe) + .into_engine(tcx, body) + .iterate_to_fixpoint(); + + sanity_check_via_rustc_peek(tcx, body, &flow_def_inits); + } + + if has_rustc_mir_with(tcx, def_id, sym::rustc_peek_liveness).is_some() { + let flow_liveness = MaybeLiveLocals.into_engine(tcx, body).iterate_to_fixpoint(); + + sanity_check_via_rustc_peek(tcx, body, &flow_liveness); + } + + if has_rustc_mir_with(tcx, def_id, sym::stop_after_dataflow).is_some() { + tcx.sess.fatal("stop_after_dataflow ended compilation"); + } + } +} + +/// This function scans `mir` for all calls to the intrinsic +/// `rustc_peek` that have the expression form `rustc_peek(&expr)`. +/// +/// For each such call, determines what the dataflow bit-state is for +/// the L-value corresponding to `expr`; if the bit-state is a 1, then +/// that call to `rustc_peek` is ignored by the sanity check. If the +/// bit-state is a 0, then this pass emits an error message saying +/// "rustc_peek: bit not set". +/// +/// The intention is that one can write unit tests for dataflow by +/// putting code into a UI test and using `rustc_peek` to +/// make observations about the results of dataflow static analyses. +/// +/// (If there are any calls to `rustc_peek` that do not match the +/// expression form above, then that emits an error as well, but those +/// errors are not intended to be used for unit tests.) +pub fn sanity_check_via_rustc_peek<'tcx, A>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + results: &Results<'tcx, A>, +) where + A: RustcPeekAt<'tcx>, +{ + let def_id = body.source.def_id(); + debug!("sanity_check_via_rustc_peek def_id: {:?}", def_id); + + let mut cursor = ResultsCursor::new(body, results); + + let peek_calls = body.basic_blocks().iter_enumerated().filter_map(|(bb, block_data)| { + PeekCall::from_terminator(tcx, block_data.terminator()).map(|call| (bb, block_data, call)) + }); + + for (bb, block_data, call) in peek_calls { + // Look for a sequence like the following to indicate that we should be peeking at `_1`: + // _2 = &_1; + // rustc_peek(_2); + // + // /* or */ + // + // _2 = _1; + // rustc_peek(_2); + let (statement_index, peek_rval) = block_data + .statements + .iter() + .enumerate() + .find_map(|(i, stmt)| value_assigned_to_local(stmt, call.arg).map(|rval| (i, rval))) + .expect( + "call to rustc_peek should be preceded by \ + assignment to temporary holding its argument", + ); + + match (call.kind, peek_rval) { + (PeekCallKind::ByRef, mir::Rvalue::Ref(_, _, place)) + | ( + PeekCallKind::ByVal, + mir::Rvalue::Use(mir::Operand::Move(place) | mir::Operand::Copy(place)), + ) => { + let loc = Location { block: bb, statement_index }; + cursor.seek_before_primary_effect(loc); + let state = cursor.get(); + results.analysis.peek_at(tcx, *place, state, call); + } + + _ => { + let msg = "rustc_peek: argument expression \ + must be either `place` or `&place`"; + tcx.sess.span_err(call.span, msg); + } + } + } +} + +/// If `stmt` is an assignment where the LHS is the given local (with no projections), returns the +/// RHS of the assignment. +fn value_assigned_to_local<'a, 'tcx>( + stmt: &'a mir::Statement<'tcx>, + local: Local, +) -> Option<&'a mir::Rvalue<'tcx>> { + if let mir::StatementKind::Assign(box (place, rvalue)) = &stmt.kind { + if let Some(l) = place.as_local() { + if local == l { + return Some(&*rvalue); + } + } + } + + None +} + +#[derive(Clone, Copy, Debug)] +enum PeekCallKind { + ByVal, + ByRef, +} + +impl PeekCallKind { + fn from_arg_ty(arg: Ty<'_>) -> Self { + match arg.kind() { + ty::Ref(_, _, _) => PeekCallKind::ByRef, + _ => PeekCallKind::ByVal, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct PeekCall { + arg: Local, + kind: PeekCallKind, + span: Span, +} + +impl PeekCall { + fn from_terminator<'tcx>( + tcx: TyCtxt<'tcx>, + terminator: &mir::Terminator<'tcx>, + ) -> Option<Self> { + use mir::Operand; + + let span = terminator.source_info.span; + if let mir::TerminatorKind::Call { func: Operand::Constant(func), args, .. } = + &terminator.kind + { + if let ty::FnDef(def_id, substs) = *func.literal.ty().kind() { + let name = tcx.item_name(def_id); + if !tcx.is_intrinsic(def_id) || name != sym::rustc_peek { + return None; + } + + assert_eq!(args.len(), 1); + let kind = PeekCallKind::from_arg_ty(substs.type_at(0)); + let arg = match &args[0] { + Operand::Copy(place) | Operand::Move(place) => { + if let Some(local) = place.as_local() { + local + } else { + tcx.sess.diagnostic().span_err( + span, + "dataflow::sanity_check cannot feed a non-temp to rustc_peek.", + ); + return None; + } + } + _ => { + tcx.sess.diagnostic().span_err( + span, + "dataflow::sanity_check cannot feed a non-temp to rustc_peek.", + ); + return None; + } + }; + + return Some(PeekCall { arg, kind, span }); + } + } + + None + } +} + +pub trait RustcPeekAt<'tcx>: Analysis<'tcx> { + fn peek_at( + &self, + tcx: TyCtxt<'tcx>, + place: mir::Place<'tcx>, + flow_state: &Self::Domain, + call: PeekCall, + ); +} + +impl<'tcx, A, D> RustcPeekAt<'tcx> for A +where + A: Analysis<'tcx, Domain = D> + HasMoveData<'tcx>, + D: JoinSemiLattice + Clone + BitSetExt<MovePathIndex>, +{ + fn peek_at( + &self, + tcx: TyCtxt<'tcx>, + place: mir::Place<'tcx>, + flow_state: &Self::Domain, + call: PeekCall, + ) { + match self.move_data().rev_lookup.find(place.as_ref()) { + LookupResult::Exact(peek_mpi) => { + let bit_state = flow_state.contains(peek_mpi); + debug!("rustc_peek({:?} = &{:?}) bit_state: {}", call.arg, place, bit_state); + if !bit_state { + tcx.sess.span_err(call.span, "rustc_peek: bit not set"); + } + } + + LookupResult::Parent(..) => { + tcx.sess.span_err(call.span, "rustc_peek: argument untracked"); + } + } + } +} + +impl<'tcx> RustcPeekAt<'tcx> for MaybeLiveLocals { + fn peek_at( + &self, + tcx: TyCtxt<'tcx>, + place: mir::Place<'tcx>, + flow_state: &ChunkedBitSet<Local>, + call: PeekCall, + ) { + info!(?place, "peek_at"); + let Some(local) = place.as_local() else { + tcx.sess.span_err(call.span, "rustc_peek: argument was not a local"); + return; + }; + + if !flow_state.contains(local) { + tcx.sess.span_err(call.span, "rustc_peek: bit not set"); + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/storage.rs b/compiler/rustc_mir_dataflow/src/storage.rs new file mode 100644 index 000000000..c909648ea --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/storage.rs @@ -0,0 +1,20 @@ +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::{self, Local}; + +/// The set of locals in a MIR body that do not have `StorageLive`/`StorageDead` annotations. +/// +/// These locals have fixed storage for the duration of the body. +pub fn always_storage_live_locals(body: &mir::Body<'_>) -> BitSet<Local> { + let mut always_live_locals = BitSet::new_filled(body.local_decls.len()); + + for block in body.basic_blocks() { + for statement in &block.statements { + use mir::StatementKind::{StorageDead, StorageLive}; + if let StorageLive(l) | StorageDead(l) = statement.kind { + always_live_locals.remove(l); + } + } + } + + always_live_locals +} diff --git a/compiler/rustc_mir_dataflow/src/un_derefer.rs b/compiler/rustc_mir_dataflow/src/un_derefer.rs new file mode 100644 index 000000000..7e6e25cc6 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/un_derefer.rs @@ -0,0 +1,22 @@ +use rustc_data_structures::fx::FxHashMap; +use rustc_middle::mir::*; +use rustc_middle::ty::TyCtxt; + +/// Used for reverting changes made by `DerefSeparator` +pub struct UnDerefer<'tcx> { + pub tcx: TyCtxt<'tcx>, + pub derefer_sidetable: FxHashMap<Local, Place<'tcx>>, +} + +impl<'tcx> UnDerefer<'tcx> { + #[inline] + pub fn derefer(&self, place: PlaceRef<'tcx>, body: &Body<'tcx>) -> Option<Place<'tcx>> { + let reffed = self.derefer_sidetable.get(&place.local)?; + + let new_place = reffed.project_deeper(place.projection, self.tcx); + if body.local_decls[new_place.local].is_deref_temp() { + return self.derefer(new_place.as_ref(), body); + } + Some(new_place) + } +} |