diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /compiler/rustc_mir_dataflow/src/framework | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_mir_dataflow/src/framework')
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/cursor.rs | 235 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/direction.rs | 656 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/engine.rs | 413 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/fmt.rs | 211 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/graphviz.rs | 667 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/lattice.rs | 252 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/mod.rs | 624 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/tests.rs | 322 | ||||
-rw-r--r-- | compiler/rustc_mir_dataflow/src/framework/visitor.rs | 187 |
9 files changed, 3567 insertions, 0 deletions
diff --git a/compiler/rustc_mir_dataflow/src/framework/cursor.rs b/compiler/rustc_mir_dataflow/src/framework/cursor.rs new file mode 100644 index 000000000..f3b5544aa --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/cursor.rs @@ -0,0 +1,235 @@ +//! Random access inspection of the results of a dataflow analysis. + +use crate::framework::BitSetExt; + +use std::borrow::Borrow; +use std::cmp::Ordering; + +#[cfg(debug_assertions)] +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::{self, BasicBlock, Location}; + +use super::{Analysis, Direction, Effect, EffectIndex, Results}; + +/// A `ResultsCursor` that borrows the underlying `Results`. +pub type ResultsRefCursor<'a, 'mir, 'tcx, A> = ResultsCursor<'mir, 'tcx, A, &'a Results<'tcx, A>>; + +/// Allows random access inspection of the results of a dataflow analysis. +/// +/// This cursor only has linear performance within a basic block when its statements are visited in +/// the same order as the `DIRECTION` of the analysis. In the worst case—when statements are +/// visited in *reverse* order—performance will be quadratic in the number of statements in the +/// block. The order in which basic blocks are inspected has no impact on performance. +/// +/// A `ResultsCursor` can either own (the default) or borrow the dataflow results it inspects. The +/// type of ownership is determined by `R` (see `ResultsRefCursor` above). +pub struct ResultsCursor<'mir, 'tcx, A, R = Results<'tcx, A>> +where + A: Analysis<'tcx>, +{ + body: &'mir mir::Body<'tcx>, + results: R, + state: A::Domain, + + pos: CursorPosition, + + /// Indicates that `state` has been modified with a custom effect. + /// + /// When this flag is set, we need to reset to an entry set before doing a seek. + state_needs_reset: bool, + + #[cfg(debug_assertions)] + reachable_blocks: BitSet<BasicBlock>, +} + +impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R> +where + A: Analysis<'tcx>, + R: Borrow<Results<'tcx, A>>, +{ + /// Returns a new cursor that can inspect `results`. + pub fn new(body: &'mir mir::Body<'tcx>, results: R) -> Self { + let bottom_value = results.borrow().analysis.bottom_value(body); + ResultsCursor { + body, + results, + + // Initialize to the `bottom_value` and set `state_needs_reset` to tell the cursor that + // it needs to reset to block entry before the first seek. The cursor position is + // immaterial. + state_needs_reset: true, + state: bottom_value, + pos: CursorPosition::block_entry(mir::START_BLOCK), + + #[cfg(debug_assertions)] + reachable_blocks: mir::traversal::reachable_as_bitset(body), + } + } + + /// Allows inspection of unreachable basic blocks even with `debug_assertions` enabled. + #[cfg(test)] + pub(crate) fn allow_unreachable(&mut self) { + #[cfg(debug_assertions)] + self.reachable_blocks.insert_all() + } + + /// Returns the underlying `Results`. + pub fn results(&self) -> &Results<'tcx, A> { + &self.results.borrow() + } + + /// Returns the `Analysis` used to generate the underlying `Results`. + pub fn analysis(&self) -> &A { + &self.results.borrow().analysis + } + + /// Returns the dataflow state at the current location. + pub fn get(&self) -> &A::Domain { + &self.state + } + + /// Resets the cursor to hold the entry set for the given basic block. + /// + /// For forward dataflow analyses, this is the dataflow state prior to the first statement. + /// + /// For backward dataflow analyses, this is the dataflow state after the terminator. + pub(super) fn seek_to_block_entry(&mut self, block: BasicBlock) { + #[cfg(debug_assertions)] + assert!(self.reachable_blocks.contains(block)); + + self.state.clone_from(&self.results.borrow().entry_set_for_block(block)); + self.pos = CursorPosition::block_entry(block); + self.state_needs_reset = false; + } + + /// Resets the cursor to hold the state prior to the first statement in a basic block. + /// + /// For forward analyses, this is the entry set for the given block. + /// + /// For backward analyses, this is the state that will be propagated to its + /// predecessors (ignoring edge-specific effects). + pub fn seek_to_block_start(&mut self, block: BasicBlock) { + if A::Direction::IS_FORWARD { + self.seek_to_block_entry(block) + } else { + self.seek_after(Location { block, statement_index: 0 }, Effect::Primary) + } + } + + /// Resets the cursor to hold the state after the terminator in a basic block. + /// + /// For backward analyses, this is the entry set for the given block. + /// + /// For forward analyses, this is the state that will be propagated to its + /// successors (ignoring edge-specific effects). + pub fn seek_to_block_end(&mut self, block: BasicBlock) { + if A::Direction::IS_BACKWARD { + self.seek_to_block_entry(block) + } else { + self.seek_after(self.body.terminator_loc(block), Effect::Primary) + } + } + + /// Advances the cursor to hold the dataflow state at `target` before its "primary" effect is + /// applied. + /// + /// The "before" effect at the target location *will be* applied. + pub fn seek_before_primary_effect(&mut self, target: Location) { + self.seek_after(target, Effect::Before) + } + + /// Advances the cursor to hold the dataflow state at `target` after its "primary" effect is + /// applied. + /// + /// The "before" effect at the target location will be applied as well. + pub fn seek_after_primary_effect(&mut self, target: Location) { + self.seek_after(target, Effect::Primary) + } + + fn seek_after(&mut self, target: Location, effect: Effect) { + assert!(target <= self.body.terminator_loc(target.block)); + + // Reset to the entry of the target block if any of the following are true: + // - A custom effect has been applied to the cursor state. + // - We are in a different block than the target. + // - We are in the same block but have advanced past the target effect. + if self.state_needs_reset || self.pos.block != target.block { + self.seek_to_block_entry(target.block); + } else if let Some(curr_effect) = self.pos.curr_effect_index { + let mut ord = curr_effect.statement_index.cmp(&target.statement_index); + if A::Direction::IS_BACKWARD { + ord = ord.reverse() + } + + match ord.then_with(|| curr_effect.effect.cmp(&effect)) { + Ordering::Equal => return, + Ordering::Greater => self.seek_to_block_entry(target.block), + Ordering::Less => {} + } + } + + // At this point, the cursor is in the same block as the target location at an earlier + // statement. + debug_assert_eq!(target.block, self.pos.block); + + let block_data = &self.body[target.block]; + let next_effect = if A::Direction::IS_FORWARD { + #[rustfmt::skip] + self.pos.curr_effect_index.map_or_else( + || Effect::Before.at_index(0), + EffectIndex::next_in_forward_order, + ) + } else { + self.pos.curr_effect_index.map_or_else( + || Effect::Before.at_index(block_data.statements.len()), + EffectIndex::next_in_backward_order, + ) + }; + + let analysis = &self.results.borrow().analysis; + let target_effect_index = effect.at_index(target.statement_index); + + A::Direction::apply_effects_in_range( + analysis, + &mut self.state, + target.block, + block_data, + next_effect..=target_effect_index, + ); + + self.pos = + CursorPosition { block: target.block, curr_effect_index: Some(target_effect_index) }; + } + + /// Applies `f` to the cursor's internal state. + /// + /// This can be used, e.g., to apply the call return effect directly to the cursor without + /// creating an extra copy of the dataflow state. + pub fn apply_custom_effect(&mut self, f: impl FnOnce(&A, &mut A::Domain)) { + f(&self.results.borrow().analysis, &mut self.state); + self.state_needs_reset = true; + } +} + +impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R> +where + A: crate::GenKillAnalysis<'tcx>, + A::Domain: BitSetExt<A::Idx>, + R: Borrow<Results<'tcx, A>>, +{ + pub fn contains(&self, elem: A::Idx) -> bool { + self.get().contains(elem) + } +} + +#[derive(Clone, Copy, Debug)] +struct CursorPosition { + block: BasicBlock, + curr_effect_index: Option<EffectIndex>, +} + +impl CursorPosition { + fn block_entry(block: BasicBlock) -> CursorPosition { + CursorPosition { block, curr_effect_index: None } + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/direction.rs b/compiler/rustc_mir_dataflow/src/framework/direction.rs new file mode 100644 index 000000000..5c77f3ea3 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/direction.rs @@ -0,0 +1,656 @@ +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::{self, BasicBlock, Location, SwitchTargets}; +use rustc_middle::ty::TyCtxt; +use std::ops::RangeInclusive; + +use super::visitor::{ResultsVisitable, ResultsVisitor}; +use super::{ + Analysis, CallReturnPlaces, Effect, EffectIndex, GenKillAnalysis, GenKillSet, SwitchIntTarget, +}; + +pub trait Direction { + const IS_FORWARD: bool; + + const IS_BACKWARD: bool = !Self::IS_FORWARD; + + /// Applies all effects between the given `EffectIndex`s. + /// + /// `effects.start()` must precede or equal `effects.end()` in this direction. + fn apply_effects_in_range<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + effects: RangeInclusive<EffectIndex>, + ) where + A: Analysis<'tcx>; + + fn apply_effects_in_block<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: Analysis<'tcx>; + + fn gen_kill_effects_in_block<'tcx, A>( + analysis: &A, + trans: &mut GenKillSet<A::Idx>, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: GenKillAnalysis<'tcx>; + + fn visit_results_in_block<'mir, 'tcx, F, R>( + state: &mut F, + block: BasicBlock, + block_data: &'mir mir::BasicBlockData<'tcx>, + results: &R, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, + ) where + R: ResultsVisitable<'tcx, FlowState = F>; + + fn join_state_into_successors_of<'tcx, A>( + analysis: &A, + tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + dead_unwinds: Option<&BitSet<BasicBlock>>, + exit_state: &mut A::Domain, + block: (BasicBlock, &'_ mir::BasicBlockData<'tcx>), + propagate: impl FnMut(BasicBlock, &A::Domain), + ) where + A: Analysis<'tcx>; +} + +/// Dataflow that runs from the exit of a block (the terminator), to its entry (the first statement). +pub struct Backward; + +impl Direction for Backward { + const IS_FORWARD: bool = false; + + fn apply_effects_in_block<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: Analysis<'tcx>, + { + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_before_terminator_effect(state, terminator, location); + analysis.apply_terminator_effect(state, terminator, location); + + for (statement_index, statement) in block_data.statements.iter().enumerate().rev() { + let location = Location { block, statement_index }; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + } + + fn gen_kill_effects_in_block<'tcx, A>( + analysis: &A, + trans: &mut GenKillSet<A::Idx>, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: GenKillAnalysis<'tcx>, + { + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.before_terminator_effect(trans, terminator, location); + analysis.terminator_effect(trans, terminator, location); + + for (statement_index, statement) in block_data.statements.iter().enumerate().rev() { + let location = Location { block, statement_index }; + analysis.before_statement_effect(trans, statement, location); + analysis.statement_effect(trans, statement, location); + } + } + + fn apply_effects_in_range<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + effects: RangeInclusive<EffectIndex>, + ) where + A: Analysis<'tcx>, + { + let (from, to) = (*effects.start(), *effects.end()); + let terminator_index = block_data.statements.len(); + + assert!(from.statement_index <= terminator_index); + assert!(!to.precedes_in_backward_order(from)); + + // Handle the statement (or terminator) at `from`. + + let next_effect = match from.effect { + // If we need to apply the terminator effect in all or in part, do so now. + _ if from.statement_index == terminator_index => { + let location = Location { block, statement_index: from.statement_index }; + let terminator = block_data.terminator(); + + if from.effect == Effect::Before { + analysis.apply_before_terminator_effect(state, terminator, location); + if to == Effect::Before.at_index(terminator_index) { + return; + } + } + + analysis.apply_terminator_effect(state, terminator, location); + if to == Effect::Primary.at_index(terminator_index) { + return; + } + + // If `from.statement_index` is `0`, we will have hit one of the earlier comparisons + // with `to`. + from.statement_index - 1 + } + + Effect::Primary => { + let location = Location { block, statement_index: from.statement_index }; + let statement = &block_data.statements[from.statement_index]; + + analysis.apply_statement_effect(state, statement, location); + if to == Effect::Primary.at_index(from.statement_index) { + return; + } + + from.statement_index - 1 + } + + Effect::Before => from.statement_index, + }; + + // Handle all statements between `first_unapplied_idx` and `to.statement_index`. + + for statement_index in (to.statement_index..next_effect).rev().map(|i| i + 1) { + let location = Location { block, statement_index }; + let statement = &block_data.statements[statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + + // Handle the statement at `to`. + + let location = Location { block, statement_index: to.statement_index }; + let statement = &block_data.statements[to.statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + + if to.effect == Effect::Before { + return; + } + + analysis.apply_statement_effect(state, statement, location); + } + + fn visit_results_in_block<'mir, 'tcx, F, R>( + state: &mut F, + block: BasicBlock, + block_data: &'mir mir::BasicBlockData<'tcx>, + results: &R, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, + ) where + R: ResultsVisitable<'tcx, FlowState = F>, + { + results.reset_to_block_entry(state, block); + + vis.visit_block_end(&state, block_data, block); + + // Terminator + let loc = Location { block, statement_index: block_data.statements.len() }; + let term = block_data.terminator(); + results.reconstruct_before_terminator_effect(state, term, loc); + vis.visit_terminator_before_primary_effect(state, term, loc); + results.reconstruct_terminator_effect(state, term, loc); + vis.visit_terminator_after_primary_effect(state, term, loc); + + for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() { + let loc = Location { block, statement_index }; + results.reconstruct_before_statement_effect(state, stmt, loc); + vis.visit_statement_before_primary_effect(state, stmt, loc); + results.reconstruct_statement_effect(state, stmt, loc); + vis.visit_statement_after_primary_effect(state, stmt, loc); + } + + vis.visit_block_start(state, block_data, block); + } + + fn join_state_into_successors_of<'tcx, A>( + analysis: &A, + _tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + dead_unwinds: Option<&BitSet<BasicBlock>>, + exit_state: &mut A::Domain, + (bb, _bb_data): (BasicBlock, &'_ mir::BasicBlockData<'tcx>), + mut propagate: impl FnMut(BasicBlock, &A::Domain), + ) where + A: Analysis<'tcx>, + { + for pred in body.basic_blocks.predecessors()[bb].iter().copied() { + match body[pred].terminator().kind { + // Apply terminator-specific edge effects. + // + // FIXME(ecstaticmorse): Avoid cloning the exit state unconditionally. + mir::TerminatorKind::Call { destination, target: Some(dest), .. } if dest == bb => { + let mut tmp = exit_state.clone(); + analysis.apply_call_return_effect( + &mut tmp, + pred, + CallReturnPlaces::Call(destination), + ); + propagate(pred, &tmp); + } + + mir::TerminatorKind::InlineAsm { + destination: Some(dest), ref operands, .. + } if dest == bb => { + let mut tmp = exit_state.clone(); + analysis.apply_call_return_effect( + &mut tmp, + pred, + CallReturnPlaces::InlineAsm(operands), + ); + propagate(pred, &tmp); + } + + mir::TerminatorKind::Yield { resume, resume_arg, .. } if resume == bb => { + let mut tmp = exit_state.clone(); + analysis.apply_yield_resume_effect(&mut tmp, resume, resume_arg); + propagate(pred, &tmp); + } + + mir::TerminatorKind::SwitchInt { targets: _, ref discr, switch_ty: _ } => { + let mut applier = BackwardSwitchIntEdgeEffectsApplier { + body, + pred, + exit_state, + bb, + propagate: &mut propagate, + effects_applied: false, + }; + + analysis.apply_switch_int_edge_effects(pred, discr, &mut applier); + + if !applier.effects_applied { + propagate(pred, exit_state) + } + } + + // Ignore dead unwinds. + mir::TerminatorKind::Call { cleanup: Some(unwind), .. } + | mir::TerminatorKind::Assert { cleanup: Some(unwind), .. } + | mir::TerminatorKind::Drop { unwind: Some(unwind), .. } + | mir::TerminatorKind::DropAndReplace { unwind: Some(unwind), .. } + | mir::TerminatorKind::FalseUnwind { unwind: Some(unwind), .. } + | mir::TerminatorKind::InlineAsm { cleanup: Some(unwind), .. } + if unwind == bb => + { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(pred, exit_state); + } + } + + _ => propagate(pred, exit_state), + } + } + } +} + +struct BackwardSwitchIntEdgeEffectsApplier<'a, 'tcx, D, F> { + body: &'a mir::Body<'tcx>, + pred: BasicBlock, + exit_state: &'a mut D, + bb: BasicBlock, + propagate: &'a mut F, + + effects_applied: bool, +} + +impl<D, F> super::SwitchIntEdgeEffects<D> for BackwardSwitchIntEdgeEffectsApplier<'_, '_, D, F> +where + D: Clone, + F: FnMut(BasicBlock, &D), +{ + fn apply(&mut self, mut apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)) { + assert!(!self.effects_applied); + + let values = &self.body.basic_blocks.switch_sources()[&(self.bb, self.pred)]; + let targets = values.iter().map(|&value| SwitchIntTarget { value, target: self.bb }); + + let mut tmp = None; + for target in targets { + let tmp = opt_clone_from_or_clone(&mut tmp, self.exit_state); + apply_edge_effect(tmp, target); + (self.propagate)(self.pred, tmp); + } + + self.effects_applied = true; + } +} + +/// Dataflow that runs from the entry of a block (the first statement), to its exit (terminator). +pub struct Forward; + +impl Direction for Forward { + const IS_FORWARD: bool = true; + + fn apply_effects_in_block<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: Analysis<'tcx>, + { + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_before_terminator_effect(state, terminator, location); + analysis.apply_terminator_effect(state, terminator, location); + } + + fn gen_kill_effects_in_block<'tcx, A>( + analysis: &A, + trans: &mut GenKillSet<A::Idx>, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + ) where + A: GenKillAnalysis<'tcx>, + { + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.before_statement_effect(trans, statement, location); + analysis.statement_effect(trans, statement, location); + } + + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.before_terminator_effect(trans, terminator, location); + analysis.terminator_effect(trans, terminator, location); + } + + fn apply_effects_in_range<'tcx, A>( + analysis: &A, + state: &mut A::Domain, + block: BasicBlock, + block_data: &mir::BasicBlockData<'tcx>, + effects: RangeInclusive<EffectIndex>, + ) where + A: Analysis<'tcx>, + { + let (from, to) = (*effects.start(), *effects.end()); + let terminator_index = block_data.statements.len(); + + assert!(to.statement_index <= terminator_index); + assert!(!to.precedes_in_forward_order(from)); + + // If we have applied the before affect of the statement or terminator at `from` but not its + // after effect, do so now and start the loop below from the next statement. + + let first_unapplied_index = match from.effect { + Effect::Before => from.statement_index, + + Effect::Primary if from.statement_index == terminator_index => { + debug_assert_eq!(from, to); + + let location = Location { block, statement_index: terminator_index }; + let terminator = block_data.terminator(); + analysis.apply_terminator_effect(state, terminator, location); + return; + } + + Effect::Primary => { + let location = Location { block, statement_index: from.statement_index }; + let statement = &block_data.statements[from.statement_index]; + analysis.apply_statement_effect(state, statement, location); + + // If we only needed to apply the after effect of the statement at `idx`, we are done. + if from == to { + return; + } + + from.statement_index + 1 + } + }; + + // Handle all statements between `from` and `to` whose effects must be applied in full. + + for statement_index in first_unapplied_index..to.statement_index { + let location = Location { block, statement_index }; + let statement = &block_data.statements[statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + analysis.apply_statement_effect(state, statement, location); + } + + // Handle the statement or terminator at `to`. + + let location = Location { block, statement_index: to.statement_index }; + if to.statement_index == terminator_index { + let terminator = block_data.terminator(); + analysis.apply_before_terminator_effect(state, terminator, location); + + if to.effect == Effect::Primary { + analysis.apply_terminator_effect(state, terminator, location); + } + } else { + let statement = &block_data.statements[to.statement_index]; + analysis.apply_before_statement_effect(state, statement, location); + + if to.effect == Effect::Primary { + analysis.apply_statement_effect(state, statement, location); + } + } + } + + fn visit_results_in_block<'mir, 'tcx, F, R>( + state: &mut F, + block: BasicBlock, + block_data: &'mir mir::BasicBlockData<'tcx>, + results: &R, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, + ) where + R: ResultsVisitable<'tcx, FlowState = F>, + { + results.reset_to_block_entry(state, block); + + vis.visit_block_start(state, block_data, block); + + for (statement_index, stmt) in block_data.statements.iter().enumerate() { + let loc = Location { block, statement_index }; + results.reconstruct_before_statement_effect(state, stmt, loc); + vis.visit_statement_before_primary_effect(state, stmt, loc); + results.reconstruct_statement_effect(state, stmt, loc); + vis.visit_statement_after_primary_effect(state, stmt, loc); + } + + let loc = Location { block, statement_index: block_data.statements.len() }; + let term = block_data.terminator(); + results.reconstruct_before_terminator_effect(state, term, loc); + vis.visit_terminator_before_primary_effect(state, term, loc); + results.reconstruct_terminator_effect(state, term, loc); + vis.visit_terminator_after_primary_effect(state, term, loc); + + vis.visit_block_end(state, block_data, block); + } + + fn join_state_into_successors_of<'tcx, A>( + analysis: &A, + _tcx: TyCtxt<'tcx>, + _body: &mir::Body<'tcx>, + dead_unwinds: Option<&BitSet<BasicBlock>>, + exit_state: &mut A::Domain, + (bb, bb_data): (BasicBlock, &'_ mir::BasicBlockData<'tcx>), + mut propagate: impl FnMut(BasicBlock, &A::Domain), + ) where + A: Analysis<'tcx>, + { + use mir::TerminatorKind::*; + match bb_data.terminator().kind { + Return | Resume | Abort | GeneratorDrop | Unreachable => {} + + Goto { target } => propagate(target, exit_state), + + Assert { target, cleanup: unwind, expected: _, msg: _, cond: _ } + | Drop { target, unwind, place: _ } + | DropAndReplace { target, unwind, value: _, place: _ } + | FalseUnwind { real_target: target, unwind } => { + if let Some(unwind) = unwind { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(unwind, exit_state); + } + } + + propagate(target, exit_state); + } + + FalseEdge { real_target, imaginary_target } => { + propagate(real_target, exit_state); + propagate(imaginary_target, exit_state); + } + + Yield { resume: target, drop, resume_arg, value: _ } => { + if let Some(drop) = drop { + propagate(drop, exit_state); + } + + analysis.apply_yield_resume_effect(exit_state, target, resume_arg); + propagate(target, exit_state); + } + + Call { + cleanup, + destination, + target, + func: _, + args: _, + from_hir_call: _, + fn_span: _, + } => { + if let Some(unwind) = cleanup { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(unwind, exit_state); + } + } + + if let Some(target) = target { + // N.B.: This must be done *last*, otherwise the unwind path will see the call + // return effect. + analysis.apply_call_return_effect( + exit_state, + bb, + CallReturnPlaces::Call(destination), + ); + propagate(target, exit_state); + } + } + + InlineAsm { + template: _, + ref operands, + options: _, + line_spans: _, + destination, + cleanup, + } => { + if let Some(unwind) = cleanup { + if dead_unwinds.map_or(true, |dead| !dead.contains(bb)) { + propagate(unwind, exit_state); + } + } + + if let Some(target) = destination { + // N.B.: This must be done *last*, otherwise the unwind path will see the call + // return effect. + analysis.apply_call_return_effect( + exit_state, + bb, + CallReturnPlaces::InlineAsm(operands), + ); + propagate(target, exit_state); + } + } + + SwitchInt { ref targets, ref discr, switch_ty: _ } => { + let mut applier = ForwardSwitchIntEdgeEffectsApplier { + exit_state, + targets, + propagate, + effects_applied: false, + }; + + analysis.apply_switch_int_edge_effects(bb, discr, &mut applier); + + let ForwardSwitchIntEdgeEffectsApplier { + exit_state, + mut propagate, + effects_applied, + .. + } = applier; + + if !effects_applied { + for target in targets.all_targets() { + propagate(*target, exit_state); + } + } + } + } + } +} + +struct ForwardSwitchIntEdgeEffectsApplier<'a, D, F> { + exit_state: &'a mut D, + targets: &'a SwitchTargets, + propagate: F, + + effects_applied: bool, +} + +impl<D, F> super::SwitchIntEdgeEffects<D> for ForwardSwitchIntEdgeEffectsApplier<'_, D, F> +where + D: Clone, + F: FnMut(BasicBlock, &D), +{ + fn apply(&mut self, mut apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)) { + assert!(!self.effects_applied); + + let mut tmp = None; + for (value, target) in self.targets.iter() { + let tmp = opt_clone_from_or_clone(&mut tmp, self.exit_state); + apply_edge_effect(tmp, SwitchIntTarget { value: Some(value), target }); + (self.propagate)(target, tmp); + } + + // Once we get to the final, "otherwise" branch, there is no need to preserve `exit_state`, + // so pass it directly to `apply_edge_effect` to save a clone of the dataflow state. + let otherwise = self.targets.otherwise(); + apply_edge_effect(self.exit_state, SwitchIntTarget { value: None, target: otherwise }); + (self.propagate)(otherwise, self.exit_state); + + self.effects_applied = true; + } +} + +/// An analogue of `Option::get_or_insert_with` that stores a clone of `val` into `opt`, but uses +/// the more efficient `clone_from` if `opt` was `Some`. +/// +/// Returns a mutable reference to the new clone that resides in `opt`. +// +// FIXME: Figure out how to express this using `Option::clone_from`, or maybe lift it into the +// standard library? +fn opt_clone_from_or_clone<'a, T: Clone>(opt: &'a mut Option<T>, val: &T) -> &'a mut T { + if opt.is_some() { + let ret = opt.as_mut().unwrap(); + ret.clone_from(val); + ret + } else { + *opt = Some(val.clone()); + opt.as_mut().unwrap() + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/engine.rs b/compiler/rustc_mir_dataflow/src/framework/engine.rs new file mode 100644 index 000000000..f374658ce --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/engine.rs @@ -0,0 +1,413 @@ +//! A solver for dataflow problems. + +use crate::framework::BitSetExt; + +use std::ffi::OsString; +use std::path::PathBuf; + +use rustc_ast as ast; +use rustc_data_structures::work_queue::WorkQueue; +use rustc_graphviz as dot; +use rustc_hir::def_id::DefId; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::mir::{self, traversal, BasicBlock}; +use rustc_middle::mir::{create_dump_file, dump_enabled}; +use rustc_middle::ty::TyCtxt; +use rustc_span::symbol::{sym, Symbol}; + +use super::fmt::DebugWithContext; +use super::graphviz; +use super::{ + visit_results, Analysis, Direction, GenKill, GenKillAnalysis, GenKillSet, JoinSemiLattice, + ResultsCursor, ResultsVisitor, +}; + +/// A dataflow analysis that has converged to fixpoint. +pub struct Results<'tcx, A> +where + A: Analysis<'tcx>, +{ + pub analysis: A, + pub(super) entry_sets: IndexVec<BasicBlock, A::Domain>, +} + +impl<'tcx, A> Results<'tcx, A> +where + A: Analysis<'tcx>, +{ + /// Creates a `ResultsCursor` that can inspect these `Results`. + pub fn into_results_cursor<'mir>( + self, + body: &'mir mir::Body<'tcx>, + ) -> ResultsCursor<'mir, 'tcx, A> { + ResultsCursor::new(body, self) + } + + /// Gets the dataflow state for the given block. + pub fn entry_set_for_block(&self, block: BasicBlock) -> &A::Domain { + &self.entry_sets[block] + } + + pub fn visit_with<'mir>( + &self, + body: &'mir mir::Body<'tcx>, + blocks: impl IntoIterator<Item = BasicBlock>, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = A::Domain>, + ) { + visit_results(body, blocks, self, vis) + } + + pub fn visit_reachable_with<'mir>( + &self, + body: &'mir mir::Body<'tcx>, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = A::Domain>, + ) { + let blocks = mir::traversal::reachable(body); + visit_results(body, blocks.map(|(bb, _)| bb), self, vis) + } +} + +/// A solver for dataflow problems. +pub struct Engine<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + tcx: TyCtxt<'tcx>, + body: &'a mir::Body<'tcx>, + dead_unwinds: Option<&'a BitSet<BasicBlock>>, + entry_sets: IndexVec<BasicBlock, A::Domain>, + pass_name: Option<&'static str>, + analysis: A, + + /// Cached, cumulative transfer functions for each block. + // + // FIXME(ecstaticmorse): This boxed `Fn` trait object is invoked inside a tight loop for + // gen/kill problems on cyclic CFGs. This is not ideal, but it doesn't seem to degrade + // performance in practice. I've tried a few ways to avoid this, but they have downsides. See + // the message for the commit that added this FIXME for more information. + apply_trans_for_block: Option<Box<dyn Fn(BasicBlock, &mut A::Domain)>>, +} + +impl<'a, 'tcx, A, D, T> Engine<'a, 'tcx, A> +where + A: GenKillAnalysis<'tcx, Idx = T, Domain = D>, + D: Clone + JoinSemiLattice + GenKill<T> + BitSetExt<T>, + T: Idx, +{ + /// Creates a new `Engine` to solve a gen-kill dataflow problem. + pub fn new_gen_kill(tcx: TyCtxt<'tcx>, body: &'a mir::Body<'tcx>, analysis: A) -> Self { + // If there are no back-edges in the control-flow graph, we only ever need to apply the + // transfer function for each block exactly once (assuming that we process blocks in RPO). + // + // In this case, there's no need to compute the block transfer functions ahead of time. + if !body.basic_blocks.is_cfg_cyclic() { + return Self::new(tcx, body, analysis, None); + } + + // Otherwise, compute and store the cumulative transfer function for each block. + + let identity = GenKillSet::identity(analysis.bottom_value(body).domain_size()); + let mut trans_for_block = IndexVec::from_elem(identity, body.basic_blocks()); + + for (block, block_data) in body.basic_blocks().iter_enumerated() { + let trans = &mut trans_for_block[block]; + A::Direction::gen_kill_effects_in_block(&analysis, trans, block, block_data); + } + + let apply_trans = Box::new(move |bb: BasicBlock, state: &mut A::Domain| { + trans_for_block[bb].apply(state); + }); + + Self::new(tcx, body, analysis, Some(apply_trans as Box<_>)) + } +} + +impl<'a, 'tcx, A, D> Engine<'a, 'tcx, A> +where + A: Analysis<'tcx, Domain = D>, + D: Clone + JoinSemiLattice, +{ + /// Creates a new `Engine` to solve a dataflow problem with an arbitrary transfer + /// function. + /// + /// Gen-kill problems should use `new_gen_kill`, which will coalesce transfer functions for + /// better performance. + pub fn new_generic(tcx: TyCtxt<'tcx>, body: &'a mir::Body<'tcx>, analysis: A) -> Self { + Self::new(tcx, body, analysis, None) + } + + fn new( + tcx: TyCtxt<'tcx>, + body: &'a mir::Body<'tcx>, + analysis: A, + apply_trans_for_block: Option<Box<dyn Fn(BasicBlock, &mut A::Domain)>>, + ) -> Self { + let bottom_value = analysis.bottom_value(body); + let mut entry_sets = IndexVec::from_elem(bottom_value.clone(), body.basic_blocks()); + analysis.initialize_start_block(body, &mut entry_sets[mir::START_BLOCK]); + + if A::Direction::IS_BACKWARD && entry_sets[mir::START_BLOCK] != bottom_value { + bug!("`initialize_start_block` is not yet supported for backward dataflow analyses"); + } + + Engine { + analysis, + tcx, + body, + dead_unwinds: None, + pass_name: None, + entry_sets, + apply_trans_for_block, + } + } + + /// Signals that we do not want dataflow state to propagate across unwind edges for these + /// `BasicBlock`s. + /// + /// You must take care that `dead_unwinds` does not contain a `BasicBlock` that *can* actually + /// unwind during execution. Otherwise, your dataflow results will not be correct. + pub fn dead_unwinds(mut self, dead_unwinds: &'a BitSet<BasicBlock>) -> Self { + self.dead_unwinds = Some(dead_unwinds); + self + } + + /// Adds an identifier to the graphviz output for this particular run of a dataflow analysis. + /// + /// Some analyses are run multiple times in the compilation pipeline. Give them a `pass_name` + /// to differentiate them. Otherwise, only the results for the latest run will be saved. + pub fn pass_name(mut self, name: &'static str) -> Self { + self.pass_name = Some(name); + self + } + + /// Computes the fixpoint for this dataflow problem and returns it. + pub fn iterate_to_fixpoint(self) -> Results<'tcx, A> + where + A::Domain: DebugWithContext<A>, + { + let Engine { + analysis, + body, + dead_unwinds, + mut entry_sets, + tcx, + apply_trans_for_block, + pass_name, + .. + } = self; + + let mut dirty_queue: WorkQueue<BasicBlock> = + WorkQueue::with_none(body.basic_blocks().len()); + + if A::Direction::IS_FORWARD { + for (bb, _) in traversal::reverse_postorder(body) { + dirty_queue.insert(bb); + } + } else { + // Reverse post-order on the reverse CFG may generate a better iteration order for + // backward dataflow analyses, but probably not enough to matter. + for (bb, _) in traversal::postorder(body) { + dirty_queue.insert(bb); + } + } + + // `state` is not actually used between iterations; + // this is just an optimization to avoid reallocating + // every iteration. + let mut state = analysis.bottom_value(body); + while let Some(bb) = dirty_queue.pop() { + let bb_data = &body[bb]; + + // Set the state to the entry state of the block. + // This is equivalent to `state = entry_sets[bb].clone()`, + // but it saves an allocation, thus improving compile times. + state.clone_from(&entry_sets[bb]); + + // Apply the block transfer function, using the cached one if it exists. + match &apply_trans_for_block { + Some(apply) => apply(bb, &mut state), + None => A::Direction::apply_effects_in_block(&analysis, &mut state, bb, bb_data), + } + + A::Direction::join_state_into_successors_of( + &analysis, + tcx, + body, + dead_unwinds, + &mut state, + (bb, bb_data), + |target: BasicBlock, state: &A::Domain| { + let set_changed = entry_sets[target].join(state); + if set_changed { + dirty_queue.insert(target); + } + }, + ); + } + + let results = Results { analysis, entry_sets }; + + let res = write_graphviz_results(tcx, &body, &results, pass_name); + if let Err(e) = res { + error!("Failed to write graphviz dataflow results: {}", e); + } + + results + } +} + +// Graphviz + +/// Writes a DOT file containing the results of a dataflow analysis if the user requested it via +/// `rustc_mir` attributes. +fn write_graphviz_results<'tcx, A>( + tcx: TyCtxt<'tcx>, + body: &mir::Body<'tcx>, + results: &Results<'tcx, A>, + pass_name: Option<&'static str>, +) -> std::io::Result<()> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + use std::fs; + use std::io::{self, Write}; + + let def_id = body.source.def_id(); + let Ok(attrs) = RustcMirAttrs::parse(tcx, def_id) else { + // Invalid `rustc_mir` attrs are reported in `RustcMirAttrs::parse` + return Ok(()); + }; + + let mut file = match attrs.output_path(A::NAME) { + Some(path) => { + debug!("printing dataflow results for {:?} to {}", def_id, path.display()); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + io::BufWriter::new(fs::File::create(&path)?) + } + + None if tcx.sess.opts.unstable_opts.dump_mir_dataflow + && dump_enabled(tcx, A::NAME, def_id) => + { + create_dump_file( + tcx, + ".dot", + None, + A::NAME, + &pass_name.unwrap_or("-----"), + body.source, + )? + } + + _ => return Ok(()), + }; + + let style = match attrs.formatter { + Some(sym::two_phase) => graphviz::OutputStyle::BeforeAndAfter, + _ => graphviz::OutputStyle::AfterOnly, + }; + + let mut buf = Vec::new(); + + let graphviz = graphviz::Formatter::new(body, results, style); + let mut render_opts = + vec![dot::RenderOption::Fontname(tcx.sess.opts.unstable_opts.graphviz_font.clone())]; + if tcx.sess.opts.unstable_opts.graphviz_dark_mode { + render_opts.push(dot::RenderOption::DarkTheme); + } + dot::render_opts(&graphviz, &mut buf, &render_opts)?; + + file.write_all(&buf)?; + + Ok(()) +} + +#[derive(Default)] +struct RustcMirAttrs { + basename_and_suffix: Option<PathBuf>, + formatter: Option<Symbol>, +} + +impl RustcMirAttrs { + fn parse(tcx: TyCtxt<'_>, def_id: DefId) -> Result<Self, ()> { + let mut result = Ok(()); + let mut ret = RustcMirAttrs::default(); + + let rustc_mir_attrs = tcx + .get_attrs(def_id, sym::rustc_mir) + .flat_map(|attr| attr.meta_item_list().into_iter().flat_map(|v| v.into_iter())); + + for attr in rustc_mir_attrs { + let attr_result = if attr.has_name(sym::borrowck_graphviz_postflow) { + Self::set_field(&mut ret.basename_and_suffix, tcx, &attr, |s| { + let path = PathBuf::from(s.to_string()); + match path.file_name() { + Some(_) => Ok(path), + None => { + tcx.sess.span_err(attr.span(), "path must end in a filename"); + Err(()) + } + } + }) + } else if attr.has_name(sym::borrowck_graphviz_format) { + Self::set_field(&mut ret.formatter, tcx, &attr, |s| match s { + sym::gen_kill | sym::two_phase => Ok(s), + _ => { + tcx.sess.span_err(attr.span(), "unknown formatter"); + Err(()) + } + }) + } else { + Ok(()) + }; + + result = result.and(attr_result); + } + + result.map(|()| ret) + } + + fn set_field<T>( + field: &mut Option<T>, + tcx: TyCtxt<'_>, + attr: &ast::NestedMetaItem, + mapper: impl FnOnce(Symbol) -> Result<T, ()>, + ) -> Result<(), ()> { + if field.is_some() { + tcx.sess + .span_err(attr.span(), &format!("duplicate values for `{}`", attr.name_or_empty())); + + return Err(()); + } + + if let Some(s) = attr.value_str() { + *field = Some(mapper(s)?); + Ok(()) + } else { + tcx.sess + .span_err(attr.span(), &format!("`{}` requires an argument", attr.name_or_empty())); + Err(()) + } + } + + /// Returns the path where dataflow results should be written, or `None` + /// `borrowck_graphviz_postflow` was not specified. + /// + /// This performs the following transformation to the argument of `borrowck_graphviz_postflow`: + /// + /// "path/suffix.dot" -> "path/analysis_name_suffix.dot" + fn output_path(&self, analysis_name: &str) -> Option<PathBuf> { + let mut ret = self.basename_and_suffix.as_ref().cloned()?; + let suffix = ret.file_name().unwrap(); // Checked when parsing attrs + + let mut file_name: OsString = analysis_name.into(); + file_name.push("_"); + file_name.push(suffix); + ret.set_file_name(file_name); + + Some(ret) + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs new file mode 100644 index 000000000..209e6f7ac --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs @@ -0,0 +1,211 @@ +//! Custom formatting traits used when outputting Graphviz diagrams with the results of a dataflow +//! analysis. + +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; +use rustc_index::vec::Idx; +use std::fmt; + +/// An extension to `fmt::Debug` for data that can be better printed with some auxiliary data `C`. +pub trait DebugWithContext<C>: Eq + fmt::Debug { + fn fmt_with(&self, _ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self, f) + } + + /// Print the difference between `self` and `old`. + /// + /// This should print nothing if `self == old`. + /// + /// `+` and `-` are typically used to indicate differences. However, these characters are + /// fairly common and may be needed to print a types representation. If using them to indicate + /// a diff, prefix them with the "Unit Separator" control character (␟ U+001F). + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self == old { + return Ok(()); + } + + write!(f, "\u{001f}+")?; + self.fmt_with(ctxt, f)?; + + if f.alternate() { + write!(f, "\n")?; + } else { + write!(f, "\t")?; + } + + write!(f, "\u{001f}-")?; + old.fmt_with(ctxt, f) + } +} + +/// Implements `fmt::Debug` by deferring to `<T as DebugWithContext<C>>::fmt_with`. +pub struct DebugWithAdapter<'a, T, C> { + pub this: T, + pub ctxt: &'a C, +} + +impl<T, C> fmt::Debug for DebugWithAdapter<'_, T, C> +where + T: DebugWithContext<C>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.this.fmt_with(self.ctxt, f) + } +} + +/// Implements `fmt::Debug` by deferring to `<T as DebugWithContext<C>>::fmt_diff_with`. +pub struct DebugDiffWithAdapter<'a, T, C> { + pub new: T, + pub old: T, + pub ctxt: &'a C, +} + +impl<T, C> fmt::Debug for DebugDiffWithAdapter<'_, T, C> +where + T: DebugWithContext<C>, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.new.fmt_diff_with(&self.old, self.ctxt, f) + } +} + +// Impls + +impl<T, C> DebugWithContext<C> for BitSet<T> +where + T: Idx + DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter().map(|i| DebugWithAdapter { this: i, ctxt })).finish() + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let size = self.domain_size(); + assert_eq!(size, old.domain_size()); + + let mut set_in_self = HybridBitSet::new_empty(size); + let mut cleared_in_self = HybridBitSet::new_empty(size); + + for i in (0..size).map(T::new) { + match (self.contains(i), old.contains(i)) { + (true, false) => set_in_self.insert(i), + (false, true) => cleared_in_self.insert(i), + _ => continue, + }; + } + + fmt_diff(&set_in_self, &cleared_in_self, ctxt, f) + } +} + +impl<T, C> DebugWithContext<C> for ChunkedBitSet<T> +where + T: Idx + DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter().map(|i| DebugWithAdapter { this: i, ctxt })).finish() + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let size = self.domain_size(); + assert_eq!(size, old.domain_size()); + + let mut set_in_self = HybridBitSet::new_empty(size); + let mut cleared_in_self = HybridBitSet::new_empty(size); + + for i in (0..size).map(T::new) { + match (self.contains(i), old.contains(i)) { + (true, false) => set_in_self.insert(i), + (false, true) => cleared_in_self.insert(i), + _ => continue, + }; + } + + fmt_diff(&set_in_self, &cleared_in_self, ctxt, f) + } +} + +fn fmt_diff<T, C>( + inserted: &HybridBitSet<T>, + removed: &HybridBitSet<T>, + ctxt: &C, + f: &mut fmt::Formatter<'_>, +) -> fmt::Result +where + T: Idx + DebugWithContext<C>, +{ + let mut first = true; + for idx in inserted.iter() { + let delim = if first { + "\u{001f}+" + } else if f.alternate() { + "\n\u{001f}+" + } else { + ", " + }; + + write!(f, "{}", delim)?; + idx.fmt_with(ctxt, f)?; + first = false; + } + + if !f.alternate() { + first = true; + if !inserted.is_empty() && !removed.is_empty() { + write!(f, "\t")?; + } + } + + for idx in removed.iter() { + let delim = if first { + "\u{001f}-" + } else if f.alternate() { + "\n\u{001f}-" + } else { + ", " + }; + + write!(f, "{}", delim)?; + idx.fmt_with(ctxt, f)?; + first = false; + } + + Ok(()) +} + +impl<T, C> DebugWithContext<C> for &'_ T +where + T: DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*self).fmt_with(ctxt, f) + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (*self).fmt_diff_with(*old, ctxt, f) + } +} + +impl<C> DebugWithContext<C> for rustc_middle::mir::Local {} +impl<C> DebugWithContext<C> for crate::move_paths::InitIndex {} + +impl<'tcx, C> DebugWithContext<C> for crate::move_paths::MovePathIndex +where + C: crate::move_paths::HasMoveData<'tcx>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", ctxt.move_data().move_paths[*self]) + } +} + +impl<T, C> DebugWithContext<C> for crate::lattice::Dual<T> +where + T: DebugWithContext<C>, +{ + fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (self.0).fmt_with(ctxt, f) + } + + fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result { + (self.0).fmt_diff_with(&old.0, ctxt, f) + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/graphviz.rs b/compiler/rustc_mir_dataflow/src/framework/graphviz.rs new file mode 100644 index 000000000..c94198c56 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/graphviz.rs @@ -0,0 +1,667 @@ +//! A helpful diagram for debugging dataflow problems. + +use std::borrow::Cow; +use std::sync::OnceLock; +use std::{io, ops, str}; + +use regex::Regex; +use rustc_graphviz as dot; +use rustc_middle::mir::graphviz_safe_def_name; +use rustc_middle::mir::{self, BasicBlock, Body, Location}; + +use super::fmt::{DebugDiffWithAdapter, DebugWithAdapter, DebugWithContext}; +use super::{Analysis, CallReturnPlaces, Direction, Results, ResultsRefCursor, ResultsVisitor}; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OutputStyle { + AfterOnly, + BeforeAndAfter, +} + +impl OutputStyle { + fn num_state_columns(&self) -> usize { + match self { + Self::AfterOnly => 1, + Self::BeforeAndAfter => 2, + } + } +} + +pub struct Formatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + body: &'a Body<'tcx>, + results: &'a Results<'tcx, A>, + style: OutputStyle, +} + +impl<'a, 'tcx, A> Formatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + pub fn new(body: &'a Body<'tcx>, results: &'a Results<'tcx, A>, style: OutputStyle) -> Self { + Formatter { body, results, style } + } +} + +/// A pair of a basic block and an index into that basic blocks `successors`. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct CfgEdge { + source: BasicBlock, + index: usize, +} + +fn dataflow_successors(body: &Body<'_>, bb: BasicBlock) -> Vec<CfgEdge> { + body[bb] + .terminator() + .successors() + .enumerate() + .map(|(index, _)| CfgEdge { source: bb, index }) + .collect() +} + +impl<'tcx, A> dot::Labeller<'_> for Formatter<'_, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + type Node = BasicBlock; + type Edge = CfgEdge; + + fn graph_id(&self) -> dot::Id<'_> { + let name = graphviz_safe_def_name(self.body.source.def_id()); + dot::Id::new(format!("graph_for_def_id_{}", name)).unwrap() + } + + fn node_id(&self, n: &Self::Node) -> dot::Id<'_> { + dot::Id::new(format!("bb_{}", n.index())).unwrap() + } + + fn node_label(&self, block: &Self::Node) -> dot::LabelText<'_> { + let mut label = Vec::new(); + let mut fmt = BlockFormatter { + results: ResultsRefCursor::new(self.body, self.results), + style: self.style, + bg: Background::Light, + }; + + fmt.write_node_label(&mut label, self.body, *block).unwrap(); + dot::LabelText::html(String::from_utf8(label).unwrap()) + } + + fn node_shape(&self, _n: &Self::Node) -> Option<dot::LabelText<'_>> { + Some(dot::LabelText::label("none")) + } + + fn edge_label(&self, e: &Self::Edge) -> dot::LabelText<'_> { + let label = &self.body[e.source].terminator().kind.fmt_successor_labels()[e.index]; + dot::LabelText::label(label.clone()) + } +} + +impl<'a, 'tcx, A> dot::GraphWalk<'a> for Formatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + type Node = BasicBlock; + type Edge = CfgEdge; + + fn nodes(&self) -> dot::Nodes<'_, Self::Node> { + self.body.basic_blocks().indices().collect::<Vec<_>>().into() + } + + fn edges(&self) -> dot::Edges<'_, Self::Edge> { + self.body + .basic_blocks() + .indices() + .flat_map(|bb| dataflow_successors(self.body, bb)) + .collect::<Vec<_>>() + .into() + } + + fn source(&self, edge: &Self::Edge) -> Self::Node { + edge.source + } + + fn target(&self, edge: &Self::Edge) -> Self::Node { + self.body[edge.source].terminator().successors().nth(edge.index).unwrap() + } +} + +struct BlockFormatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + results: ResultsRefCursor<'a, 'a, 'tcx, A>, + bg: Background, + style: OutputStyle, +} + +impl<'a, 'tcx, A> BlockFormatter<'a, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + const HEADER_COLOR: &'static str = "#a0a0a0"; + + fn toggle_background(&mut self) -> Background { + let bg = self.bg; + self.bg = !bg; + bg + } + + fn write_node_label( + &mut self, + w: &mut impl io::Write, + body: &'a Body<'tcx>, + block: BasicBlock, + ) -> io::Result<()> { + // Sample output: + // +-+-----------------------------------------------+ + // A | bb4 | + // +-+----------------------------------+------------+ + // B | MIR | STATE | + // +-+----------------------------------+------------+ + // C | | (on entry) | {_0,_2,_3} | + // +-+----------------------------------+------------+ + // D |0| StorageLive(_7) | | + // +-+----------------------------------+------------+ + // |1| StorageLive(_8) | | + // +-+----------------------------------+------------+ + // |2| _8 = &mut _1 | +_8 | + // +-+----------------------------------+------------+ + // E |T| _4 = const Foo::twiddle(move _2) | -_2 | + // +-+----------------------------------+------------+ + // F | | (on unwind) | {_0,_3,_8} | + // +-+----------------------------------+------------+ + // | | (on successful return) | +_4 | + // +-+----------------------------------+------------+ + + // N.B., Some attributes (`align`, `balign`) are repeated on parent elements and their + // children. This is because `xdot` seemed to have a hard time correctly propagating + // attributes. Make sure to test the output before trying to remove the redundancy. + // Notably, `align` was found to have no effect when applied only to <table>. + + let table_fmt = concat!( + " border=\"1\"", + " cellborder=\"1\"", + " cellspacing=\"0\"", + " cellpadding=\"3\"", + " sides=\"rb\"", + ); + write!(w, r#"<table{fmt}>"#, fmt = table_fmt)?; + + // A + B: Block header + match self.style { + OutputStyle::AfterOnly => self.write_block_header_simple(w, block)?, + OutputStyle::BeforeAndAfter => { + self.write_block_header_with_state_columns(w, block, &["BEFORE", "AFTER"])? + } + } + + // C: State at start of block + self.bg = Background::Light; + self.results.seek_to_block_start(block); + let block_start_state = self.results.get().clone(); + self.write_row_with_full_state(w, "", "(on start)")?; + + // D + E: Statement and terminator transfer functions + self.write_statements_and_terminator(w, body, block)?; + + // F: State at end of block + + let terminator = body[block].terminator(); + + // Write the full dataflow state immediately after the terminator if it differs from the + // state at block entry. + self.results.seek_to_block_end(block); + if self.results.get() != &block_start_state || A::Direction::IS_BACKWARD { + let after_terminator_name = match terminator.kind { + mir::TerminatorKind::Call { target: Some(_), .. } => "(on unwind)", + _ => "(on end)", + }; + + self.write_row_with_full_state(w, "", after_terminator_name)?; + } + + // Write any changes caused by terminator-specific effects. + // + // FIXME: These should really be printed as part of each outgoing edge rather than the node + // for the basic block itself. That way, we could display terminator-specific effects for + // backward dataflow analyses as well as effects for `SwitchInt` terminators. + match terminator.kind { + mir::TerminatorKind::Call { destination, .. } => { + self.write_row(w, "", "(on successful return)", |this, w, fmt| { + let state_on_unwind = this.results.get().clone(); + this.results.apply_custom_effect(|analysis, state| { + analysis.apply_call_return_effect( + state, + block, + CallReturnPlaces::Call(destination), + ); + }); + + write!( + w, + r#"<td balign="left" colspan="{colspan}" {fmt} align="left">{diff}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + diff = diff_pretty( + this.results.get(), + &state_on_unwind, + this.results.analysis() + ), + ) + })?; + } + + mir::TerminatorKind::Yield { resume, resume_arg, .. } => { + self.write_row(w, "", "(on yield resume)", |this, w, fmt| { + let state_on_generator_drop = this.results.get().clone(); + this.results.apply_custom_effect(|analysis, state| { + analysis.apply_yield_resume_effect(state, resume, resume_arg); + }); + + write!( + w, + r#"<td balign="left" colspan="{colspan}" {fmt} align="left">{diff}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + diff = diff_pretty( + this.results.get(), + &state_on_generator_drop, + this.results.analysis() + ), + ) + })?; + } + + mir::TerminatorKind::InlineAsm { destination: Some(_), ref operands, .. } => { + self.write_row(w, "", "(on successful return)", |this, w, fmt| { + let state_on_unwind = this.results.get().clone(); + this.results.apply_custom_effect(|analysis, state| { + analysis.apply_call_return_effect( + state, + block, + CallReturnPlaces::InlineAsm(operands), + ); + }); + + write!( + w, + r#"<td balign="left" colspan="{colspan}" {fmt} align="left">{diff}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + diff = diff_pretty( + this.results.get(), + &state_on_unwind, + this.results.analysis() + ), + ) + })?; + } + + _ => {} + }; + + write!(w, "</table>") + } + + fn write_block_header_simple( + &mut self, + w: &mut impl io::Write, + block: BasicBlock, + ) -> io::Result<()> { + // +-------------------------------------------------+ + // A | bb4 | + // +-----------------------------------+-------------+ + // B | MIR | STATE | + // +-+---------------------------------+-------------+ + // | | ... | | + + // A + write!( + w, + concat!("<tr>", r#"<td colspan="3" sides="tl">bb{block_id}</td>"#, "</tr>",), + block_id = block.index(), + )?; + + // B + write!( + w, + concat!( + "<tr>", + r#"<td colspan="2" {fmt}>MIR</td>"#, + r#"<td {fmt}>STATE</td>"#, + "</tr>", + ), + fmt = format!("bgcolor=\"{}\" sides=\"tl\"", Self::HEADER_COLOR), + ) + } + + fn write_block_header_with_state_columns( + &mut self, + w: &mut impl io::Write, + block: BasicBlock, + state_column_names: &[&str], + ) -> io::Result<()> { + // +------------------------------------+-------------+ + // A | bb4 | STATE | + // +------------------------------------+------+------+ + // B | MIR | GEN | KILL | + // +-+----------------------------------+------+------+ + // | | ... | | | + + // A + write!( + w, + concat!( + "<tr>", + r#"<td {fmt} colspan="2">bb{block_id}</td>"#, + r#"<td {fmt} colspan="{num_state_cols}">STATE</td>"#, + "</tr>", + ), + fmt = "sides=\"tl\"", + num_state_cols = state_column_names.len(), + block_id = block.index(), + )?; + + // B + let fmt = format!("bgcolor=\"{}\" sides=\"tl\"", Self::HEADER_COLOR); + write!(w, concat!("<tr>", r#"<td colspan="2" {fmt}>MIR</td>"#,), fmt = fmt,)?; + + for name in state_column_names { + write!(w, "<td {fmt}>{name}</td>", fmt = fmt, name = name)?; + } + + write!(w, "</tr>") + } + + fn write_statements_and_terminator( + &mut self, + w: &mut impl io::Write, + body: &'a Body<'tcx>, + block: BasicBlock, + ) -> io::Result<()> { + let diffs = StateDiffCollector::run(body, block, self.results.results(), self.style); + + let mut befores = diffs.before.map(|v| v.into_iter()); + let mut afters = diffs.after.into_iter(); + + let next_in_dataflow_order = |it: &mut std::vec::IntoIter<_>| { + if A::Direction::IS_FORWARD { it.next().unwrap() } else { it.next_back().unwrap() } + }; + + for (i, statement) in body[block].statements.iter().enumerate() { + let statement_str = format!("{:?}", statement); + let index_str = format!("{}", i); + + let after = next_in_dataflow_order(&mut afters); + let before = befores.as_mut().map(next_in_dataflow_order); + + self.write_row(w, &index_str, &statement_str, |_this, w, fmt| { + if let Some(before) = before { + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = before)?; + } + + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = after) + })?; + } + + let after = next_in_dataflow_order(&mut afters); + let before = befores.as_mut().map(next_in_dataflow_order); + + assert!(afters.is_empty()); + assert!(befores.as_ref().map_or(true, ExactSizeIterator::is_empty)); + + let terminator = body[block].terminator(); + let mut terminator_str = String::new(); + terminator.kind.fmt_head(&mut terminator_str).unwrap(); + + self.write_row(w, "T", &terminator_str, |_this, w, fmt| { + if let Some(before) = before { + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = before)?; + } + + write!(w, r#"<td {fmt} align="left">{diff}</td>"#, fmt = fmt, diff = after) + }) + } + + /// Write a row with the given index and MIR, using the function argument to fill in the + /// "STATE" column(s). + fn write_row<W: io::Write>( + &mut self, + w: &mut W, + i: &str, + mir: &str, + f: impl FnOnce(&mut Self, &mut W, &str) -> io::Result<()>, + ) -> io::Result<()> { + let bg = self.toggle_background(); + let valign = if mir.starts_with("(on ") && mir != "(on entry)" { "bottom" } else { "top" }; + + let fmt = format!("valign=\"{}\" sides=\"tl\" {}", valign, bg.attr()); + + write!( + w, + concat!( + "<tr>", + r#"<td {fmt} align="right">{i}</td>"#, + r#"<td {fmt} align="left">{mir}</td>"#, + ), + i = i, + fmt = fmt, + mir = dot::escape_html(mir), + )?; + + f(self, w, &fmt)?; + write!(w, "</tr>") + } + + fn write_row_with_full_state( + &mut self, + w: &mut impl io::Write, + i: &str, + mir: &str, + ) -> io::Result<()> { + self.write_row(w, i, mir, |this, w, fmt| { + let state = this.results.get(); + let analysis = this.results.analysis(); + + // FIXME: The full state vector can be quite long. It would be nice to split on commas + // and use some text wrapping algorithm. + write!( + w, + r#"<td colspan="{colspan}" {fmt} align="left">{state}</td>"#, + colspan = this.style.num_state_columns(), + fmt = fmt, + state = format!("{:?}", DebugWithAdapter { this: state, ctxt: analysis }), + ) + }) + } +} + +struct StateDiffCollector<'a, 'tcx, A> +where + A: Analysis<'tcx>, +{ + analysis: &'a A, + prev_state: A::Domain, + before: Option<Vec<String>>, + after: Vec<String>, +} + +impl<'a, 'tcx, A> StateDiffCollector<'a, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + fn run( + body: &'a mir::Body<'tcx>, + block: BasicBlock, + results: &'a Results<'tcx, A>, + style: OutputStyle, + ) -> Self { + let mut collector = StateDiffCollector { + analysis: &results.analysis, + prev_state: results.analysis.bottom_value(body), + after: vec![], + before: (style == OutputStyle::BeforeAndAfter).then_some(vec![]), + }; + + results.visit_with(body, std::iter::once(block), &mut collector); + collector + } +} + +impl<'a, 'tcx, A> ResultsVisitor<'a, 'tcx> for StateDiffCollector<'a, 'tcx, A> +where + A: Analysis<'tcx>, + A::Domain: DebugWithContext<A>, +{ + type FlowState = A::Domain; + + fn visit_block_start( + &mut self, + state: &Self::FlowState, + _block_data: &mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + if A::Direction::IS_FORWARD { + self.prev_state.clone_from(state); + } + } + + fn visit_block_end( + &mut self, + state: &Self::FlowState, + _block_data: &mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + if A::Direction::IS_BACKWARD { + self.prev_state.clone_from(state); + } + } + + fn visit_statement_before_primary_effect( + &mut self, + state: &Self::FlowState, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + if let Some(before) = self.before.as_mut() { + before.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } + } + + fn visit_statement_after_primary_effect( + &mut self, + state: &Self::FlowState, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + self.after.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } + + fn visit_terminator_before_primary_effect( + &mut self, + state: &Self::FlowState, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + if let Some(before) = self.before.as_mut() { + before.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } + } + + fn visit_terminator_after_primary_effect( + &mut self, + state: &Self::FlowState, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + self.after.push(diff_pretty(state, &self.prev_state, self.analysis)); + self.prev_state.clone_from(state) + } +} + +macro_rules! regex { + ($re:literal $(,)?) => {{ + static RE: OnceLock<regex::Regex> = OnceLock::new(); + RE.get_or_init(|| Regex::new($re).unwrap()) + }}; +} + +fn diff_pretty<T, C>(new: T, old: T, ctxt: &C) -> String +where + T: DebugWithContext<C>, +{ + if new == old { + return String::new(); + } + + let re = regex!("\t?\u{001f}([+-])"); + + let raw_diff = format!("{:#?}", DebugDiffWithAdapter { new, old, ctxt }); + + // Replace newlines in the `Debug` output with `<br/>` + let raw_diff = raw_diff.replace('\n', r#"<br align="left"/>"#); + + let mut inside_font_tag = false; + let html_diff = re.replace_all(&raw_diff, |captures: ®ex::Captures<'_>| { + let mut ret = String::new(); + if inside_font_tag { + ret.push_str(r#"</font>"#); + } + + let tag = match &captures[1] { + "+" => r#"<font color="darkgreen">+"#, + "-" => r#"<font color="red">-"#, + _ => unreachable!(), + }; + + inside_font_tag = true; + ret.push_str(tag); + ret + }); + + let Cow::Owned(mut html_diff) = html_diff else { + return raw_diff; + }; + + if inside_font_tag { + html_diff.push_str("</font>"); + } + + html_diff +} + +/// The background color used for zebra-striping the table. +#[derive(Clone, Copy)] +enum Background { + Light, + Dark, +} + +impl Background { + fn attr(self) -> &'static str { + match self { + Self::Dark => "bgcolor=\"#f0f0f0\"", + Self::Light => "", + } + } +} + +impl ops::Not for Background { + type Output = Self; + + fn not(self) -> Self { + match self { + Self::Light => Self::Dark, + Self::Dark => Self::Light, + } + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/lattice.rs b/compiler/rustc_mir_dataflow/src/framework/lattice.rs new file mode 100644 index 000000000..d6b89eb82 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/lattice.rs @@ -0,0 +1,252 @@ +//! Traits used to represent [lattices] for use as the domain of a dataflow analysis. +//! +//! # Overview +//! +//! The most common lattice is a powerset of some set `S`, ordered by [set inclusion]. The [Hasse +//! diagram] for the powerset of a set with two elements (`X` and `Y`) is shown below. Note that +//! distinct elements at the same height in a Hasse diagram (e.g. `{X}` and `{Y}`) are +//! *incomparable*, not equal. +//! +//! ```text +//! {X, Y} <- top +//! / \ +//! {X} {Y} +//! \ / +//! {} <- bottom +//! +//! ``` +//! +//! The defining characteristic of a lattice—the one that differentiates it from a [partially +//! ordered set][poset]—is the existence of a *unique* least upper and greatest lower bound for +//! every pair of elements. The lattice join operator (`∨`) returns the least upper bound, and the +//! lattice meet operator (`∧`) returns the greatest lower bound. Types that implement one operator +//! but not the other are known as semilattices. Dataflow analysis only uses the join operator and +//! will work with any join-semilattice, but both should be specified when possible. +//! +//! ## `PartialOrd` +//! +//! Given that they represent partially ordered sets, you may be surprised that [`JoinSemiLattice`] +//! and [`MeetSemiLattice`] do not have [`PartialOrd`][std::cmp::PartialOrd] as a supertrait. This +//! is because most standard library types use lexicographic ordering instead of set inclusion for +//! their `PartialOrd` impl. Since we do not actually need to compare lattice elements to run a +//! dataflow analysis, there's no need for a newtype wrapper with a custom `PartialOrd` impl. The +//! only benefit would be the ability to check that the least upper (or greatest lower) bound +//! returned by the lattice join (or meet) operator was in fact greater (or lower) than the inputs. +//! +//! [lattices]: https://en.wikipedia.org/wiki/Lattice_(order) +//! [set inclusion]: https://en.wikipedia.org/wiki/Subset +//! [Hasse diagram]: https://en.wikipedia.org/wiki/Hasse_diagram +//! [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set + +use crate::framework::BitSetExt; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; +use rustc_index::vec::{Idx, IndexVec}; +use std::iter; + +/// A [partially ordered set][poset] that has a [least upper bound][lub] for any pair of elements +/// in the set. +/// +/// [lub]: https://en.wikipedia.org/wiki/Infimum_and_supremum +/// [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set +pub trait JoinSemiLattice: Eq { + /// Computes the least upper bound of two elements, storing the result in `self` and returning + /// `true` if `self` has changed. + /// + /// The lattice join operator is abbreviated as `∨`. + fn join(&mut self, other: &Self) -> bool; +} + +/// A [partially ordered set][poset] that has a [greatest lower bound][glb] for any pair of +/// elements in the set. +/// +/// Dataflow analyses only require that their domains implement [`JoinSemiLattice`], not +/// `MeetSemiLattice`. However, types that will be used as dataflow domains should implement both +/// so that they can be used with [`Dual`]. +/// +/// [glb]: https://en.wikipedia.org/wiki/Infimum_and_supremum +/// [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set +pub trait MeetSemiLattice: Eq { + /// Computes the greatest lower bound of two elements, storing the result in `self` and + /// returning `true` if `self` has changed. + /// + /// The lattice meet operator is abbreviated as `∧`. + fn meet(&mut self, other: &Self) -> bool; +} + +/// A `bool` is a "two-point" lattice with `true` as the top element and `false` as the bottom: +/// +/// ```text +/// true +/// | +/// false +/// ``` +impl JoinSemiLattice for bool { + fn join(&mut self, other: &Self) -> bool { + if let (false, true) = (*self, *other) { + *self = true; + return true; + } + + false + } +} + +impl MeetSemiLattice for bool { + fn meet(&mut self, other: &Self) -> bool { + if let (true, false) = (*self, *other) { + *self = false; + return true; + } + + false + } +} + +/// A tuple (or list) of lattices is itself a lattice whose least upper bound is the concatenation +/// of the least upper bounds of each element of the tuple (or list). +/// +/// In other words: +/// (A₀, A₁, ..., Aₙ) ∨ (B₀, B₁, ..., Bₙ) = (A₀∨B₀, A₁∨B₁, ..., Aₙ∨Bₙ) +impl<I: Idx, T: JoinSemiLattice> JoinSemiLattice for IndexVec<I, T> { + fn join(&mut self, other: &Self) -> bool { + assert_eq!(self.len(), other.len()); + + let mut changed = false; + for (a, b) in iter::zip(self, other) { + changed |= a.join(b); + } + changed + } +} + +impl<I: Idx, T: MeetSemiLattice> MeetSemiLattice for IndexVec<I, T> { + fn meet(&mut self, other: &Self) -> bool { + assert_eq!(self.len(), other.len()); + + let mut changed = false; + for (a, b) in iter::zip(self, other) { + changed |= a.meet(b); + } + changed + } +} + +/// A `BitSet` represents the lattice formed by the powerset of all possible values of +/// the index type `T` ordered by inclusion. Equivalently, it is a tuple of "two-point" lattices, +/// one for each possible value of `T`. +impl<T: Idx> JoinSemiLattice for BitSet<T> { + fn join(&mut self, other: &Self) -> bool { + self.union(other) + } +} + +impl<T: Idx> MeetSemiLattice for BitSet<T> { + fn meet(&mut self, other: &Self) -> bool { + self.intersect(other) + } +} + +impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> { + fn join(&mut self, other: &Self) -> bool { + self.union(other) + } +} + +impl<T: Idx> MeetSemiLattice for ChunkedBitSet<T> { + fn meet(&mut self, other: &Self) -> bool { + self.intersect(other) + } +} + +/// The counterpart of a given semilattice `T` using the [inverse order]. +/// +/// The dual of a join-semilattice is a meet-semilattice and vice versa. For example, the dual of a +/// powerset has the empty set as its top element and the full set as its bottom element and uses +/// set *intersection* as its join operator. +/// +/// [inverse order]: https://en.wikipedia.org/wiki/Duality_(order_theory) +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Dual<T>(pub T); + +impl<T: Idx> BitSetExt<T> for Dual<BitSet<T>> { + fn domain_size(&self) -> usize { + self.0.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.0.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet<T>) { + self.0.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet<T>) { + self.0.subtract(other); + } +} + +impl<T: MeetSemiLattice> JoinSemiLattice for Dual<T> { + fn join(&mut self, other: &Self) -> bool { + self.0.meet(&other.0) + } +} + +impl<T: JoinSemiLattice> MeetSemiLattice for Dual<T> { + fn meet(&mut self, other: &Self) -> bool { + self.0.join(&other.0) + } +} + +/// Extends a type `T` with top and bottom elements to make it a partially ordered set in which no +/// value of `T` is comparable with any other. +/// +/// A flat set has the following [Hasse diagram]: +/// +/// ```text +/// top +/// / ... / / \ \ ... \ +/// all possible values of `T` +/// \ ... \ \ / / ... / +/// bottom +/// ``` +/// +/// [Hasse diagram]: https://en.wikipedia.org/wiki/Hasse_diagram +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum FlatSet<T> { + Bottom, + Elem(T), + Top, +} + +impl<T: Clone + Eq> JoinSemiLattice for FlatSet<T> { + fn join(&mut self, other: &Self) -> bool { + let result = match (&*self, other) { + (Self::Top, _) | (_, Self::Bottom) => return false, + (Self::Elem(a), Self::Elem(b)) if a == b => return false, + + (Self::Bottom, Self::Elem(x)) => Self::Elem(x.clone()), + + _ => Self::Top, + }; + + *self = result; + true + } +} + +impl<T: Clone + Eq> MeetSemiLattice for FlatSet<T> { + fn meet(&mut self, other: &Self) -> bool { + let result = match (&*self, other) { + (Self::Bottom, _) | (_, Self::Top) => return false, + (Self::Elem(ref a), Self::Elem(ref b)) if a == b => return false, + + (Self::Top, Self::Elem(ref x)) => Self::Elem(x.clone()), + + _ => Self::Bottom, + }; + + *self = result; + true + } +} diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs new file mode 100644 index 000000000..f9fd6c9c5 --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs @@ -0,0 +1,624 @@ +//! A framework that can express both [gen-kill] and generic dataflow problems. +//! +//! To use this framework, implement either the [`Analysis`] or the +//! [`GenKillAnalysis`] trait. If your transfer function can be expressed with only gen/kill +//! operations, prefer `GenKillAnalysis` since it will run faster while iterating to fixpoint. The +//! `impls` module contains several examples of gen/kill dataflow analyses. +//! +//! Create an `Engine` for your analysis using the `into_engine` method on the `Analysis` trait, +//! then call `iterate_to_fixpoint`. From there, you can use a `ResultsCursor` to inspect the +//! fixpoint solution to your dataflow problem, or implement the `ResultsVisitor` interface and use +//! `visit_results`. The following example uses the `ResultsCursor` approach. +//! +//! ```ignore (cross-crate-imports) +//! use rustc_const_eval::dataflow::Analysis; // Makes `into_engine` available. +//! +//! fn do_my_analysis(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) { +//! let analysis = MyAnalysis::new() +//! .into_engine(tcx, body) +//! .iterate_to_fixpoint() +//! .into_results_cursor(body); +//! +//! // Print the dataflow state *after* each statement in the start block. +//! for (_, statement_index) in body.block_data[START_BLOCK].statements.iter_enumerated() { +//! cursor.seek_after(Location { block: START_BLOCK, statement_index }); +//! let state = cursor.get(); +//! println!("{:?}", state); +//! } +//! } +//! ``` +//! +//! [gen-kill]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems + +use std::cmp::Ordering; + +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; +use rustc_index::vec::Idx; +use rustc_middle::mir::{self, BasicBlock, Location}; +use rustc_middle::ty::TyCtxt; + +mod cursor; +mod direction; +mod engine; +pub mod fmt; +pub mod graphviz; +pub mod lattice; +mod visitor; + +pub use self::cursor::{ResultsCursor, ResultsRefCursor}; +pub use self::direction::{Backward, Direction, Forward}; +pub use self::engine::{Engine, Results}; +pub use self::lattice::{JoinSemiLattice, MeetSemiLattice}; +pub use self::visitor::{visit_results, ResultsVisitable, ResultsVisitor}; + +/// Analysis domains are all bitsets of various kinds. This trait holds +/// operations needed by all of them. +pub trait BitSetExt<T> { + fn domain_size(&self) -> usize; + fn contains(&self, elem: T) -> bool; + fn union(&mut self, other: &HybridBitSet<T>); + fn subtract(&mut self, other: &HybridBitSet<T>); +} + +impl<T: Idx> BitSetExt<T> for BitSet<T> { + fn domain_size(&self) -> usize { + self.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet<T>) { + self.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet<T>) { + self.subtract(other); + } +} + +impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> { + fn domain_size(&self) -> usize { + self.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet<T>) { + self.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet<T>) { + self.subtract(other); + } +} + +/// Defines the domain of a dataflow problem. +/// +/// This trait specifies the lattice on which this analysis operates (the domain) as well as its +/// initial value at the entry point of each basic block. +pub trait AnalysisDomain<'tcx> { + /// The type that holds the dataflow state at any given point in the program. + type Domain: Clone + JoinSemiLattice; + + /// The direction of this analysis. Either `Forward` or `Backward`. + type Direction: Direction = Forward; + + /// A descriptive name for this analysis. Used only for debugging. + /// + /// This name should be brief and contain no spaces, periods or other characters that are not + /// suitable as part of a filename. + const NAME: &'static str; + + /// Returns the initial value of the dataflow state upon entry to each basic block. + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain; + + /// Mutates the initial value of the dataflow state upon entry to the `START_BLOCK`. + /// + /// For backward analyses, initial state (besides the bottom value) is not yet supported. Trying + /// to mutate the initial state will result in a panic. + // + // FIXME: For backward dataflow analyses, the initial state should be applied to every basic + // block where control flow could exit the MIR body (e.g., those terminated with `return` or + // `resume`). It's not obvious how to handle `yield` points in generators, however. + fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain); +} + +/// A dataflow problem with an arbitrarily complex transfer function. +/// +/// # Convergence +/// +/// When implementing this trait directly (not via [`GenKillAnalysis`]), it's possible to choose a +/// transfer function such that the analysis does not reach fixpoint. To guarantee convergence, +/// your transfer functions must maintain the following invariant: +/// +/// > If the dataflow state **before** some point in the program changes to be greater +/// than the prior state **before** that point, the dataflow state **after** that point must +/// also change to be greater than the prior state **after** that point. +/// +/// This invariant guarantees that the dataflow state at a given point in the program increases +/// monotonically until fixpoint is reached. Note that this monotonicity requirement only applies +/// to the same point in the program at different points in time. The dataflow state at a given +/// point in the program may or may not be greater than the state at any preceding point. +pub trait Analysis<'tcx>: AnalysisDomain<'tcx> { + /// Updates the current dataflow state with the effect of evaluating a statement. + fn apply_statement_effect( + &self, + state: &mut Self::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + /// Updates the current dataflow state with an effect that occurs immediately *before* the + /// given statement. + /// + /// This method is useful if the consumer of the results of this analysis only needs to observe + /// *part* of the effect of a statement (e.g. for two-phase borrows). As a general rule, + /// analyses should not implement this without also implementing `apply_statement_effect`. + fn apply_before_statement_effect( + &self, + _state: &mut Self::Domain, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// Updates the current dataflow state with the effect of evaluating a terminator. + /// + /// The effect of a successful return from a `Call` terminator should **not** be accounted for + /// in this function. That should go in `apply_call_return_effect`. For example, in the + /// `InitializedPlaces` analyses, the return place for a function call is not marked as + /// initialized here. + fn apply_terminator_effect( + &self, + state: &mut Self::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); + + /// Updates the current dataflow state with an effect that occurs immediately *before* the + /// given terminator. + /// + /// This method is useful if the consumer of the results of this analysis needs only to observe + /// *part* of the effect of a terminator (e.g. for two-phase borrows). As a general rule, + /// analyses should not implement this without also implementing `apply_terminator_effect`. + fn apply_before_terminator_effect( + &self, + _state: &mut Self::Domain, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + } + + /* Edge-specific effects */ + + /// Updates the current dataflow state with the effect of a successful return from a `Call` + /// terminator. + /// + /// This is separate from `apply_terminator_effect` to properly track state across unwind + /// edges. + fn apply_call_return_effect( + &self, + state: &mut Self::Domain, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ); + + /// Updates the current dataflow state with the effect of resuming from a `Yield` terminator. + /// + /// This is similar to `apply_call_return_effect` in that it only takes place after the + /// generator is resumed, not when it is dropped. + /// + /// By default, no effects happen. + fn apply_yield_resume_effect( + &self, + _state: &mut Self::Domain, + _resume_block: BasicBlock, + _resume_place: mir::Place<'tcx>, + ) { + } + + /// Updates the current dataflow state with the effect of taking a particular branch in a + /// `SwitchInt` terminator. + /// + /// Unlike the other edge-specific effects, which are allowed to mutate `Self::Domain` + /// directly, overriders of this method must pass a callback to + /// `SwitchIntEdgeEffects::apply`. The callback will be run once for each outgoing edge and + /// will have access to the dataflow state that will be propagated along that edge. + /// + /// This interface is somewhat more complex than the other visitor-like "effect" methods. + /// However, it is both more ergonomic—callers don't need to recompute or cache information + /// about a given `SwitchInt` terminator for each one of its edges—and more efficient—the + /// engine doesn't need to clone the exit state for a block unless + /// `SwitchIntEdgeEffects::apply` is actually called. + fn apply_switch_int_edge_effects( + &self, + _block: BasicBlock, + _discr: &mir::Operand<'tcx>, + _apply_edge_effects: &mut impl SwitchIntEdgeEffects<Self::Domain>, + ) { + } + + /* Extension methods */ + + /// Creates an `Engine` to find the fixpoint for this dataflow problem. + /// + /// You shouldn't need to override this outside this module, since the combination of the + /// default impl and the one for all `A: GenKillAnalysis` will do the right thing. + /// Its purpose is to enable method chaining like so: + /// + /// ```ignore (cross-crate-imports) + /// let results = MyAnalysis::new(tcx, body) + /// .into_engine(tcx, body, def_id) + /// .iterate_to_fixpoint() + /// .into_results_cursor(body); + /// ``` + fn into_engine<'mir>( + self, + tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + ) -> Engine<'mir, 'tcx, Self> + where + Self: Sized, + { + Engine::new_generic(tcx, body, self) + } +} + +/// A gen/kill dataflow problem. +/// +/// Each method in this trait has a corresponding one in `Analysis`. However, these methods only +/// allow modification of the dataflow state via "gen" and "kill" operations. By defining transfer +/// functions for each statement in this way, the transfer function for an entire basic block can +/// be computed efficiently. +/// +/// `Analysis` is automatically implemented for all implementers of `GenKillAnalysis`. +pub trait GenKillAnalysis<'tcx>: Analysis<'tcx> { + type Idx: Idx; + + /// See `Analysis::apply_statement_effect`. + fn statement_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + /// See `Analysis::apply_before_statement_effect`. + fn before_statement_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _statement: &mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// See `Analysis::apply_terminator_effect`. + fn terminator_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); + + /// See `Analysis::apply_before_terminator_effect`. + fn before_terminator_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _terminator: &mir::Terminator<'tcx>, + _location: Location, + ) { + } + + /* Edge-specific effects */ + + /// See `Analysis::apply_call_return_effect`. + fn call_return_effect( + &self, + trans: &mut impl GenKill<Self::Idx>, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ); + + /// See `Analysis::apply_yield_resume_effect`. + fn yield_resume_effect( + &self, + _trans: &mut impl GenKill<Self::Idx>, + _resume_block: BasicBlock, + _resume_place: mir::Place<'tcx>, + ) { + } + + /// See `Analysis::apply_switch_int_edge_effects`. + fn switch_int_edge_effects<G: GenKill<Self::Idx>>( + &self, + _block: BasicBlock, + _discr: &mir::Operand<'tcx>, + _edge_effects: &mut impl SwitchIntEdgeEffects<G>, + ) { + } +} + +impl<'tcx, A> Analysis<'tcx> for A +where + A: GenKillAnalysis<'tcx>, + A::Domain: GenKill<A::Idx> + BitSetExt<A::Idx>, +{ + fn apply_statement_effect( + &self, + state: &mut A::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + self.statement_effect(state, statement, location); + } + + fn apply_before_statement_effect( + &self, + state: &mut A::Domain, + statement: &mir::Statement<'tcx>, + location: Location, + ) { + self.before_statement_effect(state, statement, location); + } + + fn apply_terminator_effect( + &self, + state: &mut A::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + self.terminator_effect(state, terminator, location); + } + + fn apply_before_terminator_effect( + &self, + state: &mut A::Domain, + terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + self.before_terminator_effect(state, terminator, location); + } + + /* Edge-specific effects */ + + fn apply_call_return_effect( + &self, + state: &mut A::Domain, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, + ) { + self.call_return_effect(state, block, return_places); + } + + fn apply_yield_resume_effect( + &self, + state: &mut A::Domain, + resume_block: BasicBlock, + resume_place: mir::Place<'tcx>, + ) { + self.yield_resume_effect(state, resume_block, resume_place); + } + + fn apply_switch_int_edge_effects( + &self, + block: BasicBlock, + discr: &mir::Operand<'tcx>, + edge_effects: &mut impl SwitchIntEdgeEffects<A::Domain>, + ) { + self.switch_int_edge_effects(block, discr, edge_effects); + } + + /* Extension methods */ + + fn into_engine<'mir>( + self, + tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + ) -> Engine<'mir, 'tcx, Self> + where + Self: Sized, + { + Engine::new_gen_kill(tcx, body, self) + } +} + +/// The legal operations for a transfer function in a gen/kill problem. +/// +/// This abstraction exists because there are two different contexts in which we call the methods in +/// `GenKillAnalysis`. Sometimes we need to store a single transfer function that can be efficiently +/// applied multiple times, such as when computing the cumulative transfer function for each block. +/// These cases require a `GenKillSet`, which in turn requires two `BitSet`s of storage. Oftentimes, +/// however, we only need to apply an effect once. In *these* cases, it is more efficient to pass the +/// `BitSet` representing the state vector directly into the `*_effect` methods as opposed to +/// building up a `GenKillSet` and then throwing it away. +pub trait GenKill<T> { + /// Inserts `elem` into the state vector. + fn gen(&mut self, elem: T); + + /// Removes `elem` from the state vector. + fn kill(&mut self, elem: T); + + /// Calls `gen` for each element in `elems`. + fn gen_all(&mut self, elems: impl IntoIterator<Item = T>) { + for elem in elems { + self.gen(elem); + } + } + + /// Calls `kill` for each element in `elems`. + fn kill_all(&mut self, elems: impl IntoIterator<Item = T>) { + for elem in elems { + self.kill(elem); + } + } +} + +/// Stores a transfer function for a gen/kill problem. +/// +/// Calling `gen`/`kill` on a `GenKillSet` will "build up" a transfer function so that it can be +/// applied multiple times efficiently. When there are multiple calls to `gen` and/or `kill` for +/// the same element, the most recent one takes precedence. +#[derive(Clone)] +pub struct GenKillSet<T> { + gen: HybridBitSet<T>, + kill: HybridBitSet<T>, +} + +impl<T: Idx> GenKillSet<T> { + /// Creates a new transfer function that will leave the dataflow state unchanged. + pub fn identity(universe: usize) -> Self { + GenKillSet { + gen: HybridBitSet::new_empty(universe), + kill: HybridBitSet::new_empty(universe), + } + } + + pub fn apply(&self, state: &mut impl BitSetExt<T>) { + state.union(&self.gen); + state.subtract(&self.kill); + } +} + +impl<T: Idx> GenKill<T> for GenKillSet<T> { + fn gen(&mut self, elem: T) { + self.gen.insert(elem); + self.kill.remove(elem); + } + + fn kill(&mut self, elem: T) { + self.kill.insert(elem); + self.gen.remove(elem); + } +} + +impl<T: Idx> GenKill<T> for BitSet<T> { + fn gen(&mut self, elem: T) { + self.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.remove(elem); + } +} + +impl<T: Idx> GenKill<T> for ChunkedBitSet<T> { + fn gen(&mut self, elem: T) { + self.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.remove(elem); + } +} + +impl<T: Idx> GenKill<T> for lattice::Dual<BitSet<T>> { + fn gen(&mut self, elem: T) { + self.0.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.0.remove(elem); + } +} + +// NOTE: DO NOT CHANGE VARIANT ORDER. The derived `Ord` impls rely on the current order. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Effect { + /// The "before" effect (e.g., `apply_before_statement_effect`) for a statement (or + /// terminator). + Before, + + /// The "primary" effect (e.g., `apply_statement_effect`) for a statement (or terminator). + Primary, +} + +impl Effect { + pub const fn at_index(self, statement_index: usize) -> EffectIndex { + EffectIndex { effect: self, statement_index } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct EffectIndex { + statement_index: usize, + effect: Effect, +} + +impl EffectIndex { + fn next_in_forward_order(self) -> Self { + match self.effect { + Effect::Before => Effect::Primary.at_index(self.statement_index), + Effect::Primary => Effect::Before.at_index(self.statement_index + 1), + } + } + + fn next_in_backward_order(self) -> Self { + match self.effect { + Effect::Before => Effect::Primary.at_index(self.statement_index), + Effect::Primary => Effect::Before.at_index(self.statement_index - 1), + } + } + + /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other` + /// in forward order. + fn precedes_in_forward_order(self, other: Self) -> bool { + let ord = self + .statement_index + .cmp(&other.statement_index) + .then_with(|| self.effect.cmp(&other.effect)); + ord == Ordering::Less + } + + /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other` + /// in backward order. + fn precedes_in_backward_order(self, other: Self) -> bool { + let ord = other + .statement_index + .cmp(&self.statement_index) + .then_with(|| self.effect.cmp(&other.effect)); + ord == Ordering::Less + } +} + +pub struct SwitchIntTarget { + pub value: Option<u128>, + pub target: BasicBlock, +} + +/// A type that records the edge-specific effects for a `SwitchInt` terminator. +pub trait SwitchIntEdgeEffects<D> { + /// Calls `apply_edge_effect` for each outgoing edge from a `SwitchInt` terminator and + /// records the results. + fn apply(&mut self, apply_edge_effect: impl FnMut(&mut D, SwitchIntTarget)); +} + +/// List of places that are written to after a successful (non-unwind) return +/// from a `Call` or `InlineAsm`. +pub enum CallReturnPlaces<'a, 'tcx> { + Call(mir::Place<'tcx>), + InlineAsm(&'a [mir::InlineAsmOperand<'tcx>]), +} + +impl<'tcx> CallReturnPlaces<'_, 'tcx> { + pub fn for_each(&self, mut f: impl FnMut(mir::Place<'tcx>)) { + match *self { + Self::Call(place) => f(place), + Self::InlineAsm(operands) => { + for op in operands { + match *op { + mir::InlineAsmOperand::Out { place: Some(place), .. } + | mir::InlineAsmOperand::InOut { out_place: Some(place), .. } => f(place), + _ => {} + } + } + } + } + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_mir_dataflow/src/framework/tests.rs b/compiler/rustc_mir_dataflow/src/framework/tests.rs new file mode 100644 index 000000000..d9461fd3a --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/tests.rs @@ -0,0 +1,322 @@ +//! A test for the logic that updates the state in a `ResultsCursor` during seek. + +use std::marker::PhantomData; + +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::{self, BasicBlock, Location}; +use rustc_middle::ty; +use rustc_span::DUMMY_SP; + +use super::*; + +/// Creates a `mir::Body` with a few disconnected basic blocks. +/// +/// This is the `Body` that will be used by the `MockAnalysis` below. The shape of its CFG is not +/// important. +fn mock_body<'tcx>() -> mir::Body<'tcx> { + let source_info = mir::SourceInfo::outermost(DUMMY_SP); + + let mut blocks = IndexVec::new(); + let mut block = |n, kind| { + let nop = mir::Statement { source_info, kind: mir::StatementKind::Nop }; + + blocks.push(mir::BasicBlockData { + statements: std::iter::repeat(&nop).cloned().take(n).collect(), + terminator: Some(mir::Terminator { source_info, kind }), + is_cleanup: false, + }) + }; + + let dummy_place = mir::Place { local: mir::RETURN_PLACE, projection: ty::List::empty() }; + + block(4, mir::TerminatorKind::Return); + block(1, mir::TerminatorKind::Return); + block( + 2, + mir::TerminatorKind::Call { + func: mir::Operand::Copy(dummy_place.clone()), + args: vec![], + destination: dummy_place.clone(), + target: Some(mir::START_BLOCK), + cleanup: None, + from_hir_call: false, + fn_span: DUMMY_SP, + }, + ); + block(3, mir::TerminatorKind::Return); + block(0, mir::TerminatorKind::Return); + block( + 4, + mir::TerminatorKind::Call { + func: mir::Operand::Copy(dummy_place.clone()), + args: vec![], + destination: dummy_place.clone(), + target: Some(mir::START_BLOCK), + cleanup: None, + from_hir_call: false, + fn_span: DUMMY_SP, + }, + ); + + mir::Body::new_cfg_only(blocks) +} + +/// A dataflow analysis whose state is unique at every possible `SeekTarget`. +/// +/// Uniqueness is achieved by having a *locally* unique effect before and after each statement and +/// terminator (see `effect_at_target`) while ensuring that the entry set for each block is +/// *globally* unique (see `mock_entry_set`). +/// +/// For example, a `BasicBlock` with ID `2` and a `Call` terminator has the following state at each +/// location ("+x" indicates that "x" is added to the state). +/// +/// | Location | Before | After | +/// |------------------------|-------------------|--------| +/// | (on_entry) | {102} || +/// | statement 0 | +0 | +1 | +/// | statement 1 | +2 | +3 | +/// | `Call` terminator | +4 | +5 | +/// | (on unwind) | {102,0,1,2,3,4,5} || +/// +/// The `102` in the block's entry set is derived from the basic block index and ensures that the +/// expected state is unique across all basic blocks. Remember, it is generated by +/// `mock_entry_sets`, not from actually running `MockAnalysis` to fixpoint. +struct MockAnalysis<'tcx, D> { + body: &'tcx mir::Body<'tcx>, + dir: PhantomData<D>, +} + +impl<D: Direction> MockAnalysis<'_, D> { + const BASIC_BLOCK_OFFSET: usize = 100; + + /// The entry set for each `BasicBlock` is the ID of that block offset by a fixed amount to + /// avoid colliding with the statement/terminator effects. + fn mock_entry_set(&self, bb: BasicBlock) -> BitSet<usize> { + let mut ret = self.bottom_value(self.body); + ret.insert(Self::BASIC_BLOCK_OFFSET + bb.index()); + ret + } + + fn mock_entry_sets(&self) -> IndexVec<BasicBlock, BitSet<usize>> { + let empty = self.bottom_value(self.body); + let mut ret = IndexVec::from_elem(empty, &self.body.basic_blocks()); + + for (bb, _) in self.body.basic_blocks().iter_enumerated() { + ret[bb] = self.mock_entry_set(bb); + } + + ret + } + + /// Returns the index that should be added to the dataflow state at the given target. + fn effect(&self, loc: EffectIndex) -> usize { + let idx = match loc.effect { + Effect::Before => loc.statement_index * 2, + Effect::Primary => loc.statement_index * 2 + 1, + }; + + assert!(idx < Self::BASIC_BLOCK_OFFSET, "Too many statements in basic block"); + idx + } + + /// Returns the expected state at the given `SeekTarget`. + /// + /// This is the union of index of the target basic block, the index assigned to the + /// target statement or terminator, and the indices of all preceding statements in the target + /// basic block. + /// + /// For example, the expected state when calling + /// `seek_before_primary_effect(Location { block: 2, statement_index: 2 })` + /// would be `[102, 0, 1, 2, 3, 4]`. + fn expected_state_at_target(&self, target: SeekTarget) -> BitSet<usize> { + let block = target.block(); + let mut ret = self.bottom_value(self.body); + ret.insert(Self::BASIC_BLOCK_OFFSET + block.index()); + + let target = match target { + SeekTarget::BlockEntry { .. } => return ret, + SeekTarget::Before(loc) => Effect::Before.at_index(loc.statement_index), + SeekTarget::After(loc) => Effect::Primary.at_index(loc.statement_index), + }; + + let mut pos = if D::IS_FORWARD { + Effect::Before.at_index(0) + } else { + Effect::Before.at_index(self.body[block].statements.len()) + }; + + loop { + ret.insert(self.effect(pos)); + + if pos == target { + return ret; + } + + if D::IS_FORWARD { + pos = pos.next_in_forward_order(); + } else { + pos = pos.next_in_backward_order(); + } + } + } +} + +impl<'tcx, D: Direction> AnalysisDomain<'tcx> for MockAnalysis<'tcx, D> { + type Domain = BitSet<usize>; + type Direction = D; + + const NAME: &'static str = "mock"; + + fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain { + BitSet::new_empty(Self::BASIC_BLOCK_OFFSET + body.basic_blocks().len()) + } + + fn initialize_start_block(&self, _: &mir::Body<'tcx>, _: &mut Self::Domain) { + unimplemented!("This is never called since `MockAnalysis` is never iterated to fixpoint"); + } +} + +impl<'tcx, D: Direction> Analysis<'tcx> for MockAnalysis<'tcx, D> { + fn apply_statement_effect( + &self, + state: &mut Self::Domain, + _statement: &mir::Statement<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Primary.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_before_statement_effect( + &self, + state: &mut Self::Domain, + _statement: &mir::Statement<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Before.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_terminator_effect( + &self, + state: &mut Self::Domain, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Primary.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_before_terminator_effect( + &self, + state: &mut Self::Domain, + _terminator: &mir::Terminator<'tcx>, + location: Location, + ) { + let idx = self.effect(Effect::Before.at_index(location.statement_index)); + assert!(state.insert(idx)); + } + + fn apply_call_return_effect( + &self, + _state: &mut Self::Domain, + _block: BasicBlock, + _return_places: CallReturnPlaces<'_, 'tcx>, + ) { + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum SeekTarget { + BlockEntry(BasicBlock), + Before(Location), + After(Location), +} + +impl SeekTarget { + fn block(&self) -> BasicBlock { + use SeekTarget::*; + + match *self { + BlockEntry(block) => block, + Before(loc) | After(loc) => loc.block, + } + } + + /// An iterator over all possible `SeekTarget`s in a given block in order, starting with + /// `BlockEntry`. + fn iter_in_block(body: &mir::Body<'_>, block: BasicBlock) -> impl Iterator<Item = Self> { + let statements_and_terminator = (0..=body[block].statements.len()) + .flat_map(|i| (0..2).map(move |j| (i, j))) + .map(move |(i, kind)| { + let loc = Location { block, statement_index: i }; + match kind { + 0 => SeekTarget::Before(loc), + 1 => SeekTarget::After(loc), + _ => unreachable!(), + } + }); + + std::iter::once(SeekTarget::BlockEntry(block)).chain(statements_and_terminator) + } +} + +fn test_cursor<D: Direction>(analysis: MockAnalysis<'_, D>) { + let body = analysis.body; + + let mut cursor = + Results { entry_sets: analysis.mock_entry_sets(), analysis }.into_results_cursor(body); + + cursor.allow_unreachable(); + + let every_target = || { + body.basic_blocks() + .iter_enumerated() + .flat_map(|(bb, _)| SeekTarget::iter_in_block(body, bb)) + }; + + let mut seek_to_target = |targ| { + use SeekTarget::*; + + match targ { + BlockEntry(block) => cursor.seek_to_block_entry(block), + Before(loc) => cursor.seek_before_primary_effect(loc), + After(loc) => cursor.seek_after_primary_effect(loc), + } + + assert_eq!(cursor.get(), &cursor.analysis().expected_state_at_target(targ)); + }; + + // Seek *to* every possible `SeekTarget` *from* every possible `SeekTarget`. + // + // By resetting the cursor to `from` each time it changes, we end up checking some edges twice. + // What we really want is an Eulerian cycle for the complete digraph over all possible + // `SeekTarget`s, but it's not worth spending the time to compute it. + for from in every_target() { + seek_to_target(from); + + for to in every_target() { + dbg!(from); + dbg!(to); + seek_to_target(to); + seek_to_target(from); + } + } +} + +#[test] +fn backward_cursor() { + let body = mock_body(); + let body = &body; + let analysis = MockAnalysis { body, dir: PhantomData::<Backward> }; + test_cursor(analysis) +} + +#[test] +fn forward_cursor() { + let body = mock_body(); + let body = &body; + let analysis = MockAnalysis { body, dir: PhantomData::<Forward> }; + test_cursor(analysis) +} diff --git a/compiler/rustc_mir_dataflow/src/framework/visitor.rs b/compiler/rustc_mir_dataflow/src/framework/visitor.rs new file mode 100644 index 000000000..75b4e150a --- /dev/null +++ b/compiler/rustc_mir_dataflow/src/framework/visitor.rs @@ -0,0 +1,187 @@ +use rustc_middle::mir::{self, BasicBlock, Location}; + +use super::{Analysis, Direction, Results}; + +/// Calls the corresponding method in `ResultsVisitor` for every location in a `mir::Body` with the +/// dataflow state at that location. +pub fn visit_results<'mir, 'tcx, F, V>( + body: &'mir mir::Body<'tcx>, + blocks: impl IntoIterator<Item = BasicBlock>, + results: &V, + vis: &mut impl ResultsVisitor<'mir, 'tcx, FlowState = F>, +) where + V: ResultsVisitable<'tcx, FlowState = F>, +{ + let mut state = results.new_flow_state(body); + + #[cfg(debug_assertions)] + let reachable_blocks = mir::traversal::reachable_as_bitset(body); + + for block in blocks { + #[cfg(debug_assertions)] + assert!(reachable_blocks.contains(block)); + + let block_data = &body[block]; + V::Direction::visit_results_in_block(&mut state, block, block_data, results, vis); + } +} + +pub trait ResultsVisitor<'mir, 'tcx> { + type FlowState; + + fn visit_block_start( + &mut self, + _state: &Self::FlowState, + _block_data: &'mir mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + } + + /// Called with the `before_statement_effect` of the given statement applied to `state` but not + /// its `statement_effect`. + fn visit_statement_before_primary_effect( + &mut self, + _state: &Self::FlowState, + _statement: &'mir mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// Called with both the `before_statement_effect` and the `statement_effect` of the given + /// statement applied to `state`. + fn visit_statement_after_primary_effect( + &mut self, + _state: &Self::FlowState, + _statement: &'mir mir::Statement<'tcx>, + _location: Location, + ) { + } + + /// Called with the `before_terminator_effect` of the given terminator applied to `state` but not + /// its `terminator_effect`. + fn visit_terminator_before_primary_effect( + &mut self, + _state: &Self::FlowState, + _terminator: &'mir mir::Terminator<'tcx>, + _location: Location, + ) { + } + + /// Called with both the `before_terminator_effect` and the `terminator_effect` of the given + /// terminator applied to `state`. + /// + /// The `call_return_effect` (if one exists) will *not* be applied to `state`. + fn visit_terminator_after_primary_effect( + &mut self, + _state: &Self::FlowState, + _terminator: &'mir mir::Terminator<'tcx>, + _location: Location, + ) { + } + + fn visit_block_end( + &mut self, + _state: &Self::FlowState, + _block_data: &'mir mir::BasicBlockData<'tcx>, + _block: BasicBlock, + ) { + } +} + +/// Things that can be visited by a `ResultsVisitor`. +/// +/// This trait exists so that we can visit the results of multiple dataflow analyses simultaneously. +/// DO NOT IMPLEMENT MANUALLY. Instead, use the `impl_visitable` macro below. +pub trait ResultsVisitable<'tcx> { + type Direction: Direction; + type FlowState; + + /// Creates an empty `FlowState` to hold the transient state for these dataflow results. + /// + /// The value of the newly created `FlowState` will be overwritten by `reset_to_block_entry` + /// before it can be observed by a `ResultsVisitor`. + fn new_flow_state(&self, body: &mir::Body<'tcx>) -> Self::FlowState; + + fn reset_to_block_entry(&self, state: &mut Self::FlowState, block: BasicBlock); + + fn reconstruct_before_statement_effect( + &self, + state: &mut Self::FlowState, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + fn reconstruct_statement_effect( + &self, + state: &mut Self::FlowState, + statement: &mir::Statement<'tcx>, + location: Location, + ); + + fn reconstruct_before_terminator_effect( + &self, + state: &mut Self::FlowState, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); + + fn reconstruct_terminator_effect( + &self, + state: &mut Self::FlowState, + terminator: &mir::Terminator<'tcx>, + location: Location, + ); +} + +impl<'tcx, A> ResultsVisitable<'tcx> for Results<'tcx, A> +where + A: Analysis<'tcx>, +{ + type FlowState = A::Domain; + + type Direction = A::Direction; + + fn new_flow_state(&self, body: &mir::Body<'tcx>) -> Self::FlowState { + self.analysis.bottom_value(body) + } + + fn reset_to_block_entry(&self, state: &mut Self::FlowState, block: BasicBlock) { + state.clone_from(&self.entry_set_for_block(block)); + } + + fn reconstruct_before_statement_effect( + &self, + state: &mut Self::FlowState, + stmt: &mir::Statement<'tcx>, + loc: Location, + ) { + self.analysis.apply_before_statement_effect(state, stmt, loc); + } + + fn reconstruct_statement_effect( + &self, + state: &mut Self::FlowState, + stmt: &mir::Statement<'tcx>, + loc: Location, + ) { + self.analysis.apply_statement_effect(state, stmt, loc); + } + + fn reconstruct_before_terminator_effect( + &self, + state: &mut Self::FlowState, + term: &mir::Terminator<'tcx>, + loc: Location, + ) { + self.analysis.apply_before_terminator_effect(state, term, loc); + } + + fn reconstruct_terminator_effect( + &self, + state: &mut Self::FlowState, + term: &mir::Terminator<'tcx>, + loc: Location, + ) { + self.analysis.apply_terminator_effect(state, term, loc); + } +} |