diff options
Diffstat (limited to '')
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/counters.rs | 614 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/debug.rs | 831 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/graph.rs | 753 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/mod.rs | 580 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/query.rs | 170 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/spans.rs | 892 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/test_macros/Cargo.toml | 8 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/test_macros/src/lib.rs | 6 | ||||
-rw-r--r-- | compiler/rustc_mir_transform/src/coverage/tests.rs | 710 |
9 files changed, 4564 insertions, 0 deletions
diff --git a/compiler/rustc_mir_transform/src/coverage/counters.rs b/compiler/rustc_mir_transform/src/coverage/counters.rs new file mode 100644 index 000000000..45de0c280 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/counters.rs @@ -0,0 +1,614 @@ +use super::Error; + +use super::debug; +use super::graph; +use super::spans; + +use debug::{DebugCounters, NESTED_INDENT}; +use graph::{BasicCoverageBlock, BcbBranch, CoverageGraph, TraverseCoverageGraphWithLoops}; +use spans::CoverageSpan; + +use rustc_data_structures::graph::WithNumNodes; +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::coverage::*; + +/// Manages the counter and expression indexes/IDs to generate `CoverageKind` components for MIR +/// `Coverage` statements. +pub(super) struct CoverageCounters { + function_source_hash: u64, + next_counter_id: u32, + num_expressions: u32, + pub debug_counters: DebugCounters, +} + +impl CoverageCounters { + pub fn new(function_source_hash: u64) -> Self { + Self { + function_source_hash, + next_counter_id: CounterValueReference::START.as_u32(), + num_expressions: 0, + debug_counters: DebugCounters::new(), + } + } + + /// Activate the `DebugCounters` data structures, to provide additional debug formatting + /// features when formatting `CoverageKind` (counter) values. + pub fn enable_debug(&mut self) { + self.debug_counters.enable(); + } + + /// Makes `CoverageKind` `Counter`s and `Expressions` for the `BasicCoverageBlock`s directly or + /// indirectly associated with `CoverageSpans`, and returns additional `Expression`s + /// representing intermediate values. + pub fn make_bcb_counters( + &mut self, + basic_coverage_blocks: &mut CoverageGraph, + coverage_spans: &[CoverageSpan], + ) -> Result<Vec<CoverageKind>, Error> { + let mut bcb_counters = BcbCounters::new(self, basic_coverage_blocks); + bcb_counters.make_bcb_counters(coverage_spans) + } + + fn make_counter<F>(&mut self, debug_block_label_fn: F) -> CoverageKind + where + F: Fn() -> Option<String>, + { + let counter = CoverageKind::Counter { + function_source_hash: self.function_source_hash, + id: self.next_counter(), + }; + if self.debug_counters.is_enabled() { + self.debug_counters.add_counter(&counter, (debug_block_label_fn)()); + } + counter + } + + fn make_expression<F>( + &mut self, + lhs: ExpressionOperandId, + op: Op, + rhs: ExpressionOperandId, + debug_block_label_fn: F, + ) -> CoverageKind + where + F: Fn() -> Option<String>, + { + let id = self.next_expression(); + let expression = CoverageKind::Expression { id, lhs, op, rhs }; + if self.debug_counters.is_enabled() { + self.debug_counters.add_counter(&expression, (debug_block_label_fn)()); + } + expression + } + + pub fn make_identity_counter(&mut self, counter_operand: ExpressionOperandId) -> CoverageKind { + let some_debug_block_label = if self.debug_counters.is_enabled() { + self.debug_counters.some_block_label(counter_operand).cloned() + } else { + None + }; + self.make_expression(counter_operand, Op::Add, ExpressionOperandId::ZERO, || { + some_debug_block_label.clone() + }) + } + + /// Counter IDs start from one and go up. + fn next_counter(&mut self) -> CounterValueReference { + assert!(self.next_counter_id < u32::MAX - self.num_expressions); + let next = self.next_counter_id; + self.next_counter_id += 1; + CounterValueReference::from(next) + } + + /// Expression IDs start from u32::MAX and go down because an Expression can reference + /// (add or subtract counts) of both Counter regions and Expression regions. The counter + /// expression operand IDs must be unique across both types. + fn next_expression(&mut self) -> InjectedExpressionId { + assert!(self.next_counter_id < u32::MAX - self.num_expressions); + let next = u32::MAX - self.num_expressions; + self.num_expressions += 1; + InjectedExpressionId::from(next) + } +} + +/// Traverse the `CoverageGraph` and add either a `Counter` or `Expression` to every BCB, to be +/// injected with `CoverageSpan`s. `Expressions` have no runtime overhead, so if a viable expression +/// (adding or subtracting two other counters or expressions) can compute the same result as an +/// embedded counter, an `Expression` should be used. +struct BcbCounters<'a> { + coverage_counters: &'a mut CoverageCounters, + basic_coverage_blocks: &'a mut CoverageGraph, +} + +impl<'a> BcbCounters<'a> { + fn new( + coverage_counters: &'a mut CoverageCounters, + basic_coverage_blocks: &'a mut CoverageGraph, + ) -> Self { + Self { coverage_counters, basic_coverage_blocks } + } + + /// If two `BasicCoverageBlock`s branch from another `BasicCoverageBlock`, one of the branches + /// can be counted by `Expression` by subtracting the other branch from the branching + /// block. Otherwise, the `BasicCoverageBlock` executed the least should have the `Counter`. + /// One way to predict which branch executes the least is by considering loops. A loop is exited + /// at a branch, so the branch that jumps to a `BasicCoverageBlock` outside the loop is almost + /// always executed less than the branch that does not exit the loop. + /// + /// Returns any non-code-span expressions created to represent intermediate values (such as to + /// add two counters so the result can be subtracted from another counter), or an Error with + /// message for subsequent debugging. + fn make_bcb_counters( + &mut self, + coverage_spans: &[CoverageSpan], + ) -> Result<Vec<CoverageKind>, Error> { + debug!("make_bcb_counters(): adding a counter or expression to each BasicCoverageBlock"); + let num_bcbs = self.basic_coverage_blocks.num_nodes(); + let mut collect_intermediate_expressions = Vec::with_capacity(num_bcbs); + + let mut bcbs_with_coverage = BitSet::new_empty(num_bcbs); + for covspan in coverage_spans { + bcbs_with_coverage.insert(covspan.bcb); + } + + // Walk the `CoverageGraph`. For each `BasicCoverageBlock` node with an associated + // `CoverageSpan`, add a counter. If the `BasicCoverageBlock` branches, add a counter or + // expression to each branch `BasicCoverageBlock` (if the branch BCB has only one incoming + // edge) or edge from the branching BCB to the branch BCB (if the branch BCB has multiple + // incoming edges). + // + // The `TraverseCoverageGraphWithLoops` traversal ensures that, when a loop is encountered, + // all `BasicCoverageBlock` nodes in the loop are visited before visiting any node outside + // the loop. The `traversal` state includes a `context_stack`, providing a way to know if + // the current BCB is in one or more nested loops or not. + let mut traversal = TraverseCoverageGraphWithLoops::new(&self.basic_coverage_blocks); + while let Some(bcb) = traversal.next(self.basic_coverage_blocks) { + if bcbs_with_coverage.contains(bcb) { + debug!("{:?} has at least one `CoverageSpan`. Get or make its counter", bcb); + let branching_counter_operand = + self.get_or_make_counter_operand(bcb, &mut collect_intermediate_expressions)?; + + if self.bcb_needs_branch_counters(bcb) { + self.make_branch_counters( + &mut traversal, + bcb, + branching_counter_operand, + &mut collect_intermediate_expressions, + )?; + } + } else { + debug!( + "{:?} does not have any `CoverageSpan`s. A counter will only be added if \ + and when a covered BCB has an expression dependency.", + bcb, + ); + } + } + + if traversal.is_complete() { + Ok(collect_intermediate_expressions) + } else { + Error::from_string(format!( + "`TraverseCoverageGraphWithLoops` missed some `BasicCoverageBlock`s: {:?}", + traversal.unvisited(), + )) + } + } + + fn make_branch_counters( + &mut self, + traversal: &mut TraverseCoverageGraphWithLoops, + branching_bcb: BasicCoverageBlock, + branching_counter_operand: ExpressionOperandId, + collect_intermediate_expressions: &mut Vec<CoverageKind>, + ) -> Result<(), Error> { + let branches = self.bcb_branches(branching_bcb); + debug!( + "{:?} has some branch(es) without counters:\n {}", + branching_bcb, + branches + .iter() + .map(|branch| { + format!("{:?}: {:?}", branch, branch.counter(&self.basic_coverage_blocks)) + }) + .collect::<Vec<_>>() + .join("\n "), + ); + + // Use the `traversal` state to decide if a subset of the branches exit a loop, making it + // likely that branch is executed less than branches that do not exit the same loop. In this + // case, any branch that does not exit the loop (and has not already been assigned a + // counter) should be counted by expression, if possible. (If a preferred expression branch + // is not selected based on the loop context, select any branch without an existing + // counter.) + let expression_branch = self.choose_preferred_expression_branch(traversal, &branches); + + // Assign a Counter or Expression to each branch, plus additional `Expression`s, as needed, + // to sum up intermediate results. + let mut some_sumup_counter_operand = None; + for branch in branches { + // Skip the selected `expression_branch`, if any. It's expression will be assigned after + // all others. + if branch != expression_branch { + let branch_counter_operand = if branch.is_only_path_to_target() { + debug!( + " {:?} has only one incoming edge (from {:?}), so adding a \ + counter", + branch, branching_bcb + ); + self.get_or_make_counter_operand( + branch.target_bcb, + collect_intermediate_expressions, + )? + } else { + debug!(" {:?} has multiple incoming edges, so adding an edge counter", branch); + self.get_or_make_edge_counter_operand( + branching_bcb, + branch.target_bcb, + collect_intermediate_expressions, + )? + }; + if let Some(sumup_counter_operand) = + some_sumup_counter_operand.replace(branch_counter_operand) + { + let intermediate_expression = self.coverage_counters.make_expression( + branch_counter_operand, + Op::Add, + sumup_counter_operand, + || None, + ); + debug!( + " [new intermediate expression: {}]", + self.format_counter(&intermediate_expression) + ); + let intermediate_expression_operand = intermediate_expression.as_operand_id(); + collect_intermediate_expressions.push(intermediate_expression); + some_sumup_counter_operand.replace(intermediate_expression_operand); + } + } + } + + // Assign the final expression to the `expression_branch` by subtracting the total of all + // other branches from the counter of the branching BCB. + let sumup_counter_operand = + some_sumup_counter_operand.expect("sumup_counter_operand should have a value"); + debug!( + "Making an expression for the selected expression_branch: {:?} \ + (expression_branch predecessors: {:?})", + expression_branch, + self.bcb_predecessors(expression_branch.target_bcb), + ); + let expression = self.coverage_counters.make_expression( + branching_counter_operand, + Op::Subtract, + sumup_counter_operand, + || Some(format!("{:?}", expression_branch)), + ); + debug!("{:?} gets an expression: {}", expression_branch, self.format_counter(&expression)); + let bcb = expression_branch.target_bcb; + if expression_branch.is_only_path_to_target() { + self.basic_coverage_blocks[bcb].set_counter(expression)?; + } else { + self.basic_coverage_blocks[bcb].set_edge_counter_from(branching_bcb, expression)?; + } + Ok(()) + } + + fn get_or_make_counter_operand( + &mut self, + bcb: BasicCoverageBlock, + collect_intermediate_expressions: &mut Vec<CoverageKind>, + ) -> Result<ExpressionOperandId, Error> { + self.recursive_get_or_make_counter_operand(bcb, collect_intermediate_expressions, 1) + } + + fn recursive_get_or_make_counter_operand( + &mut self, + bcb: BasicCoverageBlock, + collect_intermediate_expressions: &mut Vec<CoverageKind>, + debug_indent_level: usize, + ) -> Result<ExpressionOperandId, Error> { + // If the BCB already has a counter, return it. + if let Some(counter_kind) = self.basic_coverage_blocks[bcb].counter() { + debug!( + "{}{:?} already has a counter: {}", + NESTED_INDENT.repeat(debug_indent_level), + bcb, + self.format_counter(counter_kind), + ); + return Ok(counter_kind.as_operand_id()); + } + + // A BCB with only one incoming edge gets a simple `Counter` (via `make_counter()`). + // Also, a BCB that loops back to itself gets a simple `Counter`. This may indicate the + // program results in a tight infinite loop, but it should still compile. + let one_path_to_target = self.bcb_has_one_path_to_target(bcb); + if one_path_to_target || self.bcb_predecessors(bcb).contains(&bcb) { + let counter_kind = self.coverage_counters.make_counter(|| Some(format!("{:?}", bcb))); + if one_path_to_target { + debug!( + "{}{:?} gets a new counter: {}", + NESTED_INDENT.repeat(debug_indent_level), + bcb, + self.format_counter(&counter_kind), + ); + } else { + debug!( + "{}{:?} has itself as its own predecessor. It can't be part of its own \ + Expression sum, so it will get its own new counter: {}. (Note, the compiled \ + code will generate an infinite loop.)", + NESTED_INDENT.repeat(debug_indent_level), + bcb, + self.format_counter(&counter_kind), + ); + } + return self.basic_coverage_blocks[bcb].set_counter(counter_kind); + } + + // A BCB with multiple incoming edges can compute its count by `Expression`, summing up the + // counters and/or expressions of its incoming edges. This will recursively get or create + // counters for those incoming edges first, then call `make_expression()` to sum them up, + // with additional intermediate expressions as needed. + let mut predecessors = self.bcb_predecessors(bcb).to_owned().into_iter(); + debug!( + "{}{:?} has multiple incoming edges and will get an expression that sums them up...", + NESTED_INDENT.repeat(debug_indent_level), + bcb, + ); + let first_edge_counter_operand = self.recursive_get_or_make_edge_counter_operand( + predecessors.next().unwrap(), + bcb, + collect_intermediate_expressions, + debug_indent_level + 1, + )?; + let mut some_sumup_edge_counter_operand = None; + for predecessor in predecessors { + let edge_counter_operand = self.recursive_get_or_make_edge_counter_operand( + predecessor, + bcb, + collect_intermediate_expressions, + debug_indent_level + 1, + )?; + if let Some(sumup_edge_counter_operand) = + some_sumup_edge_counter_operand.replace(edge_counter_operand) + { + let intermediate_expression = self.coverage_counters.make_expression( + sumup_edge_counter_operand, + Op::Add, + edge_counter_operand, + || None, + ); + debug!( + "{}new intermediate expression: {}", + NESTED_INDENT.repeat(debug_indent_level), + self.format_counter(&intermediate_expression) + ); + let intermediate_expression_operand = intermediate_expression.as_operand_id(); + collect_intermediate_expressions.push(intermediate_expression); + some_sumup_edge_counter_operand.replace(intermediate_expression_operand); + } + } + let counter_kind = self.coverage_counters.make_expression( + first_edge_counter_operand, + Op::Add, + some_sumup_edge_counter_operand.unwrap(), + || Some(format!("{:?}", bcb)), + ); + debug!( + "{}{:?} gets a new counter (sum of predecessor counters): {}", + NESTED_INDENT.repeat(debug_indent_level), + bcb, + self.format_counter(&counter_kind) + ); + self.basic_coverage_blocks[bcb].set_counter(counter_kind) + } + + fn get_or_make_edge_counter_operand( + &mut self, + from_bcb: BasicCoverageBlock, + to_bcb: BasicCoverageBlock, + collect_intermediate_expressions: &mut Vec<CoverageKind>, + ) -> Result<ExpressionOperandId, Error> { + self.recursive_get_or_make_edge_counter_operand( + from_bcb, + to_bcb, + collect_intermediate_expressions, + 1, + ) + } + + fn recursive_get_or_make_edge_counter_operand( + &mut self, + from_bcb: BasicCoverageBlock, + to_bcb: BasicCoverageBlock, + collect_intermediate_expressions: &mut Vec<CoverageKind>, + debug_indent_level: usize, + ) -> Result<ExpressionOperandId, Error> { + // If the source BCB has only one successor (assumed to be the given target), an edge + // counter is unnecessary. Just get or make a counter for the source BCB. + let successors = self.bcb_successors(from_bcb).iter(); + if successors.len() == 1 { + return self.recursive_get_or_make_counter_operand( + from_bcb, + collect_intermediate_expressions, + debug_indent_level + 1, + ); + } + + // If the edge already has a counter, return it. + if let Some(counter_kind) = self.basic_coverage_blocks[to_bcb].edge_counter_from(from_bcb) { + debug!( + "{}Edge {:?}->{:?} already has a counter: {}", + NESTED_INDENT.repeat(debug_indent_level), + from_bcb, + to_bcb, + self.format_counter(counter_kind) + ); + return Ok(counter_kind.as_operand_id()); + } + + // Make a new counter to count this edge. + let counter_kind = + self.coverage_counters.make_counter(|| Some(format!("{:?}->{:?}", from_bcb, to_bcb))); + debug!( + "{}Edge {:?}->{:?} gets a new counter: {}", + NESTED_INDENT.repeat(debug_indent_level), + from_bcb, + to_bcb, + self.format_counter(&counter_kind) + ); + self.basic_coverage_blocks[to_bcb].set_edge_counter_from(from_bcb, counter_kind) + } + + /// Select a branch for the expression, either the recommended `reloop_branch`, or if none was + /// found, select any branch. + fn choose_preferred_expression_branch( + &self, + traversal: &TraverseCoverageGraphWithLoops, + branches: &[BcbBranch], + ) -> BcbBranch { + let branch_needs_a_counter = + |branch: &BcbBranch| branch.counter(&self.basic_coverage_blocks).is_none(); + + let some_reloop_branch = self.find_some_reloop_branch(traversal, &branches); + if let Some(reloop_branch_without_counter) = + some_reloop_branch.filter(branch_needs_a_counter) + { + debug!( + "Selecting reloop_branch={:?} that still needs a counter, to get the \ + `Expression`", + reloop_branch_without_counter + ); + reloop_branch_without_counter + } else { + let &branch_without_counter = branches + .iter() + .find(|&&branch| branch.counter(&self.basic_coverage_blocks).is_none()) + .expect( + "needs_branch_counters was `true` so there should be at least one \ + branch", + ); + debug!( + "Selecting any branch={:?} that still needs a counter, to get the \ + `Expression` because there was no `reloop_branch`, or it already had a \ + counter", + branch_without_counter + ); + branch_without_counter + } + } + + /// At most, one of the branches (or its edge, from the branching_bcb, if the branch has + /// multiple incoming edges) can have a counter computed by expression. + /// + /// If at least one of the branches leads outside of a loop (`found_loop_exit` is + /// true), and at least one other branch does not exit the loop (the first of which + /// is captured in `some_reloop_branch`), it's likely any reloop branch will be + /// executed far more often than loop exit branch, making the reloop branch a better + /// candidate for an expression. + fn find_some_reloop_branch( + &self, + traversal: &TraverseCoverageGraphWithLoops, + branches: &[BcbBranch], + ) -> Option<BcbBranch> { + let branch_needs_a_counter = + |branch: &BcbBranch| branch.counter(&self.basic_coverage_blocks).is_none(); + + let mut some_reloop_branch: Option<BcbBranch> = None; + for context in traversal.context_stack.iter().rev() { + if let Some((backedge_from_bcbs, _)) = &context.loop_backedges { + let mut found_loop_exit = false; + for &branch in branches.iter() { + if backedge_from_bcbs.iter().any(|&backedge_from_bcb| { + self.bcb_is_dominated_by(backedge_from_bcb, branch.target_bcb) + }) { + if let Some(reloop_branch) = some_reloop_branch { + if reloop_branch.counter(&self.basic_coverage_blocks).is_none() { + // we already found a candidate reloop_branch that still + // needs a counter + continue; + } + } + // The path from branch leads back to the top of the loop. Set this + // branch as the `reloop_branch`. If this branch already has a + // counter, and we find another reloop branch that doesn't have a + // counter yet, that branch will be selected as the `reloop_branch` + // instead. + some_reloop_branch = Some(branch); + } else { + // The path from branch leads outside this loop + found_loop_exit = true; + } + if found_loop_exit + && some_reloop_branch.filter(branch_needs_a_counter).is_some() + { + // Found both a branch that exits the loop and a branch that returns + // to the top of the loop (`reloop_branch`), and the `reloop_branch` + // doesn't already have a counter. + break; + } + } + if !found_loop_exit { + debug!( + "No branches exit the loop, so any branch without an existing \ + counter can have the `Expression`." + ); + break; + } + if some_reloop_branch.is_some() { + debug!( + "Found a branch that exits the loop and a branch the loops back to \ + the top of the loop (`reloop_branch`). The `reloop_branch` will \ + get the `Expression`, as long as it still needs a counter." + ); + break; + } + // else all branches exited this loop context, so run the same checks with + // the outer loop(s) + } + } + some_reloop_branch + } + + #[inline] + fn bcb_predecessors(&self, bcb: BasicCoverageBlock) -> &[BasicCoverageBlock] { + &self.basic_coverage_blocks.predecessors[bcb] + } + + #[inline] + fn bcb_successors(&self, bcb: BasicCoverageBlock) -> &[BasicCoverageBlock] { + &self.basic_coverage_blocks.successors[bcb] + } + + #[inline] + fn bcb_branches(&self, from_bcb: BasicCoverageBlock) -> Vec<BcbBranch> { + self.bcb_successors(from_bcb) + .iter() + .map(|&to_bcb| BcbBranch::from_to(from_bcb, to_bcb, &self.basic_coverage_blocks)) + .collect::<Vec<_>>() + } + + fn bcb_needs_branch_counters(&self, bcb: BasicCoverageBlock) -> bool { + let branch_needs_a_counter = + |branch: &BcbBranch| branch.counter(&self.basic_coverage_blocks).is_none(); + let branches = self.bcb_branches(bcb); + branches.len() > 1 && branches.iter().any(branch_needs_a_counter) + } + + /// Returns true if the BasicCoverageBlock has zero or one incoming edge. (If zero, it should be + /// the entry point for the function.) + #[inline] + fn bcb_has_one_path_to_target(&self, bcb: BasicCoverageBlock) -> bool { + self.bcb_predecessors(bcb).len() <= 1 + } + + #[inline] + fn bcb_is_dominated_by(&self, node: BasicCoverageBlock, dom: BasicCoverageBlock) -> bool { + self.basic_coverage_blocks.is_dominated_by(node, dom) + } + + #[inline] + fn format_counter(&self, counter_kind: &CoverageKind) -> String { + self.coverage_counters.debug_counters.format_counter(counter_kind) + } +} diff --git a/compiler/rustc_mir_transform/src/coverage/debug.rs b/compiler/rustc_mir_transform/src/coverage/debug.rs new file mode 100644 index 000000000..0f8679b0b --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/debug.rs @@ -0,0 +1,831 @@ +//! The `InstrumentCoverage` MIR pass implementation includes debugging tools and options +//! to help developers understand and/or improve the analysis and instrumentation of a MIR. +//! +//! To enable coverage, include the rustc command line option: +//! +//! * `-C instrument-coverage` +//! +//! MIR Dump Files, with additional `CoverageGraph` graphviz and `CoverageSpan` spanview +//! ------------------------------------------------------------------------------------ +//! +//! Additional debugging options include: +//! +//! * `-Z dump-mir=InstrumentCoverage` - Generate `.mir` files showing the state of the MIR, +//! before and after the `InstrumentCoverage` pass, for each compiled function. +//! +//! * `-Z dump-mir-graphviz` - If `-Z dump-mir` is also enabled for the current MIR node path, +//! each MIR dump is accompanied by a before-and-after graphical view of the MIR, in Graphviz +//! `.dot` file format (which can be visually rendered as a graph using any of a number of free +//! Graphviz viewers and IDE extensions). +//! +//! For the `InstrumentCoverage` pass, this option also enables generation of an additional +//! Graphviz `.dot` file for each function, rendering the `CoverageGraph`: the control flow +//! graph (CFG) of `BasicCoverageBlocks` (BCBs), as nodes, internally labeled to show the +//! `CoverageSpan`-based MIR elements each BCB represents (`BasicBlock`s, `Statement`s and +//! `Terminator`s), assigned coverage counters and/or expressions, and edge counters, as needed. +//! +//! (Note the additional option, `-Z graphviz-dark-mode`, can be added, to change the rendered +//! output from its default black-on-white background to a dark color theme, if desired.) +//! +//! * `-Z dump-mir-spanview` - If `-Z dump-mir` is also enabled for the current MIR node path, +//! each MIR dump is accompanied by a before-and-after `.html` document showing the function's +//! original source code, highlighted by it's MIR spans, at the `statement`-level (by default), +//! `terminator` only, or encompassing span for the `Terminator` plus all `Statement`s, in each +//! `block` (`BasicBlock`). +//! +//! For the `InstrumentCoverage` pass, this option also enables generation of an additional +//! spanview `.html` file for each function, showing the aggregated `CoverageSpan`s that will +//! require counters (or counter expressions) for accurate coverage analysis. +//! +//! Debug Logging +//! ------------- +//! +//! The `InstrumentCoverage` pass includes debug logging messages at various phases and decision +//! points, which can be enabled via environment variable: +//! +//! ```shell +//! RUSTC_LOG=rustc_mir_transform::transform::coverage=debug +//! ``` +//! +//! Other module paths with coverage-related debug logs may also be of interest, particularly for +//! debugging the coverage map data, injected as global variables in the LLVM IR (during rustc's +//! code generation pass). For example: +//! +//! ```shell +//! RUSTC_LOG=rustc_mir_transform::transform::coverage,rustc_codegen_ssa::coverageinfo,rustc_codegen_llvm::coverageinfo=debug +//! ``` +//! +//! Coverage Debug Options +//! --------------------------------- +//! +//! Additional debugging options can be enabled using the environment variable: +//! +//! ```shell +//! RUSTC_COVERAGE_DEBUG_OPTIONS=<options> +//! ``` +//! +//! These options are comma-separated, and specified in the format `option-name=value`. For example: +//! +//! ```shell +//! $ RUSTC_COVERAGE_DEBUG_OPTIONS=counter-format=id+operation,allow-unused-expressions=yes cargo build +//! ``` +//! +//! Coverage debug options include: +//! +//! * `allow-unused-expressions=yes` or `no` (default: `no`) +//! +//! The `InstrumentCoverage` algorithms _should_ only create and assign expressions to a +//! `BasicCoverageBlock`, or an incoming edge, if that expression is either (a) required to +//! count a `CoverageSpan`, or (b) a dependency of some other required counter expression. +//! +//! If an expression is generated that does not map to a `CoverageSpan` or dependency, this +//! probably indicates there was a bug in the algorithm that creates and assigns counters +//! and expressions. +//! +//! When this kind of bug is encountered, the rustc compiler will panic by default. Setting: +//! `allow-unused-expressions=yes` will log a warning message instead of panicking (effectively +//! ignoring the unused expressions), which may be helpful when debugging the root cause of +//! the problem. +//! +//! * `counter-format=<choices>`, where `<choices>` can be any plus-separated combination of `id`, +//! `block`, and/or `operation` (default: `block+operation`) +//! +//! This option effects both the `CoverageGraph` (graphviz `.dot` files) and debug logging, when +//! generating labels for counters and expressions. +//! +//! Depending on the values and combinations, counters can be labeled by: +//! +//! * `id` - counter or expression ID (ascending counter IDs, starting at 1, or descending +//! expression IDs, starting at `u32:MAX`) +//! * `block` - the `BasicCoverageBlock` label (for example, `bcb0`) or edge label (for +//! example `bcb0->bcb1`), for counters or expressions assigned to count a +//! `BasicCoverageBlock` or edge. Intermediate expressions (not directly associated with +//! a BCB or edge) will be labeled by their expression ID, unless `operation` is also +//! specified. +//! * `operation` - applied to expressions only, labels include the left-hand-side counter +//! or expression label (lhs operand), the operator (`+` or `-`), and the right-hand-side +//! counter or expression (rhs operand). Expression operand labels are generated +//! recursively, generating labels with nested operations, enclosed in parentheses +//! (for example: `bcb2 + (bcb0 - bcb1)`). + +use super::graph::{BasicCoverageBlock, BasicCoverageBlockData, CoverageGraph}; +use super::spans::CoverageSpan; + +use itertools::Itertools; +use rustc_middle::mir::create_dump_file; +use rustc_middle::mir::generic_graphviz::GraphvizWriter; +use rustc_middle::mir::spanview::{self, SpanViewable}; + +use rustc_data_structures::fx::FxHashMap; +use rustc_middle::mir::coverage::*; +use rustc_middle::mir::{self, BasicBlock, TerminatorKind}; +use rustc_middle::ty::TyCtxt; +use rustc_span::Span; + +use std::iter; +use std::ops::Deref; +use std::sync::OnceLock; + +pub const NESTED_INDENT: &str = " "; + +const RUSTC_COVERAGE_DEBUG_OPTIONS: &str = "RUSTC_COVERAGE_DEBUG_OPTIONS"; + +pub(super) fn debug_options<'a>() -> &'a DebugOptions { + static DEBUG_OPTIONS: OnceLock<DebugOptions> = OnceLock::new(); + + &DEBUG_OPTIONS.get_or_init(DebugOptions::from_env) +} + +/// Parses and maintains coverage-specific debug options captured from the environment variable +/// "RUSTC_COVERAGE_DEBUG_OPTIONS", if set. +#[derive(Debug, Clone)] +pub(super) struct DebugOptions { + pub allow_unused_expressions: bool, + counter_format: ExpressionFormat, +} + +impl DebugOptions { + fn from_env() -> Self { + let mut allow_unused_expressions = true; + let mut counter_format = ExpressionFormat::default(); + + if let Ok(env_debug_options) = std::env::var(RUSTC_COVERAGE_DEBUG_OPTIONS) { + for setting_str in env_debug_options.replace(' ', "").replace('-', "_").split(',') { + let (option, value) = match setting_str.split_once('=') { + None => (setting_str, None), + Some((k, v)) => (k, Some(v)), + }; + match option { + "allow_unused_expressions" => { + allow_unused_expressions = bool_option_val(option, value); + debug!( + "{} env option `allow_unused_expressions` is set to {}", + RUSTC_COVERAGE_DEBUG_OPTIONS, allow_unused_expressions + ); + } + "counter_format" => { + match value { + None => { + bug!( + "`{}` option in environment variable {} requires one or more \ + plus-separated choices (a non-empty subset of \ + `id+block+operation`)", + option, + RUSTC_COVERAGE_DEBUG_OPTIONS + ); + } + Some(val) => { + counter_format = counter_format_option_val(val); + debug!( + "{} env option `counter_format` is set to {:?}", + RUSTC_COVERAGE_DEBUG_OPTIONS, counter_format + ); + } + }; + } + _ => bug!( + "Unsupported setting `{}` in environment variable {}", + option, + RUSTC_COVERAGE_DEBUG_OPTIONS + ), + }; + } + } + + Self { allow_unused_expressions, counter_format } + } +} + +fn bool_option_val(option: &str, some_strval: Option<&str>) -> bool { + if let Some(val) = some_strval { + if vec!["yes", "y", "on", "true"].contains(&val) { + true + } else if vec!["no", "n", "off", "false"].contains(&val) { + false + } else { + bug!( + "Unsupported value `{}` for option `{}` in environment variable {}", + option, + val, + RUSTC_COVERAGE_DEBUG_OPTIONS + ) + } + } else { + true + } +} + +fn counter_format_option_val(strval: &str) -> ExpressionFormat { + let mut counter_format = ExpressionFormat { id: false, block: false, operation: false }; + let components = strval.splitn(3, '+'); + for component in components { + match component { + "id" => counter_format.id = true, + "block" => counter_format.block = true, + "operation" => counter_format.operation = true, + _ => bug!( + "Unsupported counter_format choice `{}` in environment variable {}", + component, + RUSTC_COVERAGE_DEBUG_OPTIONS + ), + } + } + counter_format +} + +#[derive(Debug, Clone)] +struct ExpressionFormat { + id: bool, + block: bool, + operation: bool, +} + +impl Default for ExpressionFormat { + fn default() -> Self { + Self { id: false, block: true, operation: true } + } +} + +/// If enabled, this struct maintains a map from `CoverageKind` IDs (as `ExpressionOperandId`) to +/// the `CoverageKind` data and optional label (normally, the counter's associated +/// `BasicCoverageBlock` format string, if any). +/// +/// Use `format_counter` to convert one of these `CoverageKind` counters to a debug output string, +/// as directed by the `DebugOptions`. This allows the format of counter labels in logs and dump +/// files (including the `CoverageGraph` graphviz file) to be changed at runtime, via environment +/// variable. +/// +/// `DebugCounters` supports a recursive rendering of `Expression` counters, so they can be +/// presented as nested expressions such as `(bcb3 - (bcb0 + bcb1))`. +pub(super) struct DebugCounters { + some_counters: Option<FxHashMap<ExpressionOperandId, DebugCounter>>, +} + +impl DebugCounters { + pub fn new() -> Self { + Self { some_counters: None } + } + + pub fn enable(&mut self) { + debug_assert!(!self.is_enabled()); + self.some_counters.replace(FxHashMap::default()); + } + + pub fn is_enabled(&self) -> bool { + self.some_counters.is_some() + } + + pub fn add_counter(&mut self, counter_kind: &CoverageKind, some_block_label: Option<String>) { + if let Some(counters) = &mut self.some_counters { + let id: ExpressionOperandId = match *counter_kind { + CoverageKind::Counter { id, .. } => id.into(), + CoverageKind::Expression { id, .. } => id.into(), + _ => bug!( + "the given `CoverageKind` is not an counter or expression: {:?}", + counter_kind + ), + }; + counters + .try_insert(id, DebugCounter::new(counter_kind.clone(), some_block_label)) + .expect("attempt to add the same counter_kind to DebugCounters more than once"); + } + } + + pub fn some_block_label(&self, operand: ExpressionOperandId) -> Option<&String> { + self.some_counters.as_ref().map_or(None, |counters| { + counters + .get(&operand) + .map_or(None, |debug_counter| debug_counter.some_block_label.as_ref()) + }) + } + + pub fn format_counter(&self, counter_kind: &CoverageKind) -> String { + match *counter_kind { + CoverageKind::Counter { .. } => { + format!("Counter({})", self.format_counter_kind(counter_kind)) + } + CoverageKind::Expression { .. } => { + format!("Expression({})", self.format_counter_kind(counter_kind)) + } + CoverageKind::Unreachable { .. } => "Unreachable".to_owned(), + } + } + + fn format_counter_kind(&self, counter_kind: &CoverageKind) -> String { + let counter_format = &debug_options().counter_format; + if let CoverageKind::Expression { id, lhs, op, rhs } = *counter_kind { + if counter_format.operation { + return format!( + "{}{} {} {}", + if counter_format.id || self.some_counters.is_none() { + format!("#{} = ", id.index()) + } else { + String::new() + }, + self.format_operand(lhs), + if op == Op::Add { "+" } else { "-" }, + self.format_operand(rhs), + ); + } + } + + let id: ExpressionOperandId = match *counter_kind { + CoverageKind::Counter { id, .. } => id.into(), + CoverageKind::Expression { id, .. } => id.into(), + _ => { + bug!("the given `CoverageKind` is not an counter or expression: {:?}", counter_kind) + } + }; + if self.some_counters.is_some() && (counter_format.block || !counter_format.id) { + let counters = self.some_counters.as_ref().unwrap(); + if let Some(DebugCounter { some_block_label: Some(block_label), .. }) = + counters.get(&id) + { + return if counter_format.id { + format!("{}#{}", block_label, id.index()) + } else { + block_label.to_string() + }; + } + } + format!("#{}", id.index()) + } + + fn format_operand(&self, operand: ExpressionOperandId) -> String { + if operand.index() == 0 { + return String::from("0"); + } + if let Some(counters) = &self.some_counters { + if let Some(DebugCounter { counter_kind, some_block_label }) = counters.get(&operand) { + if let CoverageKind::Expression { .. } = counter_kind { + if let Some(label) = some_block_label && debug_options().counter_format.block { + return format!( + "{}:({})", + label, + self.format_counter_kind(counter_kind) + ); + } + return format!("({})", self.format_counter_kind(counter_kind)); + } + return self.format_counter_kind(counter_kind); + } + } + format!("#{}", operand.index()) + } +} + +/// A non-public support class to `DebugCounters`. +#[derive(Debug)] +struct DebugCounter { + counter_kind: CoverageKind, + some_block_label: Option<String>, +} + +impl DebugCounter { + fn new(counter_kind: CoverageKind, some_block_label: Option<String>) -> Self { + Self { counter_kind, some_block_label } + } +} + +/// If enabled, this data structure captures additional debugging information used when generating +/// a Graphviz (.dot file) representation of the `CoverageGraph`, for debugging purposes. +pub(super) struct GraphvizData { + some_bcb_to_coverage_spans_with_counters: + Option<FxHashMap<BasicCoverageBlock, Vec<(CoverageSpan, CoverageKind)>>>, + some_bcb_to_dependency_counters: Option<FxHashMap<BasicCoverageBlock, Vec<CoverageKind>>>, + some_edge_to_counter: Option<FxHashMap<(BasicCoverageBlock, BasicBlock), CoverageKind>>, +} + +impl GraphvizData { + pub fn new() -> Self { + Self { + some_bcb_to_coverage_spans_with_counters: None, + some_bcb_to_dependency_counters: None, + some_edge_to_counter: None, + } + } + + pub fn enable(&mut self) { + debug_assert!(!self.is_enabled()); + self.some_bcb_to_coverage_spans_with_counters = Some(FxHashMap::default()); + self.some_bcb_to_dependency_counters = Some(FxHashMap::default()); + self.some_edge_to_counter = Some(FxHashMap::default()); + } + + pub fn is_enabled(&self) -> bool { + self.some_bcb_to_coverage_spans_with_counters.is_some() + } + + pub fn add_bcb_coverage_span_with_counter( + &mut self, + bcb: BasicCoverageBlock, + coverage_span: &CoverageSpan, + counter_kind: &CoverageKind, + ) { + if let Some(bcb_to_coverage_spans_with_counters) = + self.some_bcb_to_coverage_spans_with_counters.as_mut() + { + bcb_to_coverage_spans_with_counters + .entry(bcb) + .or_insert_with(Vec::new) + .push((coverage_span.clone(), counter_kind.clone())); + } + } + + pub fn get_bcb_coverage_spans_with_counters( + &self, + bcb: BasicCoverageBlock, + ) -> Option<&[(CoverageSpan, CoverageKind)]> { + if let Some(bcb_to_coverage_spans_with_counters) = + self.some_bcb_to_coverage_spans_with_counters.as_ref() + { + bcb_to_coverage_spans_with_counters.get(&bcb).map(Deref::deref) + } else { + None + } + } + + pub fn add_bcb_dependency_counter( + &mut self, + bcb: BasicCoverageBlock, + counter_kind: &CoverageKind, + ) { + if let Some(bcb_to_dependency_counters) = self.some_bcb_to_dependency_counters.as_mut() { + bcb_to_dependency_counters + .entry(bcb) + .or_insert_with(Vec::new) + .push(counter_kind.clone()); + } + } + + pub fn get_bcb_dependency_counters(&self, bcb: BasicCoverageBlock) -> Option<&[CoverageKind]> { + if let Some(bcb_to_dependency_counters) = self.some_bcb_to_dependency_counters.as_ref() { + bcb_to_dependency_counters.get(&bcb).map(Deref::deref) + } else { + None + } + } + + pub fn set_edge_counter( + &mut self, + from_bcb: BasicCoverageBlock, + to_bb: BasicBlock, + counter_kind: &CoverageKind, + ) { + if let Some(edge_to_counter) = self.some_edge_to_counter.as_mut() { + edge_to_counter + .try_insert((from_bcb, to_bb), counter_kind.clone()) + .expect("invalid attempt to insert more than one edge counter for the same edge"); + } + } + + pub fn get_edge_counter( + &self, + from_bcb: BasicCoverageBlock, + to_bb: BasicBlock, + ) -> Option<&CoverageKind> { + if let Some(edge_to_counter) = self.some_edge_to_counter.as_ref() { + edge_to_counter.get(&(from_bcb, to_bb)) + } else { + None + } + } +} + +/// If enabled, this struct captures additional data used to track whether expressions were used, +/// directly or indirectly, to compute the coverage counts for all `CoverageSpan`s, and any that are +/// _not_ used are retained in the `unused_expressions` Vec, to be included in debug output (logs +/// and/or a `CoverageGraph` graphviz output). +pub(super) struct UsedExpressions { + some_used_expression_operands: + Option<FxHashMap<ExpressionOperandId, Vec<InjectedExpressionId>>>, + some_unused_expressions: + Option<Vec<(CoverageKind, Option<BasicCoverageBlock>, BasicCoverageBlock)>>, +} + +impl UsedExpressions { + pub fn new() -> Self { + Self { some_used_expression_operands: None, some_unused_expressions: None } + } + + pub fn enable(&mut self) { + debug_assert!(!self.is_enabled()); + self.some_used_expression_operands = Some(FxHashMap::default()); + self.some_unused_expressions = Some(Vec::new()); + } + + pub fn is_enabled(&self) -> bool { + self.some_used_expression_operands.is_some() + } + + pub fn add_expression_operands(&mut self, expression: &CoverageKind) { + if let Some(used_expression_operands) = self.some_used_expression_operands.as_mut() { + if let CoverageKind::Expression { id, lhs, rhs, .. } = *expression { + used_expression_operands.entry(lhs).or_insert_with(Vec::new).push(id); + used_expression_operands.entry(rhs).or_insert_with(Vec::new).push(id); + } + } + } + + pub fn expression_is_used(&self, expression: &CoverageKind) -> bool { + if let Some(used_expression_operands) = self.some_used_expression_operands.as_ref() { + used_expression_operands.contains_key(&expression.as_operand_id()) + } else { + false + } + } + + pub fn add_unused_expression_if_not_found( + &mut self, + expression: &CoverageKind, + edge_from_bcb: Option<BasicCoverageBlock>, + target_bcb: BasicCoverageBlock, + ) { + if let Some(used_expression_operands) = self.some_used_expression_operands.as_ref() { + if !used_expression_operands.contains_key(&expression.as_operand_id()) { + self.some_unused_expressions.as_mut().unwrap().push(( + expression.clone(), + edge_from_bcb, + target_bcb, + )); + } + } + } + + /// Return the list of unused counters (if any) as a tuple with the counter (`CoverageKind`), + /// optional `from_bcb` (if it was an edge counter), and `target_bcb`. + pub fn get_unused_expressions( + &self, + ) -> Vec<(CoverageKind, Option<BasicCoverageBlock>, BasicCoverageBlock)> { + if let Some(unused_expressions) = self.some_unused_expressions.as_ref() { + unused_expressions.clone() + } else { + Vec::new() + } + } + + /// If enabled, validate that every BCB or edge counter not directly associated with a coverage + /// span is at least indirectly associated (it is a dependency of a BCB counter that _is_ + /// associated with a coverage span). + pub fn validate( + &mut self, + bcb_counters_without_direct_coverage_spans: &[( + Option<BasicCoverageBlock>, + BasicCoverageBlock, + CoverageKind, + )], + ) { + if self.is_enabled() { + let mut not_validated = bcb_counters_without_direct_coverage_spans + .iter() + .map(|(_, _, counter_kind)| counter_kind) + .collect::<Vec<_>>(); + let mut validating_count = 0; + while not_validated.len() != validating_count { + let to_validate = not_validated.split_off(0); + validating_count = to_validate.len(); + for counter_kind in to_validate { + if self.expression_is_used(counter_kind) { + self.add_expression_operands(counter_kind); + } else { + not_validated.push(counter_kind); + } + } + } + } + } + + pub fn alert_on_unused_expressions(&self, debug_counters: &DebugCounters) { + if let Some(unused_expressions) = self.some_unused_expressions.as_ref() { + for (counter_kind, edge_from_bcb, target_bcb) in unused_expressions { + let unused_counter_message = if let Some(from_bcb) = edge_from_bcb.as_ref() { + format!( + "non-coverage edge counter found without a dependent expression, in \ + {:?}->{:?}; counter={}", + from_bcb, + target_bcb, + debug_counters.format_counter(&counter_kind), + ) + } else { + format!( + "non-coverage counter found without a dependent expression, in {:?}; \ + counter={}", + target_bcb, + debug_counters.format_counter(&counter_kind), + ) + }; + + if debug_options().allow_unused_expressions { + debug!("WARNING: {}", unused_counter_message); + } else { + bug!("{}", unused_counter_message); + } + } + } + } +} + +/// Generates the MIR pass `CoverageSpan`-specific spanview dump file. +pub(super) fn dump_coverage_spanview<'tcx>( + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + basic_coverage_blocks: &CoverageGraph, + pass_name: &str, + body_span: Span, + coverage_spans: &[CoverageSpan], +) { + let mir_source = mir_body.source; + let def_id = mir_source.def_id(); + + let span_viewables = span_viewables(tcx, mir_body, basic_coverage_blocks, &coverage_spans); + let mut file = create_dump_file(tcx, "html", None, pass_name, &0, mir_source) + .expect("Unexpected error creating MIR spanview HTML file"); + let crate_name = tcx.crate_name(def_id.krate); + let item_name = tcx.def_path(def_id).to_filename_friendly_no_crate(); + let title = format!("{}.{} - Coverage Spans", crate_name, item_name); + spanview::write_document(tcx, body_span, span_viewables, &title, &mut file) + .expect("Unexpected IO error dumping coverage spans as HTML"); +} + +/// Converts the computed `BasicCoverageBlockData`s into `SpanViewable`s. +fn span_viewables<'tcx>( + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + basic_coverage_blocks: &CoverageGraph, + coverage_spans: &[CoverageSpan], +) -> Vec<SpanViewable> { + let mut span_viewables = Vec::new(); + for coverage_span in coverage_spans { + let tooltip = coverage_span.format_coverage_statements(tcx, mir_body); + let CoverageSpan { span, bcb, .. } = coverage_span; + let bcb_data = &basic_coverage_blocks[*bcb]; + let id = bcb_data.id(); + let leader_bb = bcb_data.leader_bb(); + span_viewables.push(SpanViewable { bb: leader_bb, span: *span, id, tooltip }); + } + span_viewables +} + +/// Generates the MIR pass coverage-specific graphviz dump file. +pub(super) fn dump_coverage_graphviz<'tcx>( + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + pass_name: &str, + basic_coverage_blocks: &CoverageGraph, + debug_counters: &DebugCounters, + graphviz_data: &GraphvizData, + intermediate_expressions: &[CoverageKind], + debug_used_expressions: &UsedExpressions, +) { + let mir_source = mir_body.source; + let def_id = mir_source.def_id(); + let node_content = |bcb| { + bcb_to_string_sections( + tcx, + mir_body, + debug_counters, + &basic_coverage_blocks[bcb], + graphviz_data.get_bcb_coverage_spans_with_counters(bcb), + graphviz_data.get_bcb_dependency_counters(bcb), + // intermediate_expressions are injected into the mir::START_BLOCK, so + // include them in the first BCB. + if bcb.index() == 0 { Some(&intermediate_expressions) } else { None }, + ) + }; + let edge_labels = |from_bcb| { + let from_bcb_data = &basic_coverage_blocks[from_bcb]; + let from_terminator = from_bcb_data.terminator(mir_body); + let mut edge_labels = from_terminator.kind.fmt_successor_labels(); + edge_labels.retain(|label| label != "unreachable"); + let edge_counters = from_terminator + .successors() + .map(|successor_bb| graphviz_data.get_edge_counter(from_bcb, successor_bb)); + iter::zip(&edge_labels, edge_counters) + .map(|(label, some_counter)| { + if let Some(counter) = some_counter { + format!("{}\n{}", label, debug_counters.format_counter(counter)) + } else { + label.to_string() + } + }) + .collect::<Vec<_>>() + }; + let graphviz_name = format!("Cov_{}_{}", def_id.krate.index(), def_id.index.index()); + let mut graphviz_writer = + GraphvizWriter::new(basic_coverage_blocks, &graphviz_name, node_content, edge_labels); + let unused_expressions = debug_used_expressions.get_unused_expressions(); + if unused_expressions.len() > 0 { + graphviz_writer.set_graph_label(&format!( + "Unused expressions:\n {}", + unused_expressions + .as_slice() + .iter() + .map(|(counter_kind, edge_from_bcb, target_bcb)| { + if let Some(from_bcb) = edge_from_bcb.as_ref() { + format!( + "{:?}->{:?}: {}", + from_bcb, + target_bcb, + debug_counters.format_counter(&counter_kind), + ) + } else { + format!( + "{:?}: {}", + target_bcb, + debug_counters.format_counter(&counter_kind), + ) + } + }) + .join("\n ") + )); + } + let mut file = create_dump_file(tcx, "dot", None, pass_name, &0, mir_source) + .expect("Unexpected error creating BasicCoverageBlock graphviz DOT file"); + graphviz_writer + .write_graphviz(tcx, &mut file) + .expect("Unexpected error writing BasicCoverageBlock graphviz DOT file"); +} + +fn bcb_to_string_sections<'tcx>( + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + debug_counters: &DebugCounters, + bcb_data: &BasicCoverageBlockData, + some_coverage_spans_with_counters: Option<&[(CoverageSpan, CoverageKind)]>, + some_dependency_counters: Option<&[CoverageKind]>, + some_intermediate_expressions: Option<&[CoverageKind]>, +) -> Vec<String> { + let len = bcb_data.basic_blocks.len(); + let mut sections = Vec::new(); + if let Some(collect_intermediate_expressions) = some_intermediate_expressions { + sections.push( + collect_intermediate_expressions + .iter() + .map(|expression| { + format!("Intermediate {}", debug_counters.format_counter(expression)) + }) + .join("\n"), + ); + } + if let Some(coverage_spans_with_counters) = some_coverage_spans_with_counters { + sections.push( + coverage_spans_with_counters + .iter() + .map(|(covspan, counter)| { + format!( + "{} at {}", + debug_counters.format_counter(counter), + covspan.format(tcx, mir_body) + ) + }) + .join("\n"), + ); + } + if let Some(dependency_counters) = some_dependency_counters { + sections.push(format!( + "Non-coverage counters:\n {}", + dependency_counters + .iter() + .map(|counter| debug_counters.format_counter(counter)) + .join(" \n"), + )); + } + if let Some(counter_kind) = &bcb_data.counter_kind { + sections.push(format!("{:?}", counter_kind)); + } + let non_term_blocks = bcb_data.basic_blocks[0..len - 1] + .iter() + .map(|&bb| format!("{:?}: {}", bb, term_type(&mir_body[bb].terminator().kind))) + .collect::<Vec<_>>(); + if non_term_blocks.len() > 0 { + sections.push(non_term_blocks.join("\n")); + } + sections.push(format!( + "{:?}: {}", + bcb_data.basic_blocks.last().unwrap(), + term_type(&bcb_data.terminator(mir_body).kind) + )); + sections +} + +/// Returns a simple string representation of a `TerminatorKind` variant, independent of any +/// values it might hold. +pub(super) fn term_type(kind: &TerminatorKind<'_>) -> &'static str { + match kind { + TerminatorKind::Goto { .. } => "Goto", + TerminatorKind::SwitchInt { .. } => "SwitchInt", + TerminatorKind::Resume => "Resume", + TerminatorKind::Abort => "Abort", + TerminatorKind::Return => "Return", + TerminatorKind::Unreachable => "Unreachable", + TerminatorKind::Drop { .. } => "Drop", + TerminatorKind::DropAndReplace { .. } => "DropAndReplace", + TerminatorKind::Call { .. } => "Call", + TerminatorKind::Assert { .. } => "Assert", + TerminatorKind::Yield { .. } => "Yield", + TerminatorKind::GeneratorDrop => "GeneratorDrop", + TerminatorKind::FalseEdge { .. } => "FalseEdge", + TerminatorKind::FalseUnwind { .. } => "FalseUnwind", + TerminatorKind::InlineAsm { .. } => "InlineAsm", + } +} diff --git a/compiler/rustc_mir_transform/src/coverage/graph.rs b/compiler/rustc_mir_transform/src/coverage/graph.rs new file mode 100644 index 000000000..759ea7cd3 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/graph.rs @@ -0,0 +1,753 @@ +use super::Error; + +use itertools::Itertools; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::graph::dominators::{self, Dominators}; +use rustc_data_structures::graph::{self, GraphSuccessors, WithNumNodes, WithStartNode}; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use rustc_middle::mir::coverage::*; +use rustc_middle::mir::{self, BasicBlock, BasicBlockData, Terminator, TerminatorKind}; + +use std::ops::{Index, IndexMut}; + +const ID_SEPARATOR: &str = ","; + +/// A coverage-specific simplification of the MIR control flow graph (CFG). The `CoverageGraph`s +/// nodes are `BasicCoverageBlock`s, which encompass one or more MIR `BasicBlock`s, plus a +/// `CoverageKind` counter (to be added by `CoverageCounters::make_bcb_counters`), and an optional +/// set of additional counters--if needed--to count incoming edges, if there are more than one. +/// (These "edge counters" are eventually converted into new MIR `BasicBlock`s.) +#[derive(Debug)] +pub(super) struct CoverageGraph { + bcbs: IndexVec<BasicCoverageBlock, BasicCoverageBlockData>, + bb_to_bcb: IndexVec<BasicBlock, Option<BasicCoverageBlock>>, + pub successors: IndexVec<BasicCoverageBlock, Vec<BasicCoverageBlock>>, + pub predecessors: IndexVec<BasicCoverageBlock, Vec<BasicCoverageBlock>>, + dominators: Option<Dominators<BasicCoverageBlock>>, +} + +impl CoverageGraph { + pub fn from_mir(mir_body: &mir::Body<'_>) -> Self { + let (bcbs, bb_to_bcb) = Self::compute_basic_coverage_blocks(mir_body); + + // Pre-transform MIR `BasicBlock` successors and predecessors into the BasicCoverageBlock + // equivalents. Note that since the BasicCoverageBlock graph has been fully simplified, the + // each predecessor of a BCB leader_bb should be in a unique BCB. It is possible for a + // `SwitchInt` to have multiple targets to the same destination `BasicBlock`, so + // de-duplication is required. This is done without reordering the successors. + + let bcbs_len = bcbs.len(); + let mut seen = IndexVec::from_elem_n(false, bcbs_len); + let successors = IndexVec::from_fn_n( + |bcb| { + for b in seen.iter_mut() { + *b = false; + } + let bcb_data = &bcbs[bcb]; + let mut bcb_successors = Vec::new(); + for successor in + bcb_filtered_successors(&mir_body, &bcb_data.terminator(mir_body).kind) + .filter_map(|successor_bb| bb_to_bcb[successor_bb]) + { + if !seen[successor] { + seen[successor] = true; + bcb_successors.push(successor); + } + } + bcb_successors + }, + bcbs.len(), + ); + + let mut predecessors = IndexVec::from_elem_n(Vec::new(), bcbs.len()); + for (bcb, bcb_successors) in successors.iter_enumerated() { + for &successor in bcb_successors { + predecessors[successor].push(bcb); + } + } + + let mut basic_coverage_blocks = + Self { bcbs, bb_to_bcb, successors, predecessors, dominators: None }; + let dominators = dominators::dominators(&basic_coverage_blocks); + basic_coverage_blocks.dominators = Some(dominators); + basic_coverage_blocks + } + + fn compute_basic_coverage_blocks( + mir_body: &mir::Body<'_>, + ) -> ( + IndexVec<BasicCoverageBlock, BasicCoverageBlockData>, + IndexVec<BasicBlock, Option<BasicCoverageBlock>>, + ) { + let num_basic_blocks = mir_body.basic_blocks.len(); + let mut bcbs = IndexVec::with_capacity(num_basic_blocks); + let mut bb_to_bcb = IndexVec::from_elem_n(None, num_basic_blocks); + + // Walk the MIR CFG using a Preorder traversal, which starts from `START_BLOCK` and follows + // each block terminator's `successors()`. Coverage spans must map to actual source code, + // so compiler generated blocks and paths can be ignored. To that end, the CFG traversal + // intentionally omits unwind paths. + // FIXME(#78544): MIR InstrumentCoverage: Improve coverage of `#[should_panic]` tests and + // `catch_unwind()` handlers. + let mir_cfg_without_unwind = ShortCircuitPreorder::new(&mir_body, bcb_filtered_successors); + + let mut basic_blocks = Vec::new(); + for (bb, data) in mir_cfg_without_unwind { + if let Some(last) = basic_blocks.last() { + let predecessors = &mir_body.basic_blocks.predecessors()[bb]; + if predecessors.len() > 1 || !predecessors.contains(last) { + // The `bb` has more than one _incoming_ edge, and should start its own + // `BasicCoverageBlockData`. (Note, the `basic_blocks` vector does not yet + // include `bb`; it contains a sequence of one or more sequential basic_blocks + // with no intermediate branches in or out. Save these as a new + // `BasicCoverageBlockData` before starting the new one.) + Self::add_basic_coverage_block( + &mut bcbs, + &mut bb_to_bcb, + basic_blocks.split_off(0), + ); + debug!( + " because {}", + if predecessors.len() > 1 { + "predecessors.len() > 1".to_owned() + } else { + format!("bb {} is not in precessors: {:?}", bb.index(), predecessors) + } + ); + } + } + basic_blocks.push(bb); + + let term = data.terminator(); + + match term.kind { + TerminatorKind::Return { .. } + | TerminatorKind::Abort + | TerminatorKind::Yield { .. } + | TerminatorKind::SwitchInt { .. } => { + // The `bb` has more than one _outgoing_ edge, or exits the function. Save the + // current sequence of `basic_blocks` gathered to this point, as a new + // `BasicCoverageBlockData`. + Self::add_basic_coverage_block( + &mut bcbs, + &mut bb_to_bcb, + basic_blocks.split_off(0), + ); + debug!(" because term.kind = {:?}", term.kind); + // Note that this condition is based on `TerminatorKind`, even though it + // theoretically boils down to `successors().len() != 1`; that is, either zero + // (e.g., `Return`, `Abort`) or multiple successors (e.g., `SwitchInt`), but + // since the BCB CFG ignores things like unwind branches (which exist in the + // `Terminator`s `successors()` list) checking the number of successors won't + // work. + } + + // The following `TerminatorKind`s are either not expected outside an unwind branch, + // or they should not (under normal circumstances) branch. Coverage graphs are + // simplified by assuring coverage results are accurate for program executions that + // don't panic. + // + // Programs that panic and unwind may record slightly inaccurate coverage results + // for a coverage region containing the `Terminator` that began the panic. This + // is as intended. (See Issue #78544 for a possible future option to support + // coverage in test programs that panic.) + TerminatorKind::Goto { .. } + | TerminatorKind::Resume + | TerminatorKind::Unreachable + | TerminatorKind::Drop { .. } + | TerminatorKind::DropAndReplace { .. } + | TerminatorKind::Call { .. } + | TerminatorKind::GeneratorDrop + | TerminatorKind::Assert { .. } + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::FalseUnwind { .. } + | TerminatorKind::InlineAsm { .. } => {} + } + } + + if !basic_blocks.is_empty() { + // process any remaining basic_blocks into a final `BasicCoverageBlockData` + Self::add_basic_coverage_block(&mut bcbs, &mut bb_to_bcb, basic_blocks.split_off(0)); + debug!(" because the end of the MIR CFG was reached while traversing"); + } + + (bcbs, bb_to_bcb) + } + + fn add_basic_coverage_block( + bcbs: &mut IndexVec<BasicCoverageBlock, BasicCoverageBlockData>, + bb_to_bcb: &mut IndexVec<BasicBlock, Option<BasicCoverageBlock>>, + basic_blocks: Vec<BasicBlock>, + ) { + let bcb = BasicCoverageBlock::from_usize(bcbs.len()); + for &bb in basic_blocks.iter() { + bb_to_bcb[bb] = Some(bcb); + } + let bcb_data = BasicCoverageBlockData::from(basic_blocks); + debug!("adding bcb{}: {:?}", bcb.index(), bcb_data); + bcbs.push(bcb_data); + } + + #[inline(always)] + pub fn iter_enumerated( + &self, + ) -> impl Iterator<Item = (BasicCoverageBlock, &BasicCoverageBlockData)> { + self.bcbs.iter_enumerated() + } + + #[inline(always)] + pub fn iter_enumerated_mut( + &mut self, + ) -> impl Iterator<Item = (BasicCoverageBlock, &mut BasicCoverageBlockData)> { + self.bcbs.iter_enumerated_mut() + } + + #[inline(always)] + pub fn bcb_from_bb(&self, bb: BasicBlock) -> Option<BasicCoverageBlock> { + if bb.index() < self.bb_to_bcb.len() { self.bb_to_bcb[bb] } else { None } + } + + #[inline(always)] + pub fn is_dominated_by(&self, node: BasicCoverageBlock, dom: BasicCoverageBlock) -> bool { + self.dominators.as_ref().unwrap().is_dominated_by(node, dom) + } + + #[inline(always)] + pub fn dominators(&self) -> &Dominators<BasicCoverageBlock> { + self.dominators.as_ref().unwrap() + } +} + +impl Index<BasicCoverageBlock> for CoverageGraph { + type Output = BasicCoverageBlockData; + + #[inline] + fn index(&self, index: BasicCoverageBlock) -> &BasicCoverageBlockData { + &self.bcbs[index] + } +} + +impl IndexMut<BasicCoverageBlock> for CoverageGraph { + #[inline] + fn index_mut(&mut self, index: BasicCoverageBlock) -> &mut BasicCoverageBlockData { + &mut self.bcbs[index] + } +} + +impl graph::DirectedGraph for CoverageGraph { + type Node = BasicCoverageBlock; +} + +impl graph::WithNumNodes for CoverageGraph { + #[inline] + fn num_nodes(&self) -> usize { + self.bcbs.len() + } +} + +impl graph::WithStartNode for CoverageGraph { + #[inline] + fn start_node(&self) -> Self::Node { + self.bcb_from_bb(mir::START_BLOCK) + .expect("mir::START_BLOCK should be in a BasicCoverageBlock") + } +} + +type BcbSuccessors<'graph> = std::slice::Iter<'graph, BasicCoverageBlock>; + +impl<'graph> graph::GraphSuccessors<'graph> for CoverageGraph { + type Item = BasicCoverageBlock; + type Iter = std::iter::Cloned<BcbSuccessors<'graph>>; +} + +impl graph::WithSuccessors for CoverageGraph { + #[inline] + fn successors(&self, node: Self::Node) -> <Self as GraphSuccessors<'_>>::Iter { + self.successors[node].iter().cloned() + } +} + +impl<'graph> graph::GraphPredecessors<'graph> for CoverageGraph { + type Item = BasicCoverageBlock; + type Iter = std::iter::Copied<std::slice::Iter<'graph, BasicCoverageBlock>>; +} + +impl graph::WithPredecessors for CoverageGraph { + #[inline] + fn predecessors(&self, node: Self::Node) -> <Self as graph::GraphPredecessors<'_>>::Iter { + self.predecessors[node].iter().copied() + } +} + +rustc_index::newtype_index! { + /// A node in the control-flow graph of CoverageGraph. + pub(super) struct BasicCoverageBlock { + DEBUG_FORMAT = "bcb{}", + const START_BCB = 0, + } +} + +/// `BasicCoverageBlockData` holds the data indexed by a `BasicCoverageBlock`. +/// +/// A `BasicCoverageBlock` (BCB) represents the maximal-length sequence of MIR `BasicBlock`s without +/// conditional branches, and form a new, simplified, coverage-specific Control Flow Graph, without +/// altering the original MIR CFG. +/// +/// Note that running the MIR `SimplifyCfg` transform is not sufficient (and therefore not +/// necessary). The BCB-based CFG is a more aggressive simplification. For example: +/// +/// * The BCB CFG ignores (trims) branches not relevant to coverage, such as unwind-related code, +/// that is injected by the Rust compiler but has no physical source code to count. This also +/// means a BasicBlock with a `Call` terminator can be merged into its primary successor target +/// block, in the same BCB. (But, note: Issue #78544: "MIR InstrumentCoverage: Improve coverage +/// of `#[should_panic]` tests and `catch_unwind()` handlers") +/// * Some BasicBlock terminators support Rust-specific concerns--like borrow-checking--that are +/// not relevant to coverage analysis. `FalseUnwind`, for example, can be treated the same as +/// a `Goto`, and merged with its successor into the same BCB. +/// +/// Each BCB with at least one computed `CoverageSpan` will have no more than one `Counter`. +/// In some cases, a BCB's execution count can be computed by `Expression`. Additional +/// disjoint `CoverageSpan`s in a BCB can also be counted by `Expression` (by adding `ZERO` +/// to the BCB's primary counter or expression). +/// +/// The BCB CFG is critical to simplifying the coverage analysis by ensuring graph path-based +/// queries (`is_dominated_by()`, `predecessors`, `successors`, etc.) have branch (control flow) +/// significance. +#[derive(Debug, Clone)] +pub(super) struct BasicCoverageBlockData { + pub basic_blocks: Vec<BasicBlock>, + pub counter_kind: Option<CoverageKind>, + edge_from_bcbs: Option<FxHashMap<BasicCoverageBlock, CoverageKind>>, +} + +impl BasicCoverageBlockData { + pub fn from(basic_blocks: Vec<BasicBlock>) -> Self { + assert!(basic_blocks.len() > 0); + Self { basic_blocks, counter_kind: None, edge_from_bcbs: None } + } + + #[inline(always)] + pub fn leader_bb(&self) -> BasicBlock { + self.basic_blocks[0] + } + + #[inline(always)] + pub fn last_bb(&self) -> BasicBlock { + *self.basic_blocks.last().unwrap() + } + + #[inline(always)] + pub fn terminator<'a, 'tcx>(&self, mir_body: &'a mir::Body<'tcx>) -> &'a Terminator<'tcx> { + &mir_body[self.last_bb()].terminator() + } + + pub fn set_counter( + &mut self, + counter_kind: CoverageKind, + ) -> Result<ExpressionOperandId, Error> { + debug_assert!( + // If the BCB has an edge counter (to be injected into a new `BasicBlock`), it can also + // have an expression (to be injected into an existing `BasicBlock` represented by this + // `BasicCoverageBlock`). + self.edge_from_bcbs.is_none() || counter_kind.is_expression(), + "attempt to add a `Counter` to a BCB target with existing incoming edge counters" + ); + let operand = counter_kind.as_operand_id(); + if let Some(replaced) = self.counter_kind.replace(counter_kind) { + Error::from_string(format!( + "attempt to set a BasicCoverageBlock coverage counter more than once; \ + {:?} already had counter {:?}", + self, replaced, + )) + } else { + Ok(operand) + } + } + + #[inline(always)] + pub fn counter(&self) -> Option<&CoverageKind> { + self.counter_kind.as_ref() + } + + #[inline(always)] + pub fn take_counter(&mut self) -> Option<CoverageKind> { + self.counter_kind.take() + } + + pub fn set_edge_counter_from( + &mut self, + from_bcb: BasicCoverageBlock, + counter_kind: CoverageKind, + ) -> Result<ExpressionOperandId, Error> { + if level_enabled!(tracing::Level::DEBUG) { + // If the BCB has an edge counter (to be injected into a new `BasicBlock`), it can also + // have an expression (to be injected into an existing `BasicBlock` represented by this + // `BasicCoverageBlock`). + if !self.counter_kind.as_ref().map_or(true, |c| c.is_expression()) { + return Error::from_string(format!( + "attempt to add an incoming edge counter from {:?} when the target BCB already \ + has a `Counter`", + from_bcb + )); + } + } + let operand = counter_kind.as_operand_id(); + if let Some(replaced) = + self.edge_from_bcbs.get_or_insert_default().insert(from_bcb, counter_kind) + { + Error::from_string(format!( + "attempt to set an edge counter more than once; from_bcb: \ + {:?} already had counter {:?}", + from_bcb, replaced, + )) + } else { + Ok(operand) + } + } + + #[inline] + pub fn edge_counter_from(&self, from_bcb: BasicCoverageBlock) -> Option<&CoverageKind> { + if let Some(edge_from_bcbs) = &self.edge_from_bcbs { + edge_from_bcbs.get(&from_bcb) + } else { + None + } + } + + #[inline] + pub fn take_edge_counters( + &mut self, + ) -> Option<impl Iterator<Item = (BasicCoverageBlock, CoverageKind)>> { + self.edge_from_bcbs.take().map(|m| m.into_iter()) + } + + pub fn id(&self) -> String { + format!("@{}", self.basic_blocks.iter().map(|bb| bb.index().to_string()).join(ID_SEPARATOR)) + } +} + +/// Represents a successor from a branching BasicCoverageBlock (such as the arms of a `SwitchInt`) +/// as either the successor BCB itself, if it has only one incoming edge, or the successor _plus_ +/// the specific branching BCB, representing the edge between the two. The latter case +/// distinguishes this incoming edge from other incoming edges to the same `target_bcb`. +#[derive(Clone, Copy, PartialEq, Eq)] +pub(super) struct BcbBranch { + pub edge_from_bcb: Option<BasicCoverageBlock>, + pub target_bcb: BasicCoverageBlock, +} + +impl BcbBranch { + pub fn from_to( + from_bcb: BasicCoverageBlock, + to_bcb: BasicCoverageBlock, + basic_coverage_blocks: &CoverageGraph, + ) -> Self { + let edge_from_bcb = if basic_coverage_blocks.predecessors[to_bcb].len() > 1 { + Some(from_bcb) + } else { + None + }; + Self { edge_from_bcb, target_bcb: to_bcb } + } + + pub fn counter<'a>( + &self, + basic_coverage_blocks: &'a CoverageGraph, + ) -> Option<&'a CoverageKind> { + if let Some(from_bcb) = self.edge_from_bcb { + basic_coverage_blocks[self.target_bcb].edge_counter_from(from_bcb) + } else { + basic_coverage_blocks[self.target_bcb].counter() + } + } + + pub fn is_only_path_to_target(&self) -> bool { + self.edge_from_bcb.is_none() + } +} + +impl std::fmt::Debug for BcbBranch { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(from_bcb) = self.edge_from_bcb { + write!(fmt, "{:?}->{:?}", from_bcb, self.target_bcb) + } else { + write!(fmt, "{:?}", self.target_bcb) + } + } +} + +// Returns the `Terminator`s non-unwind successors. +// FIXME(#78544): MIR InstrumentCoverage: Improve coverage of `#[should_panic]` tests and +// `catch_unwind()` handlers. +fn bcb_filtered_successors<'a, 'tcx>( + body: &'a mir::Body<'tcx>, + term_kind: &'a TerminatorKind<'tcx>, +) -> Box<dyn Iterator<Item = BasicBlock> + 'a> { + Box::new( + match &term_kind { + // SwitchInt successors are never unwind, and all of them should be traversed. + TerminatorKind::SwitchInt { ref targets, .. } => { + None.into_iter().chain(targets.all_targets().into_iter().copied()) + } + // For all other kinds, return only the first successor, if any, and ignore unwinds. + // NOTE: `chain(&[])` is required to coerce the `option::iter` (from + // `next().into_iter()`) into the `mir::Successors` aliased type. + _ => term_kind.successors().next().into_iter().chain((&[]).into_iter().copied()), + } + .filter(move |&successor| body[successor].terminator().kind != TerminatorKind::Unreachable), + ) +} + +/// Maintains separate worklists for each loop in the BasicCoverageBlock CFG, plus one for the +/// CoverageGraph outside all loops. This supports traversing the BCB CFG in a way that +/// ensures a loop is completely traversed before processing Blocks after the end of the loop. +#[derive(Debug)] +pub(super) struct TraversalContext { + /// From one or more backedges returning to a loop header. + pub loop_backedges: Option<(Vec<BasicCoverageBlock>, BasicCoverageBlock)>, + + /// worklist, to be traversed, of CoverageGraph in the loop with the given loop + /// backedges, such that the loop is the inner inner-most loop containing these + /// CoverageGraph + pub worklist: Vec<BasicCoverageBlock>, +} + +pub(super) struct TraverseCoverageGraphWithLoops { + pub backedges: IndexVec<BasicCoverageBlock, Vec<BasicCoverageBlock>>, + pub context_stack: Vec<TraversalContext>, + visited: BitSet<BasicCoverageBlock>, +} + +impl TraverseCoverageGraphWithLoops { + pub fn new(basic_coverage_blocks: &CoverageGraph) -> Self { + let start_bcb = basic_coverage_blocks.start_node(); + let backedges = find_loop_backedges(basic_coverage_blocks); + let context_stack = + vec![TraversalContext { loop_backedges: None, worklist: vec![start_bcb] }]; + // `context_stack` starts with a `TraversalContext` for the main function context (beginning + // with the `start` BasicCoverageBlock of the function). New worklists are pushed to the top + // of the stack as loops are entered, and popped off of the stack when a loop's worklist is + // exhausted. + let visited = BitSet::new_empty(basic_coverage_blocks.num_nodes()); + Self { backedges, context_stack, visited } + } + + pub fn next(&mut self, basic_coverage_blocks: &CoverageGraph) -> Option<BasicCoverageBlock> { + debug!( + "TraverseCoverageGraphWithLoops::next - context_stack: {:?}", + self.context_stack.iter().rev().collect::<Vec<_>>() + ); + while let Some(next_bcb) = { + // Strip contexts with empty worklists from the top of the stack + while self.context_stack.last().map_or(false, |context| context.worklist.is_empty()) { + self.context_stack.pop(); + } + // Pop the next bcb off of the current context_stack. If none, all BCBs were visited. + self.context_stack.last_mut().map_or(None, |context| context.worklist.pop()) + } { + if !self.visited.insert(next_bcb) { + debug!("Already visited: {:?}", next_bcb); + continue; + } + debug!("Visiting {:?}", next_bcb); + if self.backedges[next_bcb].len() > 0 { + debug!("{:?} is a loop header! Start a new TraversalContext...", next_bcb); + self.context_stack.push(TraversalContext { + loop_backedges: Some((self.backedges[next_bcb].clone(), next_bcb)), + worklist: Vec::new(), + }); + } + self.extend_worklist(basic_coverage_blocks, next_bcb); + return Some(next_bcb); + } + None + } + + pub fn extend_worklist( + &mut self, + basic_coverage_blocks: &CoverageGraph, + bcb: BasicCoverageBlock, + ) { + let successors = &basic_coverage_blocks.successors[bcb]; + debug!("{:?} has {} successors:", bcb, successors.len()); + for &successor in successors { + if successor == bcb { + debug!( + "{:?} has itself as its own successor. (Note, the compiled code will \ + generate an infinite loop.)", + bcb + ); + // Don't re-add this successor to the worklist. We are already processing it. + break; + } + for context in self.context_stack.iter_mut().rev() { + // Add successors of the current BCB to the appropriate context. Successors that + // stay within a loop are added to the BCBs context worklist. Successors that + // exit the loop (they are not dominated by the loop header) must be reachable + // from other BCBs outside the loop, and they will be added to a different + // worklist. + // + // Branching blocks (with more than one successor) must be processed before + // blocks with only one successor, to prevent unnecessarily complicating + // `Expression`s by creating a Counter in a `BasicCoverageBlock` that the + // branching block would have given an `Expression` (or vice versa). + let (some_successor_to_add, some_loop_header) = + if let Some((_, loop_header)) = context.loop_backedges { + if basic_coverage_blocks.is_dominated_by(successor, loop_header) { + (Some(successor), Some(loop_header)) + } else { + (None, None) + } + } else { + (Some(successor), None) + }; + if let Some(successor_to_add) = some_successor_to_add { + if basic_coverage_blocks.successors[successor_to_add].len() > 1 { + debug!( + "{:?} successor is branching. Prioritize it at the beginning of \ + the {}", + successor_to_add, + if let Some(loop_header) = some_loop_header { + format!("worklist for the loop headed by {:?}", loop_header) + } else { + String::from("non-loop worklist") + }, + ); + context.worklist.insert(0, successor_to_add); + } else { + debug!( + "{:?} successor is non-branching. Defer it to the end of the {}", + successor_to_add, + if let Some(loop_header) = some_loop_header { + format!("worklist for the loop headed by {:?}", loop_header) + } else { + String::from("non-loop worklist") + }, + ); + context.worklist.push(successor_to_add); + } + break; + } + } + } + } + + pub fn is_complete(&self) -> bool { + self.visited.count() == self.visited.domain_size() + } + + pub fn unvisited(&self) -> Vec<BasicCoverageBlock> { + let mut unvisited_set: BitSet<BasicCoverageBlock> = + BitSet::new_filled(self.visited.domain_size()); + unvisited_set.subtract(&self.visited); + unvisited_set.iter().collect::<Vec<_>>() + } +} + +pub(super) fn find_loop_backedges( + basic_coverage_blocks: &CoverageGraph, +) -> IndexVec<BasicCoverageBlock, Vec<BasicCoverageBlock>> { + let num_bcbs = basic_coverage_blocks.num_nodes(); + let mut backedges = IndexVec::from_elem_n(Vec::<BasicCoverageBlock>::new(), num_bcbs); + + // Identify loops by their backedges. + // + // The computational complexity is bounded by: n(s) x d where `n` is the number of + // `BasicCoverageBlock` nodes (the simplified/reduced representation of the CFG derived from the + // MIR); `s` is the average number of successors per node (which is most likely less than 2, and + // independent of the size of the function, so it can be treated as a constant); + // and `d` is the average number of dominators per node. + // + // The average number of dominators depends on the size and complexity of the function, and + // nodes near the start of the function's control flow graph typically have less dominators + // than nodes near the end of the CFG. Without doing a detailed mathematical analysis, I + // think the resulting complexity has the characteristics of O(n log n). + // + // The overall complexity appears to be comparable to many other MIR transform algorithms, and I + // don't expect that this function is creating a performance hot spot, but if this becomes an + // issue, there may be ways to optimize the `is_dominated_by` algorithm (as indicated by an + // existing `FIXME` comment in that code), or possibly ways to optimize it's usage here, perhaps + // by keeping track of results for visited `BasicCoverageBlock`s if they can be used to short + // circuit downstream `is_dominated_by` checks. + // + // For now, that kind of optimization seems unnecessarily complicated. + for (bcb, _) in basic_coverage_blocks.iter_enumerated() { + for &successor in &basic_coverage_blocks.successors[bcb] { + if basic_coverage_blocks.is_dominated_by(bcb, successor) { + let loop_header = successor; + let backedge_from_bcb = bcb; + debug!( + "Found BCB backedge: {:?} -> loop_header: {:?}", + backedge_from_bcb, loop_header + ); + backedges[loop_header].push(backedge_from_bcb); + } + } + } + backedges +} + +pub struct ShortCircuitPreorder< + 'a, + 'tcx, + F: Fn(&'a mir::Body<'tcx>, &'a TerminatorKind<'tcx>) -> Box<dyn Iterator<Item = BasicBlock> + 'a>, +> { + body: &'a mir::Body<'tcx>, + visited: BitSet<BasicBlock>, + worklist: Vec<BasicBlock>, + filtered_successors: F, +} + +impl< + 'a, + 'tcx, + F: Fn(&'a mir::Body<'tcx>, &'a TerminatorKind<'tcx>) -> Box<dyn Iterator<Item = BasicBlock> + 'a>, +> ShortCircuitPreorder<'a, 'tcx, F> +{ + pub fn new( + body: &'a mir::Body<'tcx>, + filtered_successors: F, + ) -> ShortCircuitPreorder<'a, 'tcx, F> { + let worklist = vec![mir::START_BLOCK]; + + ShortCircuitPreorder { + body, + visited: BitSet::new_empty(body.basic_blocks().len()), + worklist, + filtered_successors, + } + } +} + +impl< + 'a, + 'tcx, + F: Fn(&'a mir::Body<'tcx>, &'a TerminatorKind<'tcx>) -> Box<dyn Iterator<Item = BasicBlock> + 'a>, +> Iterator for ShortCircuitPreorder<'a, 'tcx, F> +{ + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + while let Some(idx) = self.worklist.pop() { + if !self.visited.insert(idx) { + continue; + } + + let data = &self.body[idx]; + + if let Some(ref term) = data.terminator { + self.worklist.extend((self.filtered_successors)(&self.body, &term.kind)); + } + + return Some((idx, data)); + } + + None + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let size = self.body.basic_blocks().len() - self.visited.count(); + (size, Some(size)) + } +} diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs new file mode 100644 index 000000000..2619626a5 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/mod.rs @@ -0,0 +1,580 @@ +pub mod query; + +mod counters; +mod debug; +mod graph; +mod spans; + +#[cfg(test)] +mod tests; + +use counters::CoverageCounters; +use graph::{BasicCoverageBlock, BasicCoverageBlockData, CoverageGraph}; +use spans::{CoverageSpan, CoverageSpans}; + +use crate::MirPass; + +use rustc_data_structures::graph::WithNumNodes; +use rustc_data_structures::sync::Lrc; +use rustc_index::vec::IndexVec; +use rustc_middle::hir; +use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; +use rustc_middle::mir::coverage::*; +use rustc_middle::mir::dump_enabled; +use rustc_middle::mir::{ + self, BasicBlock, BasicBlockData, Coverage, SourceInfo, Statement, StatementKind, Terminator, + TerminatorKind, +}; +use rustc_middle::ty::TyCtxt; +use rustc_span::def_id::DefId; +use rustc_span::source_map::SourceMap; +use rustc_span::{CharPos, ExpnKind, Pos, SourceFile, Span, Symbol}; + +/// A simple error message wrapper for `coverage::Error`s. +#[derive(Debug)] +struct Error { + message: String, +} + +impl Error { + pub fn from_string<T>(message: String) -> Result<T, Error> { + Err(Self { message }) + } +} + +/// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected +/// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen +/// to construct the coverage map. +pub struct InstrumentCoverage; + +impl<'tcx> MirPass<'tcx> for InstrumentCoverage { + fn is_enabled(&self, sess: &rustc_session::Session) -> bool { + sess.instrument_coverage() + } + + fn run_pass(&self, tcx: TyCtxt<'tcx>, mir_body: &mut mir::Body<'tcx>) { + let mir_source = mir_body.source; + + // If the InstrumentCoverage pass is called on promoted MIRs, skip them. + // See: https://github.com/rust-lang/rust/pull/73011#discussion_r438317601 + if mir_source.promoted.is_some() { + trace!( + "InstrumentCoverage skipped for {:?} (already promoted for Miri evaluation)", + mir_source.def_id() + ); + return; + } + + let is_fn_like = + tcx.hir().get_by_def_id(mir_source.def_id().expect_local()).fn_kind().is_some(); + + // Only instrument functions, methods, and closures (not constants since they are evaluated + // at compile time by Miri). + // FIXME(#73156): Handle source code coverage in const eval, but note, if and when const + // expressions get coverage spans, we will probably have to "carve out" space for const + // expressions from coverage spans in enclosing MIR's, like we do for closures. (That might + // be tricky if const expressions have no corresponding statements in the enclosing MIR. + // Closures are carved out by their initial `Assign` statement.) + if !is_fn_like { + trace!("InstrumentCoverage skipped for {:?} (not an fn-like)", mir_source.def_id()); + return; + } + + match mir_body.basic_blocks()[mir::START_BLOCK].terminator().kind { + TerminatorKind::Unreachable => { + trace!("InstrumentCoverage skipped for unreachable `START_BLOCK`"); + return; + } + _ => {} + } + + let codegen_fn_attrs = tcx.codegen_fn_attrs(mir_source.def_id()); + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_COVERAGE) { + return; + } + + trace!("InstrumentCoverage starting for {:?}", mir_source.def_id()); + Instrumentor::new(&self.name(), tcx, mir_body).inject_counters(); + trace!("InstrumentCoverage done for {:?}", mir_source.def_id()); + } +} + +struct Instrumentor<'a, 'tcx> { + pass_name: &'a str, + tcx: TyCtxt<'tcx>, + mir_body: &'a mut mir::Body<'tcx>, + source_file: Lrc<SourceFile>, + fn_sig_span: Span, + body_span: Span, + basic_coverage_blocks: CoverageGraph, + coverage_counters: CoverageCounters, +} + +impl<'a, 'tcx> Instrumentor<'a, 'tcx> { + fn new(pass_name: &'a str, tcx: TyCtxt<'tcx>, mir_body: &'a mut mir::Body<'tcx>) -> Self { + let source_map = tcx.sess.source_map(); + let def_id = mir_body.source.def_id(); + let (some_fn_sig, hir_body) = fn_sig_and_body(tcx, def_id); + + let body_span = get_body_span(tcx, hir_body, mir_body); + + let source_file = source_map.lookup_source_file(body_span.lo()); + let fn_sig_span = match some_fn_sig.filter(|fn_sig| { + fn_sig.span.eq_ctxt(body_span) + && Lrc::ptr_eq(&source_file, &source_map.lookup_source_file(fn_sig.span.lo())) + }) { + Some(fn_sig) => fn_sig.span.with_hi(body_span.lo()), + None => body_span.shrink_to_lo(), + }; + + debug!( + "instrumenting {}: {:?}, fn sig span: {:?}, body span: {:?}", + if tcx.is_closure(def_id) { "closure" } else { "function" }, + def_id, + fn_sig_span, + body_span + ); + + let function_source_hash = hash_mir_source(tcx, hir_body); + let basic_coverage_blocks = CoverageGraph::from_mir(mir_body); + Self { + pass_name, + tcx, + mir_body, + source_file, + fn_sig_span, + body_span, + basic_coverage_blocks, + coverage_counters: CoverageCounters::new(function_source_hash), + } + } + + fn inject_counters(&'a mut self) { + let tcx = self.tcx; + let mir_source = self.mir_body.source; + let def_id = mir_source.def_id(); + let fn_sig_span = self.fn_sig_span; + let body_span = self.body_span; + + let mut graphviz_data = debug::GraphvizData::new(); + let mut debug_used_expressions = debug::UsedExpressions::new(); + + let dump_mir = dump_enabled(tcx, self.pass_name, def_id); + let dump_graphviz = dump_mir && tcx.sess.opts.unstable_opts.dump_mir_graphviz; + let dump_spanview = dump_mir && tcx.sess.opts.unstable_opts.dump_mir_spanview.is_some(); + + if dump_graphviz { + graphviz_data.enable(); + self.coverage_counters.enable_debug(); + } + + if dump_graphviz || level_enabled!(tracing::Level::DEBUG) { + debug_used_expressions.enable(); + } + + //////////////////////////////////////////////////// + // Compute `CoverageSpan`s from the `CoverageGraph`. + let coverage_spans = CoverageSpans::generate_coverage_spans( + &self.mir_body, + fn_sig_span, + body_span, + &self.basic_coverage_blocks, + ); + + if dump_spanview { + debug::dump_coverage_spanview( + tcx, + self.mir_body, + &self.basic_coverage_blocks, + self.pass_name, + body_span, + &coverage_spans, + ); + } + + //////////////////////////////////////////////////// + // Create an optimized mix of `Counter`s and `Expression`s for the `CoverageGraph`. Ensure + // every `CoverageSpan` has a `Counter` or `Expression` assigned to its `BasicCoverageBlock` + // and all `Expression` dependencies (operands) are also generated, for any other + // `BasicCoverageBlock`s not already associated with a `CoverageSpan`. + // + // Intermediate expressions (used to compute other `Expression` values), which have no + // direct associate to any `BasicCoverageBlock`, are returned in the method `Result`. + let intermediate_expressions_or_error = self + .coverage_counters + .make_bcb_counters(&mut self.basic_coverage_blocks, &coverage_spans); + + let (result, intermediate_expressions) = match intermediate_expressions_or_error { + Ok(intermediate_expressions) => { + // If debugging, add any intermediate expressions (which are not associated with any + // BCB) to the `debug_used_expressions` map. + if debug_used_expressions.is_enabled() { + for intermediate_expression in &intermediate_expressions { + debug_used_expressions.add_expression_operands(intermediate_expression); + } + } + + //////////////////////////////////////////////////// + // Remove the counter or edge counter from of each `CoverageSpan`s associated + // `BasicCoverageBlock`, and inject a `Coverage` statement into the MIR. + // + // `Coverage` statements injected from `CoverageSpan`s will include the code regions + // (source code start and end positions) to be counted by the associated counter. + // + // These `CoverageSpan`-associated counters are removed from their associated + // `BasicCoverageBlock`s so that the only remaining counters in the `CoverageGraph` + // are indirect counters (to be injected next, without associated code regions). + self.inject_coverage_span_counters( + coverage_spans, + &mut graphviz_data, + &mut debug_used_expressions, + ); + + //////////////////////////////////////////////////// + // For any remaining `BasicCoverageBlock` counters (that were not associated with + // any `CoverageSpan`), inject `Coverage` statements (_without_ code region `Span`s) + // to ensure `BasicCoverageBlock` counters that other `Expression`s may depend on + // are in fact counted, even though they don't directly contribute to counting + // their own independent code region's coverage. + self.inject_indirect_counters(&mut graphviz_data, &mut debug_used_expressions); + + // Intermediate expressions will be injected as the final step, after generating + // debug output, if any. + //////////////////////////////////////////////////// + + (Ok(()), intermediate_expressions) + } + Err(e) => (Err(e), Vec::new()), + }; + + if graphviz_data.is_enabled() { + // Even if there was an error, a partial CoverageGraph can still generate a useful + // graphviz output. + debug::dump_coverage_graphviz( + tcx, + self.mir_body, + self.pass_name, + &self.basic_coverage_blocks, + &self.coverage_counters.debug_counters, + &graphviz_data, + &intermediate_expressions, + &debug_used_expressions, + ); + } + + if let Err(e) = result { + bug!("Error processing: {:?}: {:?}", self.mir_body.source.def_id(), e.message) + }; + + // Depending on current `debug_options()`, `alert_on_unused_expressions()` could panic, so + // this check is performed as late as possible, to allow other debug output (logs and dump + // files), which might be helpful in analyzing unused expressions, to still be generated. + debug_used_expressions.alert_on_unused_expressions(&self.coverage_counters.debug_counters); + + //////////////////////////////////////////////////// + // Finally, inject the intermediate expressions collected along the way. + for intermediate_expression in intermediate_expressions { + inject_intermediate_expression(self.mir_body, intermediate_expression); + } + } + + /// Inject a counter for each `CoverageSpan`. There can be multiple `CoverageSpan`s for a given + /// BCB, but only one actual counter needs to be incremented per BCB. `bb_counters` maps each + /// `bcb` to its `Counter`, when injected. Subsequent `CoverageSpan`s for a BCB that already has + /// a `Counter` will inject an `Expression` instead, and compute its value by adding `ZERO` to + /// the BCB `Counter` value. + /// + /// If debugging, add every BCB `Expression` associated with a `CoverageSpan`s to the + /// `used_expression_operands` map. + fn inject_coverage_span_counters( + &mut self, + coverage_spans: Vec<CoverageSpan>, + graphviz_data: &mut debug::GraphvizData, + debug_used_expressions: &mut debug::UsedExpressions, + ) { + let tcx = self.tcx; + let source_map = tcx.sess.source_map(); + let body_span = self.body_span; + let file_name = Symbol::intern(&self.source_file.name.prefer_remapped().to_string_lossy()); + + let mut bcb_counters = IndexVec::from_elem_n(None, self.basic_coverage_blocks.num_nodes()); + for covspan in coverage_spans { + let bcb = covspan.bcb; + let span = covspan.span; + let counter_kind = if let Some(&counter_operand) = bcb_counters[bcb].as_ref() { + self.coverage_counters.make_identity_counter(counter_operand) + } else if let Some(counter_kind) = self.bcb_data_mut(bcb).take_counter() { + bcb_counters[bcb] = Some(counter_kind.as_operand_id()); + debug_used_expressions.add_expression_operands(&counter_kind); + counter_kind + } else { + bug!("Every BasicCoverageBlock should have a Counter or Expression"); + }; + graphviz_data.add_bcb_coverage_span_with_counter(bcb, &covspan, &counter_kind); + + debug!( + "Calling make_code_region(file_name={}, source_file={:?}, span={}, body_span={})", + file_name, + self.source_file, + source_map.span_to_diagnostic_string(span), + source_map.span_to_diagnostic_string(body_span) + ); + + inject_statement( + self.mir_body, + counter_kind, + self.bcb_leader_bb(bcb), + Some(make_code_region(source_map, file_name, &self.source_file, span, body_span)), + ); + } + } + + /// `inject_coverage_span_counters()` looped through the `CoverageSpan`s and injected the + /// counter from the `CoverageSpan`s `BasicCoverageBlock`, removing it from the BCB in the + /// process (via `take_counter()`). + /// + /// Any other counter associated with a `BasicCoverageBlock`, or its incoming edge, but not + /// associated with a `CoverageSpan`, should only exist if the counter is an `Expression` + /// dependency (one of the expression operands). Collect them, and inject the additional + /// counters into the MIR, without a reportable coverage span. + fn inject_indirect_counters( + &mut self, + graphviz_data: &mut debug::GraphvizData, + debug_used_expressions: &mut debug::UsedExpressions, + ) { + let mut bcb_counters_without_direct_coverage_spans = Vec::new(); + for (target_bcb, target_bcb_data) in self.basic_coverage_blocks.iter_enumerated_mut() { + if let Some(counter_kind) = target_bcb_data.take_counter() { + bcb_counters_without_direct_coverage_spans.push((None, target_bcb, counter_kind)); + } + if let Some(edge_counters) = target_bcb_data.take_edge_counters() { + for (from_bcb, counter_kind) in edge_counters { + bcb_counters_without_direct_coverage_spans.push(( + Some(from_bcb), + target_bcb, + counter_kind, + )); + } + } + } + + // If debug is enabled, validate that every BCB or edge counter not directly associated + // with a coverage span is at least indirectly associated (it is a dependency of a BCB + // counter that _is_ associated with a coverage span). + debug_used_expressions.validate(&bcb_counters_without_direct_coverage_spans); + + for (edge_from_bcb, target_bcb, counter_kind) in bcb_counters_without_direct_coverage_spans + { + debug_used_expressions.add_unused_expression_if_not_found( + &counter_kind, + edge_from_bcb, + target_bcb, + ); + + match counter_kind { + CoverageKind::Counter { .. } => { + let inject_to_bb = if let Some(from_bcb) = edge_from_bcb { + // The MIR edge starts `from_bb` (the outgoing / last BasicBlock in + // `from_bcb`) and ends at `to_bb` (the incoming / first BasicBlock in the + // `target_bcb`; also called the `leader_bb`). + let from_bb = self.bcb_last_bb(from_bcb); + let to_bb = self.bcb_leader_bb(target_bcb); + + let new_bb = inject_edge_counter_basic_block(self.mir_body, from_bb, to_bb); + graphviz_data.set_edge_counter(from_bcb, new_bb, &counter_kind); + debug!( + "Edge {:?} (last {:?}) -> {:?} (leader {:?}) requires a new MIR \ + BasicBlock {:?}, for unclaimed edge counter {}", + edge_from_bcb, + from_bb, + target_bcb, + to_bb, + new_bb, + self.format_counter(&counter_kind), + ); + new_bb + } else { + let target_bb = self.bcb_last_bb(target_bcb); + graphviz_data.add_bcb_dependency_counter(target_bcb, &counter_kind); + debug!( + "{:?} ({:?}) gets a new Coverage statement for unclaimed counter {}", + target_bcb, + target_bb, + self.format_counter(&counter_kind), + ); + target_bb + }; + + inject_statement(self.mir_body, counter_kind, inject_to_bb, None); + } + CoverageKind::Expression { .. } => { + inject_intermediate_expression(self.mir_body, counter_kind) + } + _ => bug!("CoverageKind should be a counter"), + } + } + } + + #[inline] + fn bcb_leader_bb(&self, bcb: BasicCoverageBlock) -> BasicBlock { + self.bcb_data(bcb).leader_bb() + } + + #[inline] + fn bcb_last_bb(&self, bcb: BasicCoverageBlock) -> BasicBlock { + self.bcb_data(bcb).last_bb() + } + + #[inline] + fn bcb_data(&self, bcb: BasicCoverageBlock) -> &BasicCoverageBlockData { + &self.basic_coverage_blocks[bcb] + } + + #[inline] + fn bcb_data_mut(&mut self, bcb: BasicCoverageBlock) -> &mut BasicCoverageBlockData { + &mut self.basic_coverage_blocks[bcb] + } + + #[inline] + fn format_counter(&self, counter_kind: &CoverageKind) -> String { + self.coverage_counters.debug_counters.format_counter(counter_kind) + } +} + +fn inject_edge_counter_basic_block( + mir_body: &mut mir::Body<'_>, + from_bb: BasicBlock, + to_bb: BasicBlock, +) -> BasicBlock { + let span = mir_body[from_bb].terminator().source_info.span.shrink_to_hi(); + let new_bb = mir_body.basic_blocks_mut().push(BasicBlockData { + statements: vec![], // counter will be injected here + terminator: Some(Terminator { + source_info: SourceInfo::outermost(span), + kind: TerminatorKind::Goto { target: to_bb }, + }), + is_cleanup: false, + }); + let edge_ref = mir_body[from_bb] + .terminator_mut() + .successors_mut() + .find(|successor| **successor == to_bb) + .expect("from_bb should have a successor for to_bb"); + *edge_ref = new_bb; + new_bb +} + +fn inject_statement( + mir_body: &mut mir::Body<'_>, + counter_kind: CoverageKind, + bb: BasicBlock, + some_code_region: Option<CodeRegion>, +) { + debug!( + " injecting statement {:?} for {:?} at code region: {:?}", + counter_kind, bb, some_code_region + ); + let data = &mut mir_body[bb]; + let source_info = data.terminator().source_info; + let statement = Statement { + source_info, + kind: StatementKind::Coverage(Box::new(Coverage { + kind: counter_kind, + code_region: some_code_region, + })), + }; + data.statements.insert(0, statement); +} + +// Non-code expressions are injected into the coverage map, without generating executable code. +fn inject_intermediate_expression(mir_body: &mut mir::Body<'_>, expression: CoverageKind) { + debug_assert!(matches!(expression, CoverageKind::Expression { .. })); + debug!(" injecting non-code expression {:?}", expression); + let inject_in_bb = mir::START_BLOCK; + let data = &mut mir_body[inject_in_bb]; + let source_info = data.terminator().source_info; + let statement = Statement { + source_info, + kind: StatementKind::Coverage(Box::new(Coverage { kind: expression, code_region: None })), + }; + data.statements.push(statement); +} + +/// Convert the Span into its file name, start line and column, and end line and column +fn make_code_region( + source_map: &SourceMap, + file_name: Symbol, + source_file: &Lrc<SourceFile>, + span: Span, + body_span: Span, +) -> CodeRegion { + let (start_line, mut start_col) = source_file.lookup_file_pos(span.lo()); + let (end_line, end_col) = if span.hi() == span.lo() { + let (end_line, mut end_col) = (start_line, start_col); + // Extend an empty span by one character so the region will be counted. + let CharPos(char_pos) = start_col; + if span.hi() == body_span.hi() { + start_col = CharPos(char_pos - 1); + } else { + end_col = CharPos(char_pos + 1); + } + (end_line, end_col) + } else { + source_file.lookup_file_pos(span.hi()) + }; + let start_line = source_map.doctest_offset_line(&source_file.name, start_line); + let end_line = source_map.doctest_offset_line(&source_file.name, end_line); + CodeRegion { + file_name, + start_line: start_line as u32, + start_col: start_col.to_u32() + 1, + end_line: end_line as u32, + end_col: end_col.to_u32() + 1, + } +} + +fn fn_sig_and_body<'tcx>( + tcx: TyCtxt<'tcx>, + def_id: DefId, +) -> (Option<&'tcx rustc_hir::FnSig<'tcx>>, &'tcx rustc_hir::Body<'tcx>) { + // FIXME(#79625): Consider improving MIR to provide the information needed, to avoid going back + // to HIR for it. + let hir_node = tcx.hir().get_if_local(def_id).expect("expected DefId is local"); + let fn_body_id = hir::map::associated_body(hir_node).expect("HIR node is a function with body"); + (hir::map::fn_sig(hir_node), tcx.hir().body(fn_body_id)) +} + +fn get_body_span<'tcx>( + tcx: TyCtxt<'tcx>, + hir_body: &rustc_hir::Body<'tcx>, + mir_body: &mut mir::Body<'tcx>, +) -> Span { + let mut body_span = hir_body.value.span; + let def_id = mir_body.source.def_id(); + + if tcx.is_closure(def_id) { + // If the MIR function is a closure, and if the closure body span + // starts from a macro, but it's content is not in that macro, try + // to find a non-macro callsite, and instrument the spans there + // instead. + loop { + let expn_data = body_span.ctxt().outer_expn_data(); + if expn_data.is_root() { + break; + } + if let ExpnKind::Macro { .. } = expn_data.kind { + body_span = expn_data.call_site; + } else { + break; + } + } + } + + body_span +} + +fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx rustc_hir::Body<'tcx>) -> u64 { + // FIXME(cjgillot) Stop hashing HIR manually here. + let owner = hir_body.id().hir_id.owner; + tcx.hir_owner_nodes(owner).unwrap().hash_including_bodies.to_smaller_hash() +} diff --git a/compiler/rustc_mir_transform/src/coverage/query.rs b/compiler/rustc_mir_transform/src/coverage/query.rs new file mode 100644 index 000000000..9d02f58ae --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/query.rs @@ -0,0 +1,170 @@ +use super::*; + +use rustc_middle::mir::coverage::*; +use rustc_middle::mir::{self, Body, Coverage, CoverageInfo}; +use rustc_middle::ty::query::Providers; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_span::def_id::DefId; + +/// A `query` provider for retrieving coverage information injected into MIR. +pub(crate) fn provide(providers: &mut Providers) { + providers.coverageinfo = |tcx, def_id| coverageinfo(tcx, def_id); + providers.covered_code_regions = |tcx, def_id| covered_code_regions(tcx, def_id); +} + +/// The `num_counters` argument to `llvm.instrprof.increment` is the max counter_id + 1, or in +/// other words, the number of counter value references injected into the MIR (plus 1 for the +/// reserved `ZERO` counter, which uses counter ID `0` when included in an expression). Injected +/// counters have a counter ID from `1..num_counters-1`. +/// +/// `num_expressions` is the number of counter expressions added to the MIR body. +/// +/// Both `num_counters` and `num_expressions` are used to initialize new vectors, during backend +/// code generate, to lookup counters and expressions by simple u32 indexes. +/// +/// MIR optimization may split and duplicate some BasicBlock sequences, or optimize out some code +/// including injected counters. (It is OK if some counters are optimized out, but those counters +/// are still included in the total `num_counters` or `num_expressions`.) Simply counting the +/// calls may not work; but computing the number of counters or expressions by adding `1` to the +/// highest ID (for a given instrumented function) is valid. +/// +/// This visitor runs twice, first with `add_missing_operands` set to `false`, to find the maximum +/// counter ID and maximum expression ID based on their enum variant `id` fields; then, as a +/// safeguard, with `add_missing_operands` set to `true`, to find any other counter or expression +/// IDs referenced by expression operands, if not already seen. +/// +/// Ideally, each operand ID in a MIR `CoverageKind::Expression` will have a separate MIR `Coverage` +/// statement for the `Counter` or `Expression` with the referenced ID. but since current or future +/// MIR optimizations can theoretically optimize out segments of a MIR, it may not be possible to +/// guarantee this, so the second pass ensures the `CoverageInfo` counts include all referenced IDs. +struct CoverageVisitor { + info: CoverageInfo, + add_missing_operands: bool, +} + +impl CoverageVisitor { + /// Updates `num_counters` to the maximum encountered zero-based counter_id plus 1. Note the + /// final computed number of counters should be the number of all `CoverageKind::Counter` + /// statements in the MIR *plus one* for the implicit `ZERO` counter. + #[inline(always)] + fn update_num_counters(&mut self, counter_id: u32) { + self.info.num_counters = std::cmp::max(self.info.num_counters, counter_id + 1); + } + + /// Computes an expression index for each expression ID, and updates `num_expressions` to the + /// maximum encountered index plus 1. + #[inline(always)] + fn update_num_expressions(&mut self, expression_id: u32) { + let expression_index = u32::MAX - expression_id; + self.info.num_expressions = std::cmp::max(self.info.num_expressions, expression_index + 1); + } + + fn update_from_expression_operand(&mut self, operand_id: u32) { + if operand_id >= self.info.num_counters { + let operand_as_expression_index = u32::MAX - operand_id; + if operand_as_expression_index >= self.info.num_expressions { + // The operand ID is outside the known range of counter IDs and also outside the + // known range of expression IDs. In either case, the result of a missing operand + // (if and when used in an expression) will be zero, so from a computation + // perspective, it doesn't matter whether it is interpreted as a counter or an + // expression. + // + // However, the `num_counters` and `num_expressions` query results are used to + // allocate arrays when generating the coverage map (during codegen), so choose + // the type that grows either `num_counters` or `num_expressions` the least. + if operand_id - self.info.num_counters + < operand_as_expression_index - self.info.num_expressions + { + self.update_num_counters(operand_id) + } else { + self.update_num_expressions(operand_id) + } + } + } + } + + fn visit_body(&mut self, body: &Body<'_>) { + for bb_data in body.basic_blocks().iter() { + for statement in bb_data.statements.iter() { + if let StatementKind::Coverage(box ref coverage) = statement.kind { + if is_inlined(body, statement) { + continue; + } + self.visit_coverage(coverage); + } + } + } + } + + fn visit_coverage(&mut self, coverage: &Coverage) { + if self.add_missing_operands { + match coverage.kind { + CoverageKind::Expression { lhs, rhs, .. } => { + self.update_from_expression_operand(u32::from(lhs)); + self.update_from_expression_operand(u32::from(rhs)); + } + _ => {} + } + } else { + match coverage.kind { + CoverageKind::Counter { id, .. } => { + self.update_num_counters(u32::from(id)); + } + CoverageKind::Expression { id, .. } => { + self.update_num_expressions(u32::from(id)); + } + _ => {} + } + } + } +} + +fn coverageinfo<'tcx>(tcx: TyCtxt<'tcx>, instance_def: ty::InstanceDef<'tcx>) -> CoverageInfo { + let mir_body = tcx.instance_mir(instance_def); + + let mut coverage_visitor = CoverageVisitor { + // num_counters always has at least the `ZERO` counter. + info: CoverageInfo { num_counters: 1, num_expressions: 0 }, + add_missing_operands: false, + }; + + coverage_visitor.visit_body(mir_body); + + coverage_visitor.add_missing_operands = true; + coverage_visitor.visit_body(mir_body); + + coverage_visitor.info +} + +fn covered_code_regions<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> Vec<&'tcx CodeRegion> { + let body = mir_body(tcx, def_id); + body.basic_blocks() + .iter() + .flat_map(|data| { + data.statements.iter().filter_map(|statement| match statement.kind { + StatementKind::Coverage(box ref coverage) => { + if is_inlined(body, statement) { + None + } else { + coverage.code_region.as_ref() // may be None + } + } + _ => None, + }) + }) + .collect() +} + +fn is_inlined(body: &Body<'_>, statement: &Statement<'_>) -> bool { + let scope_data = &body.source_scopes[statement.source_info.scope]; + scope_data.inlined.is_some() || scope_data.inlined_parent_scope.is_some() +} + +/// This function ensures we obtain the correct MIR for the given item irrespective of +/// whether that means const mir or runtime mir. For `const fn` this opts for runtime +/// mir. +fn mir_body<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> &'tcx mir::Body<'tcx> { + let id = ty::WithOptConstParam::unknown(def_id); + let def = ty::InstanceDef::Item(id); + tcx.instance_mir(def) +} diff --git a/compiler/rustc_mir_transform/src/coverage/spans.rs b/compiler/rustc_mir_transform/src/coverage/spans.rs new file mode 100644 index 000000000..423e78317 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/spans.rs @@ -0,0 +1,892 @@ +use super::debug::term_type; +use super::graph::{BasicCoverageBlock, BasicCoverageBlockData, CoverageGraph, START_BCB}; + +use itertools::Itertools; +use rustc_data_structures::graph::WithNumNodes; +use rustc_middle::mir::spanview::source_range_no_file; +use rustc_middle::mir::{ + self, AggregateKind, BasicBlock, FakeReadCause, Rvalue, Statement, StatementKind, Terminator, + TerminatorKind, +}; +use rustc_middle::ty::TyCtxt; +use rustc_span::source_map::original_sp; +use rustc_span::{BytePos, ExpnKind, MacroKind, Span, Symbol}; + +use std::cell::RefCell; +use std::cmp::Ordering; + +#[derive(Debug, Copy, Clone)] +pub(super) enum CoverageStatement { + Statement(BasicBlock, Span, usize), + Terminator(BasicBlock, Span), +} + +impl CoverageStatement { + pub fn format<'tcx>(&self, tcx: TyCtxt<'tcx>, mir_body: &mir::Body<'tcx>) -> String { + match *self { + Self::Statement(bb, span, stmt_index) => { + let stmt = &mir_body[bb].statements[stmt_index]; + format!( + "{}: @{}[{}]: {:?}", + source_range_no_file(tcx, span), + bb.index(), + stmt_index, + stmt + ) + } + Self::Terminator(bb, span) => { + let term = mir_body[bb].terminator(); + format!( + "{}: @{}.{}: {:?}", + source_range_no_file(tcx, span), + bb.index(), + term_type(&term.kind), + term.kind + ) + } + } + } + + pub fn span(&self) -> Span { + match self { + Self::Statement(_, span, _) | Self::Terminator(_, span) => *span, + } + } +} + +/// A BCB is deconstructed into one or more `Span`s. Each `Span` maps to a `CoverageSpan` that +/// references the originating BCB and one or more MIR `Statement`s and/or `Terminator`s. +/// Initially, the `Span`s come from the `Statement`s and `Terminator`s, but subsequent +/// transforms can combine adjacent `Span`s and `CoverageSpan` from the same BCB, merging the +/// `CoverageStatement` vectors, and the `Span`s to cover the extent of the combined `Span`s. +/// +/// Note: A `CoverageStatement` merged into another CoverageSpan may come from a `BasicBlock` that +/// is not part of the `CoverageSpan` bcb if the statement was included because it's `Span` matches +/// or is subsumed by the `Span` associated with this `CoverageSpan`, and it's `BasicBlock` +/// `is_dominated_by()` the `BasicBlock`s in this `CoverageSpan`. +#[derive(Debug, Clone)] +pub(super) struct CoverageSpan { + pub span: Span, + pub expn_span: Span, + pub current_macro_or_none: RefCell<Option<Option<Symbol>>>, + pub bcb: BasicCoverageBlock, + pub coverage_statements: Vec<CoverageStatement>, + pub is_closure: bool, +} + +impl CoverageSpan { + pub fn for_fn_sig(fn_sig_span: Span) -> Self { + Self { + span: fn_sig_span, + expn_span: fn_sig_span, + current_macro_or_none: Default::default(), + bcb: START_BCB, + coverage_statements: vec![], + is_closure: false, + } + } + + pub fn for_statement( + statement: &Statement<'_>, + span: Span, + expn_span: Span, + bcb: BasicCoverageBlock, + bb: BasicBlock, + stmt_index: usize, + ) -> Self { + let is_closure = match statement.kind { + StatementKind::Assign(box (_, Rvalue::Aggregate(box ref kind, _))) => { + matches!(kind, AggregateKind::Closure(_, _) | AggregateKind::Generator(_, _, _)) + } + _ => false, + }; + + Self { + span, + expn_span, + current_macro_or_none: Default::default(), + bcb, + coverage_statements: vec![CoverageStatement::Statement(bb, span, stmt_index)], + is_closure, + } + } + + pub fn for_terminator( + span: Span, + expn_span: Span, + bcb: BasicCoverageBlock, + bb: BasicBlock, + ) -> Self { + Self { + span, + expn_span, + current_macro_or_none: Default::default(), + bcb, + coverage_statements: vec![CoverageStatement::Terminator(bb, span)], + is_closure: false, + } + } + + pub fn merge_from(&mut self, mut other: CoverageSpan) { + debug_assert!(self.is_mergeable(&other)); + self.span = self.span.to(other.span); + self.coverage_statements.append(&mut other.coverage_statements); + } + + pub fn cutoff_statements_at(&mut self, cutoff_pos: BytePos) { + self.coverage_statements.retain(|covstmt| covstmt.span().hi() <= cutoff_pos); + if let Some(highest_covstmt) = + self.coverage_statements.iter().max_by_key(|covstmt| covstmt.span().hi()) + { + self.span = self.span.with_hi(highest_covstmt.span().hi()); + } + } + + #[inline] + pub fn is_mergeable(&self, other: &Self) -> bool { + self.is_in_same_bcb(other) && !(self.is_closure || other.is_closure) + } + + #[inline] + pub fn is_in_same_bcb(&self, other: &Self) -> bool { + self.bcb == other.bcb + } + + pub fn format<'tcx>(&self, tcx: TyCtxt<'tcx>, mir_body: &mir::Body<'tcx>) -> String { + format!( + "{}\n {}", + source_range_no_file(tcx, self.span), + self.format_coverage_statements(tcx, mir_body).replace('\n', "\n "), + ) + } + + pub fn format_coverage_statements<'tcx>( + &self, + tcx: TyCtxt<'tcx>, + mir_body: &mir::Body<'tcx>, + ) -> String { + let mut sorted_coverage_statements = self.coverage_statements.clone(); + sorted_coverage_statements.sort_unstable_by_key(|covstmt| match *covstmt { + CoverageStatement::Statement(bb, _, index) => (bb, index), + CoverageStatement::Terminator(bb, _) => (bb, usize::MAX), + }); + sorted_coverage_statements.iter().map(|covstmt| covstmt.format(tcx, mir_body)).join("\n") + } + + /// If the span is part of a macro, returns the macro name symbol. + pub fn current_macro(&self) -> Option<Symbol> { + self.current_macro_or_none + .borrow_mut() + .get_or_insert_with(|| { + if let ExpnKind::Macro(MacroKind::Bang, current_macro) = + self.expn_span.ctxt().outer_expn_data().kind + { + return Some(current_macro); + } + None + }) + .map(|symbol| symbol) + } + + /// If the span is part of a macro, and the macro is visible (expands directly to the given + /// body_span), returns the macro name symbol. + pub fn visible_macro(&self, body_span: Span) -> Option<Symbol> { + if let Some(current_macro) = self.current_macro() && self + .expn_span + .parent_callsite() + .unwrap_or_else(|| bug!("macro must have a parent")) + .eq_ctxt(body_span) + { + return Some(current_macro); + } + None + } + + pub fn is_macro_expansion(&self) -> bool { + self.current_macro().is_some() + } +} + +/// Converts the initial set of `CoverageSpan`s (one per MIR `Statement` or `Terminator`) into a +/// minimal set of `CoverageSpan`s, using the BCB CFG to determine where it is safe and useful to: +/// +/// * Remove duplicate source code coverage regions +/// * Merge spans that represent continuous (both in source code and control flow), non-branching +/// execution +/// * Carve out (leave uncovered) any span that will be counted by another MIR (notably, closures) +pub struct CoverageSpans<'a, 'tcx> { + /// The MIR, used to look up `BasicBlockData`. + mir_body: &'a mir::Body<'tcx>, + + /// A `Span` covering the signature of function for the MIR. + fn_sig_span: Span, + + /// A `Span` covering the function body of the MIR (typically from left curly brace to right + /// curly brace). + body_span: Span, + + /// The BasicCoverageBlock Control Flow Graph (BCB CFG). + basic_coverage_blocks: &'a CoverageGraph, + + /// The initial set of `CoverageSpan`s, sorted by `Span` (`lo` and `hi`) and by relative + /// dominance between the `BasicCoverageBlock`s of equal `Span`s. + sorted_spans_iter: Option<std::vec::IntoIter<CoverageSpan>>, + + /// The current `CoverageSpan` to compare to its `prev`, to possibly merge, discard, force the + /// discard of the `prev` (and or `pending_dups`), or keep both (with `prev` moved to + /// `pending_dups`). If `curr` is not discarded or merged, it becomes `prev` for the next + /// iteration. + some_curr: Option<CoverageSpan>, + + /// The original `span` for `curr`, in case `curr.span()` is modified. The `curr_original_span` + /// **must not be mutated** (except when advancing to the next `curr`), even if `curr.span()` + /// is mutated. + curr_original_span: Span, + + /// The CoverageSpan from a prior iteration; typically assigned from that iteration's `curr`. + /// If that `curr` was discarded, `prev` retains its value from the previous iteration. + some_prev: Option<CoverageSpan>, + + /// Assigned from `curr_original_span` from the previous iteration. The `prev_original_span` + /// **must not be mutated** (except when advancing to the next `prev`), even if `prev.span()` + /// is mutated. + prev_original_span: Span, + + /// A copy of the expn_span from the prior iteration. + prev_expn_span: Option<Span>, + + /// One or more `CoverageSpan`s with the same `Span` but different `BasicCoverageBlock`s, and + /// no `BasicCoverageBlock` in this list dominates another `BasicCoverageBlock` in the list. + /// If a new `curr` span also fits this criteria (compared to an existing list of + /// `pending_dups`), that `curr` `CoverageSpan` moves to `prev` before possibly being added to + /// the `pending_dups` list, on the next iteration. As a result, if `prev` and `pending_dups` + /// have the same `Span`, the criteria for `pending_dups` holds for `prev` as well: a `prev` + /// with a matching `Span` does not dominate any `pending_dup` and no `pending_dup` dominates a + /// `prev` with a matching `Span`) + pending_dups: Vec<CoverageSpan>, + + /// The final `CoverageSpan`s to add to the coverage map. A `Counter` or `Expression` + /// will also be injected into the MIR for each `CoverageSpan`. + refined_spans: Vec<CoverageSpan>, +} + +impl<'a, 'tcx> CoverageSpans<'a, 'tcx> { + /// Generate a minimal set of `CoverageSpan`s, each representing a contiguous code region to be + /// counted. + /// + /// The basic steps are: + /// + /// 1. Extract an initial set of spans from the `Statement`s and `Terminator`s of each + /// `BasicCoverageBlockData`. + /// 2. Sort the spans by span.lo() (starting position). Spans that start at the same position + /// are sorted with longer spans before shorter spans; and equal spans are sorted + /// (deterministically) based on "dominator" relationship (if any). + /// 3. Traverse the spans in sorted order to identify spans that can be dropped (for instance, + /// if another span or spans are already counting the same code region), or should be merged + /// into a broader combined span (because it represents a contiguous, non-branching, and + /// uninterrupted region of source code). + /// + /// Closures are exposed in their enclosing functions as `Assign` `Rvalue`s, and since + /// closures have their own MIR, their `Span` in their enclosing function should be left + /// "uncovered". + /// + /// Note the resulting vector of `CoverageSpan`s may not be fully sorted (and does not need + /// to be). + pub(super) fn generate_coverage_spans( + mir_body: &'a mir::Body<'tcx>, + fn_sig_span: Span, // Ensured to be same SourceFile and SyntaxContext as `body_span` + body_span: Span, + basic_coverage_blocks: &'a CoverageGraph, + ) -> Vec<CoverageSpan> { + let mut coverage_spans = CoverageSpans { + mir_body, + fn_sig_span, + body_span, + basic_coverage_blocks, + sorted_spans_iter: None, + refined_spans: Vec::with_capacity(basic_coverage_blocks.num_nodes() * 2), + some_curr: None, + curr_original_span: Span::with_root_ctxt(BytePos(0), BytePos(0)), + some_prev: None, + prev_original_span: Span::with_root_ctxt(BytePos(0), BytePos(0)), + prev_expn_span: None, + pending_dups: Vec::new(), + }; + + let sorted_spans = coverage_spans.mir_to_initial_sorted_coverage_spans(); + + coverage_spans.sorted_spans_iter = Some(sorted_spans.into_iter()); + + coverage_spans.to_refined_spans() + } + + fn mir_to_initial_sorted_coverage_spans(&self) -> Vec<CoverageSpan> { + let mut initial_spans = + Vec::<CoverageSpan>::with_capacity(self.mir_body.basic_blocks.len() * 2); + for (bcb, bcb_data) in self.basic_coverage_blocks.iter_enumerated() { + initial_spans.extend(self.bcb_to_initial_coverage_spans(bcb, bcb_data)); + } + + if initial_spans.is_empty() { + // This can happen if, for example, the function is unreachable (contains only a + // `BasicBlock`(s) with an `Unreachable` terminator). + return initial_spans; + } + + initial_spans.push(CoverageSpan::for_fn_sig(self.fn_sig_span)); + + initial_spans.sort_unstable_by(|a, b| { + if a.span.lo() == b.span.lo() { + if a.span.hi() == b.span.hi() { + if a.is_in_same_bcb(b) { + Some(Ordering::Equal) + } else { + // Sort equal spans by dominator relationship, in reverse order (so + // dominators always come after the dominated equal spans). When later + // comparing two spans in order, the first will either dominate the second, + // or they will have no dominator relationship. + self.basic_coverage_blocks.dominators().rank_partial_cmp(b.bcb, a.bcb) + } + } else { + // Sort hi() in reverse order so shorter spans are attempted after longer spans. + // This guarantees that, if a `prev` span overlaps, and is not equal to, a + // `curr` span, the prev span either extends further left of the curr span, or + // they start at the same position and the prev span extends further right of + // the end of the curr span. + b.span.hi().partial_cmp(&a.span.hi()) + } + } else { + a.span.lo().partial_cmp(&b.span.lo()) + } + .unwrap() + }); + + initial_spans + } + + /// Iterate through the sorted `CoverageSpan`s, and return the refined list of merged and + /// de-duplicated `CoverageSpan`s. + fn to_refined_spans(mut self) -> Vec<CoverageSpan> { + while self.next_coverage_span() { + if self.some_prev.is_none() { + debug!(" initial span"); + self.check_invoked_macro_name_span(); + } else if self.curr().is_mergeable(self.prev()) { + debug!(" same bcb (and neither is a closure), merge with prev={:?}", self.prev()); + let prev = self.take_prev(); + self.curr_mut().merge_from(prev); + self.check_invoked_macro_name_span(); + // Note that curr.span may now differ from curr_original_span + } else if self.prev_ends_before_curr() { + debug!( + " different bcbs and disjoint spans, so keep curr for next iter, and add \ + prev={:?}", + self.prev() + ); + let prev = self.take_prev(); + self.push_refined_span(prev); + self.check_invoked_macro_name_span(); + } else if self.prev().is_closure { + // drop any equal or overlapping span (`curr`) and keep `prev` to test again in the + // next iter + debug!( + " curr overlaps a closure (prev). Drop curr and keep prev for next iter. \ + prev={:?}", + self.prev() + ); + self.take_curr(); + } else if self.curr().is_closure { + self.carve_out_span_for_closure(); + } else if self.prev_original_span == self.curr().span { + // Note that this compares the new (`curr`) span to `prev_original_span`. + // In this branch, the actual span byte range of `prev_original_span` is not + // important. What is important is knowing whether the new `curr` span was + // **originally** the same as the original span of `prev()`. The original spans + // reflect their original sort order, and for equal spans, conveys a partial + // ordering based on CFG dominator priority. + if self.prev().is_macro_expansion() && self.curr().is_macro_expansion() { + // Macros that expand to include branching (such as + // `assert_eq!()`, `assert_ne!()`, `info!()`, `debug!()`, or + // `trace!()) typically generate callee spans with identical + // ranges (typically the full span of the macro) for all + // `BasicBlocks`. This makes it impossible to distinguish + // the condition (`if val1 != val2`) from the optional + // branched statements (such as the call to `panic!()` on + // assert failure). In this case it is better (or less + // worse) to drop the optional branch bcbs and keep the + // non-conditional statements, to count when reached. + debug!( + " curr and prev are part of a macro expansion, and curr has the same span \ + as prev, but is in a different bcb. Drop curr and keep prev for next iter. \ + prev={:?}", + self.prev() + ); + self.take_curr(); + } else { + self.hold_pending_dups_unless_dominated(); + } + } else { + self.cutoff_prev_at_overlapping_curr(); + self.check_invoked_macro_name_span(); + } + } + + debug!(" AT END, adding last prev={:?}", self.prev()); + let prev = self.take_prev(); + let pending_dups = self.pending_dups.split_off(0); + for dup in pending_dups { + debug!(" ...adding at least one pending dup={:?}", dup); + self.push_refined_span(dup); + } + + // Async functions wrap a closure that implements the body to be executed. The enclosing + // function is called and returns an `impl Future` without initially executing any of the + // body. To avoid showing the return from the enclosing function as a "covered" return from + // the closure, the enclosing function's `TerminatorKind::Return`s `CoverageSpan` is + // excluded. The closure's `Return` is the only one that will be counted. This provides + // adequate coverage, and more intuitive counts. (Avoids double-counting the closing brace + // of the function body.) + let body_ends_with_closure = if let Some(last_covspan) = self.refined_spans.last() { + last_covspan.is_closure && last_covspan.span.hi() == self.body_span.hi() + } else { + false + }; + + if !body_ends_with_closure { + self.push_refined_span(prev); + } + + // Remove `CoverageSpan`s derived from closures, originally added to ensure the coverage + // regions for the current function leave room for the closure's own coverage regions + // (injected separately, from the closure's own MIR). + self.refined_spans.retain(|covspan| !covspan.is_closure); + self.refined_spans + } + + fn push_refined_span(&mut self, covspan: CoverageSpan) { + let len = self.refined_spans.len(); + if len > 0 { + let last = &mut self.refined_spans[len - 1]; + if last.is_mergeable(&covspan) { + debug!( + "merging new refined span with last refined span, last={:?}, covspan={:?}", + last, covspan + ); + last.merge_from(covspan); + return; + } + } + self.refined_spans.push(covspan) + } + + fn check_invoked_macro_name_span(&mut self) { + if let Some(visible_macro) = self.curr().visible_macro(self.body_span) { + if self.prev_expn_span.map_or(true, |prev_expn_span| { + self.curr().expn_span.ctxt() != prev_expn_span.ctxt() + }) { + let merged_prefix_len = self.curr_original_span.lo() - self.curr().span.lo(); + let after_macro_bang = + merged_prefix_len + BytePos(visible_macro.as_str().len() as u32 + 1); + let mut macro_name_cov = self.curr().clone(); + self.curr_mut().span = + self.curr().span.with_lo(self.curr().span.lo() + after_macro_bang); + macro_name_cov.span = + macro_name_cov.span.with_hi(macro_name_cov.span.lo() + after_macro_bang); + debug!( + " and curr starts a new macro expansion, so add a new span just for \ + the macro `{}!`, new span={:?}", + visible_macro, macro_name_cov + ); + self.push_refined_span(macro_name_cov); + } + } + } + + // Generate a set of `CoverageSpan`s from the filtered set of `Statement`s and `Terminator`s of + // the `BasicBlock`(s) in the given `BasicCoverageBlockData`. One `CoverageSpan` is generated + // for each `Statement` and `Terminator`. (Note that subsequent stages of coverage analysis will + // merge some `CoverageSpan`s, at which point a `CoverageSpan` may represent multiple + // `Statement`s and/or `Terminator`s.) + fn bcb_to_initial_coverage_spans( + &self, + bcb: BasicCoverageBlock, + bcb_data: &'a BasicCoverageBlockData, + ) -> Vec<CoverageSpan> { + bcb_data + .basic_blocks + .iter() + .flat_map(|&bb| { + let data = &self.mir_body[bb]; + data.statements + .iter() + .enumerate() + .filter_map(move |(index, statement)| { + filtered_statement_span(statement).map(|span| { + CoverageSpan::for_statement( + statement, + function_source_span(span, self.body_span), + span, + bcb, + bb, + index, + ) + }) + }) + .chain(filtered_terminator_span(data.terminator()).map(|span| { + CoverageSpan::for_terminator( + function_source_span(span, self.body_span), + span, + bcb, + bb, + ) + })) + }) + .collect() + } + + fn curr(&self) -> &CoverageSpan { + self.some_curr + .as_ref() + .unwrap_or_else(|| bug!("invalid attempt to unwrap a None some_curr")) + } + + fn curr_mut(&mut self) -> &mut CoverageSpan { + self.some_curr + .as_mut() + .unwrap_or_else(|| bug!("invalid attempt to unwrap a None some_curr")) + } + + fn prev(&self) -> &CoverageSpan { + self.some_prev + .as_ref() + .unwrap_or_else(|| bug!("invalid attempt to unwrap a None some_prev")) + } + + fn prev_mut(&mut self) -> &mut CoverageSpan { + self.some_prev + .as_mut() + .unwrap_or_else(|| bug!("invalid attempt to unwrap a None some_prev")) + } + + fn take_prev(&mut self) -> CoverageSpan { + self.some_prev.take().unwrap_or_else(|| bug!("invalid attempt to unwrap a None some_prev")) + } + + /// If there are `pending_dups` but `prev` is not a matching dup (`prev.span` doesn't match the + /// `pending_dups` spans), then one of the following two things happened during the previous + /// iteration: + /// * the previous `curr` span (which is now `prev`) was not a duplicate of the pending_dups + /// (in which case there should be at least two spans in `pending_dups`); or + /// * the `span` of `prev` was modified by `curr_mut().merge_from(prev)` (in which case + /// `pending_dups` could have as few as one span) + /// In either case, no more spans will match the span of `pending_dups`, so + /// add the `pending_dups` if they don't overlap `curr`, and clear the list. + fn check_pending_dups(&mut self) { + if let Some(dup) = self.pending_dups.last() && dup.span != self.prev().span { + debug!( + " SAME spans, but pending_dups are NOT THE SAME, so BCBs matched on \ + previous iteration, or prev started a new disjoint span" + ); + if dup.span.hi() <= self.curr().span.lo() { + let pending_dups = self.pending_dups.split_off(0); + for dup in pending_dups.into_iter() { + debug!(" ...adding at least one pending={:?}", dup); + self.push_refined_span(dup); + } + } else { + self.pending_dups.clear(); + } + } + } + + /// Advance `prev` to `curr` (if any), and `curr` to the next `CoverageSpan` in sorted order. + fn next_coverage_span(&mut self) -> bool { + if let Some(curr) = self.some_curr.take() { + self.prev_expn_span = Some(curr.expn_span); + self.some_prev = Some(curr); + self.prev_original_span = self.curr_original_span; + } + while let Some(curr) = self.sorted_spans_iter.as_mut().unwrap().next() { + debug!("FOR curr={:?}", curr); + if self.some_prev.is_some() && self.prev_starts_after_next(&curr) { + debug!( + " prev.span starts after curr.span, so curr will be dropped (skipping past \ + closure?); prev={:?}", + self.prev() + ); + } else { + // Save a copy of the original span for `curr` in case the `CoverageSpan` is changed + // by `self.curr_mut().merge_from(prev)`. + self.curr_original_span = curr.span; + self.some_curr.replace(curr); + self.check_pending_dups(); + return true; + } + } + false + } + + /// If called, then the next call to `next_coverage_span()` will *not* update `prev` with the + /// `curr` coverage span. + fn take_curr(&mut self) -> CoverageSpan { + self.some_curr.take().unwrap_or_else(|| bug!("invalid attempt to unwrap a None some_curr")) + } + + /// Returns true if the curr span should be skipped because prev has already advanced beyond the + /// end of curr. This can only happen if a prior iteration updated `prev` to skip past a region + /// of code, such as skipping past a closure. + fn prev_starts_after_next(&self, next_curr: &CoverageSpan) -> bool { + self.prev().span.lo() > next_curr.span.lo() + } + + /// Returns true if the curr span starts past the end of the prev span, which means they don't + /// overlap, so we now know the prev can be added to the refined coverage spans. + fn prev_ends_before_curr(&self) -> bool { + self.prev().span.hi() <= self.curr().span.lo() + } + + /// If `prev`s span extends left of the closure (`curr`), carve out the closure's span from + /// `prev`'s span. (The closure's coverage counters will be injected when processing the + /// closure's own MIR.) Add the portion of the span to the left of the closure; and if the span + /// extends to the right of the closure, update `prev` to that portion of the span. For any + /// `pending_dups`, repeat the same process. + fn carve_out_span_for_closure(&mut self) { + let curr_span = self.curr().span; + let left_cutoff = curr_span.lo(); + let right_cutoff = curr_span.hi(); + let has_pre_closure_span = self.prev().span.lo() < right_cutoff; + let has_post_closure_span = self.prev().span.hi() > right_cutoff; + let mut pending_dups = self.pending_dups.split_off(0); + if has_pre_closure_span { + let mut pre_closure = self.prev().clone(); + pre_closure.span = pre_closure.span.with_hi(left_cutoff); + debug!(" prev overlaps a closure. Adding span for pre_closure={:?}", pre_closure); + if !pending_dups.is_empty() { + for mut dup in pending_dups.iter().cloned() { + dup.span = dup.span.with_hi(left_cutoff); + debug!(" ...and at least one pre_closure dup={:?}", dup); + self.push_refined_span(dup); + } + } + self.push_refined_span(pre_closure); + } + if has_post_closure_span { + // Mutate `prev.span()` to start after the closure (and discard curr). + // (**NEVER** update `prev_original_span` because it affects the assumptions + // about how the `CoverageSpan`s are ordered.) + self.prev_mut().span = self.prev().span.with_lo(right_cutoff); + debug!(" Mutated prev.span to start after the closure. prev={:?}", self.prev()); + for dup in pending_dups.iter_mut() { + debug!(" ...and at least one overlapping dup={:?}", dup); + dup.span = dup.span.with_lo(right_cutoff); + } + self.pending_dups.append(&mut pending_dups); + let closure_covspan = self.take_curr(); + self.push_refined_span(closure_covspan); // since self.prev() was already updated + } else { + pending_dups.clear(); + } + } + + /// Called if `curr.span` equals `prev_original_span` (and potentially equal to all + /// `pending_dups` spans, if any). Keep in mind, `prev.span()` may have been changed. + /// If prev.span() was merged into other spans (with matching BCB, for instance), + /// `prev.span.hi()` will be greater than (further right of) `prev_original_span.hi()`. + /// If prev.span() was split off to the right of a closure, prev.span().lo() will be + /// greater than prev_original_span.lo(). The actual span of `prev_original_span` is + /// not as important as knowing that `prev()` **used to have the same span** as `curr(), + /// which means their sort order is still meaningful for determining the dominator + /// relationship. + /// + /// When two `CoverageSpan`s have the same `Span`, dominated spans can be discarded; but if + /// neither `CoverageSpan` dominates the other, both (or possibly more than two) are held, + /// until their disposition is determined. In this latter case, the `prev` dup is moved into + /// `pending_dups` so the new `curr` dup can be moved to `prev` for the next iteration. + fn hold_pending_dups_unless_dominated(&mut self) { + // Equal coverage spans are ordered by dominators before dominated (if any), so it should be + // impossible for `curr` to dominate any previous `CoverageSpan`. + debug_assert!(!self.span_bcb_is_dominated_by(self.prev(), self.curr())); + + let initial_pending_count = self.pending_dups.len(); + if initial_pending_count > 0 { + let mut pending_dups = self.pending_dups.split_off(0); + pending_dups.retain(|dup| !self.span_bcb_is_dominated_by(self.curr(), dup)); + self.pending_dups.append(&mut pending_dups); + if self.pending_dups.len() < initial_pending_count { + debug!( + " discarded {} of {} pending_dups that dominated curr", + initial_pending_count - self.pending_dups.len(), + initial_pending_count + ); + } + } + + if self.span_bcb_is_dominated_by(self.curr(), self.prev()) { + debug!( + " different bcbs but SAME spans, and prev dominates curr. Discard prev={:?}", + self.prev() + ); + self.cutoff_prev_at_overlapping_curr(); + // If one span dominates the other, associate the span with the code from the dominated + // block only (`curr`), and discard the overlapping portion of the `prev` span. (Note + // that if `prev.span` is wider than `prev_original_span`, a `CoverageSpan` will still + // be created for `prev`s block, for the non-overlapping portion, left of `curr.span`.) + // + // For example: + // match somenum { + // x if x < 1 => { ... } + // }... + // + // The span for the first `x` is referenced by both the pattern block (every time it is + // evaluated) and the arm code (only when matched). The counter will be applied only to + // the dominated block. This allows coverage to track and highlight things like the + // assignment of `x` above, if the branch is matched, making `x` available to the arm + // code; and to track and highlight the question mark `?` "try" operator at the end of + // a function call returning a `Result`, so the `?` is covered when the function returns + // an `Err`, and not counted as covered if the function always returns `Ok`. + } else { + // Save `prev` in `pending_dups`. (`curr` will become `prev` in the next iteration.) + // If the `curr` CoverageSpan is later discarded, `pending_dups` can be discarded as + // well; but if `curr` is added to refined_spans, the `pending_dups` will also be added. + debug!( + " different bcbs but SAME spans, and neither dominates, so keep curr for \ + next iter, and, pending upcoming spans (unless overlapping) add prev={:?}", + self.prev() + ); + let prev = self.take_prev(); + self.pending_dups.push(prev); + } + } + + /// `curr` overlaps `prev`. If `prev`s span extends left of `curr`s span, keep _only_ + /// statements that end before `curr.lo()` (if any), and add the portion of the + /// combined span for those statements. Any other statements have overlapping spans + /// that can be ignored because `curr` and/or other upcoming statements/spans inside + /// the overlap area will produce their own counters. This disambiguation process + /// avoids injecting multiple counters for overlapping spans, and the potential for + /// double-counting. + fn cutoff_prev_at_overlapping_curr(&mut self) { + debug!( + " different bcbs, overlapping spans, so ignore/drop pending and only add prev \ + if it has statements that end before curr; prev={:?}", + self.prev() + ); + if self.pending_dups.is_empty() { + let curr_span = self.curr().span; + self.prev_mut().cutoff_statements_at(curr_span.lo()); + if self.prev().coverage_statements.is_empty() { + debug!(" ... no non-overlapping statements to add"); + } else { + debug!(" ... adding modified prev={:?}", self.prev()); + let prev = self.take_prev(); + self.push_refined_span(prev); + } + } else { + // with `pending_dups`, `prev` cannot have any statements that don't overlap + self.pending_dups.clear(); + } + } + + fn span_bcb_is_dominated_by(&self, covspan: &CoverageSpan, dom_covspan: &CoverageSpan) -> bool { + self.basic_coverage_blocks.is_dominated_by(covspan.bcb, dom_covspan.bcb) + } +} + +/// If the MIR `Statement` has a span contributive to computing coverage spans, +/// return it; otherwise return `None`. +pub(super) fn filtered_statement_span(statement: &Statement<'_>) -> Option<Span> { + match statement.kind { + // These statements have spans that are often outside the scope of the executed source code + // for their parent `BasicBlock`. + StatementKind::StorageLive(_) + | StatementKind::StorageDead(_) + // Coverage should not be encountered, but don't inject coverage coverage + | StatementKind::Coverage(_) + // Ignore `Nop`s + | StatementKind::Nop => None, + + // FIXME(#78546): MIR InstrumentCoverage - Can the source_info.span for `FakeRead` + // statements be more consistent? + // + // FakeReadCause::ForGuardBinding, in this example: + // match somenum { + // x if x < 1 => { ... } + // }... + // The BasicBlock within the match arm code included one of these statements, but the span + // for it covered the `1` in this source. The actual statements have nothing to do with that + // source span: + // FakeRead(ForGuardBinding, _4); + // where `_4` is: + // _4 = &_1; (at the span for the first `x`) + // and `_1` is the `Place` for `somenum`. + // + // If and when the Issue is resolved, remove this special case match pattern: + StatementKind::FakeRead(box (cause, _)) if cause == FakeReadCause::ForGuardBinding => None, + + // Retain spans from all other statements + StatementKind::FakeRead(box (_, _)) // Not including `ForGuardBinding` + | StatementKind::CopyNonOverlapping(..) + | StatementKind::Assign(_) + | StatementKind::SetDiscriminant { .. } + | StatementKind::Deinit(..) + | StatementKind::Retag(_, _) + | StatementKind::AscribeUserType(_, _) => { + Some(statement.source_info.span) + } + } +} + +/// If the MIR `Terminator` has a span contributive to computing coverage spans, +/// return it; otherwise return `None`. +pub(super) fn filtered_terminator_span(terminator: &Terminator<'_>) -> Option<Span> { + match terminator.kind { + // These terminators have spans that don't positively contribute to computing a reasonable + // span of actually executed source code. (For example, SwitchInt terminators extracted from + // an `if condition { block }` has a span that includes the executed block, if true, + // but for coverage, the code region executed, up to *and* through the SwitchInt, + // actually stops before the if's block.) + TerminatorKind::Unreachable // Unreachable blocks are not connected to the MIR CFG + | TerminatorKind::Assert { .. } + | TerminatorKind::Drop { .. } + | TerminatorKind::DropAndReplace { .. } + | TerminatorKind::SwitchInt { .. } + // For `FalseEdge`, only the `real` branch is taken, so it is similar to a `Goto`. + | TerminatorKind::FalseEdge { .. } + | TerminatorKind::Goto { .. } => None, + + // Call `func` operand can have a more specific span when part of a chain of calls + | TerminatorKind::Call { ref func, .. } => { + let mut span = terminator.source_info.span; + if let mir::Operand::Constant(box constant) = func { + if constant.span.lo() > span.lo() { + span = span.with_lo(constant.span.lo()); + } + } + Some(span) + } + + // Retain spans from all other terminators + TerminatorKind::Resume + | TerminatorKind::Abort + | TerminatorKind::Return + | TerminatorKind::Yield { .. } + | TerminatorKind::GeneratorDrop + | TerminatorKind::FalseUnwind { .. } + | TerminatorKind::InlineAsm { .. } => { + Some(terminator.source_info.span) + } + } +} + +/// Returns an extrapolated span (pre-expansion[^1]) corresponding to a range +/// within the function's body source. This span is guaranteed to be contained +/// within, or equal to, the `body_span`. If the extrapolated span is not +/// contained within the `body_span`, the `body_span` is returned. +/// +/// [^1]Expansions result from Rust syntax including macros, syntactic sugar, +/// etc.). +#[inline] +pub(super) fn function_source_span(span: Span, body_span: Span) -> Span { + let original_span = original_sp(span, body_span).with_ctxt(body_span.ctxt()); + if body_span.contains(original_span) { original_span } else { body_span } +} diff --git a/compiler/rustc_mir_transform/src/coverage/test_macros/Cargo.toml b/compiler/rustc_mir_transform/src/coverage/test_macros/Cargo.toml new file mode 100644 index 000000000..f5e8b6565 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/test_macros/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "coverage_test_macros" +version = "0.0.0" +edition = "2021" + +[lib] +proc-macro = true +doctest = false diff --git a/compiler/rustc_mir_transform/src/coverage/test_macros/src/lib.rs b/compiler/rustc_mir_transform/src/coverage/test_macros/src/lib.rs new file mode 100644 index 000000000..3d6095d27 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/test_macros/src/lib.rs @@ -0,0 +1,6 @@ +use proc_macro::TokenStream; + +#[proc_macro] +pub fn let_bcb(item: TokenStream) -> TokenStream { + format!("let bcb{} = graph::BasicCoverageBlock::from_usize({});", item, item).parse().unwrap() +} diff --git a/compiler/rustc_mir_transform/src/coverage/tests.rs b/compiler/rustc_mir_transform/src/coverage/tests.rs new file mode 100644 index 000000000..6380f0352 --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/tests.rs @@ -0,0 +1,710 @@ +//! This crate hosts a selection of "unit tests" for components of the `InstrumentCoverage` MIR +//! pass. +//! +//! ```shell +//! ./x.py test --keep-stage 1 compiler/rustc_mir --test-args '--show-output coverage' +//! ``` +//! +//! The tests construct a few "mock" objects, as needed, to support the `InstrumentCoverage` +//! functions and algorithms. Mocked objects include instances of `mir::Body`; including +//! `Terminator`s of various `kind`s, and `Span` objects. Some functions used by or used on +//! real, runtime versions of these mocked-up objects have constraints (such as cross-thread +//! limitations) and deep dependencies on other elements of the full Rust compiler (which is +//! *not* constructed or mocked for these tests). +//! +//! Of particular note, attempting to simply print elements of the `mir::Body` with default +//! `Debug` formatting can fail because some `Debug` format implementations require the +//! `TyCtxt`, obtained via a static global variable that is *not* set for these tests. +//! Initializing the global type context is prohibitively complex for the scope and scale of these +//! tests (essentially requiring initializing the entire compiler). +//! +//! Also note, some basic features of `Span` also rely on the `Span`s own "session globals", which +//! are unrelated to the `TyCtxt` global. Without initializing the `Span` session globals, some +//! basic, coverage-specific features would be impossible to test, but thankfully initializing these +//! globals is comparatively simpler. The easiest way is to wrap the test in a closure argument +//! to: `rustc_span::create_default_session_globals_then(|| { test_here(); })`. + +use super::counters; +use super::debug; +use super::graph; +use super::spans; + +use coverage_test_macros::let_bcb; + +use itertools::Itertools; +use rustc_data_structures::graph::WithNumNodes; +use rustc_data_structures::graph::WithSuccessors; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::mir::coverage::CoverageKind; +use rustc_middle::mir::*; +use rustc_middle::ty::{self, Ty, TyCtxt}; +use rustc_span::{self, BytePos, Pos, Span, DUMMY_SP}; + +// All `TEMP_BLOCK` targets should be replaced before calling `to_body() -> mir::Body`. +const TEMP_BLOCK: BasicBlock = BasicBlock::MAX; + +struct MockBlocks<'tcx> { + blocks: IndexVec<BasicBlock, BasicBlockData<'tcx>>, + dummy_place: Place<'tcx>, + next_local: usize, + bool_ty: Ty<'tcx>, +} + +impl<'tcx> MockBlocks<'tcx> { + fn new() -> Self { + Self { + blocks: IndexVec::new(), + dummy_place: Place { local: RETURN_PLACE, projection: ty::List::empty() }, + next_local: 0, + bool_ty: TyCtxt::BOOL_TY_FOR_UNIT_TESTING, + } + } + + fn new_temp(&mut self) -> Local { + let index = self.next_local; + self.next_local += 1; + Local::new(index) + } + + fn push(&mut self, kind: TerminatorKind<'tcx>) -> BasicBlock { + let next_lo = if let Some(last) = self.blocks.last() { + self.blocks[last].terminator().source_info.span.hi() + } else { + BytePos(1) + }; + let next_hi = next_lo + BytePos(1); + self.blocks.push(BasicBlockData { + statements: vec![], + terminator: Some(Terminator { + source_info: SourceInfo::outermost(Span::with_root_ctxt(next_lo, next_hi)), + kind, + }), + is_cleanup: false, + }) + } + + fn link(&mut self, from_block: BasicBlock, to_block: BasicBlock) { + match self.blocks[from_block].terminator_mut().kind { + TerminatorKind::Assert { ref mut target, .. } + | TerminatorKind::Call { target: Some(ref mut target), .. } + | TerminatorKind::Drop { ref mut target, .. } + | TerminatorKind::DropAndReplace { ref mut target, .. } + | TerminatorKind::FalseEdge { real_target: ref mut target, .. } + | TerminatorKind::FalseUnwind { real_target: ref mut target, .. } + | TerminatorKind::Goto { ref mut target } + | TerminatorKind::InlineAsm { destination: Some(ref mut target), .. } + | TerminatorKind::Yield { resume: ref mut target, .. } => *target = to_block, + ref invalid => bug!("Invalid from_block: {:?}", invalid), + } + } + + fn add_block_from( + &mut self, + some_from_block: Option<BasicBlock>, + to_kind: TerminatorKind<'tcx>, + ) -> BasicBlock { + let new_block = self.push(to_kind); + if let Some(from_block) = some_from_block { + self.link(from_block, new_block); + } + new_block + } + + fn set_branch(&mut self, switchint: BasicBlock, branch_index: usize, to_block: BasicBlock) { + match self.blocks[switchint].terminator_mut().kind { + TerminatorKind::SwitchInt { ref mut targets, .. } => { + let mut branches = targets.iter().collect::<Vec<_>>(); + let otherwise = if branch_index == branches.len() { + to_block + } else { + let old_otherwise = targets.otherwise(); + if branch_index > branches.len() { + branches.push((branches.len() as u128, old_otherwise)); + while branches.len() < branch_index { + branches.push((branches.len() as u128, TEMP_BLOCK)); + } + to_block + } else { + branches[branch_index] = (branch_index as u128, to_block); + old_otherwise + } + }; + *targets = SwitchTargets::new(branches.into_iter(), otherwise); + } + ref invalid => bug!("Invalid BasicBlock kind or no to_block: {:?}", invalid), + } + } + + fn call(&mut self, some_from_block: Option<BasicBlock>) -> BasicBlock { + self.add_block_from( + some_from_block, + TerminatorKind::Call { + func: Operand::Copy(self.dummy_place.clone()), + args: vec![], + destination: self.dummy_place.clone(), + target: Some(TEMP_BLOCK), + cleanup: None, + from_hir_call: false, + fn_span: DUMMY_SP, + }, + ) + } + + fn goto(&mut self, some_from_block: Option<BasicBlock>) -> BasicBlock { + self.add_block_from(some_from_block, TerminatorKind::Goto { target: TEMP_BLOCK }) + } + + fn switchint(&mut self, some_from_block: Option<BasicBlock>) -> BasicBlock { + let switchint_kind = TerminatorKind::SwitchInt { + discr: Operand::Move(Place::from(self.new_temp())), + switch_ty: self.bool_ty, // just a dummy value + targets: SwitchTargets::static_if(0, TEMP_BLOCK, TEMP_BLOCK), + }; + self.add_block_from(some_from_block, switchint_kind) + } + + fn return_(&mut self, some_from_block: Option<BasicBlock>) -> BasicBlock { + self.add_block_from(some_from_block, TerminatorKind::Return) + } + + fn to_body(self) -> Body<'tcx> { + Body::new_cfg_only(self.blocks) + } +} + +fn debug_basic_blocks<'tcx>(mir_body: &Body<'tcx>) -> String { + format!( + "{:?}", + mir_body + .basic_blocks() + .iter_enumerated() + .map(|(bb, data)| { + let term = &data.terminator(); + let kind = &term.kind; + let span = term.source_info.span; + let sp = format!("(span:{},{})", span.lo().to_u32(), span.hi().to_u32()); + match kind { + TerminatorKind::Assert { target, .. } + | TerminatorKind::Call { target: Some(target), .. } + | TerminatorKind::Drop { target, .. } + | TerminatorKind::DropAndReplace { target, .. } + | TerminatorKind::FalseEdge { real_target: target, .. } + | TerminatorKind::FalseUnwind { real_target: target, .. } + | TerminatorKind::Goto { target } + | TerminatorKind::InlineAsm { destination: Some(target), .. } + | TerminatorKind::Yield { resume: target, .. } => { + format!("{}{:?}:{} -> {:?}", sp, bb, debug::term_type(kind), target) + } + TerminatorKind::SwitchInt { targets, .. } => { + format!("{}{:?}:{} -> {:?}", sp, bb, debug::term_type(kind), targets) + } + _ => format!("{}{:?}:{}", sp, bb, debug::term_type(kind)), + } + }) + .collect::<Vec<_>>() + ) +} + +static PRINT_GRAPHS: bool = false; + +fn print_mir_graphviz(name: &str, mir_body: &Body<'_>) { + if PRINT_GRAPHS { + println!( + "digraph {} {{\n{}\n}}", + name, + mir_body + .basic_blocks() + .iter_enumerated() + .map(|(bb, data)| { + format!( + " {:?} [label=\"{:?}: {}\"];\n{}", + bb, + bb, + debug::term_type(&data.terminator().kind), + mir_body + .basic_blocks + .successors(bb) + .map(|successor| { format!(" {:?} -> {:?};", bb, successor) }) + .join("\n") + ) + }) + .join("\n") + ); + } +} + +fn print_coverage_graphviz( + name: &str, + mir_body: &Body<'_>, + basic_coverage_blocks: &graph::CoverageGraph, +) { + if PRINT_GRAPHS { + println!( + "digraph {} {{\n{}\n}}", + name, + basic_coverage_blocks + .iter_enumerated() + .map(|(bcb, bcb_data)| { + format!( + " {:?} [label=\"{:?}: {}\"];\n{}", + bcb, + bcb, + debug::term_type(&bcb_data.terminator(mir_body).kind), + basic_coverage_blocks + .successors(bcb) + .map(|successor| { format!(" {:?} -> {:?};", bcb, successor) }) + .join("\n") + ) + }) + .join("\n") + ); + } +} + +/// Create a mock `Body` with a simple flow. +fn goto_switchint<'a>() -> Body<'a> { + let mut blocks = MockBlocks::new(); + let start = blocks.call(None); + let goto = blocks.goto(Some(start)); + let switchint = blocks.switchint(Some(goto)); + let then_call = blocks.call(None); + let else_call = blocks.call(None); + blocks.set_branch(switchint, 0, then_call); + blocks.set_branch(switchint, 1, else_call); + blocks.return_(Some(then_call)); + blocks.return_(Some(else_call)); + + let mir_body = blocks.to_body(); + print_mir_graphviz("mir_goto_switchint", &mir_body); + /* Graphviz character plots created using: `graph-easy --as=boxart`: + ┌────────────────┐ + │ bb0: Call │ + └────────────────┘ + │ + │ + ▼ + ┌────────────────┐ + │ bb1: Goto │ + └────────────────┘ + │ + │ + ▼ + ┌─────────────┐ ┌────────────────┐ + │ bb4: Call │ ◀── │ bb2: SwitchInt │ + └─────────────┘ └────────────────┘ + │ │ + │ │ + ▼ ▼ + ┌─────────────┐ ┌────────────────┐ + │ bb6: Return │ │ bb3: Call │ + └─────────────┘ └────────────────┘ + │ + │ + ▼ + ┌────────────────┐ + │ bb5: Return │ + └────────────────┘ + */ + mir_body +} + +macro_rules! assert_successors { + ($basic_coverage_blocks:ident, $i:ident, [$($successor:ident),*]) => { + let mut successors = $basic_coverage_blocks.successors[$i].clone(); + successors.sort_unstable(); + assert_eq!(successors, vec![$($successor),*]); + } +} + +#[test] +fn test_covgraph_goto_switchint() { + let mir_body = goto_switchint(); + if false { + eprintln!("basic_blocks = {}", debug_basic_blocks(&mir_body)); + } + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + print_coverage_graphviz("covgraph_goto_switchint ", &mir_body, &basic_coverage_blocks); + /* + ┌──────────────┐ ┌─────────────────┐ + │ bcb2: Return │ ◀── │ bcb0: SwitchInt │ + └──────────────┘ └─────────────────┘ + │ + │ + ▼ + ┌─────────────────┐ + │ bcb1: Return │ + └─────────────────┘ + */ + assert_eq!( + basic_coverage_blocks.num_nodes(), + 3, + "basic_coverage_blocks: {:?}", + basic_coverage_blocks.iter_enumerated().collect::<Vec<_>>() + ); + + let_bcb!(0); + let_bcb!(1); + let_bcb!(2); + + assert_successors!(basic_coverage_blocks, bcb0, [bcb1, bcb2]); + assert_successors!(basic_coverage_blocks, bcb1, []); + assert_successors!(basic_coverage_blocks, bcb2, []); +} + +/// Create a mock `Body` with a loop. +fn switchint_then_loop_else_return<'a>() -> Body<'a> { + let mut blocks = MockBlocks::new(); + let start = blocks.call(None); + let switchint = blocks.switchint(Some(start)); + let then_call = blocks.call(None); + blocks.set_branch(switchint, 0, then_call); + let backedge_goto = blocks.goto(Some(then_call)); + blocks.link(backedge_goto, switchint); + let else_return = blocks.return_(None); + blocks.set_branch(switchint, 1, else_return); + + let mir_body = blocks.to_body(); + print_mir_graphviz("mir_switchint_then_loop_else_return", &mir_body); + /* + ┌────────────────┐ + │ bb0: Call │ + └────────────────┘ + │ + │ + ▼ + ┌─────────────┐ ┌────────────────┐ + │ bb4: Return │ ◀── │ bb1: SwitchInt │ ◀┐ + └─────────────┘ └────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌────────────────┐ │ + │ bb2: Call │ │ + └────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌────────────────┐ │ + │ bb3: Goto │ ─┘ + └────────────────┘ + */ + mir_body +} + +#[test] +fn test_covgraph_switchint_then_loop_else_return() { + let mir_body = switchint_then_loop_else_return(); + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + print_coverage_graphviz( + "covgraph_switchint_then_loop_else_return", + &mir_body, + &basic_coverage_blocks, + ); + /* + ┌─────────────────┐ + │ bcb0: Call │ + └─────────────────┘ + │ + │ + ▼ + ┌────────────┐ ┌─────────────────┐ + │ bcb3: Goto │ ◀── │ bcb1: SwitchInt │ ◀┐ + └────────────┘ └─────────────────┘ │ + │ │ │ + │ │ │ + │ ▼ │ + │ ┌─────────────────┐ │ + │ │ bcb2: Return │ │ + │ └─────────────────┘ │ + │ │ + └─────────────────────────────────────┘ + */ + assert_eq!( + basic_coverage_blocks.num_nodes(), + 4, + "basic_coverage_blocks: {:?}", + basic_coverage_blocks.iter_enumerated().collect::<Vec<_>>() + ); + + let_bcb!(0); + let_bcb!(1); + let_bcb!(2); + let_bcb!(3); + + assert_successors!(basic_coverage_blocks, bcb0, [bcb1]); + assert_successors!(basic_coverage_blocks, bcb1, [bcb2, bcb3]); + assert_successors!(basic_coverage_blocks, bcb2, []); + assert_successors!(basic_coverage_blocks, bcb3, [bcb1]); +} + +/// Create a mock `Body` with nested loops. +fn switchint_loop_then_inner_loop_else_break<'a>() -> Body<'a> { + let mut blocks = MockBlocks::new(); + let start = blocks.call(None); + let switchint = blocks.switchint(Some(start)); + let then_call = blocks.call(None); + blocks.set_branch(switchint, 0, then_call); + let else_return = blocks.return_(None); + blocks.set_branch(switchint, 1, else_return); + + let inner_start = blocks.call(Some(then_call)); + let inner_switchint = blocks.switchint(Some(inner_start)); + let inner_then_call = blocks.call(None); + blocks.set_branch(inner_switchint, 0, inner_then_call); + let inner_backedge_goto = blocks.goto(Some(inner_then_call)); + blocks.link(inner_backedge_goto, inner_switchint); + let inner_else_break_goto = blocks.goto(None); + blocks.set_branch(inner_switchint, 1, inner_else_break_goto); + + let backedge_goto = blocks.goto(Some(inner_else_break_goto)); + blocks.link(backedge_goto, switchint); + + let mir_body = blocks.to_body(); + print_mir_graphviz("mir_switchint_loop_then_inner_loop_else_break", &mir_body); + /* + ┌────────────────┐ + │ bb0: Call │ + └────────────────┘ + │ + │ + ▼ + ┌─────────────┐ ┌────────────────┐ + │ bb3: Return │ ◀── │ bb1: SwitchInt │ ◀─────┐ + └─────────────┘ └────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌────────────────┐ │ + │ bb2: Call │ │ + └────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌────────────────┐ │ + │ bb4: Call │ │ + └────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌─────────────┐ ┌────────────────┐ │ + │ bb8: Goto │ ◀── │ bb5: SwitchInt │ ◀┐ │ + └─────────────┘ └────────────────┘ │ │ + │ │ │ │ + │ │ │ │ + ▼ ▼ │ │ + ┌─────────────┐ ┌────────────────┐ │ │ + │ bb9: Goto │ ─┐ │ bb6: Call │ │ │ + └─────────────┘ │ └────────────────┘ │ │ + │ │ │ │ + │ │ │ │ + │ ▼ │ │ + │ ┌────────────────┐ │ │ + │ │ bb7: Goto │ ─┘ │ + │ └────────────────┘ │ + │ │ + └───────────────────────────┘ + */ + mir_body +} + +#[test] +fn test_covgraph_switchint_loop_then_inner_loop_else_break() { + let mir_body = switchint_loop_then_inner_loop_else_break(); + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + print_coverage_graphviz( + "covgraph_switchint_loop_then_inner_loop_else_break", + &mir_body, + &basic_coverage_blocks, + ); + /* + ┌─────────────────┐ + │ bcb0: Call │ + └─────────────────┘ + │ + │ + ▼ + ┌──────────────┐ ┌─────────────────┐ + │ bcb2: Return │ ◀── │ bcb1: SwitchInt │ ◀┐ + └──────────────┘ └─────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌─────────────────┐ │ + │ bcb3: Call │ │ + └─────────────────┘ │ + │ │ + │ │ + ▼ │ + ┌──────────────┐ ┌─────────────────┐ │ + │ bcb6: Goto │ ◀── │ bcb4: SwitchInt │ ◀┼────┐ + └──────────────┘ └─────────────────┘ │ │ + │ │ │ │ + │ │ │ │ + │ ▼ │ │ + │ ┌─────────────────┐ │ │ + │ │ bcb5: Goto │ ─┘ │ + │ └─────────────────┘ │ + │ │ + └────────────────────────────────────────────┘ + */ + assert_eq!( + basic_coverage_blocks.num_nodes(), + 7, + "basic_coverage_blocks: {:?}", + basic_coverage_blocks.iter_enumerated().collect::<Vec<_>>() + ); + + let_bcb!(0); + let_bcb!(1); + let_bcb!(2); + let_bcb!(3); + let_bcb!(4); + let_bcb!(5); + let_bcb!(6); + + assert_successors!(basic_coverage_blocks, bcb0, [bcb1]); + assert_successors!(basic_coverage_blocks, bcb1, [bcb2, bcb3]); + assert_successors!(basic_coverage_blocks, bcb2, []); + assert_successors!(basic_coverage_blocks, bcb3, [bcb4]); + assert_successors!(basic_coverage_blocks, bcb4, [bcb5, bcb6]); + assert_successors!(basic_coverage_blocks, bcb5, [bcb1]); + assert_successors!(basic_coverage_blocks, bcb6, [bcb4]); +} + +#[test] +fn test_find_loop_backedges_none() { + let mir_body = goto_switchint(); + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + if false { + eprintln!( + "basic_coverage_blocks = {:?}", + basic_coverage_blocks.iter_enumerated().collect::<Vec<_>>() + ); + eprintln!("successors = {:?}", basic_coverage_blocks.successors); + } + let backedges = graph::find_loop_backedges(&basic_coverage_blocks); + assert_eq!( + backedges.iter_enumerated().map(|(_bcb, backedges)| backedges.len()).sum::<usize>(), + 0, + "backedges: {:?}", + backedges + ); +} + +#[test] +fn test_find_loop_backedges_one() { + let mir_body = switchint_then_loop_else_return(); + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + let backedges = graph::find_loop_backedges(&basic_coverage_blocks); + assert_eq!( + backedges.iter_enumerated().map(|(_bcb, backedges)| backedges.len()).sum::<usize>(), + 1, + "backedges: {:?}", + backedges + ); + + let_bcb!(1); + let_bcb!(3); + + assert_eq!(backedges[bcb1], vec![bcb3]); +} + +#[test] +fn test_find_loop_backedges_two() { + let mir_body = switchint_loop_then_inner_loop_else_break(); + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + let backedges = graph::find_loop_backedges(&basic_coverage_blocks); + assert_eq!( + backedges.iter_enumerated().map(|(_bcb, backedges)| backedges.len()).sum::<usize>(), + 2, + "backedges: {:?}", + backedges + ); + + let_bcb!(1); + let_bcb!(4); + let_bcb!(5); + let_bcb!(6); + + assert_eq!(backedges[bcb1], vec![bcb5]); + assert_eq!(backedges[bcb4], vec![bcb6]); +} + +#[test] +fn test_traverse_coverage_with_loops() { + let mir_body = switchint_loop_then_inner_loop_else_break(); + let basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + let mut traversed_in_order = Vec::new(); + let mut traversal = graph::TraverseCoverageGraphWithLoops::new(&basic_coverage_blocks); + while let Some(bcb) = traversal.next(&basic_coverage_blocks) { + traversed_in_order.push(bcb); + } + + let_bcb!(6); + + // bcb0 is visited first. Then bcb1 starts the first loop, and all remaining nodes, *except* + // bcb6 are inside the first loop. + assert_eq!( + *traversed_in_order.last().expect("should have elements"), + bcb6, + "bcb6 should not be visited until all nodes inside the first loop have been visited" + ); +} + +fn synthesize_body_span_from_terminators(mir_body: &Body<'_>) -> Span { + let mut some_span: Option<Span> = None; + for (_, data) in mir_body.basic_blocks().iter_enumerated() { + let term_span = data.terminator().source_info.span; + if let Some(span) = some_span.as_mut() { + *span = span.to(term_span); + } else { + some_span = Some(term_span) + } + } + some_span.expect("body must have at least one BasicBlock") +} + +#[test] +fn test_make_bcb_counters() { + rustc_span::create_default_session_globals_then(|| { + let mir_body = goto_switchint(); + let body_span = synthesize_body_span_from_terminators(&mir_body); + let mut basic_coverage_blocks = graph::CoverageGraph::from_mir(&mir_body); + let mut coverage_spans = Vec::new(); + for (bcb, data) in basic_coverage_blocks.iter_enumerated() { + if let Some(span) = spans::filtered_terminator_span(data.terminator(&mir_body)) { + coverage_spans.push(spans::CoverageSpan::for_terminator( + spans::function_source_span(span, body_span), + span, + bcb, + data.last_bb(), + )); + } + } + let mut coverage_counters = counters::CoverageCounters::new(0); + let intermediate_expressions = coverage_counters + .make_bcb_counters(&mut basic_coverage_blocks, &coverage_spans) + .expect("should be Ok"); + assert_eq!(intermediate_expressions.len(), 0); + + let_bcb!(1); + assert_eq!( + 1, // coincidentally, bcb1 has a `Counter` with id = 1 + match basic_coverage_blocks[bcb1].counter().expect("should have a counter") { + CoverageKind::Counter { id, .. } => id, + _ => panic!("expected a Counter"), + } + .as_u32() + ); + + let_bcb!(2); + assert_eq!( + 2, // coincidentally, bcb2 has a `Counter` with id = 2 + match basic_coverage_blocks[bcb2].counter().expect("should have a counter") { + CoverageKind::Counter { id, .. } => id, + _ => panic!("expected a Counter"), + } + .as_u32() + ); + }); +} |