From ef24de24a82fe681581cc130f342363c47c0969a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 7 Jun 2024 07:48:48 +0200 Subject: Merging upstream version 1.75.0+dfsg1. Signed-off-by: Daniel Baumann --- .../rustc_codegen_llvm/src/coverageinfo/ffi.rs | 23 +- .../src/coverageinfo/map_data.rs | 385 ++++++++++----------- .../rustc_codegen_llvm/src/coverageinfo/mapgen.rs | 287 ++++++++++----- .../rustc_codegen_llvm/src/coverageinfo/mod.rs | 195 +++-------- 4 files changed, 445 insertions(+), 445 deletions(-) (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo') diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs index 763186a58..7ad2d03a5 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs @@ -1,4 +1,4 @@ -use rustc_middle::mir::coverage::{CounterId, ExpressionId, Operand}; +use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId}; /// Must match the layout of `LLVMRustCounterKind`. #[derive(Copy, Clone, Debug)] @@ -43,11 +43,11 @@ impl Counter { Self { kind: CounterKind::Expression, id: expression_id.as_u32() } } - pub(crate) fn from_operand(operand: Operand) -> Self { - match operand { - Operand::Zero => Self::ZERO, - Operand::Counter(id) => Self::counter_value_reference(id), - Operand::Expression(id) => Self::expression(id), + pub(crate) fn from_term(term: CovTerm) -> Self { + match term { + CovTerm::Zero => Self::ZERO, + CovTerm::Counter(id) => Self::counter_value_reference(id), + CovTerm::Expression(id) => Self::expression(id), } } } @@ -73,17 +73,6 @@ pub struct CounterExpression { pub rhs: Counter, } -impl CounterExpression { - /// The dummy expression `(0 - 0)` has a representation of all zeroes, - /// making it marginally more efficient to initialize than `(0 + 0)`. - pub(crate) const DUMMY: Self = - Self { lhs: Counter::ZERO, kind: ExprKind::Subtract, rhs: Counter::ZERO }; - - pub fn new(lhs: Counter, kind: ExprKind, rhs: Counter) -> Self { - Self { kind, lhs, rhs } - } -} - /// Corresponds to enum `llvm::coverage::CounterMappingRegion::RegionKind`. /// /// Must match the layout of `LLVMRustCounterMappingRegionKind`. diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs index e83110dca..cd67fafb8 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -1,271 +1,270 @@ use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind}; +use rustc_data_structures::captures::Captures; use rustc_data_structures::fx::FxIndexSet; -use rustc_index::IndexVec; -use rustc_middle::mir::coverage::{CodeRegion, CounterId, ExpressionId, Op, Operand}; +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::coverage::{ + CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op, +}; use rustc_middle::ty::Instance; -use rustc_middle::ty::TyCtxt; - -#[derive(Clone, Debug, PartialEq)] -pub struct Expression { - lhs: Operand, - op: Op, - rhs: Operand, - region: Option, -} +use rustc_span::Symbol; -/// Collects all of the coverage regions associated with (a) injected counters, (b) counter -/// expressions (additions or subtraction), and (c) unreachable regions (always counted as zero), -/// for a given Function. This struct also stores the `function_source_hash`, -/// computed during instrumentation, and forwarded with counters. -/// -/// Note, it may be important to understand LLVM's definitions of `unreachable` regions versus "gap -/// regions" (or "gap areas"). A gap region is a code region within a counted region (either counter -/// or expression), but the line or lines in the gap region are not executable (such as lines with -/// only whitespace or comments). According to LLVM Code Coverage Mapping documentation, "A count -/// for a gap area is only used as the line execution count if there are no other regions on a -/// line." +/// Holds all of the coverage mapping data associated with a function instance, +/// collected during traversal of `Coverage` statements in the function's MIR. #[derive(Debug)] -pub struct FunctionCoverage<'tcx> { - instance: Instance<'tcx>, - source_hash: u64, +pub struct FunctionCoverageCollector<'tcx> { + /// Coverage info that was attached to this function by the instrumentor. + function_coverage_info: &'tcx FunctionCoverageInfo, is_used: bool, - counters: IndexVec>, - expressions: IndexVec>, - unreachable_regions: Vec, + + /// Tracks which counters have been seen, so that we can identify mappings + /// to counters that were optimized out, and set them to zero. + counters_seen: BitSet, + /// Contains all expression IDs that have been seen in an `ExpressionUsed` + /// coverage statement, plus all expression IDs that aren't directly used + /// by any mappings (and therefore do not have expression-used statements). + /// After MIR traversal is finished, we can conclude that any IDs missing + /// from this set must have had their statements deleted by MIR opts. + expressions_seen: BitSet, } -impl<'tcx> FunctionCoverage<'tcx> { +impl<'tcx> FunctionCoverageCollector<'tcx> { /// Creates a new set of coverage data for a used (called) function. - pub fn new(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Self { - Self::create(tcx, instance, true) + pub fn new( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + ) -> Self { + Self::create(instance, function_coverage_info, true) } /// Creates a new set of coverage data for an unused (never called) function. - pub fn unused(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Self { - Self::create(tcx, instance, false) + pub fn unused( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + ) -> Self { + Self::create(instance, function_coverage_info, false) } - fn create(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>, is_used: bool) -> Self { - let coverageinfo = tcx.coverageinfo(instance.def); + fn create( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + ) -> Self { + let num_counters = function_coverage_info.num_counters; + let num_expressions = function_coverage_info.expressions.len(); debug!( - "FunctionCoverage::create(instance={:?}) has coverageinfo={:?}. is_used={}", - instance, coverageinfo, is_used + "FunctionCoverage::create(instance={instance:?}) has \ + num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}" ); - Self { - instance, - source_hash: 0, // will be set with the first `add_counter()` - is_used, - counters: IndexVec::from_elem_n(None, coverageinfo.num_counters as usize), - expressions: IndexVec::from_elem_n(None, coverageinfo.num_expressions as usize), - unreachable_regions: Vec::new(), - } - } - - /// Returns true for a used (called) function, and false for an unused function. - pub fn is_used(&self) -> bool { - self.is_used - } - /// Sets the function source hash value. If called multiple times for the same function, all - /// calls should have the same hash value. - pub fn set_function_source_hash(&mut self, source_hash: u64) { - if self.source_hash == 0 { - self.source_hash = source_hash; - } else { - debug_assert_eq!(source_hash, self.source_hash); + // Create a filled set of expression IDs, so that expressions not + // directly used by mappings will be treated as "seen". + // (If they end up being unused, LLVM will delete them for us.) + let mut expressions_seen = BitSet::new_filled(num_expressions); + // For each expression ID that is directly used by one or more mappings, + // mark it as not-yet-seen. This indicates that we expect to see a + // corresponding `ExpressionUsed` statement during MIR traversal. + for Mapping { term, .. } in &function_coverage_info.mappings { + if let &CovTerm::Expression(id) = term { + expressions_seen.remove(id); + } } - } - /// Adds a code region to be counted by an injected counter intrinsic. - pub fn add_counter(&mut self, id: CounterId, region: CodeRegion) { - if let Some(previous_region) = self.counters[id].replace(region.clone()) { - assert_eq!(previous_region, region, "add_counter: code region for id changed"); + Self { + function_coverage_info, + is_used, + counters_seen: BitSet::new_empty(num_counters), + expressions_seen, } } - /// Both counters and "counter expressions" (or simply, "expressions") can be operands in other - /// expressions. These are tracked as separate variants of `Operand`, so there is no ambiguity - /// between operands that are counter IDs and operands that are expression IDs. - pub fn add_counter_expression( - &mut self, - expression_id: ExpressionId, - lhs: Operand, - op: Op, - rhs: Operand, - region: Option, - ) { - debug!( - "add_counter_expression({:?}, lhs={:?}, op={:?}, rhs={:?} at {:?}", - expression_id, lhs, op, rhs, region - ); - debug_assert!( - expression_id.as_usize() < self.expressions.len(), - "expression_id {} is out of range for expressions.len() = {} - for {:?}", - expression_id.as_usize(), - self.expressions.len(), - self, - ); - if let Some(previous_expression) = self.expressions[expression_id].replace(Expression { - lhs, - op, - rhs, - region: region.clone(), - }) { - assert_eq!( - previous_expression, - Expression { lhs, op, rhs, region }, - "add_counter_expression: expression for id changed" - ); - } + /// Marks a counter ID as having been seen in a counter-increment statement. + #[instrument(level = "debug", skip(self))] + pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) { + self.counters_seen.insert(id); } - /// Add a region that will be marked as "unreachable", with a constant "zero counter". - pub fn add_unreachable_region(&mut self, region: CodeRegion) { - self.unreachable_regions.push(region) + /// Marks an expression ID as having been seen in an expression-used statement. + #[instrument(level = "debug", skip(self))] + pub(crate) fn mark_expression_id_seen(&mut self, id: ExpressionId) { + self.expressions_seen.insert(id); } - /// Perform some simplifications to make the final coverage mappings - /// slightly smaller. + /// Identify expressions that will always have a value of zero, and note + /// their IDs in [`ZeroExpressions`]. Mappings that refer to a zero expression + /// can instead become mappings to a constant zero value. /// /// This method mainly exists to preserve the simplifications that were /// already being performed by the Rust-side expression renumbering, so that /// the resulting coverage mappings don't get worse. - pub(crate) fn simplify_expressions(&mut self) { + fn identify_zero_expressions(&self) -> ZeroExpressions { // The set of expressions that either were optimized out entirely, or // have zero as both of their operands, and will therefore always have // a value of zero. Other expressions that refer to these as operands - // can have those operands replaced with `Operand::Zero`. - let mut zero_expressions = FxIndexSet::default(); + // can have those operands replaced with `CovTerm::Zero`. + let mut zero_expressions = ZeroExpressions::default(); - // For each expression, perform simplifications based on lower-numbered - // expressions, and then update the set of always-zero expressions if - // necessary. + // Simplify a copy of each expression based on lower-numbered expressions, + // and then update the set of always-zero expressions if necessary. // (By construction, expressions can only refer to other expressions - // that have lower IDs, so one simplification pass is sufficient.) - for (id, maybe_expression) in self.expressions.iter_enumerated_mut() { - let Some(expression) = maybe_expression else { - // If an expression is missing, it must have been optimized away, + // that have lower IDs, so one pass is sufficient.) + for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() { + if !self.expressions_seen.contains(id) { + // If an expression was not seen, it must have been optimized away, // so any operand that refers to it can be replaced with zero. zero_expressions.insert(id); continue; + } + + // We don't need to simplify the actual expression data in the + // expressions list; we can just simplify a temporary copy and then + // use that to update the set of always-zero expressions. + let Expression { mut lhs, op, mut rhs } = *expression; + + // If an expression has an operand that is also an expression, the + // operand's ID must be strictly lower. This is what lets us find + // all zero expressions in one pass. + let assert_operand_expression_is_lower = |operand_id: ExpressionId| { + assert!( + operand_id < id, + "Operand {operand_id:?} should be less than {id:?} in {expression:?}", + ) }; - // If an operand refers to an expression that is always zero, then - // that operand can be replaced with `Operand::Zero`. - let maybe_set_operand_to_zero = |operand: &mut Operand| match &*operand { - Operand::Expression(id) if zero_expressions.contains(id) => { - *operand = Operand::Zero; + // If an operand refers to a counter or expression that is always + // zero, then that operand can be replaced with `CovTerm::Zero`. + let maybe_set_operand_to_zero = |operand: &mut CovTerm| { + if let CovTerm::Expression(id) = *operand { + assert_operand_expression_is_lower(id); + } + + if is_zero_term(&self.counters_seen, &zero_expressions, *operand) { + *operand = CovTerm::Zero; } - _ => (), }; - maybe_set_operand_to_zero(&mut expression.lhs); - maybe_set_operand_to_zero(&mut expression.rhs); + maybe_set_operand_to_zero(&mut lhs); + maybe_set_operand_to_zero(&mut rhs); // Coverage counter values cannot be negative, so if an expression // involves subtraction from zero, assume that its RHS must also be zero. // (Do this after simplifications that could set the LHS to zero.) - if let Expression { lhs: Operand::Zero, op: Op::Subtract, .. } = expression { - expression.rhs = Operand::Zero; + if lhs == CovTerm::Zero && op == Op::Subtract { + rhs = CovTerm::Zero; } // After the above simplifications, if both operands are zero, then // we know that this expression is always zero too. - if let Expression { lhs: Operand::Zero, rhs: Operand::Zero, .. } = expression { + if lhs == CovTerm::Zero && rhs == CovTerm::Zero { zero_expressions.insert(id); } } + + zero_expressions } - /// Return the source hash, generated from the HIR node structure, and used to indicate whether - /// or not the source code structure changed between different compilations. - pub fn source_hash(&self) -> u64 { - self.source_hash + pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> { + let zero_expressions = self.identify_zero_expressions(); + let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self; + + FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions } } +} - /// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their - /// associated `Regions` (from which the LLVM-specific `CoverageMapGenerator` will create - /// `CounterMappingRegion`s. - pub fn get_expressions_and_counter_regions( - &self, - ) -> (Vec, impl Iterator) { - assert!( - self.source_hash != 0 || !self.is_used, - "No counters provided the source_hash for used function: {:?}", - self.instance - ); +pub(crate) struct FunctionCoverage<'tcx> { + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, - let counter_expressions = self.counter_expressions(); - // Expression IDs are indices into `self.expressions`, and on the LLVM - // side they will be treated as indices into `counter_expressions`, so - // the two vectors should correspond 1:1. - assert_eq!(self.expressions.len(), counter_expressions.len()); + counters_seen: BitSet, + zero_expressions: ZeroExpressions, +} - let counter_regions = self.counter_regions(); - let expression_regions = self.expression_regions(); - let unreachable_regions = self.unreachable_regions(); +impl<'tcx> FunctionCoverage<'tcx> { + /// Returns true for a used (called) function, and false for an unused function. + pub(crate) fn is_used(&self) -> bool { + self.is_used + } - let counter_regions = - counter_regions.chain(expression_regions.into_iter().chain(unreachable_regions)); - (counter_expressions, counter_regions) + /// Return the source hash, generated from the HIR node structure, and used to indicate whether + /// or not the source code structure changed between different compilations. + pub fn source_hash(&self) -> u64 { + if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } } - fn counter_regions(&self) -> impl Iterator { - self.counters.iter_enumerated().filter_map(|(index, entry)| { - // Option::map() will return None to filter out missing counters. This may happen - // if, for example, a MIR-instrumented counter is removed during an optimization. - entry.as_ref().map(|region| (Counter::counter_value_reference(index), region)) - }) + /// Returns an iterator over all filenames used by this function's mappings. + pub(crate) fn all_file_names(&self) -> impl Iterator + Captures<'_> { + self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name) } /// Convert this function's coverage expression data into a form that can be /// passed through FFI to LLVM. - fn counter_expressions(&self) -> Vec { + pub(crate) fn counter_expressions( + &self, + ) -> impl Iterator + ExactSizeIterator + Captures<'_> { // We know that LLVM will optimize out any unused expressions before // producing the final coverage map, so there's no need to do the same // thing on the Rust side unless we're confident we can do much better. // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) - self.expressions - .iter() - .map(|expression| match expression { - None => { - // This expression ID was allocated, but we never saw the - // actual expression, so it must have been optimized out. - // Replace it with a dummy expression, and let LLVM take - // care of omitting it from the expression list. - CounterExpression::DUMMY - } - &Some(Expression { lhs, op, rhs, .. }) => { - // Convert the operands and operator as normal. - CounterExpression::new( - Counter::from_operand(lhs), - match op { - Op::Add => ExprKind::Add, - Op::Subtract => ExprKind::Subtract, - }, - Counter::from_operand(rhs), - ) - } - }) - .collect::>() + self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| { + CounterExpression { + lhs: self.counter_for_term(lhs), + kind: match op { + Op::Add => ExprKind::Add, + Op::Subtract => ExprKind::Subtract, + }, + rhs: self.counter_for_term(rhs), + } + }) + } + + /// Converts this function's coverage mappings into an intermediate form + /// that will be used by `mapgen` when preparing for FFI. + pub(crate) fn counter_regions( + &self, + ) -> impl Iterator + ExactSizeIterator { + self.function_coverage_info.mappings.iter().map(move |mapping| { + let &Mapping { term, ref code_region } = mapping; + let counter = self.counter_for_term(term); + (counter, code_region) + }) } - fn expression_regions(&self) -> Vec<(Counter, &CodeRegion)> { - // Find all of the expression IDs that weren't optimized out AND have - // an attached code region, and return the corresponding mapping as a - // counter/region pair. - self.expressions - .iter_enumerated() - .filter_map(|(id, expression)| { - let code_region = expression.as_ref()?.region.as_ref()?; - Some((Counter::expression(id), code_region)) - }) - .collect::>() + fn counter_for_term(&self, term: CovTerm) -> Counter { + if is_zero_term(&self.counters_seen, &self.zero_expressions, term) { + Counter::ZERO + } else { + Counter::from_term(term) + } } +} + +/// Set of expression IDs that are known to always evaluate to zero. +/// Any mapping or expression operand that refers to these expressions can have +/// that reference replaced with a constant zero value. +#[derive(Default)] +struct ZeroExpressions(FxIndexSet); + +impl ZeroExpressions { + fn insert(&mut self, id: ExpressionId) { + self.0.insert(id); + } + + fn contains(&self, id: ExpressionId) -> bool { + self.0.contains(&id) + } +} - fn unreachable_regions(&self) -> impl Iterator { - self.unreachable_regions.iter().map(|region| (Counter::ZERO, region)) +/// Returns `true` if the given term is known to have a value of zero, taking +/// into account knowledge of which counters are unused and which expressions +/// are always zero. +fn is_zero_term( + counters_seen: &BitSet, + zero_expressions: &ZeroExpressions, + term: CovTerm, +) -> bool { + match term { + CovTerm::Zero => true, + CovTerm::Counter(id) => !counters_seen.contains(id), + CovTerm::Expression(id) => zero_expressions.contains(id), } } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index d4e775256..274e0aeaa 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -1,18 +1,20 @@ use crate::common::CodegenCx; use crate::coverageinfo; use crate::coverageinfo::ffi::CounterMappingRegion; -use crate::coverageinfo::map_data::FunctionCoverage; +use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector}; use crate::llvm; -use rustc_codegen_ssa::traits::ConstMethods; -use rustc_data_structures::fx::FxIndexSet; +use itertools::Itertools as _; +use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods}; +use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; use rustc_hir::def::DefKind; use rustc_hir::def_id::DefId; use rustc_index::IndexVec; use rustc_middle::bug; -use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; +use rustc_middle::mir; use rustc_middle::mir::coverage::CodeRegion; -use rustc_middle::ty::TyCtxt; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_span::def_id::DefIdSet; use rustc_span::Symbol; /// Generates and exports the Coverage Map. @@ -56,21 +58,40 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { return; } - let mut global_file_table = GlobalFileTable::new(tcx); + let function_coverage_entries = function_coverage_map + .into_iter() + .map(|(instance, function_coverage)| (instance, function_coverage.into_finished())) + .collect::>(); + + let all_file_names = + function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names()); + let global_file_table = GlobalFileTable::new(all_file_names); + + // Encode all filenames referenced by coverage mappings in this CGU. + let filenames_buffer = global_file_table.make_filenames_buffer(tcx); + + let filenames_size = filenames_buffer.len(); + let filenames_val = cx.const_bytes(&filenames_buffer); + let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer); + + // Generate the coverage map header, which contains the filenames used by + // this CGU's coverage mappings, and store it in a well-known global. + let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val); + coverageinfo::save_cov_data_to_mod(cx, cov_data_val); + + let mut unused_function_names = Vec::new(); + let covfun_section_name = coverageinfo::covfun_section_name(cx); // Encode coverage mappings and generate function records - let mut function_data = Vec::new(); - for (instance, mut function_coverage) in function_coverage_map { + for (instance, function_coverage) in function_coverage_entries { debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); - function_coverage.simplify_expressions(); - let function_coverage = function_coverage; let mangled_function_name = tcx.symbol_name(instance).name; let source_hash = function_coverage.source_hash(); let is_used = function_coverage.is_used(); let coverage_mapping_buffer = - encode_mappings_for_function(&mut global_file_table, &function_coverage); + encode_mappings_for_function(&global_file_table, &function_coverage); if coverage_mapping_buffer.is_empty() { if function_coverage.is_used() { @@ -84,21 +105,10 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { } } - function_data.push((mangled_function_name, source_hash, is_used, coverage_mapping_buffer)); - } - - // Encode all filenames referenced by counters/expressions in this module - let filenames_buffer = global_file_table.into_filenames_buffer(); - - let filenames_size = filenames_buffer.len(); - let filenames_val = cx.const_bytes(&filenames_buffer); - let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer); - - // Generate the LLVM IR representation of the coverage map and store it in a well-known global - let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val); + if !is_used { + unused_function_names.push(mangled_function_name); + } - let covfun_section_name = coverageinfo::covfun_section_name(cx); - for (mangled_function_name, source_hash, is_used, coverage_mapping_buffer) in function_data { save_function_record( cx, &covfun_section_name, @@ -110,90 +120,143 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { ); } - // Save the coverage data value to LLVM IR - coverageinfo::save_cov_data_to_mod(cx, cov_data_val); + // For unused functions, we need to take their mangled names and store them + // in a specially-named global array. LLVM's `InstrProfiling` pass will + // detect this global and include those names in its `__llvm_prf_names` + // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.) + if !unused_function_names.is_empty() { + assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); + + let name_globals = unused_function_names + .into_iter() + .map(|mangled_function_name| cx.const_str(mangled_function_name).0) + .collect::>(); + let initializer = cx.const_array(cx.type_ptr(), &name_globals); + + let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), "__llvm_coverage_names"); + llvm::set_global_constant(array, true); + llvm::set_linkage(array, llvm::Linkage::InternalLinkage); + llvm::set_initializer(array, initializer); + } } +/// Maps "global" (per-CGU) file ID numbers to their underlying filenames. struct GlobalFileTable { - global_file_table: FxIndexSet, + /// This "raw" table doesn't include the working dir, so a filename's + /// global ID is its index in this set **plus one**. + raw_file_table: FxIndexSet, } impl GlobalFileTable { - fn new(tcx: TyCtxt<'_>) -> Self { - let mut global_file_table = FxIndexSet::default(); + fn new(all_file_names: impl IntoIterator) -> Self { + // Collect all of the filenames into a set. Filenames usually come in + // contiguous runs, so we can dedup adjacent ones to save work. + let mut raw_file_table = all_file_names.into_iter().dedup().collect::>(); + + // Sort the file table by its actual string values, not the arbitrary + // ordering of its symbols. + raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str())); + + Self { raw_file_table } + } + + fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 { + let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| { + bug!("file name not found in prepared global file table: {file_name}"); + }); + // The raw file table doesn't include an entry for the working dir + // (which has ID 0), so add 1 to get the correct ID. + (raw_id + 1) as u32 + } + + fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec { // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) // requires setting the first filename to the compilation directory. // Since rustc generates coverage maps with relative paths, the // compilation directory can be combined with the relative paths // to get absolute paths, if needed. - let working_dir = Symbol::intern( - &tcx.sess.opts.working_dir.remapped_path_if_available().to_string_lossy(), - ); - global_file_table.insert(working_dir); - Self { global_file_table } - } - - fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 { - let (global_file_id, _) = self.global_file_table.insert_full(file_name); - global_file_id as u32 - } - - fn into_filenames_buffer(self) -> Vec { - // This method takes `self` so that the caller can't accidentally - // modify the original file table after encoding it into a buffer. + use rustc_session::RemapFileNameExt; + let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy(); llvm::build_byte_buffer(|buffer| { coverageinfo::write_filenames_section_to_buffer( - self.global_file_table.iter().map(Symbol::as_str), + // Insert the working dir at index 0, before the other filenames. + std::iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str)), buffer, ); }) } } +rustc_index::newtype_index! { + // Tell the newtype macro to not generate `Encode`/`Decode` impls. + #[custom_encodable] + struct LocalFileId {} +} + +/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) +/// file IDs. +#[derive(Default)] +struct VirtualFileMapping { + local_to_global: IndexVec, + global_to_local: FxIndexMap, +} + +impl VirtualFileMapping { + fn local_id_for_global(&mut self, global_file_id: u32) -> LocalFileId { + *self + .global_to_local + .entry(global_file_id) + .or_insert_with(|| self.local_to_global.push(global_file_id)) + } + + fn into_vec(self) -> Vec { + self.local_to_global.raw + } +} + /// Using the expressions and counter regions collected for a single function, /// generate the variable-sized payload of its corresponding `__llvm_covfun` /// entry. The payload is returned as a vector of bytes. /// /// Newly-encountered filenames will be added to the global file table. fn encode_mappings_for_function( - global_file_table: &mut GlobalFileTable, + global_file_table: &GlobalFileTable, function_coverage: &FunctionCoverage<'_>, ) -> Vec { - let (expressions, counter_regions) = function_coverage.get_expressions_and_counter_regions(); - - let mut counter_regions = counter_regions.collect::>(); + let counter_regions = function_coverage.counter_regions(); if counter_regions.is_empty() { return Vec::new(); } - let mut virtual_file_mapping = IndexVec::::new(); + let expressions = function_coverage.counter_expressions().collect::>(); + + let mut virtual_file_mapping = VirtualFileMapping::default(); let mut mapping_regions = Vec::with_capacity(counter_regions.len()); - // Sort the list of (counter, region) mapping pairs by region, so that they - // can be grouped by filename. Prepare file IDs for each filename, and - // prepare the mapping data so that we can pass it through FFI to LLVM. - counter_regions.sort_by_key(|(_counter, region)| *region); - for counter_regions_for_file in - counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name) + // Group mappings into runs with the same filename, preserving the order + // yielded by `FunctionCoverage`. + // Prepare file IDs for each filename, and prepare the mapping data so that + // we can pass it through FFI to LLVM. + for (file_name, counter_regions_for_file) in + &counter_regions.group_by(|(_counter, region)| region.file_name) { - // Look up (or allocate) the global file ID for this filename. - let file_name = counter_regions_for_file[0].1.file_name; + // Look up the global file ID for this filename. let global_file_id = global_file_table.global_file_id_for_file_name(file_name); // Associate that global file ID with a local file ID for this function. - let local_file_id: u32 = virtual_file_mapping.push(global_file_id); - debug!(" file id: local {local_file_id} => global {global_file_id} = '{file_name:?}'"); + let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id); + debug!(" file id: {local_file_id:?} => global {global_file_id} = '{file_name:?}'"); // For each counter/region pair in this function+file, convert it to a // form suitable for FFI. - for &(counter, region) in counter_regions_for_file { + for (counter, region) in counter_regions_for_file { let CodeRegion { file_name: _, start_line, start_col, end_line, end_col } = *region; debug!("Adding counter {counter:?} to map for {region:?}"); mapping_regions.push(CounterMappingRegion::code_region( counter, - local_file_id, + local_file_id.as_u32(), start_line, start_col, end_line, @@ -205,7 +268,7 @@ fn encode_mappings_for_function( // Encode the function's coverage mappings into a buffer. llvm::build_byte_buffer(|buffer| { coverageinfo::write_mapping_to_buffer( - virtual_file_mapping.raw, + virtual_file_mapping.into_vec(), expressions, mapping_regions, buffer, @@ -289,13 +352,12 @@ fn save_function_record( /// `-Clink-dead-code` will not generate code for unused generic functions.) /// /// We can find the unused functions (including generic functions) by the set difference of all MIR -/// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`tcx` query -/// `codegened_and_inlined_items`). +/// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`codegenned_and_inlined_items`). /// -/// These unused functions are then codegen'd in one of the CGUs which is marked as the -/// "code coverage dead code cgu" during the partitioning process. This prevents us from generating -/// code regions for the same function more than once which can lead to linker errors regarding -/// duplicate symbols. +/// These unused functions don't need to be codegenned, but we do need to add them to the function +/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them. +/// We also end up adding their symbol names to a special global array that LLVM will include in +/// its embedded coverage data. fn add_unused_functions(cx: &CodegenCx<'_, '_>) { assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); @@ -315,7 +377,7 @@ fn add_unused_functions(cx: &CodegenCx<'_, '_>) { // generic functions from consideration as well. if !matches!( kind, - DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Generator + DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Coroutine ) { return None; } @@ -326,21 +388,80 @@ fn add_unused_functions(cx: &CodegenCx<'_, '_>) { }) .collect(); - let codegenned_def_ids = tcx.codegened_and_inlined_items(()); + let codegenned_def_ids = codegenned_and_inlined_items(tcx); - for non_codegenned_def_id in - eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id)) - { - let codegen_fn_attrs = tcx.codegen_fn_attrs(non_codegenned_def_id); - - // If a function is marked `#[coverage(off)]`, then skip generating a - // dead code stub for it. - if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_COVERAGE) { - debug!("skipping unused fn marked #[coverage(off)]: {:?}", non_codegenned_def_id); + // For each `DefId` that should have coverage instrumentation but wasn't + // codegenned, add it to the function coverage map as an unused function. + for def_id in eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id)) { + // Skip any function that didn't have coverage data added to it by the + // coverage instrumentor. + let body = tcx.instance_mir(ty::InstanceDef::Item(def_id)); + let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { continue; + }; + + debug!("generating unused fn: {def_id:?}"); + let instance = declare_unused_fn(tcx, def_id); + add_unused_function_coverage(cx, instance, function_coverage_info); + } +} + +/// All items participating in code generation together with (instrumented) +/// items inlined into them. +fn codegenned_and_inlined_items(tcx: TyCtxt<'_>) -> DefIdSet { + let (items, cgus) = tcx.collect_and_partition_mono_items(()); + let mut visited = DefIdSet::default(); + let mut result = items.clone(); + + for cgu in cgus { + for item in cgu.items().keys() { + if let mir::mono::MonoItem::Fn(ref instance) = item { + let did = instance.def_id(); + if !visited.insert(did) { + continue; + } + let body = tcx.instance_mir(instance.def); + for block in body.basic_blocks.iter() { + for statement in &block.statements { + let mir::StatementKind::Coverage(_) = statement.kind else { continue }; + let scope = statement.source_info.scope; + if let Some(inlined) = scope.inlined_instance(&body.source_scopes) { + result.insert(inlined.def_id()); + } + } + } + } } + } - debug!("generating unused fn: {:?}", non_codegenned_def_id); - cx.define_unused_fn(non_codegenned_def_id); + result +} + +fn declare_unused_fn<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> ty::Instance<'tcx> { + ty::Instance::new( + def_id, + ty::GenericArgs::for_item(tcx, def_id, |param, _| { + if let ty::GenericParamDefKind::Lifetime = param.kind { + tcx.lifetimes.re_erased.into() + } else { + tcx.mk_param_from_def(param) + } + }), + ) +} + +fn add_unused_function_coverage<'tcx>( + cx: &CodegenCx<'_, 'tcx>, + instance: ty::Instance<'tcx>, + function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo, +) { + // An unused function's mappings will automatically be rewritten to map to + // zero, because none of its counters/expressions are marked as seen. + let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info); + + if let Some(coverage_context) = cx.coverage_context() { + coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage); + } else { + bug!("Could not get the `coverage_context`"); } } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index c70cb670e..7d6975618 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -1,10 +1,9 @@ use crate::llvm; -use crate::abi::Abi; use crate::builder::Builder; use crate::common::CodegenCx; use crate::coverageinfo::ffi::{CounterExpression, CounterMappingRegion}; -use crate::coverageinfo::map_data::FunctionCoverage; +use crate::coverageinfo::map_data::FunctionCoverageCollector; use libc::c_uint; use rustc_codegen_ssa::traits::{ @@ -12,17 +11,12 @@ use rustc_codegen_ssa::traits::{ StaticMethods, }; use rustc_data_structures::fx::FxHashMap; -use rustc_hir as hir; -use rustc_hir::def_id::DefId; use rustc_llvm::RustString; use rustc_middle::bug; -use rustc_middle::mir::coverage::{CounterId, CoverageKind}; +use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::mir::Coverage; -use rustc_middle::ty; -use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt}; -use rustc_middle::ty::GenericArgs; +use rustc_middle::ty::layout::HasTyCtxt; use rustc_middle::ty::Instance; -use rustc_middle::ty::Ty; use std::cell::RefCell; @@ -30,14 +24,13 @@ pub(crate) mod ffi; pub(crate) mod map_data; pub mod mapgen; -const UNUSED_FUNCTION_COUNTER_ID: CounterId = CounterId::START; - const VAR_ALIGN_BYTES: usize = 8; /// A context object for maintaining all state needed by the coverageinfo module. pub struct CrateCoverageContext<'ll, 'tcx> { /// Coverage data for each instrumented function identified by DefId. - pub(crate) function_coverage_map: RefCell, FunctionCoverage<'tcx>>>, + pub(crate) function_coverage_map: + RefCell, FunctionCoverageCollector<'tcx>>>, pub(crate) pgo_func_name_var_map: RefCell, &'ll llvm::Value>>, } @@ -49,7 +42,9 @@ impl<'ll, 'tcx> CrateCoverageContext<'ll, 'tcx> { } } - pub fn take_function_coverage_map(&self) -> FxHashMap, FunctionCoverage<'tcx>> { + pub fn take_function_coverage_map( + &self, + ) -> FxHashMap, FunctionCoverageCollector<'tcx>> { self.function_coverage_map.replace(FxHashMap::default()) } } @@ -76,68 +71,56 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { bug!("Could not get the `coverage_context`"); } } - - /// Functions with MIR-based coverage are normally codegenned _only_ if - /// called. LLVM coverage tools typically expect every function to be - /// defined (even if unused), with at least one call to LLVM intrinsic - /// `instrprof.increment`. - /// - /// Codegen a small function that will never be called, with one counter - /// that will never be incremented. - /// - /// For used/called functions, the coverageinfo was already added to the - /// `function_coverage_map` (keyed by function `Instance`) during codegen. - /// But in this case, since the unused function was _not_ previously - /// codegenned, collect the coverage `CodeRegion`s from the MIR and add - /// them. The first `CodeRegion` is used to add a single counter, with the - /// same counter ID used in the injected `instrprof.increment` intrinsic - /// call. Since the function is never called, all other `CodeRegion`s can be - /// added as `unreachable_region`s. - fn define_unused_fn(&self, def_id: DefId) { - let instance = declare_unused_fn(self, def_id); - codegen_unused_fn_and_counter(self, instance); - add_unused_function_coverage(self, instance, def_id); - } } impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { + #[instrument(level = "debug", skip(self))] fn add_coverage(&mut self, instance: Instance<'tcx>, coverage: &Coverage) { + // Our caller should have already taken care of inlining subtleties, + // so we can assume that counter/expression IDs in this coverage + // statement are meaningful for the given instance. + // + // (Either the statement was not inlined and directly belongs to this + // instance, or it was inlined *from* this instance.) + let bx = self; + let Some(function_coverage_info) = + bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref() + else { + debug!("function has a coverage statement but no coverage info"); + return; + }; + let Some(coverage_context) = bx.coverage_context() else { return }; let mut coverage_map = coverage_context.function_coverage_map.borrow_mut(); let func_coverage = coverage_map .entry(instance) - .or_insert_with(|| FunctionCoverage::new(bx.tcx(), instance)); + .or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info)); - let Coverage { kind, code_region } = coverage.clone(); - match kind { - CoverageKind::Counter { function_source_hash, id } => { - debug!( - "ensuring function source hash is set for instance={:?}; function_source_hash={}", - instance, function_source_hash, - ); - func_coverage.set_function_source_hash(function_source_hash); - - if let Some(code_region) = code_region { - // Note: Some counters do not have code regions, but may still be referenced - // from expressions. In that case, don't add the counter to the coverage map, - // but do inject the counter intrinsic. - debug!( - "adding counter to coverage_map: instance={:?}, id={:?}, region={:?}", - instance, id, code_region, - ); - func_coverage.add_counter(id, code_region); - } + let Coverage { kind } = coverage; + match *kind { + CoverageKind::CounterIncrement { id } => { + func_coverage.mark_counter_id_seen(id); // We need to explicitly drop the `RefMut` before calling into `instrprof_increment`, // as that needs an exclusive borrow. drop(coverage_map); - let coverageinfo = bx.tcx().coverageinfo(instance.def); + // The number of counters passed to `llvm.instrprof.increment` might + // be smaller than the number originally inserted by the instrumentor, + // if some high-numbered counters were removed by MIR optimizations. + // If so, LLVM's profiler runtime will use fewer physical counters. + let num_counters = + bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1; + assert!( + num_counters as usize <= function_coverage_info.num_counters, + "num_counters disagreement: query says {num_counters} but function info only has {}", + function_coverage_info.num_counters + ); let fn_name = bx.get_pgo_func_name_var(instance); - let hash = bx.const_u64(function_source_hash); - let num_counters = bx.const_u32(coverageinfo.num_counters); + let hash = bx.const_u64(function_coverage_info.function_source_hash); + let num_counters = bx.const_u32(num_counters); let index = bx.const_u32(id.as_u32()); debug!( "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})", @@ -145,105 +128,13 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { ); bx.instrprof_increment(fn_name, hash, num_counters, index); } - CoverageKind::Expression { id, lhs, op, rhs } => { - debug!( - "adding counter expression to coverage_map: instance={:?}, id={:?}, {:?} {:?} {:?}; region: {:?}", - instance, id, lhs, op, rhs, code_region, - ); - func_coverage.add_counter_expression(id, lhs, op, rhs, code_region); - } - CoverageKind::Unreachable => { - let code_region = - code_region.expect("unreachable regions always have code regions"); - debug!( - "adding unreachable code to coverage_map: instance={:?}, at {:?}", - instance, code_region, - ); - func_coverage.add_unreachable_region(code_region); + CoverageKind::ExpressionUsed { id } => { + func_coverage.mark_expression_id_seen(id); } } } } -fn declare_unused_fn<'tcx>(cx: &CodegenCx<'_, 'tcx>, def_id: DefId) -> Instance<'tcx> { - let tcx = cx.tcx; - - let instance = Instance::new( - def_id, - GenericArgs::for_item(tcx, def_id, |param, _| { - if let ty::GenericParamDefKind::Lifetime = param.kind { - tcx.lifetimes.re_erased.into() - } else { - tcx.mk_param_from_def(param) - } - }), - ); - - let llfn = cx.declare_fn( - tcx.symbol_name(instance).name, - cx.fn_abi_of_fn_ptr( - ty::Binder::dummy(tcx.mk_fn_sig( - [Ty::new_unit(tcx)], - Ty::new_unit(tcx), - false, - hir::Unsafety::Unsafe, - Abi::Rust, - )), - ty::List::empty(), - ), - None, - ); - - llvm::set_linkage(llfn, llvm::Linkage::PrivateLinkage); - llvm::set_visibility(llfn, llvm::Visibility::Default); - - assert!(cx.instances.borrow_mut().insert(instance, llfn).is_none()); - - instance -} - -fn codegen_unused_fn_and_counter<'tcx>(cx: &CodegenCx<'_, 'tcx>, instance: Instance<'tcx>) { - let llfn = cx.get_fn(instance); - let llbb = Builder::append_block(cx, llfn, "unused_function"); - let mut bx = Builder::build(cx, llbb); - let fn_name = bx.get_pgo_func_name_var(instance); - let hash = bx.const_u64(0); - let num_counters = bx.const_u32(1); - let index = bx.const_u32(u32::from(UNUSED_FUNCTION_COUNTER_ID)); - debug!( - "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, - index={:?}) for unused function: {:?}", - fn_name, hash, num_counters, index, instance - ); - bx.instrprof_increment(fn_name, hash, num_counters, index); - bx.ret_void(); -} - -fn add_unused_function_coverage<'tcx>( - cx: &CodegenCx<'_, 'tcx>, - instance: Instance<'tcx>, - def_id: DefId, -) { - let tcx = cx.tcx; - - let mut function_coverage = FunctionCoverage::unused(tcx, instance); - for (index, &code_region) in tcx.covered_code_regions(def_id).iter().enumerate() { - if index == 0 { - // Insert at least one real counter so the LLVM CoverageMappingReader will find expected - // definitions. - function_coverage.add_counter(UNUSED_FUNCTION_COUNTER_ID, code_region.clone()); - } else { - function_coverage.add_unreachable_region(code_region.clone()); - } - } - - if let Some(coverage_context) = cx.coverage_context() { - coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage); - } else { - bug!("Could not get the `coverage_context`"); - } -} - /// Calls llvm::createPGOFuncNameVar() with the given function instance's /// mangled function name. The LLVM API returns an llvm::GlobalVariable /// containing the function name, with the specific variable name and linkage -- cgit v1.2.3