From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_middle/src/mir/basic_blocks.rs | 147 + compiler/rustc_middle/src/mir/coverage.rs | 186 ++ compiler/rustc_middle/src/mir/generic_graph.rs | 69 + compiler/rustc_middle/src/mir/generic_graphviz.rs | 173 ++ .../rustc_middle/src/mir/graph_cyclic_cache.rs | 63 + compiler/rustc_middle/src/mir/graphviz.rs | 134 + .../rustc_middle/src/mir/interpret/allocation.rs | 1300 +++++++++ compiler/rustc_middle/src/mir/interpret/error.rs | 551 ++++ compiler/rustc_middle/src/mir/interpret/mod.rs | 633 +++++ compiler/rustc_middle/src/mir/interpret/pointer.rs | 307 +++ compiler/rustc_middle/src/mir/interpret/queries.rs | 217 ++ compiler/rustc_middle/src/mir/interpret/value.rs | 651 +++++ compiler/rustc_middle/src/mir/mod.rs | 2900 ++++++++++++++++++++ compiler/rustc_middle/src/mir/mono.rs | 527 ++++ compiler/rustc_middle/src/mir/patch.rs | 196 ++ compiler/rustc_middle/src/mir/predecessors.rs | 78 + compiler/rustc_middle/src/mir/pretty.rs | 1067 +++++++ compiler/rustc_middle/src/mir/query.rs | 476 ++++ compiler/rustc_middle/src/mir/spanview.rs | 691 +++++ compiler/rustc_middle/src/mir/switch_sources.rs | 78 + compiler/rustc_middle/src/mir/syntax.rs | 1168 ++++++++ compiler/rustc_middle/src/mir/tcx.rs | 307 +++ compiler/rustc_middle/src/mir/terminator.rs | 448 +++ compiler/rustc_middle/src/mir/traversal.rs | 388 +++ compiler/rustc_middle/src/mir/type_foldable.rs | 240 ++ compiler/rustc_middle/src/mir/type_visitable.rs | 190 ++ compiler/rustc_middle/src/mir/visit.rs | 1330 +++++++++ 27 files changed, 14515 insertions(+) create mode 100644 compiler/rustc_middle/src/mir/basic_blocks.rs create mode 100644 compiler/rustc_middle/src/mir/coverage.rs create mode 100644 compiler/rustc_middle/src/mir/generic_graph.rs create mode 100644 compiler/rustc_middle/src/mir/generic_graphviz.rs create mode 100644 compiler/rustc_middle/src/mir/graph_cyclic_cache.rs create mode 100644 compiler/rustc_middle/src/mir/graphviz.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/allocation.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/error.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/mod.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/pointer.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/queries.rs create mode 100644 compiler/rustc_middle/src/mir/interpret/value.rs create mode 100644 compiler/rustc_middle/src/mir/mod.rs create mode 100644 compiler/rustc_middle/src/mir/mono.rs create mode 100644 compiler/rustc_middle/src/mir/patch.rs create mode 100644 compiler/rustc_middle/src/mir/predecessors.rs create mode 100644 compiler/rustc_middle/src/mir/pretty.rs create mode 100644 compiler/rustc_middle/src/mir/query.rs create mode 100644 compiler/rustc_middle/src/mir/spanview.rs create mode 100644 compiler/rustc_middle/src/mir/switch_sources.rs create mode 100644 compiler/rustc_middle/src/mir/syntax.rs create mode 100644 compiler/rustc_middle/src/mir/tcx.rs create mode 100644 compiler/rustc_middle/src/mir/terminator.rs create mode 100644 compiler/rustc_middle/src/mir/traversal.rs create mode 100644 compiler/rustc_middle/src/mir/type_foldable.rs create mode 100644 compiler/rustc_middle/src/mir/type_visitable.rs create mode 100644 compiler/rustc_middle/src/mir/visit.rs (limited to 'compiler/rustc_middle/src/mir') diff --git a/compiler/rustc_middle/src/mir/basic_blocks.rs b/compiler/rustc_middle/src/mir/basic_blocks.rs new file mode 100644 index 000000000..78080fcd5 --- /dev/null +++ b/compiler/rustc_middle/src/mir/basic_blocks.rs @@ -0,0 +1,147 @@ +use crate::mir::graph_cyclic_cache::GraphIsCyclicCache; +use crate::mir::predecessors::{PredecessorCache, Predecessors}; +use crate::mir::switch_sources::{SwitchSourceCache, SwitchSources}; +use crate::mir::traversal::PostorderCache; +use crate::mir::{BasicBlock, BasicBlockData, Successors, START_BLOCK}; + +use rustc_data_structures::graph; +use rustc_data_structures::graph::dominators::{dominators, Dominators}; +use rustc_index::vec::IndexVec; + +#[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable, TypeFoldable, TypeVisitable)] +pub struct BasicBlocks<'tcx> { + basic_blocks: IndexVec>, + predecessor_cache: PredecessorCache, + switch_source_cache: SwitchSourceCache, + is_cyclic: GraphIsCyclicCache, + postorder_cache: PostorderCache, +} + +impl<'tcx> BasicBlocks<'tcx> { + #[inline] + pub fn new(basic_blocks: IndexVec>) -> Self { + BasicBlocks { + basic_blocks, + predecessor_cache: PredecessorCache::new(), + switch_source_cache: SwitchSourceCache::new(), + is_cyclic: GraphIsCyclicCache::new(), + postorder_cache: PostorderCache::new(), + } + } + + /// Returns true if control-flow graph contains a cycle reachable from the `START_BLOCK`. + #[inline] + pub fn is_cfg_cyclic(&self) -> bool { + self.is_cyclic.is_cyclic(self) + } + + #[inline] + pub fn dominators(&self) -> Dominators { + dominators(&self) + } + + /// Returns predecessors for each basic block. + #[inline] + pub fn predecessors(&self) -> &Predecessors { + self.predecessor_cache.compute(&self.basic_blocks) + } + + /// Returns basic blocks in a postorder. + #[inline] + pub fn postorder(&self) -> &[BasicBlock] { + self.postorder_cache.compute(&self.basic_blocks) + } + + /// `switch_sources()[&(target, switch)]` returns a list of switch + /// values that lead to a `target` block from a `switch` block. + #[inline] + pub fn switch_sources(&self) -> &SwitchSources { + self.switch_source_cache.compute(&self.basic_blocks) + } + + /// Returns mutable reference to basic blocks. Invalidates CFG cache. + #[inline] + pub fn as_mut(&mut self) -> &mut IndexVec> { + self.invalidate_cfg_cache(); + &mut self.basic_blocks + } + + /// Get mutable access to basic blocks without invalidating the CFG cache. + /// + /// By calling this method instead of e.g. [`BasicBlocks::as_mut`] you promise not to change + /// the CFG. This means that + /// + /// 1) The number of basic blocks remains unchanged + /// 2) The set of successors of each terminator remains unchanged. + /// 3) For each `TerminatorKind::SwitchInt`, the `targets` remains the same and the terminator + /// kind is not changed. + /// + /// If any of these conditions cannot be upheld, you should call [`BasicBlocks::invalidate_cfg_cache`]. + #[inline] + pub fn as_mut_preserves_cfg(&mut self) -> &mut IndexVec> { + &mut self.basic_blocks + } + + /// Invalidates cached information about the CFG. + /// + /// You will only ever need this if you have also called [`BasicBlocks::as_mut_preserves_cfg`]. + /// All other methods that allow you to mutate the basic blocks also call this method + /// themselves, thereby avoiding any risk of accidentaly cache invalidation. + pub fn invalidate_cfg_cache(&mut self) { + self.predecessor_cache.invalidate(); + self.switch_source_cache.invalidate(); + self.is_cyclic.invalidate(); + self.postorder_cache.invalidate(); + } +} + +impl<'tcx> std::ops::Deref for BasicBlocks<'tcx> { + type Target = IndexVec>; + + #[inline] + fn deref(&self) -> &IndexVec> { + &self.basic_blocks + } +} + +impl<'tcx> graph::DirectedGraph for BasicBlocks<'tcx> { + type Node = BasicBlock; +} + +impl<'tcx> graph::WithNumNodes for BasicBlocks<'tcx> { + #[inline] + fn num_nodes(&self) -> usize { + self.basic_blocks.len() + } +} + +impl<'tcx> graph::WithStartNode for BasicBlocks<'tcx> { + #[inline] + fn start_node(&self) -> Self::Node { + START_BLOCK + } +} + +impl<'tcx> graph::WithSuccessors for BasicBlocks<'tcx> { + #[inline] + fn successors(&self, node: Self::Node) -> >::Iter { + self.basic_blocks[node].terminator().successors() + } +} + +impl<'a, 'b> graph::GraphSuccessors<'b> for BasicBlocks<'a> { + type Item = BasicBlock; + type Iter = Successors<'b>; +} + +impl<'tcx, 'graph> graph::GraphPredecessors<'graph> for BasicBlocks<'tcx> { + type Item = BasicBlock; + type Iter = std::iter::Copied>; +} + +impl<'tcx> graph::WithPredecessors for BasicBlocks<'tcx> { + #[inline] + fn predecessors(&self, node: Self::Node) -> >::Iter { + self.predecessors()[node].iter().copied() + } +} diff --git a/compiler/rustc_middle/src/mir/coverage.rs b/compiler/rustc_middle/src/mir/coverage.rs new file mode 100644 index 000000000..efa946452 --- /dev/null +++ b/compiler/rustc_middle/src/mir/coverage.rs @@ -0,0 +1,186 @@ +//! Metadata from source code coverage analysis and instrumentation. + +use rustc_macros::HashStable; +use rustc_span::Symbol; + +use std::cmp::Ord; +use std::fmt::{self, Debug, Formatter}; + +rustc_index::newtype_index! { + /// An ExpressionOperandId value is assigned directly from either a + /// CounterValueReference.as_u32() (which ascend from 1) or an ExpressionOperandId.as_u32() + /// (which _*descend*_ from u32::MAX). Id value `0` (zero) represents a virtual counter with a + /// constant value of `0`. + pub struct ExpressionOperandId { + derive [HashStable] + DEBUG_FORMAT = "ExpressionOperandId({})", + MAX = 0xFFFF_FFFF, + } +} + +impl ExpressionOperandId { + /// An expression operand for a "zero counter", as described in the following references: + /// + /// * + /// * + /// * + /// + /// This operand can be used to count two or more separate code regions with a single counter, + /// if they run sequentially with no branches, by injecting the `Counter` in a `BasicBlock` for + /// one of the code regions, and inserting `CounterExpression`s ("add ZERO to the counter") in + /// the coverage map for the other code regions. + pub const ZERO: Self = Self::from_u32(0); +} + +rustc_index::newtype_index! { + pub struct CounterValueReference { + derive [HashStable] + DEBUG_FORMAT = "CounterValueReference({})", + MAX = 0xFFFF_FFFF, + } +} + +impl CounterValueReference { + /// Counters start at 1 to reserve 0 for ExpressionOperandId::ZERO. + pub const START: Self = Self::from_u32(1); + + /// Returns explicitly-requested zero-based version of the counter id, used + /// during codegen. LLVM expects zero-based indexes. + pub fn zero_based_index(self) -> u32 { + let one_based_index = self.as_u32(); + debug_assert!(one_based_index > 0); + one_based_index - 1 + } +} + +rustc_index::newtype_index! { + /// InjectedExpressionId.as_u32() converts to ExpressionOperandId.as_u32() + /// + /// Values descend from u32::MAX. + pub struct InjectedExpressionId { + derive [HashStable] + DEBUG_FORMAT = "InjectedExpressionId({})", + MAX = 0xFFFF_FFFF, + } +} + +rustc_index::newtype_index! { + /// InjectedExpressionIndex.as_u32() translates to u32::MAX - ExpressionOperandId.as_u32() + /// + /// Values ascend from 0. + pub struct InjectedExpressionIndex { + derive [HashStable] + DEBUG_FORMAT = "InjectedExpressionIndex({})", + MAX = 0xFFFF_FFFF, + } +} + +rustc_index::newtype_index! { + /// MappedExpressionIndex values ascend from zero, and are recalculated indexes based on their + /// array position in the LLVM coverage map "Expressions" array, which is assembled during the + /// "mapgen" process. They cannot be computed algorithmically, from the other `newtype_index`s. + pub struct MappedExpressionIndex { + derive [HashStable] + DEBUG_FORMAT = "MappedExpressionIndex({})", + MAX = 0xFFFF_FFFF, + } +} + +impl From for ExpressionOperandId { + #[inline] + fn from(v: CounterValueReference) -> ExpressionOperandId { + ExpressionOperandId::from(v.as_u32()) + } +} + +impl From for ExpressionOperandId { + #[inline] + fn from(v: InjectedExpressionId) -> ExpressionOperandId { + ExpressionOperandId::from(v.as_u32()) + } +} + +#[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)] +pub enum CoverageKind { + Counter { + function_source_hash: u64, + id: CounterValueReference, + }, + Expression { + id: InjectedExpressionId, + lhs: ExpressionOperandId, + op: Op, + rhs: ExpressionOperandId, + }, + Unreachable, +} + +impl CoverageKind { + pub fn as_operand_id(&self) -> ExpressionOperandId { + use CoverageKind::*; + match *self { + Counter { id, .. } => ExpressionOperandId::from(id), + Expression { id, .. } => ExpressionOperandId::from(id), + Unreachable => bug!("Unreachable coverage cannot be part of an expression"), + } + } + + pub fn is_expression(&self) -> bool { + matches!(self, Self::Expression { .. }) + } +} + +impl Debug for CoverageKind { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + use CoverageKind::*; + match self { + Counter { id, .. } => write!(fmt, "Counter({:?})", id.index()), + Expression { id, lhs, op, rhs } => write!( + fmt, + "Expression({:?}) = {} {} {}", + id.index(), + lhs.index(), + if *op == Op::Add { "+" } else { "-" }, + rhs.index(), + ), + Unreachable => write!(fmt, "Unreachable"), + } + } +} + +#[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, Eq, PartialOrd, Ord)] +#[derive(TypeFoldable, TypeVisitable)] +pub struct CodeRegion { + pub file_name: Symbol, + pub start_line: u32, + pub start_col: u32, + pub end_line: u32, + pub end_col: u32, +} + +impl Debug for CodeRegion { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!( + fmt, + "{}:{}:{} - {}:{}", + self.file_name, self.start_line, self.start_col, self.end_line, self.end_col + ) + } +} + +#[derive(Copy, Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +#[derive(TypeFoldable, TypeVisitable)] +pub enum Op { + Subtract, + Add, +} + +impl Op { + pub fn is_add(&self) -> bool { + matches!(self, Self::Add) + } + + pub fn is_subtract(&self) -> bool { + matches!(self, Self::Subtract) + } +} diff --git a/compiler/rustc_middle/src/mir/generic_graph.rs b/compiler/rustc_middle/src/mir/generic_graph.rs new file mode 100644 index 000000000..f3621cd99 --- /dev/null +++ b/compiler/rustc_middle/src/mir/generic_graph.rs @@ -0,0 +1,69 @@ +use gsgdt::{Edge, Graph, Node, NodeStyle}; +use rustc_hir::def_id::DefId; +use rustc_middle::mir::*; +use rustc_middle::ty::TyCtxt; + +/// Convert an MIR function into a gsgdt Graph +pub fn mir_fn_to_generic_graph<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'_>) -> Graph { + let def_id = body.source.def_id(); + let def_name = graphviz_safe_def_name(def_id); + let graph_name = format!("Mir_{}", def_name); + let dark_mode = tcx.sess.opts.unstable_opts.graphviz_dark_mode; + + // Nodes + let nodes: Vec = body + .basic_blocks() + .iter_enumerated() + .map(|(block, _)| bb_to_graph_node(block, body, dark_mode)) + .collect(); + + // Edges + let mut edges = Vec::new(); + for (source, _) in body.basic_blocks().iter_enumerated() { + let def_id = body.source.def_id(); + let terminator = body[source].terminator(); + let labels = terminator.kind.fmt_successor_labels(); + + for (target, label) in terminator.successors().zip(labels) { + let src = node(def_id, source); + let trg = node(def_id, target); + edges.push(Edge::new(src, trg, label.to_string())); + } + } + + Graph::new(graph_name, nodes, edges) +} + +fn bb_to_graph_node(block: BasicBlock, body: &Body<'_>, dark_mode: bool) -> Node { + let def_id = body.source.def_id(); + let data = &body[block]; + let label = node(def_id, block); + + let (title, bgcolor) = if data.is_cleanup { + let color = if dark_mode { "royalblue" } else { "lightblue" }; + (format!("{} (cleanup)", block.index()), color) + } else { + let color = if dark_mode { "dimgray" } else { "gray" }; + (format!("{}", block.index()), color) + }; + + let style = NodeStyle { title_bg: Some(bgcolor.to_owned()), ..Default::default() }; + let mut stmts: Vec = data.statements.iter().map(|x| format!("{:?}", x)).collect(); + + // add the terminator to the stmts, gsgdt can print it out separately + let mut terminator_head = String::new(); + data.terminator().kind.fmt_head(&mut terminator_head).unwrap(); + stmts.push(terminator_head); + + Node::new(stmts, label, title, style) +} + +// Must match `[0-9A-Za-z_]*`. This does not appear in the rendered graph, so +// it does not have to be user friendly. +pub fn graphviz_safe_def_name(def_id: DefId) -> String { + format!("{}_{}", def_id.krate.index(), def_id.index.index(),) +} + +fn node(def_id: DefId, block: BasicBlock) -> String { + format!("bb{}__{}", block.index(), graphviz_safe_def_name(def_id)) +} diff --git a/compiler/rustc_middle/src/mir/generic_graphviz.rs b/compiler/rustc_middle/src/mir/generic_graphviz.rs new file mode 100644 index 000000000..11ac45943 --- /dev/null +++ b/compiler/rustc_middle/src/mir/generic_graphviz.rs @@ -0,0 +1,173 @@ +use rustc_data_structures::graph::{self, iterate}; +use rustc_graphviz as dot; +use rustc_middle::ty::TyCtxt; +use std::io::{self, Write}; + +pub struct GraphvizWriter< + 'a, + G: graph::DirectedGraph + graph::WithSuccessors + graph::WithStartNode + graph::WithNumNodes, + NodeContentFn: Fn(::Node) -> Vec, + EdgeLabelsFn: Fn(::Node) -> Vec, +> { + graph: &'a G, + is_subgraph: bool, + graphviz_name: String, + graph_label: Option, + node_content_fn: NodeContentFn, + edge_labels_fn: EdgeLabelsFn, +} + +impl< + 'a, + G: graph::DirectedGraph + graph::WithSuccessors + graph::WithStartNode + graph::WithNumNodes, + NodeContentFn: Fn(::Node) -> Vec, + EdgeLabelsFn: Fn(::Node) -> Vec, +> GraphvizWriter<'a, G, NodeContentFn, EdgeLabelsFn> +{ + pub fn new( + graph: &'a G, + graphviz_name: &str, + node_content_fn: NodeContentFn, + edge_labels_fn: EdgeLabelsFn, + ) -> Self { + Self { + graph, + is_subgraph: false, + graphviz_name: graphviz_name.to_owned(), + graph_label: None, + node_content_fn, + edge_labels_fn, + } + } + + pub fn set_graph_label(&mut self, graph_label: &str) { + self.graph_label = Some(graph_label.to_owned()); + } + + /// Write a graphviz DOT of the graph + pub fn write_graphviz<'tcx, W>(&self, tcx: TyCtxt<'tcx>, w: &mut W) -> io::Result<()> + where + W: Write, + { + let kind = if self.is_subgraph { "subgraph" } else { "digraph" }; + let cluster = if self.is_subgraph { "cluster_" } else { "" }; // Print border around graph + // FIXME(richkadel): If/when migrating the MIR graphviz to this generic implementation, + // prepend "Mir_" to the graphviz_safe_def_name(def_id) + writeln!(w, "{} {}{} {{", kind, cluster, self.graphviz_name)?; + + // Global graph properties + let font = format!(r#"fontname="{}""#, tcx.sess.opts.unstable_opts.graphviz_font); + let mut graph_attrs = vec![&font[..]]; + let mut content_attrs = vec![&font[..]]; + + let dark_mode = tcx.sess.opts.unstable_opts.graphviz_dark_mode; + if dark_mode { + graph_attrs.push(r#"bgcolor="black""#); + graph_attrs.push(r#"fontcolor="white""#); + content_attrs.push(r#"color="white""#); + content_attrs.push(r#"fontcolor="white""#); + } + + writeln!(w, r#" graph [{}];"#, graph_attrs.join(" "))?; + let content_attrs_str = content_attrs.join(" "); + writeln!(w, r#" node [{}];"#, content_attrs_str)?; + writeln!(w, r#" edge [{}];"#, content_attrs_str)?; + + // Graph label + if let Some(graph_label) = &self.graph_label { + self.write_graph_label(graph_label, w)?; + } + + // Nodes + for node in iterate::post_order_from(self.graph, self.graph.start_node()) { + self.write_node(node, dark_mode, w)?; + } + + // Edges + for source in iterate::post_order_from(self.graph, self.graph.start_node()) { + self.write_edges(source, w)?; + } + writeln!(w, "}}") + } + + /// Write a graphviz DOT node for the given node. + pub fn write_node(&self, node: G::Node, dark_mode: bool, w: &mut W) -> io::Result<()> + where + W: Write, + { + // Start a new node with the label to follow, in one of DOT's pseudo-HTML tables. + write!(w, r#" {} [shape="none", label=<"#, self.node(node))?; + + write!(w, r#""#)?; + + // FIXME(richkadel): If/when migrating the MIR graphviz to this generic implementation, + // we need generic way to know if node header should have a different color. For example, + // for MIR: + // + // let (blk, bgcolor) = if data.is_cleanup { + // let color = if dark_mode { "royalblue" } else { "lightblue" }; + // (format!("{:?} (cleanup)", node), color) + // } else { + // let color = if dark_mode { "dimgray" } else { "gray" }; + // (format!("{:?}", node), color) + // }; + let color = if dark_mode { "dimgray" } else { "gray" }; + let (blk, bgcolor) = (format!("{:?}", node), color); + write!( + w, + r#""#, + attrs = r#"align="center""#, + colspan = 1, + blk = blk, + bgcolor = bgcolor + )?; + + for section in (self.node_content_fn)(node) { + write!( + w, + r#""#, + dot::escape_html(§ion).replace('\n', "
") + )?; + } + + // Close the table + write!(w, "
{blk}
{}
")?; + + // Close the node label and the node itself. + writeln!(w, ">];") + } + + /// Write graphviz DOT edges with labels between the given node and all of its successors. + fn write_edges(&self, source: G::Node, w: &mut W) -> io::Result<()> + where + W: Write, + { + let edge_labels = (self.edge_labels_fn)(source); + for (index, target) in self.graph.successors(source).enumerate() { + let src = self.node(source); + let trg = self.node(target); + let escaped_edge_label = if let Some(edge_label) = edge_labels.get(index) { + dot::escape_html(edge_label).replace('\n', r#"
"#) + } else { + "".to_owned() + }; + writeln!(w, r#" {} -> {} [label=<{}>];"#, src, trg, escaped_edge_label)?; + } + Ok(()) + } + + /// Write the graphviz DOT label for the overall graph. This is essentially a block of text that + /// will appear below the graph. + fn write_graph_label(&self, label: &str, w: &mut W) -> io::Result<()> + where + W: Write, + { + let lines = label.split('\n').map(|s| dot::escape_html(s)).collect::>(); + let escaped_label = lines.join(r#"
"#); + writeln!(w, r#" label=<

{}



>;"#, escaped_label) + } + + fn node(&self, node: G::Node) -> String { + format!("{:?}__{}", node, self.graphviz_name) + } +} diff --git a/compiler/rustc_middle/src/mir/graph_cyclic_cache.rs b/compiler/rustc_middle/src/mir/graph_cyclic_cache.rs new file mode 100644 index 000000000..f97bf2883 --- /dev/null +++ b/compiler/rustc_middle/src/mir/graph_cyclic_cache.rs @@ -0,0 +1,63 @@ +use rustc_data_structures::graph::{ + self, DirectedGraph, WithNumNodes, WithStartNode, WithSuccessors, +}; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::OnceCell; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +/// Helper type to cache the result of `graph::is_cyclic`. +#[derive(Clone, Debug)] +pub(super) struct GraphIsCyclicCache { + cache: OnceCell, +} + +impl GraphIsCyclicCache { + #[inline] + pub(super) fn new() -> Self { + GraphIsCyclicCache { cache: OnceCell::new() } + } + + pub(super) fn is_cyclic(&self, graph: &G) -> bool + where + G: ?Sized + DirectedGraph + WithStartNode + WithSuccessors + WithNumNodes, + { + *self.cache.get_or_init(|| graph::is_cyclic(graph)) + } + + /// Invalidates the cache. + #[inline] + pub(super) fn invalidate(&mut self) { + // Invalidating the cache requires mutating the MIR, which in turn requires a unique + // reference (`&mut`) to the `mir::Body`. Because of this, we can assume that all + // callers of `invalidate` have a unique reference to the MIR and thus to the + // cache. This means we never need to do synchronization when `invalidate` is called, + // we can simply reinitialize the `OnceCell`. + self.cache = OnceCell::new(); + } +} + +impl Encodable for GraphIsCyclicCache { + #[inline] + fn encode(&self, s: &mut S) { + Encodable::encode(&(), s); + } +} + +impl Decodable for GraphIsCyclicCache { + #[inline] + fn decode(d: &mut D) -> Self { + let () = Decodable::decode(d); + Self::new() + } +} + +impl HashStable for GraphIsCyclicCache { + #[inline] + fn hash_stable(&self, _: &mut CTX, _: &mut StableHasher) { + // do nothing + } +} + +TrivialTypeTraversalAndLiftImpls! { + GraphIsCyclicCache, +} diff --git a/compiler/rustc_middle/src/mir/graphviz.rs b/compiler/rustc_middle/src/mir/graphviz.rs new file mode 100644 index 000000000..5de56dad0 --- /dev/null +++ b/compiler/rustc_middle/src/mir/graphviz.rs @@ -0,0 +1,134 @@ +use gsgdt::GraphvizSettings; +use rustc_graphviz as dot; +use rustc_hir::def_id::DefId; +use rustc_middle::mir::*; +use rustc_middle::ty::{self, TyCtxt}; +use std::fmt::Debug; +use std::io::{self, Write}; + +use super::generic_graph::mir_fn_to_generic_graph; +use super::pretty::dump_mir_def_ids; + +/// Write a graphviz DOT graph of a list of MIRs. +pub fn write_mir_graphviz(tcx: TyCtxt<'_>, single: Option, w: &mut W) -> io::Result<()> +where + W: Write, +{ + let def_ids = dump_mir_def_ids(tcx, single); + + let mirs = + def_ids + .iter() + .flat_map(|def_id| { + if tcx.is_const_fn_raw(*def_id) { + vec![tcx.optimized_mir(*def_id), tcx.mir_for_ctfe(*def_id)] + } else { + vec![tcx.instance_mir(ty::InstanceDef::Item(ty::WithOptConstParam::unknown( + *def_id, + )))] + } + }) + .collect::>(); + + let use_subgraphs = mirs.len() > 1; + if use_subgraphs { + writeln!(w, "digraph __crate__ {{")?; + } + + for mir in mirs { + write_mir_fn_graphviz(tcx, mir, use_subgraphs, w)?; + } + + if use_subgraphs { + writeln!(w, "}}")?; + } + + Ok(()) +} + +/// Write a graphviz DOT graph of the MIR. +pub fn write_mir_fn_graphviz<'tcx, W>( + tcx: TyCtxt<'tcx>, + body: &Body<'_>, + subgraph: bool, + w: &mut W, +) -> io::Result<()> +where + W: Write, +{ + // Global graph properties + let font = format!(r#"fontname="{}""#, tcx.sess.opts.unstable_opts.graphviz_font); + let mut graph_attrs = vec![&font[..]]; + let mut content_attrs = vec![&font[..]]; + + let dark_mode = tcx.sess.opts.unstable_opts.graphviz_dark_mode; + if dark_mode { + graph_attrs.push(r#"bgcolor="black""#); + graph_attrs.push(r#"fontcolor="white""#); + content_attrs.push(r#"color="white""#); + content_attrs.push(r#"fontcolor="white""#); + } + + // Graph label + let mut label = String::from(""); + // FIXME: remove this unwrap + write_graph_label(tcx, body, &mut label).unwrap(); + let g = mir_fn_to_generic_graph(tcx, body); + let settings = GraphvizSettings { + graph_attrs: Some(graph_attrs.join(" ")), + node_attrs: Some(content_attrs.join(" ")), + edge_attrs: Some(content_attrs.join(" ")), + graph_label: Some(label), + }; + g.to_dot(w, &settings, subgraph) +} + +/// Write the graphviz DOT label for the overall graph. This is essentially a block of text that +/// will appear below the graph, showing the type of the `fn` this MIR represents and the types of +/// all the variables and temporaries. +fn write_graph_label<'tcx, W: std::fmt::Write>( + tcx: TyCtxt<'tcx>, + body: &Body<'_>, + w: &mut W, +) -> std::fmt::Result { + let def_id = body.source.def_id(); + + write!(w, "fn {}(", dot::escape_html(&tcx.def_path_str(def_id)))?; + + // fn argument types. + for (i, arg) in body.args_iter().enumerate() { + if i > 0 { + write!(w, ", ")?; + } + write!(w, "{:?}: {}", Place::from(arg), escape(&body.local_decls[arg].ty))?; + } + + write!(w, ") -> {}", escape(&body.return_ty()))?; + write!(w, r#"
"#)?; + + for local in body.vars_and_temps_iter() { + let decl = &body.local_decls[local]; + + write!(w, "let ")?; + if decl.mutability == Mutability::Mut { + write!(w, "mut ")?; + } + + write!(w, r#"{:?}: {};
"#, Place::from(local), escape(&decl.ty))?; + } + + for var_debug_info in &body.var_debug_info { + write!( + w, + r#"debug {} => {};
"#, + var_debug_info.name, + escape(&var_debug_info.value), + )?; + } + + Ok(()) +} + +fn escape(t: &T) -> String { + dot::escape_html(&format!("{:?}", t)) +} diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs new file mode 100644 index 000000000..db7e0fb8a --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -0,0 +1,1300 @@ +//! The virtual memory representation of the MIR interpreter. + +use std::borrow::Cow; +use std::convert::{TryFrom, TryInto}; +use std::fmt; +use std::hash; +use std::iter; +use std::ops::{Deref, Range}; +use std::ptr; + +use rustc_ast::Mutability; +use rustc_data_structures::intern::Interned; +use rustc_data_structures::sorted_map::SortedMap; +use rustc_span::DUMMY_SP; +use rustc_target::abi::{Align, HasDataLayout, Size}; + +use super::{ + read_target_uint, write_target_uint, AllocId, InterpError, InterpResult, Pointer, Provenance, + ResourceExhaustionInfo, Scalar, ScalarMaybeUninit, ScalarSizeMismatch, UndefinedBehaviorInfo, + UninitBytesAccess, UnsupportedOpInfo, +}; +use crate::ty; + +/// This type represents an Allocation in the Miri/CTFE core engine. +/// +/// Its public API is rather low-level, working directly with allocation offsets and a custom error +/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory` +/// module provides higher-level access. +// Note: for performance reasons when interning, some of the `Allocation` fields can be partially +// hashed. (see the `Hash` impl below for more details), so the impl is not derived. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)] +#[derive(HashStable)] +pub struct Allocation { + /// The actual bytes of the allocation. + /// Note that the bytes of a pointer represent the offset of the pointer. + bytes: Box<[u8]>, + /// Maps from byte addresses to extra data for each pointer. + /// Only the first byte of a pointer is inserted into the map; i.e., + /// every entry in this map applies to `pointer_size` consecutive bytes starting + /// at the given offset. + relocations: Relocations, + /// Denotes which part of this allocation is initialized. + init_mask: InitMask, + /// The alignment of the allocation to detect unaligned reads. + /// (`Align` guarantees that this is a power of two.) + pub align: Align, + /// `true` if the allocation is mutable. + /// Also used by codegen to determine if a static should be put into mutable memory, + /// which happens for `static mut` and `static` with interior mutability. + pub mutability: Mutability, + /// Extra state for the machine. + pub extra: Extra, +} + +/// This is the maximum size we will hash at a time, when interning an `Allocation` and its +/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer. +/// Used when these two structures are large: we only partially hash the larger fields in that +/// situation. See the comment at the top of their respective `Hash` impl for more details. +const MAX_BYTES_TO_HASH: usize = 64; + +/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning. +/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH` +/// bytes. +const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH; + +// Const allocations are only hashed for interning. However, they can be large, making the hashing +// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially +// big buffers like the actual bytes of allocation. We can partially hash some fields when they're +// large. +impl hash::Hash for Allocation { + fn hash(&self, state: &mut H) { + // Partially hash the `bytes` buffer when it is large. To limit collisions with common + // prefixes and suffixes, we hash the length and some slices of the buffer. + let byte_count = self.bytes.len(); + if byte_count > MAX_HASHED_BUFFER_LEN { + // Hash the buffer's length. + byte_count.hash(state); + + // And its head and tail. + self.bytes[..MAX_BYTES_TO_HASH].hash(state); + self.bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state); + } else { + self.bytes.hash(state); + } + + // Hash the other fields as usual. + self.relocations.hash(state); + self.init_mask.hash(state); + self.align.hash(state); + self.mutability.hash(state); + self.extra.hash(state); + } +} + +/// Interned types generally have an `Outer` type and an `Inner` type, where +/// `Outer` is a newtype around `Interned`, and all the operations are +/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an +/// outer type and `TyS` is its inner type. +/// +/// Here things are different because only const allocations are interned. This +/// means that both the inner type (`Allocation`) and the outer type +/// (`ConstAllocation`) are used quite a bit. +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, HashStable)] +#[rustc_pass_by_value] +pub struct ConstAllocation<'tcx, Prov = AllocId, Extra = ()>( + pub Interned<'tcx, Allocation>, +); + +impl<'tcx> fmt::Debug for ConstAllocation<'tcx> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // This matches how `Allocation` is printed. We print it like this to + // avoid having to update expected output in a lot of tests. + write!(f, "{:?}", self.inner()) + } +} + +impl<'tcx, Prov, Extra> ConstAllocation<'tcx, Prov, Extra> { + pub fn inner(self) -> &'tcx Allocation { + self.0.0 + } +} + +/// We have our own error type that does not know about the `AllocId`; that information +/// is added when converting to `InterpError`. +#[derive(Debug)] +pub enum AllocError { + /// A scalar had the wrong size. + ScalarSizeMismatch(ScalarSizeMismatch), + /// Encountered a pointer where we needed raw bytes. + ReadPointerAsBytes, + /// Partially overwriting a pointer. + PartialPointerOverwrite(Size), + /// Using uninitialized data where it is not allowed. + InvalidUninitBytes(Option), +} +pub type AllocResult = Result; + +impl From for AllocError { + fn from(s: ScalarSizeMismatch) -> Self { + AllocError::ScalarSizeMismatch(s) + } +} + +impl AllocError { + pub fn to_interp_error<'tcx>(self, alloc_id: AllocId) -> InterpError<'tcx> { + use AllocError::*; + match self { + ScalarSizeMismatch(s) => { + InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ScalarSizeMismatch(s)) + } + ReadPointerAsBytes => InterpError::Unsupported(UnsupportedOpInfo::ReadPointerAsBytes), + PartialPointerOverwrite(offset) => InterpError::Unsupported( + UnsupportedOpInfo::PartialPointerOverwrite(Pointer::new(alloc_id, offset)), + ), + InvalidUninitBytes(info) => InterpError::UndefinedBehavior( + UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))), + ), + } + } +} + +/// The information that makes up a memory access: offset and size. +#[derive(Copy, Clone)] +pub struct AllocRange { + pub start: Size, + pub size: Size, +} + +impl fmt::Debug for AllocRange { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{:#x}..{:#x}]", self.start.bytes(), self.end().bytes()) + } +} + +/// Free-starting constructor for less syntactic overhead. +#[inline(always)] +pub fn alloc_range(start: Size, size: Size) -> AllocRange { + AllocRange { start, size } +} + +impl AllocRange { + #[inline] + pub fn from(r: Range) -> Self { + alloc_range(r.start, r.end - r.start) // `Size` subtraction (overflow-checked) + } + + #[inline(always)] + pub fn end(self) -> Size { + self.start + self.size // This does overflow checking. + } + + /// Returns the `subrange` within this range; panics if it is not a subrange. + #[inline] + pub fn subrange(self, subrange: AllocRange) -> AllocRange { + let sub_start = self.start + subrange.start; + let range = alloc_range(sub_start, subrange.size); + assert!(range.end() <= self.end(), "access outside the bounds for given AllocRange"); + range + } +} + +// The constructors are all without extra; the extra gets added by a machine hook later. +impl Allocation { + /// Creates an allocation initialized by the given bytes + pub fn from_bytes<'a>( + slice: impl Into>, + align: Align, + mutability: Mutability, + ) -> Self { + let bytes = Box::<[u8]>::from(slice.into()); + let size = Size::from_bytes(bytes.len()); + Self { + bytes, + relocations: Relocations::new(), + init_mask: InitMask::new(size, true), + align, + mutability, + extra: (), + } + } + + pub fn from_bytes_byte_aligned_immutable<'a>(slice: impl Into>) -> Self { + Allocation::from_bytes(slice, Align::ONE, Mutability::Not) + } + + /// Try to create an Allocation of `size` bytes, failing if there is not enough memory + /// available to the compiler to do so. + /// + /// If `panic_on_fail` is true, this will never return `Err`. + pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> { + let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| { + // This results in an error that can happen non-deterministically, since the memory + // available to the compiler can change between runs. Normally queries are always + // deterministic. However, we can be non-deterministic here because all uses of const + // evaluation (including ConstProp!) will make compilation fail (via hard error + // or ICE) upon encountering a `MemoryExhausted` error. + if panic_on_fail { + panic!("Allocation::uninit called with panic_on_fail had allocation failure") + } + ty::tls::with(|tcx| { + tcx.sess.delay_span_bug(DUMMY_SP, "exhausted memory during interpretation") + }); + InterpError::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted) + })?; + // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]> + let bytes = unsafe { bytes.assume_init() }; + Ok(Allocation { + bytes, + relocations: Relocations::new(), + init_mask: InitMask::new(size, false), + align, + mutability: Mutability::Mut, + extra: (), + }) + } +} + +impl Allocation { + /// Adjust allocation from the ones in tcx to a custom Machine instance + /// with a different Provenance and Extra type. + pub fn adjust_from_tcx( + self, + cx: &impl HasDataLayout, + extra: Extra, + mut adjust_ptr: impl FnMut(Pointer) -> Result, Err>, + ) -> Result, Err> { + // Compute new pointer provenance, which also adjusts the bytes. + let mut bytes = self.bytes; + let mut new_relocations = Vec::with_capacity(self.relocations.0.len()); + let ptr_size = cx.data_layout().pointer_size.bytes_usize(); + let endian = cx.data_layout().endian; + for &(offset, alloc_id) in self.relocations.iter() { + let idx = offset.bytes_usize(); + let ptr_bytes = &mut bytes[idx..idx + ptr_size]; + let bits = read_target_uint(endian, ptr_bytes).unwrap(); + let (ptr_prov, ptr_offset) = + adjust_ptr(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_parts(); + write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap(); + new_relocations.push((offset, ptr_prov)); + } + // Create allocation. + Ok(Allocation { + bytes, + relocations: Relocations::from_presorted(new_relocations), + init_mask: self.init_mask, + align: self.align, + mutability: self.mutability, + extra, + }) + } +} + +/// Raw accessors. Provide access to otherwise private bytes. +impl Allocation { + pub fn len(&self) -> usize { + self.bytes.len() + } + + pub fn size(&self) -> Size { + Size::from_bytes(self.len()) + } + + /// Looks at a slice which may describe uninitialized bytes or describe a relocation. This differs + /// from `get_bytes_with_uninit_and_ptr` in that it does no relocation checks (even on the + /// edges) at all. + /// This must not be used for reads affecting the interpreter execution. + pub fn inspect_with_uninit_and_ptr_outside_interpreter(&self, range: Range) -> &[u8] { + &self.bytes[range] + } + + /// Returns the mask indicating which bytes are initialized. + pub fn init_mask(&self) -> &InitMask { + &self.init_mask + } + + /// Returns the relocation list. + pub fn relocations(&self) -> &Relocations { + &self.relocations + } +} + +/// Byte accessors. +impl Allocation { + /// This is the entirely abstraction-violating way to just grab the raw bytes without + /// caring about relocations. It just deduplicates some code between `read_scalar` + /// and `get_bytes_internal`. + fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] { + &self.bytes[range.start.bytes_usize()..range.end().bytes_usize()] + } + + /// The last argument controls whether we error out when there are uninitialized or pointer + /// bytes. However, we *always* error when there are relocations overlapping the edges of the + /// range. + /// + /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead, + /// + /// This function also guarantees that the resulting pointer will remain stable + /// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies + /// on that. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + fn get_bytes_internal( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + check_init_and_ptr: bool, + ) -> AllocResult<&[u8]> { + if check_init_and_ptr { + self.check_init(range)?; + self.check_relocations(cx, range)?; + } else { + // We still don't want relocations on the *edges*. + self.check_relocation_edges(cx, range)?; + } + + Ok(self.get_bytes_even_more_internal(range)) + } + + /// Checks that these bytes are initialized and not pointer bytes, and then return them + /// as a slice. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods + /// on `InterpCx` instead. + #[inline] + pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> { + self.get_bytes_internal(cx, range, true) + } + + /// It is the caller's responsibility to handle uninitialized and pointer bytes. + /// However, this still checks that there are no relocations on the *edges*. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + #[inline] + pub fn get_bytes_with_uninit_and_ptr( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + ) -> AllocResult<&[u8]> { + self.get_bytes_internal(cx, range, false) + } + + /// Just calling this already marks everything as defined and removes relocations, + /// so be sure to actually put data there! + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods + /// on `InterpCx` instead. + pub fn get_bytes_mut( + &mut self, + cx: &impl HasDataLayout, + range: AllocRange, + ) -> AllocResult<&mut [u8]> { + self.mark_init(range, true); + self.clear_relocations(cx, range)?; + + Ok(&mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]) + } + + /// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory. + pub fn get_bytes_mut_ptr( + &mut self, + cx: &impl HasDataLayout, + range: AllocRange, + ) -> AllocResult<*mut [u8]> { + self.mark_init(range, true); + self.clear_relocations(cx, range)?; + + assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check + let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize()); + let len = range.end().bytes_usize() - range.start.bytes_usize(); + Ok(ptr::slice_from_raw_parts_mut(begin_ptr, len)) + } +} + +/// Reading and writing. +impl Allocation { + /// Validates that `ptr.offset` and `ptr.offset + size` do not point to the middle of a + /// relocation. If `allow_uninit`/`allow_ptr` is `false`, also enforces that the memory in the + /// given range contains no uninitialized bytes/relocations. + pub fn check_bytes( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + allow_uninit: bool, + allow_ptr: bool, + ) -> AllocResult { + // Check bounds and relocations on the edges. + self.get_bytes_with_uninit_and_ptr(cx, range)?; + // Check uninit and ptr. + if !allow_uninit { + self.check_init(range)?; + } + if !allow_ptr { + self.check_relocations(cx, range)?; + } + Ok(()) + } + + /// Reads a *non-ZST* scalar. + /// + /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine + /// supports that) provenance is entirely ignored. + /// + /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check + /// for ZSTness anyway due to integer pointers being valid for ZSTs. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to call `InterpCx::read_scalar` instead of this method. + pub fn read_scalar( + &self, + cx: &impl HasDataLayout, + range: AllocRange, + read_provenance: bool, + ) -> AllocResult> { + if read_provenance { + assert_eq!(range.size, cx.data_layout().pointer_size); + } + + // First and foremost, if anything is uninit, bail. + if self.is_init(range).is_err() { + // This inflates uninitialized bytes to the entire scalar, even if only a few + // bytes are uninitialized. + return Ok(ScalarMaybeUninit::Uninit); + } + + // If we are doing a pointer read, and there is a relocation exactly where we + // are reading, then we can put data and relocation back together and return that. + if read_provenance && let Some(&prov) = self.relocations.get(&range.start) { + // We already checked init and relocations, so we can use this function. + let bytes = self.get_bytes_even_more_internal(range); + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + let ptr = Pointer::new(prov, Size::from_bytes(bits)); + return Ok(ScalarMaybeUninit::from_pointer(ptr, cx)); + } + + // If we are *not* reading a pointer, and we can just ignore relocations, + // then do exactly that. + if !read_provenance && Prov::OFFSET_IS_ADDR { + // We just strip provenance. + let bytes = self.get_bytes_even_more_internal(range); + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size))); + } + + // It's complicated. Better make sure there is no provenance anywhere. + // FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then + // `read_pointer` is true and we ideally would distinguish the following two cases: + // - The entire `range` is covered by 2 relocations for the same provenance. + // Then we should return a pointer with that provenance. + // - The range has inhomogeneous provenance. Then we should return just the + // underlying bits. + let bytes = self.get_bytes(cx, range)?; + let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap(); + Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size))) + } + + /// Writes a *non-ZST* scalar. + /// + /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check + /// for ZSTness anyway due to integer pointers being valid for ZSTs. + /// + /// It is the caller's responsibility to check bounds and alignment beforehand. + /// Most likely, you want to call `InterpCx::write_scalar` instead of this method. + #[instrument(skip(self, cx), level = "debug")] + pub fn write_scalar( + &mut self, + cx: &impl HasDataLayout, + range: AllocRange, + val: ScalarMaybeUninit, + ) -> AllocResult { + assert!(self.mutability == Mutability::Mut); + + let val = match val { + ScalarMaybeUninit::Scalar(scalar) => scalar, + ScalarMaybeUninit::Uninit => { + return self.write_uninit(cx, range); + } + }; + + // `to_bits_or_ptr_internal` is the right method because we just want to store this data + // as-is into memory. + let (bytes, provenance) = match val.to_bits_or_ptr_internal(range.size)? { + Err(val) => { + let (provenance, offset) = val.into_parts(); + (u128::from(offset.bytes()), Some(provenance)) + } + Ok(data) => (data, None), + }; + + let endian = cx.data_layout().endian; + let dst = self.get_bytes_mut(cx, range)?; + write_target_uint(endian, dst, bytes).unwrap(); + + // See if we have to also write a relocation. + if let Some(provenance) = provenance { + self.relocations.0.insert(range.start, provenance); + } + + Ok(()) + } + + /// Write "uninit" to the given memory range. + pub fn write_uninit(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + self.mark_init(range, false); + self.clear_relocations(cx, range)?; + return Ok(()); + } +} + +/// Relocations. +impl Allocation { + /// Returns all relocations overlapping with the given pointer-offset pair. + fn get_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> &[(Size, Prov)] { + // We have to go back `pointer_size - 1` bytes, as that one would still overlap with + // the beginning of this range. + let start = range.start.bytes().saturating_sub(cx.data_layout().pointer_size.bytes() - 1); + self.relocations.range(Size::from_bytes(start)..range.end()) + } + + /// Returns whether this allocation has relocations overlapping with the given range. + /// + /// Note: this function exists to allow `get_relocations` to be private, in order to somewhat + /// limit access to relocations outside of the `Allocation` abstraction. + /// + pub fn has_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> bool { + !self.get_relocations(cx, range).is_empty() + } + + /// Checks that there are no relocations overlapping with the given range. + #[inline(always)] + fn check_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + if self.has_relocations(cx, range) { Err(AllocError::ReadPointerAsBytes) } else { Ok(()) } + } + + /// Removes all relocations inside the given range. + /// If there are relocations overlapping with the edges, they + /// are removed as well *and* the bytes they cover are marked as + /// uninitialized. This is a somewhat odd "spooky action at a distance", + /// but it allows strictly more code to run than if we would just error + /// immediately in that case. + fn clear_relocations(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult + where + Prov: Provenance, + { + // Find the start and end of the given range and its outermost relocations. + let (first, last) = { + // Find all relocations overlapping the given range. + let relocations = self.get_relocations(cx, range); + if relocations.is_empty() { + return Ok(()); + } + + ( + relocations.first().unwrap().0, + relocations.last().unwrap().0 + cx.data_layout().pointer_size, + ) + }; + let start = range.start; + let end = range.end(); + + // We need to handle clearing the relocations from parts of a pointer. + // FIXME: Miri should preserve partial relocations; see + // https://github.com/rust-lang/miri/issues/2181. + if first < start { + if Prov::ERR_ON_PARTIAL_PTR_OVERWRITE { + return Err(AllocError::PartialPointerOverwrite(first)); + } + warn!( + "Partial pointer overwrite! De-initializing memory at offsets {first:?}..{start:?}." + ); + self.init_mask.set_range(first, start, false); + } + if last > end { + if Prov::ERR_ON_PARTIAL_PTR_OVERWRITE { + return Err(AllocError::PartialPointerOverwrite( + last - cx.data_layout().pointer_size, + )); + } + warn!( + "Partial pointer overwrite! De-initializing memory at offsets {end:?}..{last:?}." + ); + self.init_mask.set_range(end, last, false); + } + + // Forget all the relocations. + // Since relocations do not overlap, we know that removing until `last` (exclusive) is fine, + // i.e., this will not remove any other relocations just after the ones we care about. + self.relocations.0.remove_range(first..last); + + Ok(()) + } + + /// Errors if there are relocations overlapping with the edges of the + /// given memory range. + #[inline] + fn check_relocation_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult { + self.check_relocations(cx, alloc_range(range.start, Size::ZERO))?; + self.check_relocations(cx, alloc_range(range.end(), Size::ZERO))?; + Ok(()) + } +} + +/// "Relocations" stores the provenance information of pointers stored in memory. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, TyEncodable, TyDecodable)] +pub struct Relocations(SortedMap); + +impl Relocations { + pub fn new() -> Self { + Relocations(SortedMap::new()) + } + + // The caller must guarantee that the given relocations are already sorted + // by address and contain no duplicates. + pub fn from_presorted(r: Vec<(Size, Prov)>) -> Self { + Relocations(SortedMap::from_presorted_elements(r)) + } +} + +impl Deref for Relocations { + type Target = SortedMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +/// A partial, owned list of relocations to transfer into another allocation. +/// +/// Offsets are already adjusted to the destination allocation. +pub struct AllocationRelocations { + dest_relocations: Vec<(Size, Prov)>, +} + +impl Allocation { + pub fn prepare_relocation_copy( + &self, + cx: &impl HasDataLayout, + src: AllocRange, + dest: Size, + count: u64, + ) -> AllocationRelocations { + let relocations = self.get_relocations(cx, src); + if relocations.is_empty() { + return AllocationRelocations { dest_relocations: Vec::new() }; + } + + let size = src.size; + let mut new_relocations = Vec::with_capacity(relocations.len() * (count as usize)); + + // If `count` is large, this is rather wasteful -- we are allocating a big array here, which + // is mostly filled with redundant information since it's just N copies of the same `Prov`s + // at slightly adjusted offsets. The reason we do this is so that in `mark_relocation_range` + // we can use `insert_presorted`. That wouldn't work with an `Iterator` that just produces + // the right sequence of relocations for all N copies. + for i in 0..count { + new_relocations.extend(relocations.iter().map(|&(offset, reloc)| { + // compute offset for current repetition + let dest_offset = dest + size * i; // `Size` operations + ( + // shift offsets from source allocation to destination allocation + (offset + dest_offset) - src.start, // `Size` operations + reloc, + ) + })); + } + + AllocationRelocations { dest_relocations: new_relocations } + } + + /// Applies a relocation copy. + /// The affected range, as defined in the parameters to `prepare_relocation_copy` is expected + /// to be clear of relocations. + /// + /// This is dangerous to use as it can violate internal `Allocation` invariants! + /// It only exists to support an efficient implementation of `mem_copy_repeatedly`. + pub fn mark_relocation_range(&mut self, relocations: AllocationRelocations) { + self.relocations.0.insert_presorted(relocations.dest_relocations); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Uninitialized byte tracking +//////////////////////////////////////////////////////////////////////////////// + +type Block = u64; + +/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte +/// is initialized. If it is `false` the byte is uninitialized. +// Note: for performance reasons when interning, some of the `InitMask` fields can be partially +// hashed. (see the `Hash` impl below for more details), so the impl is not derived. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)] +#[derive(HashStable)] +pub struct InitMask { + blocks: Vec, + len: Size, +} + +// Const allocations are only hashed for interning. However, they can be large, making the hashing +// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially +// big buffers like the allocation's init mask. We can partially hash some fields when they're +// large. +impl hash::Hash for InitMask { + fn hash(&self, state: &mut H) { + const MAX_BLOCKS_TO_HASH: usize = MAX_BYTES_TO_HASH / std::mem::size_of::(); + const MAX_BLOCKS_LEN: usize = MAX_HASHED_BUFFER_LEN / std::mem::size_of::(); + + // Partially hash the `blocks` buffer when it is large. To limit collisions with common + // prefixes and suffixes, we hash the length and some slices of the buffer. + let block_count = self.blocks.len(); + if block_count > MAX_BLOCKS_LEN { + // Hash the buffer's length. + block_count.hash(state); + + // And its head and tail. + self.blocks[..MAX_BLOCKS_TO_HASH].hash(state); + self.blocks[block_count - MAX_BLOCKS_TO_HASH..].hash(state); + } else { + self.blocks.hash(state); + } + + // Hash the other fields as usual. + self.len.hash(state); + } +} + +impl InitMask { + pub const BLOCK_SIZE: u64 = 64; + + #[inline] + fn bit_index(bits: Size) -> (usize, usize) { + // BLOCK_SIZE is the number of bits that can fit in a `Block`. + // Each bit in a `Block` represents the initialization state of one byte of an allocation, + // so we use `.bytes()` here. + let bits = bits.bytes(); + let a = bits / InitMask::BLOCK_SIZE; + let b = bits % InitMask::BLOCK_SIZE; + (usize::try_from(a).unwrap(), usize::try_from(b).unwrap()) + } + + #[inline] + fn size_from_bit_index(block: impl TryInto, bit: impl TryInto) -> Size { + let block = block.try_into().ok().unwrap(); + let bit = bit.try_into().ok().unwrap(); + Size::from_bytes(block * InitMask::BLOCK_SIZE + bit) + } + + pub fn new(size: Size, state: bool) -> Self { + let mut m = InitMask { blocks: vec![], len: Size::ZERO }; + m.grow(size, state); + m + } + + pub fn set_range(&mut self, start: Size, end: Size, new_state: bool) { + let len = self.len; + if end > len { + self.grow(end - len, new_state); + } + self.set_range_inbounds(start, end, new_state); + } + + pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) { + let (blocka, bita) = Self::bit_index(start); + let (blockb, bitb) = Self::bit_index(end); + if blocka == blockb { + // First set all bits except the first `bita`, + // then unset the last `64 - bitb` bits. + let range = if bitb == 0 { + u64::MAX << bita + } else { + (u64::MAX << bita) & (u64::MAX >> (64 - bitb)) + }; + if new_state { + self.blocks[blocka] |= range; + } else { + self.blocks[blocka] &= !range; + } + return; + } + // across block boundaries + if new_state { + // Set `bita..64` to `1`. + self.blocks[blocka] |= u64::MAX << bita; + // Set `0..bitb` to `1`. + if bitb != 0 { + self.blocks[blockb] |= u64::MAX >> (64 - bitb); + } + // Fill in all the other blocks (much faster than one bit at a time). + for block in (blocka + 1)..blockb { + self.blocks[block] = u64::MAX; + } + } else { + // Set `bita..64` to `0`. + self.blocks[blocka] &= !(u64::MAX << bita); + // Set `0..bitb` to `0`. + if bitb != 0 { + self.blocks[blockb] &= !(u64::MAX >> (64 - bitb)); + } + // Fill in all the other blocks (much faster than one bit at a time). + for block in (blocka + 1)..blockb { + self.blocks[block] = 0; + } + } + } + + #[inline] + pub fn get(&self, i: Size) -> bool { + let (block, bit) = Self::bit_index(i); + (self.blocks[block] & (1 << bit)) != 0 + } + + #[inline] + pub fn set(&mut self, i: Size, new_state: bool) { + let (block, bit) = Self::bit_index(i); + self.set_bit(block, bit, new_state); + } + + #[inline] + fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) { + if new_state { + self.blocks[block] |= 1 << bit; + } else { + self.blocks[block] &= !(1 << bit); + } + } + + pub fn grow(&mut self, amount: Size, new_state: bool) { + if amount.bytes() == 0 { + return; + } + let unused_trailing_bits = + u64::try_from(self.blocks.len()).unwrap() * Self::BLOCK_SIZE - self.len.bytes(); + if amount.bytes() > unused_trailing_bits { + let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1; + self.blocks.extend( + // FIXME(oli-obk): optimize this by repeating `new_state as Block`. + iter::repeat(0).take(usize::try_from(additional_blocks).unwrap()), + ); + } + let start = self.len; + self.len += amount; + self.set_range_inbounds(start, start + amount, new_state); // `Size` operation + } + + /// Returns the index of the first bit in `start..end` (end-exclusive) that is equal to is_init. + fn find_bit(&self, start: Size, end: Size, is_init: bool) -> Option { + /// A fast implementation of `find_bit`, + /// which skips over an entire block at a time if it's all 0s (resp. 1s), + /// and finds the first 1 (resp. 0) bit inside a block using `trailing_zeros` instead of a loop. + /// + /// Note that all examples below are written with 8 (instead of 64) bit blocks for simplicity, + /// and with the least significant bit (and lowest block) first: + /// ```text + /// 00000000|00000000 + /// ^ ^ ^ ^ + /// index: 0 7 8 15 + /// ``` + /// Also, if not stated, assume that `is_init = true`, that is, we are searching for the first 1 bit. + fn find_bit_fast( + init_mask: &InitMask, + start: Size, + end: Size, + is_init: bool, + ) -> Option { + /// Search one block, returning the index of the first bit equal to `is_init`. + fn search_block( + bits: Block, + block: usize, + start_bit: usize, + is_init: bool, + ) -> Option { + // For the following examples, assume this function was called with: + // bits = 0b00111011 + // start_bit = 3 + // is_init = false + // Note that, for the examples in this function, the most significant bit is written first, + // which is backwards compared to the comments in `find_bit`/`find_bit_fast`. + + // Invert bits so we're always looking for the first set bit. + // ! 0b00111011 + // bits = 0b11000100 + let bits = if is_init { bits } else { !bits }; + // Mask off unused start bits. + // 0b11000100 + // & 0b11111000 + // bits = 0b11000000 + let bits = bits & (!0 << start_bit); + // Find set bit, if any. + // bit = trailing_zeros(0b11000000) + // bit = 6 + if bits == 0 { + None + } else { + let bit = bits.trailing_zeros(); + Some(InitMask::size_from_bit_index(block, bit)) + } + } + + if start >= end { + return None; + } + + // Convert `start` and `end` to block indexes and bit indexes within each block. + // We must convert `end` to an inclusive bound to handle block boundaries correctly. + // + // For example: + // + // (a) 00000000|00000000 (b) 00000000| + // ^~~~~~~~~~~^ ^~~~~~~~~^ + // start end start end + // + // In both cases, the block index of `end` is 1. + // But we do want to search block 1 in (a), and we don't in (b). + // + // We subtract 1 from both end positions to make them inclusive: + // + // (a) 00000000|00000000 (b) 00000000| + // ^~~~~~~~~~^ ^~~~~~~^ + // start end_inclusive start end_inclusive + // + // For (a), the block index of `end_inclusive` is 1, and for (b), it's 0. + // This provides the desired behavior of searching blocks 0 and 1 for (a), + // and searching only block 0 for (b). + // There is no concern of overflows since we checked for `start >= end` above. + let (start_block, start_bit) = InitMask::bit_index(start); + let end_inclusive = Size::from_bytes(end.bytes() - 1); + let (end_block_inclusive, _) = InitMask::bit_index(end_inclusive); + + // Handle first block: need to skip `start_bit` bits. + // + // We need to handle the first block separately, + // because there may be bits earlier in the block that should be ignored, + // such as the bit marked (1) in this example: + // + // (1) + // -|------ + // (c) 01000000|00000000|00000001 + // ^~~~~~~~~~~~~~~~~~^ + // start end + if let Some(i) = + search_block(init_mask.blocks[start_block], start_block, start_bit, is_init) + { + // If the range is less than a block, we may find a matching bit after `end`. + // + // For example, we shouldn't successfully find bit (2), because it's after `end`: + // + // (2) + // -------| + // (d) 00000001|00000000|00000001 + // ^~~~~^ + // start end + // + // An alternative would be to mask off end bits in the same way as we do for start bits, + // but performing this check afterwards is faster and simpler to implement. + if i < end { + return Some(i); + } else { + return None; + } + } + + // Handle remaining blocks. + // + // We can skip over an entire block at once if it's all 0s (resp. 1s). + // The block marked (3) in this example is the first block that will be handled by this loop, + // and it will be skipped for that reason: + // + // (3) + // -------- + // (e) 01000000|00000000|00000001 + // ^~~~~~~~~~~~~~~~~~^ + // start end + if start_block < end_block_inclusive { + // This loop is written in a specific way for performance. + // Notably: `..end_block_inclusive + 1` is used for an inclusive range instead of `..=end_block_inclusive`, + // and `.zip(start_block + 1..)` is used to track the index instead of `.enumerate().skip().take()`, + // because both alternatives result in significantly worse codegen. + // `end_block_inclusive + 1` is guaranteed not to wrap, because `end_block_inclusive <= end / BLOCK_SIZE`, + // and `BLOCK_SIZE` (the number of bits per block) will always be at least 8 (1 byte). + for (&bits, block) in init_mask.blocks[start_block + 1..end_block_inclusive + 1] + .iter() + .zip(start_block + 1..) + { + if let Some(i) = search_block(bits, block, 0, is_init) { + // If this is the last block, we may find a matching bit after `end`. + // + // For example, we shouldn't successfully find bit (4), because it's after `end`: + // + // (4) + // -------| + // (f) 00000001|00000000|00000001 + // ^~~~~~~~~~~~~~~~~~^ + // start end + // + // As above with example (d), we could handle the end block separately and mask off end bits, + // but unconditionally searching an entire block at once and performing this check afterwards + // is faster and much simpler to implement. + if i < end { + return Some(i); + } else { + return None; + } + } + } + } + + None + } + + #[cfg_attr(not(debug_assertions), allow(dead_code))] + fn find_bit_slow( + init_mask: &InitMask, + start: Size, + end: Size, + is_init: bool, + ) -> Option { + (start..end).find(|&i| init_mask.get(i) == is_init) + } + + let result = find_bit_fast(self, start, end, is_init); + + debug_assert_eq!( + result, + find_bit_slow(self, start, end, is_init), + "optimized implementation of find_bit is wrong for start={:?} end={:?} is_init={} init_mask={:#?}", + start, + end, + is_init, + self + ); + + result + } +} + +/// A contiguous chunk of initialized or uninitialized memory. +pub enum InitChunk { + Init(Range), + Uninit(Range), +} + +impl InitChunk { + #[inline] + pub fn is_init(&self) -> bool { + match self { + Self::Init(_) => true, + Self::Uninit(_) => false, + } + } + + #[inline] + pub fn range(&self) -> Range { + match self { + Self::Init(r) => r.clone(), + Self::Uninit(r) => r.clone(), + } + } +} + +impl InitMask { + /// Checks whether the range `start..end` (end-exclusive) is entirely initialized. + /// + /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte + /// indexes for the first contiguous span of the uninitialized access. + #[inline] + pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), AllocRange> { + if end > self.len { + return Err(AllocRange::from(self.len..end)); + } + + let uninit_start = self.find_bit(start, end, false); + + match uninit_start { + Some(uninit_start) => { + let uninit_end = self.find_bit(uninit_start, end, true).unwrap_or(end); + Err(AllocRange::from(uninit_start..uninit_end)) + } + None => Ok(()), + } + } + + /// Returns an iterator, yielding a range of byte indexes for each contiguous region + /// of initialized or uninitialized bytes inside the range `start..end` (end-exclusive). + /// + /// The iterator guarantees the following: + /// - Chunks are nonempty. + /// - Chunks are adjacent (each range's start is equal to the previous range's end). + /// - Chunks span exactly `start..end` (the first starts at `start`, the last ends at `end`). + /// - Chunks alternate between [`InitChunk::Init`] and [`InitChunk::Uninit`]. + #[inline] + pub fn range_as_init_chunks(&self, start: Size, end: Size) -> InitChunkIter<'_> { + assert!(end <= self.len); + + let is_init = if start < end { + self.get(start) + } else { + // `start..end` is empty: there are no chunks, so use some arbitrary value + false + }; + + InitChunkIter { init_mask: self, is_init, start, end } + } +} + +/// Yields [`InitChunk`]s. See [`InitMask::range_as_init_chunks`]. +#[derive(Clone)] +pub struct InitChunkIter<'a> { + init_mask: &'a InitMask, + /// Whether the next chunk we will return is initialized. + /// If there are no more chunks, contains some arbitrary value. + is_init: bool, + /// The current byte index into `init_mask`. + start: Size, + /// The end byte index into `init_mask`. + end: Size, +} + +impl<'a> Iterator for InitChunkIter<'a> { + type Item = InitChunk; + + #[inline] + fn next(&mut self) -> Option { + if self.start >= self.end { + return None; + } + + let end_of_chunk = + self.init_mask.find_bit(self.start, self.end, !self.is_init).unwrap_or(self.end); + let range = self.start..end_of_chunk; + + let ret = + Some(if self.is_init { InitChunk::Init(range) } else { InitChunk::Uninit(range) }); + + self.is_init = !self.is_init; + self.start = end_of_chunk; + + ret + } +} + +/// Uninitialized bytes. +impl Allocation { + /// Checks whether the given range is entirely initialized. + /// + /// Returns `Ok(())` if it's initialized. Otherwise returns the range of byte + /// indexes of the first contiguous uninitialized access. + fn is_init(&self, range: AllocRange) -> Result<(), AllocRange> { + self.init_mask.is_range_initialized(range.start, range.end()) // `Size` addition + } + + /// Checks that a range of bytes is initialized. If not, returns the `InvalidUninitBytes` + /// error which will report the first range of bytes which is uninitialized. + fn check_init(&self, range: AllocRange) -> AllocResult { + self.is_init(range).map_err(|uninit_range| { + AllocError::InvalidUninitBytes(Some(UninitBytesAccess { + access: range, + uninit: uninit_range, + })) + }) + } + + fn mark_init(&mut self, range: AllocRange, is_init: bool) { + if range.size.bytes() == 0 { + return; + } + assert!(self.mutability == Mutability::Mut); + self.init_mask.set_range(range.start, range.end(), is_init); + } +} + +/// Run-length encoding of the uninit mask. +/// Used to copy parts of a mask multiple times to another allocation. +pub struct InitMaskCompressed { + /// Whether the first range is initialized. + initial: bool, + /// The lengths of ranges that are run-length encoded. + /// The initialization state of the ranges alternate starting with `initial`. + ranges: smallvec::SmallVec<[u64; 1]>, +} + +impl InitMaskCompressed { + pub fn no_bytes_init(&self) -> bool { + // The `ranges` are run-length encoded and of alternating initialization state. + // So if `ranges.len() > 1` then the second block is an initialized range. + !self.initial && self.ranges.len() == 1 + } +} + +/// Transferring the initialization mask to other allocations. +impl Allocation { + /// Creates a run-length encoding of the initialization mask; panics if range is empty. + /// + /// This is essentially a more space-efficient version of + /// `InitMask::range_as_init_chunks(...).collect::>()`. + pub fn compress_uninit_range(&self, range: AllocRange) -> InitMaskCompressed { + // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`), + // a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from + // the source and write it to the destination. Even if we optimized the memory accesses, + // we'd be doing all of this `repeat` times. + // Therefore we precompute a compressed version of the initialization mask of the source value and + // then write it back `repeat` times without computing any more information from the source. + + // A precomputed cache for ranges of initialized / uninitialized bits + // 0000010010001110 will become + // `[5, 1, 2, 1, 3, 3, 1]`, + // where each element toggles the state. + + let mut ranges = smallvec::SmallVec::<[u64; 1]>::new(); + + let mut chunks = self.init_mask.range_as_init_chunks(range.start, range.end()).peekable(); + + let initial = chunks.peek().expect("range should be nonempty").is_init(); + + // Here we rely on `range_as_init_chunks` to yield alternating init/uninit chunks. + for chunk in chunks { + let len = chunk.range().end.bytes() - chunk.range().start.bytes(); + ranges.push(len); + } + + InitMaskCompressed { ranges, initial } + } + + /// Applies multiple instances of the run-length encoding to the initialization mask. + /// + /// This is dangerous to use as it can violate internal `Allocation` invariants! + /// It only exists to support an efficient implementation of `mem_copy_repeatedly`. + pub fn mark_compressed_init_range( + &mut self, + defined: &InitMaskCompressed, + range: AllocRange, + repeat: u64, + ) { + // An optimization where we can just overwrite an entire range of initialization + // bits if they are going to be uniformly `1` or `0`. + if defined.ranges.len() <= 1 { + self.init_mask.set_range_inbounds( + range.start, + range.start + range.size * repeat, // `Size` operations + defined.initial, + ); + return; + } + + for mut j in 0..repeat { + j *= range.size.bytes(); + j += range.start.bytes(); + let mut cur = defined.initial; + for range in &defined.ranges { + let old_j = j; + j += range; + self.init_mask.set_range_inbounds( + Size::from_bytes(old_j), + Size::from_bytes(j), + cur, + ); + cur = !cur; + } + } + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/error.rs b/compiler/rustc_middle/src/mir/interpret/error.rs new file mode 100644 index 000000000..cecb55578 --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/error.rs @@ -0,0 +1,551 @@ +use super::{AllocId, AllocRange, ConstAlloc, Pointer, Scalar}; + +use crate::mir::interpret::ConstValue; +use crate::ty::{layout, query::TyCtxtAt, tls, Ty, ValTree}; + +use rustc_data_structures::sync::Lock; +use rustc_errors::{pluralize, struct_span_err, DiagnosticBuilder, ErrorGuaranteed}; +use rustc_macros::HashStable; +use rustc_session::CtfeBacktrace; +use rustc_span::def_id::DefId; +use rustc_target::abi::{call, Align, Size}; +use std::{any::Any, backtrace::Backtrace, fmt}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, HashStable, TyEncodable, TyDecodable)] +pub enum ErrorHandled { + /// Already reported an error for this evaluation, and the compilation is + /// *guaranteed* to fail. Warnings/lints *must not* produce `Reported`. + Reported(ErrorGuaranteed), + /// Already emitted a lint for this evaluation. + Linted, + /// Don't emit an error, the evaluation failed because the MIR was generic + /// and the substs didn't fully monomorphize it. + TooGeneric, +} + +impl From for ErrorHandled { + fn from(err: ErrorGuaranteed) -> ErrorHandled { + ErrorHandled::Reported(err) + } +} + +TrivialTypeTraversalAndLiftImpls! { + ErrorHandled, +} + +pub type EvalToAllocationRawResult<'tcx> = Result, ErrorHandled>; +pub type EvalToConstValueResult<'tcx> = Result, ErrorHandled>; +pub type EvalToValTreeResult<'tcx> = Result>, ErrorHandled>; + +pub fn struct_error<'tcx>( + tcx: TyCtxtAt<'tcx>, + msg: &str, +) -> DiagnosticBuilder<'tcx, ErrorGuaranteed> { + struct_span_err!(tcx.sess, tcx.span, E0080, "{}", msg) +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(InterpErrorInfo<'_>, 8); + +/// Packages the kind of error we got from the const code interpreter +/// up with a Rust-level backtrace of where the error occurred. +/// These should always be constructed by calling `.into()` on +/// an `InterpError`. In `rustc_mir::interpret`, we have `throw_err_*` +/// macros for this. +#[derive(Debug)] +pub struct InterpErrorInfo<'tcx>(Box>); + +#[derive(Debug)] +struct InterpErrorInfoInner<'tcx> { + kind: InterpError<'tcx>, + backtrace: Option>, +} + +impl fmt::Display for InterpErrorInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0.kind) + } +} + +impl<'tcx> InterpErrorInfo<'tcx> { + pub fn print_backtrace(&self) { + if let Some(backtrace) = self.0.backtrace.as_ref() { + print_backtrace(backtrace); + } + } + + pub fn into_kind(self) -> InterpError<'tcx> { + let InterpErrorInfo(box InterpErrorInfoInner { kind, .. }) = self; + kind + } + + #[inline] + pub fn kind(&self) -> &InterpError<'tcx> { + &self.0.kind + } +} + +fn print_backtrace(backtrace: &Backtrace) { + eprintln!("\n\nAn error occurred in miri:\n{}", backtrace); +} + +impl From for InterpErrorInfo<'_> { + fn from(err: ErrorHandled) -> Self { + match err { + ErrorHandled::Reported(ErrorGuaranteed { .. }) | ErrorHandled::Linted => { + err_inval!(ReferencedConstant) + } + ErrorHandled::TooGeneric => err_inval!(TooGeneric), + } + .into() + } +} + +impl From for InterpErrorInfo<'_> { + fn from(err: ErrorGuaranteed) -> Self { + InterpError::InvalidProgram(InvalidProgramInfo::AlreadyReported(err)).into() + } +} + +impl<'tcx> From> for InterpErrorInfo<'tcx> { + fn from(kind: InterpError<'tcx>) -> Self { + let capture_backtrace = tls::with_opt(|tcx| { + if let Some(tcx) = tcx { + *Lock::borrow(&tcx.sess.ctfe_backtrace) + } else { + CtfeBacktrace::Disabled + } + }); + + let backtrace = match capture_backtrace { + CtfeBacktrace::Disabled => None, + CtfeBacktrace::Capture => Some(Box::new(Backtrace::force_capture())), + CtfeBacktrace::Immediate => { + // Print it now. + let backtrace = Backtrace::force_capture(); + print_backtrace(&backtrace); + None + } + }; + + InterpErrorInfo(Box::new(InterpErrorInfoInner { kind, backtrace })) + } +} + +/// Error information for when the program we executed turned out not to actually be a valid +/// program. This cannot happen in stand-alone Miri, but it can happen during CTFE/ConstProp +/// where we work on generic code or execution does not have all information available. +pub enum InvalidProgramInfo<'tcx> { + /// Resolution can fail if we are in a too generic context. + TooGeneric, + /// Cannot compute this constant because it depends on another one + /// which already produced an error. + ReferencedConstant, + /// Abort in case errors are already reported. + AlreadyReported(ErrorGuaranteed), + /// An error occurred during layout computation. + Layout(layout::LayoutError<'tcx>), + /// An error occurred during FnAbi computation: the passed --target lacks FFI support + /// (which unfortunately typeck does not reject). + /// Not using `FnAbiError` as that contains a nested `LayoutError`. + FnAbiAdjustForForeignAbi(call::AdjustForForeignAbiError), + /// SizeOf of unsized type was requested. + SizeOfUnsizedType(Ty<'tcx>), +} + +impl fmt::Display for InvalidProgramInfo<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use InvalidProgramInfo::*; + match self { + TooGeneric => write!(f, "encountered overly generic constant"), + ReferencedConstant => write!(f, "referenced constant has errors"), + AlreadyReported(ErrorGuaranteed { .. }) => { + write!(f, "encountered constants with type errors, stopping evaluation") + } + Layout(ref err) => write!(f, "{err}"), + FnAbiAdjustForForeignAbi(ref err) => write!(f, "{err}"), + SizeOfUnsizedType(ty) => write!(f, "size_of called on unsized type `{ty}`"), + } + } +} + +/// Details of why a pointer had to be in-bounds. +#[derive(Debug, Copy, Clone, TyEncodable, TyDecodable, HashStable)] +pub enum CheckInAllocMsg { + /// We are dereferencing a pointer (i.e., creating a place). + DerefTest, + /// We are access memory. + MemoryAccessTest, + /// We are doing pointer arithmetic. + PointerArithmeticTest, + /// We are doing pointer offset_from. + OffsetFromTest, + /// None of the above -- generic/unspecific inbounds test. + InboundsTest, +} + +impl fmt::Display for CheckInAllocMsg { + /// When this is printed as an error the context looks like this: + /// "{msg}{pointer} is a dangling pointer". + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match *self { + CheckInAllocMsg::DerefTest => "dereferencing pointer failed: ", + CheckInAllocMsg::MemoryAccessTest => "memory access failed: ", + CheckInAllocMsg::PointerArithmeticTest => "out-of-bounds pointer arithmetic: ", + CheckInAllocMsg::OffsetFromTest => "out-of-bounds offset_from: ", + CheckInAllocMsg::InboundsTest => "out-of-bounds pointer use: ", + } + ) + } +} + +/// Details of an access to uninitialized bytes where it is not allowed. +#[derive(Debug)] +pub struct UninitBytesAccess { + /// Range of the original memory access. + pub access: AllocRange, + /// Range of the uninit memory that was encountered. (Might not be maximal.) + pub uninit: AllocRange, +} + +/// Information about a size mismatch. +#[derive(Debug)] +pub struct ScalarSizeMismatch { + pub target_size: u64, + pub data_size: u64, +} + +/// Error information for when the program caused Undefined Behavior. +pub enum UndefinedBehaviorInfo { + /// Free-form case. Only for errors that are never caught! + Ub(String), + /// Unreachable code was executed. + Unreachable, + /// A slice/array index projection went out-of-bounds. + BoundsCheckFailed { + len: u64, + index: u64, + }, + /// Something was divided by 0 (x / 0). + DivisionByZero, + /// Something was "remainded" by 0 (x % 0). + RemainderByZero, + /// Signed division overflowed (INT_MIN / -1). + DivisionOverflow, + /// Signed remainder overflowed (INT_MIN % -1). + RemainderOverflow, + /// Overflowing inbounds pointer arithmetic. + PointerArithOverflow, + /// Invalid metadata in a wide pointer (using `str` to avoid allocations). + InvalidMeta(&'static str), + /// Reading a C string that does not end within its allocation. + UnterminatedCString(Pointer), + /// Dereferencing a dangling pointer after it got freed. + PointerUseAfterFree(AllocId), + /// Used a pointer outside the bounds it is valid for. + /// (If `ptr_size > 0`, determines the size of the memory range that was expected to be in-bounds.) + PointerOutOfBounds { + alloc_id: AllocId, + alloc_size: Size, + ptr_offset: i64, + ptr_size: Size, + msg: CheckInAllocMsg, + }, + /// Using an integer as a pointer in the wrong way. + DanglingIntPointer(u64, CheckInAllocMsg), + /// Used a pointer with bad alignment. + AlignmentCheckFailed { + required: Align, + has: Align, + }, + /// Writing to read-only memory. + WriteToReadOnly(AllocId), + // Trying to access the data behind a function pointer. + DerefFunctionPointer(AllocId), + // Trying to access the data behind a vtable pointer. + DerefVTablePointer(AllocId), + /// The value validity check found a problem. + /// Should only be thrown by `validity.rs` and always point out which part of the value + /// is the problem. + ValidationFailure { + /// The "path" to the value in question, e.g. `.0[5].field` for a struct + /// field in the 6th element of an array that is the first element of a tuple. + path: Option, + msg: String, + }, + /// Using a non-boolean `u8` as bool. + InvalidBool(u8), + /// Using a non-character `u32` as character. + InvalidChar(u32), + /// The tag of an enum does not encode an actual discriminant. + InvalidTag(Scalar), + /// Using a pointer-not-to-a-function as function pointer. + InvalidFunctionPointer(Pointer), + /// Using a pointer-not-to-a-vtable as vtable pointer. + InvalidVTablePointer(Pointer), + /// Using a string that is not valid UTF-8, + InvalidStr(std::str::Utf8Error), + /// Using uninitialized data where it is not allowed. + InvalidUninitBytes(Option<(AllocId, UninitBytesAccess)>), + /// Working with a local that is not currently live. + DeadLocal, + /// Data size is not equal to target size. + ScalarSizeMismatch(ScalarSizeMismatch), + /// A discriminant of an uninhabited enum variant is written. + UninhabitedEnumVariantWritten, +} + +impl fmt::Display for UndefinedBehaviorInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use UndefinedBehaviorInfo::*; + match self { + Ub(msg) => write!(f, "{msg}"), + Unreachable => write!(f, "entering unreachable code"), + BoundsCheckFailed { ref len, ref index } => { + write!(f, "indexing out of bounds: the len is {len} but the index is {index}") + } + DivisionByZero => write!(f, "dividing by zero"), + RemainderByZero => write!(f, "calculating the remainder with a divisor of zero"), + DivisionOverflow => write!(f, "overflow in signed division (dividing MIN by -1)"), + RemainderOverflow => write!(f, "overflow in signed remainder (dividing MIN by -1)"), + PointerArithOverflow => write!(f, "overflowing in-bounds pointer arithmetic"), + InvalidMeta(msg) => write!(f, "invalid metadata in wide pointer: {msg}"), + UnterminatedCString(p) => write!( + f, + "reading a null-terminated string starting at {p:?} with no null found before end of allocation", + ), + PointerUseAfterFree(a) => { + write!(f, "pointer to {a:?} was dereferenced after this allocation got freed") + } + PointerOutOfBounds { alloc_id, alloc_size, ptr_offset, ptr_size: Size::ZERO, msg } => { + write!( + f, + "{msg}{alloc_id:?} has size {alloc_size}, so pointer at offset {ptr_offset} is out-of-bounds", + alloc_size = alloc_size.bytes(), + ) + } + PointerOutOfBounds { alloc_id, alloc_size, ptr_offset, ptr_size, msg } => write!( + f, + "{msg}{alloc_id:?} has size {alloc_size}, so pointer to {ptr_size} byte{ptr_size_p} starting at offset {ptr_offset} is out-of-bounds", + alloc_size = alloc_size.bytes(), + ptr_size = ptr_size.bytes(), + ptr_size_p = pluralize!(ptr_size.bytes()), + ), + DanglingIntPointer(i, msg) => { + write!( + f, + "{msg}{pointer} is a dangling pointer (it has no provenance)", + pointer = Pointer::>::from_addr(*i), + ) + } + AlignmentCheckFailed { required, has } => write!( + f, + "accessing memory with alignment {has}, but alignment {required} is required", + has = has.bytes(), + required = required.bytes() + ), + WriteToReadOnly(a) => write!(f, "writing to {a:?} which is read-only"), + DerefFunctionPointer(a) => write!(f, "accessing {a:?} which contains a function"), + DerefVTablePointer(a) => write!(f, "accessing {a:?} which contains a vtable"), + ValidationFailure { path: None, msg } => { + write!(f, "constructing invalid value: {msg}") + } + ValidationFailure { path: Some(path), msg } => { + write!(f, "constructing invalid value at {path}: {msg}") + } + InvalidBool(b) => { + write!(f, "interpreting an invalid 8-bit value as a bool: 0x{b:02x}") + } + InvalidChar(c) => { + write!(f, "interpreting an invalid 32-bit value as a char: 0x{c:08x}") + } + InvalidTag(val) => write!(f, "enum value has invalid tag: {val:x}"), + InvalidFunctionPointer(p) => { + write!(f, "using {p:?} as function pointer but it does not point to a function") + } + InvalidVTablePointer(p) => { + write!(f, "using {p:?} as vtable pointer but it does not point to a vtable") + } + InvalidStr(err) => write!(f, "this string is not valid UTF-8: {err}"), + InvalidUninitBytes(Some((alloc, info))) => write!( + f, + "reading memory at {alloc:?}{access:?}, \ + but memory is uninitialized at {uninit:?}, \ + and this operation requires initialized memory", + access = info.access, + uninit = info.uninit, + ), + InvalidUninitBytes(None) => write!( + f, + "using uninitialized data, but this operation requires initialized memory" + ), + DeadLocal => write!(f, "accessing a dead local variable"), + ScalarSizeMismatch(self::ScalarSizeMismatch { target_size, data_size }) => write!( + f, + "scalar size mismatch: expected {target_size} bytes but got {data_size} bytes instead", + ), + UninhabitedEnumVariantWritten => { + write!(f, "writing discriminant of an uninhabited enum") + } + } + } +} + +/// Error information for when the program did something that might (or might not) be correct +/// to do according to the Rust spec, but due to limitations in the interpreter, the +/// operation could not be carried out. These limitations can differ between CTFE and the +/// Miri engine, e.g., CTFE does not support dereferencing pointers at integral addresses. +pub enum UnsupportedOpInfo { + /// Free-form case. Only for errors that are never caught! + Unsupported(String), + /// Encountered a pointer where we needed raw bytes. + ReadPointerAsBytes, + /// Overwriting parts of a pointer; the resulting state cannot be represented in our + /// `Allocation` data structure. See . + PartialPointerOverwrite(Pointer), + // + // The variants below are only reachable from CTFE/const prop, miri will never emit them. + // + /// Accessing thread local statics + ThreadLocalStatic(DefId), + /// Accessing an unsupported extern static. + ReadExternStatic(DefId), +} + +impl fmt::Display for UnsupportedOpInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use UnsupportedOpInfo::*; + match self { + Unsupported(ref msg) => write!(f, "{msg}"), + ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes"), + PartialPointerOverwrite(ptr) => { + write!(f, "unable to overwrite parts of a pointer in memory at {ptr:?}") + } + ThreadLocalStatic(did) => write!(f, "cannot access thread local static ({did:?})"), + ReadExternStatic(did) => write!(f, "cannot read from extern static ({did:?})"), + } + } +} + +/// Error information for when the program exhausted the resources granted to it +/// by the interpreter. +pub enum ResourceExhaustionInfo { + /// The stack grew too big. + StackFrameLimitReached, + /// The program ran for too long. + /// + /// The exact limit is set by the `const_eval_limit` attribute. + StepLimitReached, + /// There is not enough memory to perform an allocation. + MemoryExhausted, +} + +impl fmt::Display for ResourceExhaustionInfo { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ResourceExhaustionInfo::*; + match self { + StackFrameLimitReached => { + write!(f, "reached the configured maximum number of stack frames") + } + StepLimitReached => { + write!(f, "exceeded interpreter step limit (see `#[const_eval_limit]`)") + } + MemoryExhausted => { + write!(f, "tried to allocate more memory than available to compiler") + } + } + } +} + +/// A trait to work around not having trait object upcasting. +pub trait AsAny: Any { + fn as_any(&self) -> &dyn Any; +} +impl AsAny for T { + #[inline(always)] + fn as_any(&self) -> &dyn Any { + self + } +} + +/// A trait for machine-specific errors (or other "machine stop" conditions). +pub trait MachineStopType: AsAny + fmt::Display + Send { + /// If `true`, emit a hard error instead of going through the `CONST_ERR` lint + fn is_hard_err(&self) -> bool { + false + } +} + +impl dyn MachineStopType { + #[inline(always)] + pub fn downcast_ref(&self) -> Option<&T> { + self.as_any().downcast_ref() + } +} + +pub enum InterpError<'tcx> { + /// The program caused undefined behavior. + UndefinedBehavior(UndefinedBehaviorInfo), + /// The program did something the interpreter does not support (some of these *might* be UB + /// but the interpreter is not sure). + Unsupported(UnsupportedOpInfo), + /// The program was invalid (ill-typed, bad MIR, not sufficiently monomorphized, ...). + InvalidProgram(InvalidProgramInfo<'tcx>), + /// The program exhausted the interpreter's resources (stack/heap too big, + /// execution takes too long, ...). + ResourceExhaustion(ResourceExhaustionInfo), + /// Stop execution for a machine-controlled reason. This is never raised by + /// the core engine itself. + MachineStop(Box), +} + +pub type InterpResult<'tcx, T = ()> = Result>; + +impl fmt::Display for InterpError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use InterpError::*; + match *self { + Unsupported(ref msg) => write!(f, "{msg}"), + InvalidProgram(ref msg) => write!(f, "{msg}"), + UndefinedBehavior(ref msg) => write!(f, "{msg}"), + ResourceExhaustion(ref msg) => write!(f, "{msg}"), + MachineStop(ref msg) => write!(f, "{msg}"), + } + } +} + +// Forward `Debug` to `Display`, so it does not look awful. +impl fmt::Debug for InterpError<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } +} + +impl InterpError<'_> { + /// Some errors do string formatting even if the error is never printed. + /// To avoid performance issues, there are places where we want to be sure to never raise these formatting errors, + /// so this method lets us detect them and `bug!` on unexpected errors. + pub fn formatted_string(&self) -> bool { + matches!( + self, + InterpError::Unsupported(UnsupportedOpInfo::Unsupported(_)) + | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::ValidationFailure { .. }) + | InterpError::UndefinedBehavior(UndefinedBehaviorInfo::Ub(_)) + ) + } + + /// Should this error be reported as a hard error, preventing compilation, or a soft error, + /// causing a deny-by-default lint? + pub fn is_hard_err(&self) -> bool { + use InterpError::*; + match *self { + MachineStop(ref err) => err.is_hard_err(), + UndefinedBehavior(_) => true, + ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted) => true, + _ => false, + } + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs new file mode 100644 index 000000000..967f8ece1 --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -0,0 +1,633 @@ +//! An interpreter for MIR used in CTFE and by miri. + +#[macro_export] +macro_rules! err_unsup { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::Unsupported( + $crate::mir::interpret::UnsupportedOpInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_unsup_format { + ($($tt:tt)*) => { err_unsup!(Unsupported(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! err_inval { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::InvalidProgram( + $crate::mir::interpret::InvalidProgramInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_ub { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::UndefinedBehavior( + $crate::mir::interpret::UndefinedBehaviorInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_ub_format { + ($($tt:tt)*) => { err_ub!(Ub(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! err_exhaust { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::ResourceExhaustion( + $crate::mir::interpret::ResourceExhaustionInfo::$($tt)* + ) + }; +} + +#[macro_export] +macro_rules! err_machine_stop { + ($($tt:tt)*) => { + $crate::mir::interpret::InterpError::MachineStop(Box::new($($tt)*)) + }; +} + +// In the `throw_*` macros, avoid `return` to make them work with `try {}`. +#[macro_export] +macro_rules! throw_unsup { + ($($tt:tt)*) => { do yeet err_unsup!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_unsup_format { + ($($tt:tt)*) => { throw_unsup!(Unsupported(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! throw_inval { + ($($tt:tt)*) => { do yeet err_inval!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_ub { + ($($tt:tt)*) => { do yeet err_ub!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_ub_format { + ($($tt:tt)*) => { throw_ub!(Ub(format!($($tt)*))) }; +} + +#[macro_export] +macro_rules! throw_exhaust { + ($($tt:tt)*) => { do yeet err_exhaust!($($tt)*) }; +} + +#[macro_export] +macro_rules! throw_machine_stop { + ($($tt:tt)*) => { do yeet err_machine_stop!($($tt)*) }; +} + +mod allocation; +mod error; +mod pointer; +mod queries; +mod value; + +use std::convert::TryFrom; +use std::fmt; +use std::io; +use std::io::{Read, Write}; +use std::num::{NonZeroU32, NonZeroU64}; +use std::sync::atomic::{AtomicU32, Ordering}; + +use rustc_ast::LitKind; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::{HashMapExt, Lock}; +use rustc_data_structures::tiny_list::TinyList; +use rustc_hir::def_id::DefId; +use rustc_macros::HashStable; +use rustc_middle::ty::print::with_no_trimmed_paths; +use rustc_serialize::{Decodable, Encodable}; +use rustc_target::abi::Endian; + +use crate::mir; +use crate::ty::codec::{TyDecoder, TyEncoder}; +use crate::ty::subst::GenericArgKind; +use crate::ty::{self, Instance, Ty, TyCtxt}; + +pub use self::error::{ + struct_error, CheckInAllocMsg, ErrorHandled, EvalToAllocationRawResult, EvalToConstValueResult, + EvalToValTreeResult, InterpError, InterpErrorInfo, InterpResult, InvalidProgramInfo, + MachineStopType, ResourceExhaustionInfo, ScalarSizeMismatch, UndefinedBehaviorInfo, + UninitBytesAccess, UnsupportedOpInfo, +}; + +pub use self::value::{get_slice_bytes, ConstAlloc, ConstValue, Scalar, ScalarMaybeUninit}; + +pub use self::allocation::{ + alloc_range, AllocRange, Allocation, ConstAllocation, InitChunk, InitChunkIter, InitMask, + Relocations, +}; + +pub use self::pointer::{Pointer, PointerArithmetic, Provenance}; + +/// Uniquely identifies one of the following: +/// - A constant +/// - A static +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, TyEncodable, TyDecodable)] +#[derive(HashStable, Lift)] +pub struct GlobalId<'tcx> { + /// For a constant or static, the `Instance` of the item itself. + /// For a promoted global, the `Instance` of the function they belong to. + pub instance: ty::Instance<'tcx>, + + /// The index for promoted globals within their function's `mir::Body`. + pub promoted: Option, +} + +impl<'tcx> GlobalId<'tcx> { + pub fn display(self, tcx: TyCtxt<'tcx>) -> String { + let instance_name = with_no_trimmed_paths!(tcx.def_path_str(self.instance.def.def_id())); + if let Some(promoted) = self.promoted { + format!("{}::{:?}", instance_name, promoted) + } else { + instance_name + } + } +} + +/// Input argument for `tcx.lit_to_const`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, HashStable)] +pub struct LitToConstInput<'tcx> { + /// The absolute value of the resultant constant. + pub lit: &'tcx LitKind, + /// The type of the constant. + pub ty: Ty<'tcx>, + /// If the constant is negative. + pub neg: bool, +} + +/// Error type for `tcx.lit_to_const`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, HashStable)] +pub enum LitToConstError { + /// The literal's inferred type did not match the expected `ty` in the input. + /// This is used for graceful error handling (`delay_span_bug`) in + /// type checking (`Const::from_anon_const`). + TypeError, + Reported, +} + +#[derive(Copy, Clone, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct AllocId(pub NonZeroU64); + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for AllocId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if f.alternate() { write!(f, "a{}", self.0) } else { write!(f, "alloc{}", self.0) } + } +} + +// No "Display" since AllocIds are not usually user-visible. + +#[derive(TyDecodable, TyEncodable)] +enum AllocDiscriminant { + Alloc, + Fn, + VTable, + Static, +} + +pub fn specialized_encode_alloc_id<'tcx, E: TyEncoder>>( + encoder: &mut E, + tcx: TyCtxt<'tcx>, + alloc_id: AllocId, +) { + match tcx.global_alloc(alloc_id) { + GlobalAlloc::Memory(alloc) => { + trace!("encoding {:?} with {:#?}", alloc_id, alloc); + AllocDiscriminant::Alloc.encode(encoder); + alloc.encode(encoder); + } + GlobalAlloc::Function(fn_instance) => { + trace!("encoding {:?} with {:#?}", alloc_id, fn_instance); + AllocDiscriminant::Fn.encode(encoder); + fn_instance.encode(encoder); + } + GlobalAlloc::VTable(ty, poly_trait_ref) => { + trace!("encoding {:?} with {ty:#?}, {poly_trait_ref:#?}", alloc_id); + AllocDiscriminant::VTable.encode(encoder); + ty.encode(encoder); + poly_trait_ref.encode(encoder); + } + GlobalAlloc::Static(did) => { + assert!(!tcx.is_thread_local_static(did)); + // References to statics doesn't need to know about their allocations, + // just about its `DefId`. + AllocDiscriminant::Static.encode(encoder); + did.encode(encoder); + } + } +} + +// Used to avoid infinite recursion when decoding cyclic allocations. +type DecodingSessionId = NonZeroU32; + +#[derive(Clone)] +enum State { + Empty, + InProgressNonAlloc(TinyList), + InProgress(TinyList, AllocId), + Done(AllocId), +} + +pub struct AllocDecodingState { + // For each `AllocId`, we keep track of which decoding state it's currently in. + decoding_state: Vec>, + // The offsets of each allocation in the data stream. + data_offsets: Vec, +} + +impl AllocDecodingState { + #[inline] + pub fn new_decoding_session(&self) -> AllocDecodingSession<'_> { + static DECODER_SESSION_ID: AtomicU32 = AtomicU32::new(0); + let counter = DECODER_SESSION_ID.fetch_add(1, Ordering::SeqCst); + + // Make sure this is never zero. + let session_id = DecodingSessionId::new((counter & 0x7FFFFFFF) + 1).unwrap(); + + AllocDecodingSession { state: self, session_id } + } + + pub fn new(data_offsets: Vec) -> Self { + let decoding_state = vec![Lock::new(State::Empty); data_offsets.len()]; + + Self { decoding_state, data_offsets } + } +} + +#[derive(Copy, Clone)] +pub struct AllocDecodingSession<'s> { + state: &'s AllocDecodingState, + session_id: DecodingSessionId, +} + +impl<'s> AllocDecodingSession<'s> { + /// Decodes an `AllocId` in a thread-safe way. + pub fn decode_alloc_id<'tcx, D>(&self, decoder: &mut D) -> AllocId + where + D: TyDecoder>, + { + // Read the index of the allocation. + let idx = usize::try_from(decoder.read_u32()).unwrap(); + let pos = usize::try_from(self.state.data_offsets[idx]).unwrap(); + + // Decode the `AllocDiscriminant` now so that we know if we have to reserve an + // `AllocId`. + let (alloc_kind, pos) = decoder.with_position(pos, |decoder| { + let alloc_kind = AllocDiscriminant::decode(decoder); + (alloc_kind, decoder.position()) + }); + + // Check the decoding state to see if it's already decoded or if we should + // decode it here. + let alloc_id = { + let mut entry = self.state.decoding_state[idx].lock(); + + match *entry { + State::Done(alloc_id) => { + return alloc_id; + } + ref mut entry @ State::Empty => { + // We are allowed to decode. + match alloc_kind { + AllocDiscriminant::Alloc => { + // If this is an allocation, we need to reserve an + // `AllocId` so we can decode cyclic graphs. + let alloc_id = decoder.interner().reserve_alloc_id(); + *entry = + State::InProgress(TinyList::new_single(self.session_id), alloc_id); + Some(alloc_id) + } + AllocDiscriminant::Fn + | AllocDiscriminant::Static + | AllocDiscriminant::VTable => { + // Fns and statics cannot be cyclic, and their `AllocId` + // is determined later by interning. + *entry = + State::InProgressNonAlloc(TinyList::new_single(self.session_id)); + None + } + } + } + State::InProgressNonAlloc(ref mut sessions) => { + if sessions.contains(&self.session_id) { + bug!("this should be unreachable"); + } else { + // Start decoding concurrently. + sessions.insert(self.session_id); + None + } + } + State::InProgress(ref mut sessions, alloc_id) => { + if sessions.contains(&self.session_id) { + // Don't recurse. + return alloc_id; + } else { + // Start decoding concurrently. + sessions.insert(self.session_id); + Some(alloc_id) + } + } + } + }; + + // Now decode the actual data. + let alloc_id = decoder.with_position(pos, |decoder| { + match alloc_kind { + AllocDiscriminant::Alloc => { + let alloc = as Decodable<_>>::decode(decoder); + // We already have a reserved `AllocId`. + let alloc_id = alloc_id.unwrap(); + trace!("decoded alloc {:?}: {:#?}", alloc_id, alloc); + decoder.interner().set_alloc_id_same_memory(alloc_id, alloc); + alloc_id + } + AllocDiscriminant::Fn => { + assert!(alloc_id.is_none()); + trace!("creating fn alloc ID"); + let instance = ty::Instance::decode(decoder); + trace!("decoded fn alloc instance: {:?}", instance); + let alloc_id = decoder.interner().create_fn_alloc(instance); + alloc_id + } + AllocDiscriminant::VTable => { + assert!(alloc_id.is_none()); + trace!("creating vtable alloc ID"); + let ty = as Decodable>::decode(decoder); + let poly_trait_ref = + > as Decodable>::decode(decoder); + trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}"); + let alloc_id = decoder.interner().create_vtable_alloc(ty, poly_trait_ref); + alloc_id + } + AllocDiscriminant::Static => { + assert!(alloc_id.is_none()); + trace!("creating extern static alloc ID"); + let did = >::decode(decoder); + trace!("decoded static def-ID: {:?}", did); + let alloc_id = decoder.interner().create_static_alloc(did); + alloc_id + } + } + }); + + self.state.decoding_state[idx].with_lock(|entry| { + *entry = State::Done(alloc_id); + }); + + alloc_id + } +} + +/// An allocation in the global (tcx-managed) memory can be either a function pointer, +/// a static, or a "real" allocation with some data in it. +#[derive(Debug, Clone, Eq, PartialEq, Hash, TyDecodable, TyEncodable, HashStable)] +pub enum GlobalAlloc<'tcx> { + /// The alloc ID is used as a function pointer. + Function(Instance<'tcx>), + /// This alloc ID points to a symbolic (not-reified) vtable. + VTable(Ty<'tcx>, Option>), + /// The alloc ID points to a "lazy" static variable that did not get computed (yet). + /// This is also used to break the cycle in recursive statics. + Static(DefId), + /// The alloc ID points to memory. + Memory(ConstAllocation<'tcx>), +} + +impl<'tcx> GlobalAlloc<'tcx> { + /// Panics if the `GlobalAlloc` does not refer to an `GlobalAlloc::Memory` + #[track_caller] + #[inline] + pub fn unwrap_memory(&self) -> ConstAllocation<'tcx> { + match *self { + GlobalAlloc::Memory(mem) => mem, + _ => bug!("expected memory, got {:?}", self), + } + } + + /// Panics if the `GlobalAlloc` is not `GlobalAlloc::Function` + #[track_caller] + #[inline] + pub fn unwrap_fn(&self) -> Instance<'tcx> { + match *self { + GlobalAlloc::Function(instance) => instance, + _ => bug!("expected function, got {:?}", self), + } + } + + /// Panics if the `GlobalAlloc` is not `GlobalAlloc::VTable` + #[track_caller] + #[inline] + pub fn unwrap_vtable(&self) -> (Ty<'tcx>, Option>) { + match *self { + GlobalAlloc::VTable(ty, poly_trait_ref) => (ty, poly_trait_ref), + _ => bug!("expected vtable, got {:?}", self), + } + } +} + +pub(crate) struct AllocMap<'tcx> { + /// Maps `AllocId`s to their corresponding allocations. + alloc_map: FxHashMap>, + + /// Used to ensure that statics and functions only get one associated `AllocId`. + /// Should never contain a `GlobalAlloc::Memory`! + // + // FIXME: Should we just have two separate dedup maps for statics and functions each? + dedup: FxHashMap, AllocId>, + + /// The `AllocId` to assign to the next requested ID. + /// Always incremented; never gets smaller. + next_id: AllocId, +} + +impl<'tcx> AllocMap<'tcx> { + pub(crate) fn new() -> Self { + AllocMap { + alloc_map: Default::default(), + dedup: Default::default(), + next_id: AllocId(NonZeroU64::new(1).unwrap()), + } + } + fn reserve(&mut self) -> AllocId { + let next = self.next_id; + self.next_id.0 = self.next_id.0.checked_add(1).expect( + "You overflowed a u64 by incrementing by 1... \ + You've just earned yourself a free drink if we ever meet. \ + Seriously, how did you do that?!", + ); + next + } +} + +impl<'tcx> TyCtxt<'tcx> { + /// Obtains a new allocation ID that can be referenced but does not + /// yet have an allocation backing it. + /// + /// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such + /// an `AllocId` from a query. + pub fn reserve_alloc_id(self) -> AllocId { + self.alloc_map.lock().reserve() + } + + /// Reserves a new ID *if* this allocation has not been dedup-reserved before. + /// Should only be used for "symbolic" allocations (function pointers, vtables, statics), we + /// don't want to dedup IDs for "real" memory! + fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>) -> AllocId { + let mut alloc_map = self.alloc_map.lock(); + match alloc { + GlobalAlloc::Function(..) | GlobalAlloc::Static(..) | GlobalAlloc::VTable(..) => {} + GlobalAlloc::Memory(..) => bug!("Trying to dedup-reserve memory with real data!"), + } + if let Some(&alloc_id) = alloc_map.dedup.get(&alloc) { + return alloc_id; + } + let id = alloc_map.reserve(); + debug!("creating alloc {alloc:?} with id {id:?}"); + alloc_map.alloc_map.insert(id, alloc.clone()); + alloc_map.dedup.insert(alloc, id); + id + } + + /// Generates an `AllocId` for a static or return a cached one in case this function has been + /// called on the same static before. + pub fn create_static_alloc(self, static_id: DefId) -> AllocId { + self.reserve_and_set_dedup(GlobalAlloc::Static(static_id)) + } + + /// Generates an `AllocId` for a function. Depending on the function type, + /// this might get deduplicated or assigned a new ID each time. + pub fn create_fn_alloc(self, instance: Instance<'tcx>) -> AllocId { + // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated + // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be + // duplicated across crates. + // We thus generate a new `AllocId` for every mention of a function. This means that + // `main as fn() == main as fn()` is false, while `let x = main as fn(); x == x` is true. + // However, formatting code relies on function identity (see #58320), so we only do + // this for generic functions. Lifetime parameters are ignored. + let is_generic = instance + .substs + .into_iter() + .any(|kind| !matches!(kind.unpack(), GenericArgKind::Lifetime(_))); + if is_generic { + // Get a fresh ID. + let mut alloc_map = self.alloc_map.lock(); + let id = alloc_map.reserve(); + alloc_map.alloc_map.insert(id, GlobalAlloc::Function(instance)); + id + } else { + // Deduplicate. + self.reserve_and_set_dedup(GlobalAlloc::Function(instance)) + } + } + + /// Generates an `AllocId` for a (symbolic, not-reified) vtable. Will get deduplicated. + pub fn create_vtable_alloc( + self, + ty: Ty<'tcx>, + poly_trait_ref: Option>, + ) -> AllocId { + self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref)) + } + + /// Interns the `Allocation` and return a new `AllocId`, even if there's already an identical + /// `Allocation` with a different `AllocId`. + /// Statics with identical content will still point to the same `Allocation`, i.e., + /// their data will be deduplicated through `Allocation` interning -- but they + /// are different places in memory and as such need different IDs. + pub fn create_memory_alloc(self, mem: ConstAllocation<'tcx>) -> AllocId { + let id = self.reserve_alloc_id(); + self.set_alloc_id_memory(id, mem); + id + } + + /// Returns `None` in case the `AllocId` is dangling. An `InterpretCx` can still have a + /// local `Allocation` for that `AllocId`, but having such an `AllocId` in a constant is + /// illegal and will likely ICE. + /// This function exists to allow const eval to detect the difference between evaluation- + /// local dangling pointers and allocations in constants/statics. + #[inline] + pub fn try_get_global_alloc(self, id: AllocId) -> Option> { + self.alloc_map.lock().alloc_map.get(&id).cloned() + } + + #[inline] + #[track_caller] + /// Panics in case the `AllocId` is dangling. Since that is impossible for `AllocId`s in + /// constants (as all constants must pass interning and validation that check for dangling + /// ids), this function is frequently used throughout rustc, but should not be used within + /// the miri engine. + pub fn global_alloc(self, id: AllocId) -> GlobalAlloc<'tcx> { + match self.try_get_global_alloc(id) { + Some(alloc) => alloc, + None => bug!("could not find allocation for {id:?}"), + } + } + + /// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to + /// call this function twice, even with the same `Allocation` will ICE the compiler. + pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) { + if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) { + bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}"); + } + } + + /// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. May be called + /// twice for the same `(AllocId, Allocation)` pair. + fn set_alloc_id_same_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) { + self.alloc_map.lock().alloc_map.insert_same(id, GlobalAlloc::Memory(mem)); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Methods to access integers in the target endianness +//////////////////////////////////////////////////////////////////////////////// + +#[inline] +pub fn write_target_uint( + endianness: Endian, + mut target: &mut [u8], + data: u128, +) -> Result<(), io::Error> { + // This u128 holds an "any-size uint" (since smaller uints can fits in it) + // So we do not write all bytes of the u128, just the "payload". + match endianness { + Endian::Little => target.write(&data.to_le_bytes())?, + Endian::Big => target.write(&data.to_be_bytes()[16 - target.len()..])?, + }; + debug_assert!(target.len() == 0); // We should have filled the target buffer. + Ok(()) +} + +#[inline] +pub fn read_target_uint(endianness: Endian, mut source: &[u8]) -> Result { + // This u128 holds an "any-size uint" (since smaller uints can fits in it) + let mut buf = [0u8; std::mem::size_of::()]; + // So we do not read exactly 16 bytes into the u128, just the "payload". + let uint = match endianness { + Endian::Little => { + source.read(&mut buf)?; + Ok(u128::from_le_bytes(buf)) + } + Endian::Big => { + source.read(&mut buf[16 - source.len()..])?; + Ok(u128::from_be_bytes(buf)) + } + }; + debug_assert!(source.len() == 0); // We should have consumed the source buffer. + uint +} diff --git a/compiler/rustc_middle/src/mir/interpret/pointer.rs b/compiler/rustc_middle/src/mir/interpret/pointer.rs new file mode 100644 index 000000000..384954cbb --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/pointer.rs @@ -0,0 +1,307 @@ +use super::{AllocId, InterpResult}; + +use rustc_macros::HashStable; +use rustc_target::abi::{HasDataLayout, Size}; + +use std::convert::{TryFrom, TryInto}; +use std::fmt; + +//////////////////////////////////////////////////////////////////////////////// +// Pointer arithmetic +//////////////////////////////////////////////////////////////////////////////// + +pub trait PointerArithmetic: HasDataLayout { + // These are not supposed to be overridden. + + #[inline(always)] + fn pointer_size(&self) -> Size { + self.data_layout().pointer_size + } + + #[inline(always)] + fn max_size_of_val(&self) -> Size { + Size::from_bytes(self.machine_isize_max()) + } + + #[inline] + fn machine_usize_max(&self) -> u64 { + self.pointer_size().unsigned_int_max().try_into().unwrap() + } + + #[inline] + fn machine_isize_min(&self) -> i64 { + self.pointer_size().signed_int_min().try_into().unwrap() + } + + #[inline] + fn machine_isize_max(&self) -> i64 { + self.pointer_size().signed_int_max().try_into().unwrap() + } + + #[inline] + fn machine_usize_to_isize(&self, val: u64) -> i64 { + let val = val as i64; + // Now wrap-around into the machine_isize range. + if val > self.machine_isize_max() { + // This can only happen the the ptr size is < 64, so we know max_usize_plus_1 fits into + // i64. + debug_assert!(self.pointer_size().bits() < 64); + let max_usize_plus_1 = 1u128 << self.pointer_size().bits(); + val - i64::try_from(max_usize_plus_1).unwrap() + } else { + val + } + } + + /// Helper function: truncate given value-"overflowed flag" pair to pointer size and + /// update "overflowed flag" if there was an overflow. + /// This should be called by all the other methods before returning! + #[inline] + fn truncate_to_ptr(&self, (val, over): (u64, bool)) -> (u64, bool) { + let val = u128::from(val); + let max_ptr_plus_1 = 1u128 << self.pointer_size().bits(); + (u64::try_from(val % max_ptr_plus_1).unwrap(), over || val >= max_ptr_plus_1) + } + + #[inline] + fn overflowing_offset(&self, val: u64, i: u64) -> (u64, bool) { + // We do not need to check if i fits in a machine usize. If it doesn't, + // either the wrapping_add will wrap or res will not fit in a pointer. + let res = val.overflowing_add(i); + self.truncate_to_ptr(res) + } + + #[inline] + fn overflowing_signed_offset(&self, val: u64, i: i64) -> (u64, bool) { + // We need to make sure that i fits in a machine isize. + let n = i.unsigned_abs(); + if i >= 0 { + let (val, over) = self.overflowing_offset(val, n); + (val, over || i > self.machine_isize_max()) + } else { + let res = val.overflowing_sub(n); + let (val, over) = self.truncate_to_ptr(res); + (val, over || i < self.machine_isize_min()) + } + } + + #[inline] + fn offset<'tcx>(&self, val: u64, i: u64) -> InterpResult<'tcx, u64> { + let (res, over) = self.overflowing_offset(val, i); + if over { throw_ub!(PointerArithOverflow) } else { Ok(res) } + } + + #[inline] + fn signed_offset<'tcx>(&self, val: u64, i: i64) -> InterpResult<'tcx, u64> { + let (res, over) = self.overflowing_signed_offset(val, i); + if over { throw_ub!(PointerArithOverflow) } else { Ok(res) } + } +} + +impl PointerArithmetic for T {} + +/// This trait abstracts over the kind of provenance that is associated with a `Pointer`. It is +/// mostly opaque; the `Machine` trait extends it with some more operations that also have access to +/// some global state. +/// We don't actually care about this `Debug` bound (we use `Provenance::fmt` to format the entire +/// pointer), but `derive` adds some unnecessary bounds. +pub trait Provenance: Copy + fmt::Debug { + /// Says whether the `offset` field of `Pointer`s with this provenance is the actual physical address. + /// If `true, ptr-to-int casts work by simply discarding the provenance. + /// If `false`, ptr-to-int casts are not supported. The offset *must* be relative in that case. + const OFFSET_IS_ADDR: bool; + + /// We also use this trait to control whether to abort execution when a pointer is being partially overwritten + /// (this avoids a separate trait in `allocation.rs` just for this purpose). + const ERR_ON_PARTIAL_PTR_OVERWRITE: bool; + + /// Determines how a pointer should be printed. + fn fmt(ptr: &Pointer, f: &mut fmt::Formatter<'_>) -> fmt::Result + where + Self: Sized; + + /// If `OFFSET_IS_ADDR == false`, provenance must always be able to + /// identify the allocation this ptr points to (i.e., this must return `Some`). + /// Otherwise this function is best-effort (but must agree with `Machine::ptr_get_alloc`). + /// (Identifying the offset in that allocation, however, is harder -- use `Memory::ptr_get_alloc` for that.) + fn get_alloc_id(self) -> Option; +} + +impl Provenance for AllocId { + // With the `AllocId` as provenance, the `offset` is interpreted *relative to the allocation*, + // so ptr-to-int casts are not possible (since we do not know the global physical offset). + const OFFSET_IS_ADDR: bool = false; + + // For now, do not allow this, so that we keep our options open. + const ERR_ON_PARTIAL_PTR_OVERWRITE: bool = true; + + fn fmt(ptr: &Pointer, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Forward `alternate` flag to `alloc_id` printing. + if f.alternate() { + write!(f, "{:#?}", ptr.provenance)?; + } else { + write!(f, "{:?}", ptr.provenance)?; + } + // Print offset only if it is non-zero. + if ptr.offset.bytes() > 0 { + write!(f, "+{:#x}", ptr.offset.bytes())?; + } + Ok(()) + } + + fn get_alloc_id(self) -> Option { + Some(self) + } +} + +/// Represents a pointer in the Miri engine. +/// +/// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, TyEncodable, TyDecodable, Hash)] +#[derive(HashStable)] +pub struct Pointer { + pub(super) offset: Size, // kept private to avoid accidental misinterpretation (meaning depends on `Prov` type) + pub provenance: Prov, +} + +static_assert_size!(Pointer, 16); +// `Option` pointers are also passed around quite a bit +// (but not stored in permanent machine state). +static_assert_size!(Pointer>, 16); + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for Pointer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Provenance::fmt(self, f) + } +} + +impl fmt::Debug for Pointer> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.provenance { + Some(prov) => Provenance::fmt(&Pointer::new(prov, self.offset), f), + None => write!(f, "{:#x}[noalloc]", self.offset.bytes()), + } + } +} + +impl fmt::Display for Pointer> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.provenance.is_none() && self.offset.bytes() == 0 { + write!(f, "null pointer") + } else { + fmt::Debug::fmt(self, f) + } + } +} + +/// Produces a `Pointer` that points to the beginning of the `Allocation`. +impl From for Pointer { + #[inline(always)] + fn from(alloc_id: AllocId) -> Self { + Pointer::new(alloc_id, Size::ZERO) + } +} + +impl From> for Pointer> { + #[inline(always)] + fn from(ptr: Pointer) -> Self { + let (prov, offset) = ptr.into_parts(); + Pointer::new(Some(prov), offset) + } +} + +impl Pointer> { + /// Convert this pointer that *might* have a provenance into a pointer that *definitely* has a + /// provenance, or an absolute address. + /// + /// This is rarely what you want; call `ptr_try_get_alloc_id` instead. + pub fn into_pointer_or_addr(self) -> Result, Size> { + match self.provenance { + Some(prov) => Ok(Pointer::new(prov, self.offset)), + None => Err(self.offset), + } + } + + /// Returns the absolute address the pointer points to. + /// Only works if Prov::OFFSET_IS_ADDR is true! + pub fn addr(self) -> Size + where + Prov: Provenance, + { + assert!(Prov::OFFSET_IS_ADDR); + self.offset + } +} + +impl Pointer> { + #[inline(always)] + pub fn from_addr(addr: u64) -> Self { + Pointer { provenance: None, offset: Size::from_bytes(addr) } + } + + #[inline(always)] + pub fn null() -> Self { + Pointer::from_addr(0) + } +} + +impl<'tcx, Prov> Pointer { + #[inline(always)] + pub fn new(provenance: Prov, offset: Size) -> Self { + Pointer { provenance, offset } + } + + /// Obtain the constituents of this pointer. Not that the meaning of the offset depends on the type `Prov`! + /// This function must only be used in the implementation of `Machine::ptr_get_alloc`, + /// and when a `Pointer` is taken apart to be stored efficiently in an `Allocation`. + #[inline(always)] + pub fn into_parts(self) -> (Prov, Size) { + (self.provenance, self.offset) + } + + pub fn map_provenance(self, f: impl FnOnce(Prov) -> Prov) -> Self { + Pointer { provenance: f(self.provenance), ..self } + } + + #[inline] + pub fn offset(self, i: Size, cx: &impl HasDataLayout) -> InterpResult<'tcx, Self> { + Ok(Pointer { + offset: Size::from_bytes(cx.data_layout().offset(self.offset.bytes(), i.bytes())?), + ..self + }) + } + + #[inline] + pub fn overflowing_offset(self, i: Size, cx: &impl HasDataLayout) -> (Self, bool) { + let (res, over) = cx.data_layout().overflowing_offset(self.offset.bytes(), i.bytes()); + let ptr = Pointer { offset: Size::from_bytes(res), ..self }; + (ptr, over) + } + + #[inline(always)] + pub fn wrapping_offset(self, i: Size, cx: &impl HasDataLayout) -> Self { + self.overflowing_offset(i, cx).0 + } + + #[inline] + pub fn signed_offset(self, i: i64, cx: &impl HasDataLayout) -> InterpResult<'tcx, Self> { + Ok(Pointer { + offset: Size::from_bytes(cx.data_layout().signed_offset(self.offset.bytes(), i)?), + ..self + }) + } + + #[inline] + pub fn overflowing_signed_offset(self, i: i64, cx: &impl HasDataLayout) -> (Self, bool) { + let (res, over) = cx.data_layout().overflowing_signed_offset(self.offset.bytes(), i); + let ptr = Pointer { offset: Size::from_bytes(res), ..self }; + (ptr, over) + } + + #[inline(always)] + pub fn wrapping_signed_offset(self, i: i64, cx: &impl HasDataLayout) -> Self { + self.overflowing_signed_offset(i, cx).0 + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/queries.rs b/compiler/rustc_middle/src/mir/interpret/queries.rs new file mode 100644 index 000000000..786927e2d --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/queries.rs @@ -0,0 +1,217 @@ +use super::{ErrorHandled, EvalToConstValueResult, EvalToValTreeResult, GlobalId}; + +use crate::mir; +use crate::ty::subst::InternalSubsts; +use crate::ty::visit::TypeVisitable; +use crate::ty::{self, query::TyCtxtAt, query::TyCtxtEnsure, TyCtxt}; +use rustc_hir::def_id::DefId; +use rustc_span::{Span, DUMMY_SP}; + +impl<'tcx> TyCtxt<'tcx> { + /// Evaluates a constant without providing any substitutions. This is useful to evaluate consts + /// that can't take any generic arguments like statics, const items or enum discriminants. If a + /// generic parameter is used within the constant `ErrorHandled::ToGeneric` will be returned. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_poly(self, def_id: DefId) -> EvalToConstValueResult<'tcx> { + // In some situations def_id will have substitutions within scope, but they aren't allowed + // to be used. So we can't use `Instance::mono`, instead we feed unresolved substitutions + // into `const_eval` which will return `ErrorHandled::ToGeneric` if any of them are + // encountered. + let substs = InternalSubsts::identity_for_item(self, def_id); + let instance = ty::Instance::new(def_id, substs); + let cid = GlobalId { instance, promoted: None }; + let param_env = self.param_env(def_id).with_reveal_all_normalized(self); + self.const_eval_global_id(param_env, cid, None) + } + /// Resolves and evaluates a constant. + /// + /// The constant can be located on a trait like `::C`, in which case the given + /// substitutions and environment are used to resolve the constant. Alternatively if the + /// constant has generic parameters in scope the substitutions are used to evaluate the value of + /// the constant. For example in `fn foo() { let _ = [0; bar::()]; }` the repeat count + /// constant `bar::()` requires a substitution for `T`, if the substitution for `T` is still + /// too generic for the constant to be evaluated then `Err(ErrorHandled::TooGeneric)` is + /// returned. + #[instrument(level = "debug", skip(self))] + pub fn const_eval_resolve( + self, + param_env: ty::ParamEnv<'tcx>, + ct: ty::Unevaluated<'tcx>, + span: Option, + ) -> EvalToConstValueResult<'tcx> { + // Cannot resolve `Unevaluated` constants that contain inference + // variables. We reject those here since `resolve_opt_const_arg` + // would fail otherwise. + // + // When trying to evaluate constants containing inference variables, + // use `Infcx::const_eval_resolve` instead. + if ct.substs.has_infer_types_or_consts() { + bug!("did not expect inference variables here"); + } + + match ty::Instance::resolve_opt_const_arg(self, param_env, ct.def, ct.substs) { + Ok(Some(instance)) => { + let cid = GlobalId { instance, promoted: ct.promoted }; + self.const_eval_global_id(param_env, cid, span) + } + Ok(None) => Err(ErrorHandled::TooGeneric), + Err(error_reported) => Err(ErrorHandled::Reported(error_reported)), + } + } + + #[instrument(level = "debug", skip(self))] + pub fn const_eval_resolve_for_typeck( + self, + param_env: ty::ParamEnv<'tcx>, + ct: ty::Unevaluated<'tcx>, + span: Option, + ) -> EvalToValTreeResult<'tcx> { + // Cannot resolve `Unevaluated` constants that contain inference + // variables. We reject those here since `resolve_opt_const_arg` + // would fail otherwise. + // + // When trying to evaluate constants containing inference variables, + // use `Infcx::const_eval_resolve` instead. + if ct.substs.has_infer_types_or_consts() { + bug!("did not expect inference variables here"); + } + + match ty::Instance::resolve_opt_const_arg(self, param_env, ct.def, ct.substs) { + Ok(Some(instance)) => { + let cid = GlobalId { instance, promoted: ct.promoted }; + self.const_eval_global_id_for_typeck(param_env, cid, span) + } + Ok(None) => Err(ErrorHandled::TooGeneric), + Err(error_reported) => Err(ErrorHandled::Reported(error_reported)), + } + } + + pub fn const_eval_instance( + self, + param_env: ty::ParamEnv<'tcx>, + instance: ty::Instance<'tcx>, + span: Option, + ) -> EvalToConstValueResult<'tcx> { + self.const_eval_global_id(param_env, GlobalId { instance, promoted: None }, span) + } + + /// Evaluate a constant to a `ConstValue`. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_global_id( + self, + param_env: ty::ParamEnv<'tcx>, + cid: GlobalId<'tcx>, + span: Option, + ) -> EvalToConstValueResult<'tcx> { + let param_env = param_env.with_const(); + // Const-eval shouldn't depend on lifetimes at all, so we can erase them, which should + // improve caching of queries. + let inputs = self.erase_regions(param_env.and(cid)); + if let Some(span) = span { + self.at(span).eval_to_const_value_raw(inputs) + } else { + self.eval_to_const_value_raw(inputs) + } + } + + /// Evaluate a constant to a type-level constant. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_global_id_for_typeck( + self, + param_env: ty::ParamEnv<'tcx>, + cid: GlobalId<'tcx>, + span: Option, + ) -> EvalToValTreeResult<'tcx> { + let param_env = param_env.with_const(); + debug!(?param_env); + // Const-eval shouldn't depend on lifetimes at all, so we can erase them, which should + // improve caching of queries. + let inputs = self.erase_regions(param_env.and(cid)); + debug!(?inputs); + if let Some(span) = span { + self.at(span).eval_to_valtree(inputs) + } else { + self.eval_to_valtree(inputs) + } + } + + /// Evaluate a static's initializer, returning the allocation of the initializer's memory. + #[inline(always)] + pub fn eval_static_initializer( + self, + def_id: DefId, + ) -> Result, ErrorHandled> { + self.at(DUMMY_SP).eval_static_initializer(def_id) + } +} + +impl<'tcx> TyCtxtAt<'tcx> { + /// Evaluate a static's initializer, returning the allocation of the initializer's memory. + pub fn eval_static_initializer( + self, + def_id: DefId, + ) -> Result, ErrorHandled> { + trace!("eval_static_initializer: Need to compute {:?}", def_id); + assert!(self.is_static(def_id)); + let instance = ty::Instance::mono(*self, def_id); + let gid = GlobalId { instance, promoted: None }; + self.eval_to_allocation(gid, ty::ParamEnv::reveal_all()) + } + + /// Evaluate anything constant-like, returning the allocation of the final memory. + fn eval_to_allocation( + self, + gid: GlobalId<'tcx>, + param_env: ty::ParamEnv<'tcx>, + ) -> Result, ErrorHandled> { + let param_env = param_env.with_const(); + trace!("eval_to_allocation: Need to compute {:?}", gid); + let raw_const = self.eval_to_allocation_raw(param_env.and(gid))?; + Ok(self.global_alloc(raw_const.alloc_id).unwrap_memory()) + } +} + +impl<'tcx> TyCtxtEnsure<'tcx> { + /// Evaluates a constant without providing any substitutions. This is useful to evaluate consts + /// that can't take any generic arguments like statics, const items or enum discriminants. If a + /// generic parameter is used within the constant `ErrorHandled::ToGeneric` will be returned. + #[instrument(skip(self), level = "debug")] + pub fn const_eval_poly(self, def_id: DefId) { + // In some situations def_id will have substitutions within scope, but they aren't allowed + // to be used. So we can't use `Instance::mono`, instead we feed unresolved substitutions + // into `const_eval` which will return `ErrorHandled::ToGeneric` if any of them are + // encountered. + let substs = InternalSubsts::identity_for_item(self.tcx, def_id); + let instance = ty::Instance::new(def_id, substs); + let cid = GlobalId { instance, promoted: None }; + let param_env = + self.tcx.param_env(def_id).with_reveal_all_normalized(self.tcx).with_const(); + // Const-eval shouldn't depend on lifetimes at all, so we can erase them, which should + // improve caching of queries. + let inputs = self.tcx.erase_regions(param_env.and(cid)); + self.eval_to_const_value_raw(inputs) + } + + /// Evaluate a static's initializer, returning the allocation of the initializer's memory. + pub fn eval_static_initializer(self, def_id: DefId) { + trace!("eval_static_initializer: Need to compute {:?}", def_id); + assert!(self.tcx.is_static(def_id)); + let instance = ty::Instance::mono(self.tcx, def_id); + let gid = GlobalId { instance, promoted: None }; + let param_env = ty::ParamEnv::reveal_all().with_const(); + trace!("eval_to_allocation: Need to compute {:?}", gid); + self.eval_to_allocation_raw(param_env.and(gid)) + } +} + +impl<'tcx> TyCtxt<'tcx> { + /// Destructure a mir constant ADT or array into its variant index and its field values. + /// Panics if the destructuring fails, use `try_destructure_mir_constant` for fallible version. + pub fn destructure_mir_constant( + self, + param_env: ty::ParamEnv<'tcx>, + constant: mir::ConstantKind<'tcx>, + ) -> mir::DestructuredMirConstant<'tcx> { + self.try_destructure_mir_constant(param_env.and(constant)).unwrap() + } +} diff --git a/compiler/rustc_middle/src/mir/interpret/value.rs b/compiler/rustc_middle/src/mir/interpret/value.rs new file mode 100644 index 000000000..834c114ee --- /dev/null +++ b/compiler/rustc_middle/src/mir/interpret/value.rs @@ -0,0 +1,651 @@ +use std::convert::{TryFrom, TryInto}; +use std::fmt; + +use rustc_apfloat::{ + ieee::{Double, Single}, + Float, +}; +use rustc_macros::HashStable; +use rustc_target::abi::{HasDataLayout, Size}; + +use crate::ty::{Lift, ParamEnv, ScalarInt, Ty, TyCtxt}; + +use super::{ + AllocId, AllocRange, ConstAllocation, InterpResult, Pointer, PointerArithmetic, Provenance, + ScalarSizeMismatch, +}; + +/// Represents the result of const evaluation via the `eval_to_allocation` query. +#[derive(Copy, Clone, HashStable, TyEncodable, TyDecodable, Debug, Hash, Eq, PartialEq)] +pub struct ConstAlloc<'tcx> { + // the value lives here, at offset 0, and that allocation definitely is an `AllocKind::Memory` + // (so you can use `AllocMap::unwrap_memory`). + pub alloc_id: AllocId, + pub ty: Ty<'tcx>, +} + +/// Represents a constant value in Rust. `Scalar` and `Slice` are optimizations for +/// array length computations, enum discriminants and the pattern matching logic. +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable, Hash)] +#[derive(HashStable)] +pub enum ConstValue<'tcx> { + /// Used only for types with `layout::abi::Scalar` ABI. + /// + /// Not using the enum `Value` to encode that this must not be `Uninit`. + Scalar(Scalar), + + /// Only used for ZSTs. + ZeroSized, + + /// Used only for `&[u8]` and `&str` + Slice { data: ConstAllocation<'tcx>, start: usize, end: usize }, + + /// A value not represented/representable by `Scalar` or `Slice` + ByRef { + /// The backing memory of the value, may contain more memory than needed for just the value + /// in order to share `ConstAllocation`s between values + alloc: ConstAllocation<'tcx>, + /// Offset into `alloc` + offset: Size, + }, +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(ConstValue<'_>, 32); + +impl<'a, 'tcx> Lift<'tcx> for ConstValue<'a> { + type Lifted = ConstValue<'tcx>; + fn lift_to_tcx(self, tcx: TyCtxt<'tcx>) -> Option> { + Some(match self { + ConstValue::Scalar(s) => ConstValue::Scalar(s), + ConstValue::ZeroSized => ConstValue::ZeroSized, + ConstValue::Slice { data, start, end } => { + ConstValue::Slice { data: tcx.lift(data)?, start, end } + } + ConstValue::ByRef { alloc, offset } => { + ConstValue::ByRef { alloc: tcx.lift(alloc)?, offset } + } + }) + } +} + +impl<'tcx> ConstValue<'tcx> { + #[inline] + pub fn try_to_scalar(&self) -> Option> { + match *self { + ConstValue::ByRef { .. } | ConstValue::Slice { .. } | ConstValue::ZeroSized => None, + ConstValue::Scalar(val) => Some(val), + } + } + + pub fn try_to_scalar_int(&self) -> Option { + Some(self.try_to_scalar()?.assert_int()) + } + + pub fn try_to_bits(&self, size: Size) -> Option { + self.try_to_scalar_int()?.to_bits(size).ok() + } + + pub fn try_to_bool(&self) -> Option { + self.try_to_scalar_int()?.try_into().ok() + } + + pub fn try_to_machine_usize(&self, tcx: TyCtxt<'tcx>) -> Option { + self.try_to_scalar_int()?.try_to_machine_usize(tcx).ok() + } + + pub fn try_to_bits_for_ty( + &self, + tcx: TyCtxt<'tcx>, + param_env: ParamEnv<'tcx>, + ty: Ty<'tcx>, + ) -> Option { + let size = tcx.layout_of(param_env.with_reveal_all_normalized(tcx).and(ty)).ok()?.size; + self.try_to_bits(size) + } + + pub fn from_bool(b: bool) -> Self { + ConstValue::Scalar(Scalar::from_bool(b)) + } + + pub fn from_u64(i: u64) -> Self { + ConstValue::Scalar(Scalar::from_u64(i)) + } + + pub fn from_machine_usize(i: u64, cx: &impl HasDataLayout) -> Self { + ConstValue::Scalar(Scalar::from_machine_usize(i, cx)) + } +} + +/// A `Scalar` represents an immediate, primitive value existing outside of a +/// `memory::Allocation`. It is in many ways like a small chunk of an `Allocation`, up to 16 bytes in +/// size. Like a range of bytes in an `Allocation`, a `Scalar` can either represent the raw bytes +/// of a simple value or a pointer into another `Allocation` +/// +/// These variants would be private if there was a convenient way to achieve that in Rust. +/// Do *not* match on a `Scalar`! Use the various `to_*` methods instead. +#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, TyEncodable, TyDecodable, Hash)] +#[derive(HashStable)] +pub enum Scalar { + /// The raw bytes of a simple value. + Int(ScalarInt), + + /// A pointer into an `Allocation`. An `Allocation` in the `memory` module has a list of + /// relocations, but a `Scalar` is only large enough to contain one, so we just represent the + /// relocation and its associated offset together as a `Pointer` here. + /// + /// We also store the size of the pointer, such that a `Scalar` always knows how big it is. + /// The size is always the pointer size of the current target, but this is not information + /// that we always have readily available. + Ptr(Pointer, u8), +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(Scalar, 24); + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for Scalar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Scalar::Ptr(ptr, _size) => write!(f, "{:?}", ptr), + Scalar::Int(int) => write!(f, "{:?}", int), + } + } +} + +impl fmt::Display for Scalar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Scalar::Ptr(ptr, _size) => write!(f, "pointer to {:?}", ptr), + Scalar::Int(int) => write!(f, "{}", int), + } + } +} + +impl fmt::LowerHex for Scalar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Scalar::Ptr(ptr, _size) => write!(f, "pointer to {:?}", ptr), + Scalar::Int(int) => write!(f, "{:#x}", int), + } + } +} + +impl From for Scalar { + #[inline(always)] + fn from(f: Single) -> Self { + Scalar::from_f32(f) + } +} + +impl From for Scalar { + #[inline(always)] + fn from(f: Double) -> Self { + Scalar::from_f64(f) + } +} + +impl From for Scalar { + #[inline(always)] + fn from(ptr: ScalarInt) -> Self { + Scalar::Int(ptr) + } +} + +impl Scalar { + #[inline(always)] + pub fn from_pointer(ptr: Pointer, cx: &impl HasDataLayout) -> Self { + Scalar::Ptr(ptr, u8::try_from(cx.pointer_size().bytes()).unwrap()) + } + + /// Create a Scalar from a pointer with an `Option<_>` provenance (where `None` represents a + /// plain integer / "invalid" pointer). + pub fn from_maybe_pointer(ptr: Pointer>, cx: &impl HasDataLayout) -> Self { + match ptr.into_parts() { + (Some(prov), offset) => Scalar::from_pointer(Pointer::new(prov, offset), cx), + (None, offset) => { + Scalar::Int(ScalarInt::try_from_uint(offset.bytes(), cx.pointer_size()).unwrap()) + } + } + } + + #[inline] + pub fn null_ptr(cx: &impl HasDataLayout) -> Self { + Scalar::Int(ScalarInt::null(cx.pointer_size())) + } + + #[inline] + pub fn from_bool(b: bool) -> Self { + Scalar::Int(b.into()) + } + + #[inline] + pub fn from_char(c: char) -> Self { + Scalar::Int(c.into()) + } + + #[inline] + pub fn try_from_uint(i: impl Into, size: Size) -> Option { + ScalarInt::try_from_uint(i, size).map(Scalar::Int) + } + + #[inline] + pub fn from_uint(i: impl Into, size: Size) -> Self { + let i = i.into(); + Self::try_from_uint(i, size) + .unwrap_or_else(|| bug!("Unsigned value {:#x} does not fit in {} bits", i, size.bits())) + } + + #[inline] + pub fn from_u8(i: u8) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_u16(i: u16) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_u32(i: u32) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_u64(i: u64) -> Self { + Scalar::Int(i.into()) + } + + #[inline] + pub fn from_machine_usize(i: u64, cx: &impl HasDataLayout) -> Self { + Self::from_uint(i, cx.data_layout().pointer_size) + } + + #[inline] + pub fn try_from_int(i: impl Into, size: Size) -> Option { + ScalarInt::try_from_int(i, size).map(Scalar::Int) + } + + #[inline] + pub fn from_int(i: impl Into, size: Size) -> Self { + let i = i.into(); + Self::try_from_int(i, size) + .unwrap_or_else(|| bug!("Signed value {:#x} does not fit in {} bits", i, size.bits())) + } + + #[inline] + pub fn from_i32(i: i32) -> Self { + Self::from_int(i, Size::from_bits(32)) + } + + #[inline] + pub fn from_i64(i: i64) -> Self { + Self::from_int(i, Size::from_bits(64)) + } + + #[inline] + pub fn from_machine_isize(i: i64, cx: &impl HasDataLayout) -> Self { + Self::from_int(i, cx.data_layout().pointer_size) + } + + #[inline] + pub fn from_f32(f: Single) -> Self { + Scalar::Int(f.into()) + } + + #[inline] + pub fn from_f64(f: Double) -> Self { + Scalar::Int(f.into()) + } + + /// This is almost certainly not the method you want! You should dispatch on the type + /// and use `to_{u8,u16,...}`/`scalar_to_ptr` to perform ptr-to-int / int-to-ptr casts as needed. + /// + /// This method only exists for the benefit of low-level operations that truly need to treat the + /// scalar in whatever form it is. + /// + /// This throws UB (instead of ICEing) on a size mismatch since size mismatches can arise in + /// Miri when someone declares a function that we shim (such as `malloc`) with a wrong type. + #[inline] + pub fn to_bits_or_ptr_internal( + self, + target_size: Size, + ) -> Result>, ScalarSizeMismatch> { + assert_ne!(target_size.bytes(), 0, "you should never look at the bits of a ZST"); + Ok(match self { + Scalar::Int(int) => Ok(int.to_bits(target_size).map_err(|size| { + ScalarSizeMismatch { target_size: target_size.bytes(), data_size: size.bytes() } + })?), + Scalar::Ptr(ptr, sz) => { + if target_size.bytes() != u64::from(sz) { + return Err(ScalarSizeMismatch { + target_size: target_size.bytes(), + data_size: sz.into(), + }); + } + Err(ptr) + } + }) + } +} + +impl<'tcx, Prov: Provenance> Scalar { + pub fn to_pointer(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, Pointer>> { + match self + .to_bits_or_ptr_internal(cx.pointer_size()) + .map_err(|s| err_ub!(ScalarSizeMismatch(s)))? + { + Err(ptr) => Ok(ptr.into()), + Ok(bits) => { + let addr = u64::try_from(bits).unwrap(); + Ok(Pointer::from_addr(addr)) + } + } + } + + /// Fundamental scalar-to-int (cast) operation. Many convenience wrappers exist below, that you + /// likely want to use instead. + /// + /// Will perform ptr-to-int casts if needed and possible. + /// If that fails, we know the offset is relative, so we return an "erased" Scalar + /// (which is useful for error messages but not much else). + #[inline] + pub fn try_to_int(self) -> Result> { + match self { + Scalar::Int(int) => Ok(int), + Scalar::Ptr(ptr, sz) => { + if Prov::OFFSET_IS_ADDR { + Ok(ScalarInt::try_from_uint(ptr.offset.bytes(), Size::from_bytes(sz)).unwrap()) + } else { + // We know `offset` is relative, since `OFFSET_IS_ADDR == false`. + let (prov, offset) = ptr.into_parts(); + // Because `OFFSET_IS_ADDR == false`, this unwrap can never fail. + Err(Scalar::Ptr(Pointer::new(prov.get_alloc_id().unwrap(), offset), sz)) + } + } + } + } + + #[inline(always)] + pub fn assert_int(self) -> ScalarInt { + self.try_to_int().unwrap() + } + + /// This throws UB (instead of ICEing) on a size mismatch since size mismatches can arise in + /// Miri when someone declares a function that we shim (such as `malloc`) with a wrong type. + #[inline] + pub fn to_bits(self, target_size: Size) -> InterpResult<'tcx, u128> { + assert_ne!(target_size.bytes(), 0, "you should never look at the bits of a ZST"); + self.try_to_int().map_err(|_| err_unsup!(ReadPointerAsBytes))?.to_bits(target_size).map_err( + |size| { + err_ub!(ScalarSizeMismatch(ScalarSizeMismatch { + target_size: target_size.bytes(), + data_size: size.bytes(), + })) + .into() + }, + ) + } + + #[inline(always)] + pub fn assert_bits(self, target_size: Size) -> u128 { + self.to_bits(target_size).unwrap() + } + + pub fn to_bool(self) -> InterpResult<'tcx, bool> { + let val = self.to_u8()?; + match val { + 0 => Ok(false), + 1 => Ok(true), + _ => throw_ub!(InvalidBool(val)), + } + } + + pub fn to_char(self) -> InterpResult<'tcx, char> { + let val = self.to_u32()?; + match std::char::from_u32(val) { + Some(c) => Ok(c), + None => throw_ub!(InvalidChar(val)), + } + } + + /// Converts the scalar to produce an unsigned integer of the given size. + /// Fails if the scalar is a pointer. + #[inline] + pub fn to_uint(self, size: Size) -> InterpResult<'tcx, u128> { + self.to_bits(size) + } + + /// Converts the scalar to produce a `u8`. Fails if the scalar is a pointer. + pub fn to_u8(self) -> InterpResult<'tcx, u8> { + self.to_uint(Size::from_bits(8)).map(|v| u8::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u16`. Fails if the scalar is a pointer. + pub fn to_u16(self) -> InterpResult<'tcx, u16> { + self.to_uint(Size::from_bits(16)).map(|v| u16::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u32`. Fails if the scalar is a pointer. + pub fn to_u32(self) -> InterpResult<'tcx, u32> { + self.to_uint(Size::from_bits(32)).map(|v| u32::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u64`. Fails if the scalar is a pointer. + pub fn to_u64(self) -> InterpResult<'tcx, u64> { + self.to_uint(Size::from_bits(64)).map(|v| u64::try_from(v).unwrap()) + } + + /// Converts the scalar to produce a `u128`. Fails if the scalar is a pointer. + pub fn to_u128(self) -> InterpResult<'tcx, u128> { + self.to_uint(Size::from_bits(128)) + } + + /// Converts the scalar to produce a machine-pointer-sized unsigned integer. + /// Fails if the scalar is a pointer. + pub fn to_machine_usize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, u64> { + let b = self.to_uint(cx.data_layout().pointer_size)?; + Ok(u64::try_from(b).unwrap()) + } + + /// Converts the scalar to produce a signed integer of the given size. + /// Fails if the scalar is a pointer. + #[inline] + pub fn to_int(self, size: Size) -> InterpResult<'tcx, i128> { + let b = self.to_bits(size)?; + Ok(size.sign_extend(b) as i128) + } + + /// Converts the scalar to produce an `i8`. Fails if the scalar is a pointer. + pub fn to_i8(self) -> InterpResult<'tcx, i8> { + self.to_int(Size::from_bits(8)).map(|v| i8::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i16`. Fails if the scalar is a pointer. + pub fn to_i16(self) -> InterpResult<'tcx, i16> { + self.to_int(Size::from_bits(16)).map(|v| i16::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i32`. Fails if the scalar is a pointer. + pub fn to_i32(self) -> InterpResult<'tcx, i32> { + self.to_int(Size::from_bits(32)).map(|v| i32::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i64`. Fails if the scalar is a pointer. + pub fn to_i64(self) -> InterpResult<'tcx, i64> { + self.to_int(Size::from_bits(64)).map(|v| i64::try_from(v).unwrap()) + } + + /// Converts the scalar to produce an `i128`. Fails if the scalar is a pointer. + pub fn to_i128(self) -> InterpResult<'tcx, i128> { + self.to_int(Size::from_bits(128)) + } + + /// Converts the scalar to produce a machine-pointer-sized signed integer. + /// Fails if the scalar is a pointer. + pub fn to_machine_isize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, i64> { + let b = self.to_int(cx.data_layout().pointer_size)?; + Ok(i64::try_from(b).unwrap()) + } + + #[inline] + pub fn to_f32(self) -> InterpResult<'tcx, Single> { + // Going through `u32` to check size and truncation. + Ok(Single::from_bits(self.to_u32()?.into())) + } + + #[inline] + pub fn to_f64(self) -> InterpResult<'tcx, Double> { + // Going through `u64` to check size and truncation. + Ok(Double::from_bits(self.to_u64()?.into())) + } +} + +#[derive(Clone, Copy, Eq, PartialEq, TyEncodable, TyDecodable, HashStable, Hash)] +pub enum ScalarMaybeUninit { + Scalar(Scalar), + Uninit, +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(ScalarMaybeUninit, 24); + +impl From> for ScalarMaybeUninit { + #[inline(always)] + fn from(s: Scalar) -> Self { + ScalarMaybeUninit::Scalar(s) + } +} + +// We want the `Debug` output to be readable as it is used by `derive(Debug)` for +// all the Miri types. +impl fmt::Debug for ScalarMaybeUninit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ScalarMaybeUninit::Uninit => write!(f, ""), + ScalarMaybeUninit::Scalar(s) => write!(f, "{:?}", s), + } + } +} + +impl fmt::LowerHex for ScalarMaybeUninit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ScalarMaybeUninit::Uninit => write!(f, "uninitialized bytes"), + ScalarMaybeUninit::Scalar(s) => write!(f, "{:x}", s), + } + } +} + +impl ScalarMaybeUninit { + #[inline] + pub fn from_pointer(ptr: Pointer, cx: &impl HasDataLayout) -> Self { + ScalarMaybeUninit::Scalar(Scalar::from_pointer(ptr, cx)) + } + + #[inline] + pub fn from_maybe_pointer(ptr: Pointer>, cx: &impl HasDataLayout) -> Self { + ScalarMaybeUninit::Scalar(Scalar::from_maybe_pointer(ptr, cx)) + } + + #[inline] + pub fn check_init<'tcx>(self) -> InterpResult<'tcx, Scalar> { + match self { + ScalarMaybeUninit::Scalar(scalar) => Ok(scalar), + ScalarMaybeUninit::Uninit => throw_ub!(InvalidUninitBytes(None)), + } + } +} + +impl<'tcx, Prov: Provenance> ScalarMaybeUninit { + #[inline(always)] + pub fn to_pointer(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, Pointer>> { + self.check_init()?.to_pointer(cx) + } + + #[inline(always)] + pub fn to_bool(self) -> InterpResult<'tcx, bool> { + self.check_init()?.to_bool() + } + + #[inline(always)] + pub fn to_char(self) -> InterpResult<'tcx, char> { + self.check_init()?.to_char() + } + + #[inline(always)] + pub fn to_f32(self) -> InterpResult<'tcx, Single> { + self.check_init()?.to_f32() + } + + #[inline(always)] + pub fn to_f64(self) -> InterpResult<'tcx, Double> { + self.check_init()?.to_f64() + } + + #[inline(always)] + pub fn to_u8(self) -> InterpResult<'tcx, u8> { + self.check_init()?.to_u8() + } + + #[inline(always)] + pub fn to_u16(self) -> InterpResult<'tcx, u16> { + self.check_init()?.to_u16() + } + + #[inline(always)] + pub fn to_u32(self) -> InterpResult<'tcx, u32> { + self.check_init()?.to_u32() + } + + #[inline(always)] + pub fn to_u64(self) -> InterpResult<'tcx, u64> { + self.check_init()?.to_u64() + } + + #[inline(always)] + pub fn to_machine_usize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, u64> { + self.check_init()?.to_machine_usize(cx) + } + + #[inline(always)] + pub fn to_i8(self) -> InterpResult<'tcx, i8> { + self.check_init()?.to_i8() + } + + #[inline(always)] + pub fn to_i16(self) -> InterpResult<'tcx, i16> { + self.check_init()?.to_i16() + } + + #[inline(always)] + pub fn to_i32(self) -> InterpResult<'tcx, i32> { + self.check_init()?.to_i32() + } + + #[inline(always)] + pub fn to_i64(self) -> InterpResult<'tcx, i64> { + self.check_init()?.to_i64() + } + + #[inline(always)] + pub fn to_machine_isize(self, cx: &impl HasDataLayout) -> InterpResult<'tcx, i64> { + self.check_init()?.to_machine_isize(cx) + } +} + +/// Gets the bytes of a constant slice value. +pub fn get_slice_bytes<'tcx>(cx: &impl HasDataLayout, val: ConstValue<'tcx>) -> &'tcx [u8] { + if let ConstValue::Slice { data, start, end } = val { + let len = end - start; + data.inner() + .get_bytes( + cx, + AllocRange { start: Size::from_bytes(start), size: Size::from_bytes(len) }, + ) + .unwrap_or_else(|err| bug!("const slice is invalid: {:?}", err)) + } else { + bug!("expected const slice, but found another const value"); + } +} diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs new file mode 100644 index 000000000..7ab71f900 --- /dev/null +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -0,0 +1,2900 @@ +//! MIR datatypes and passes. See the [rustc dev guide] for more info. +//! +//! [rustc dev guide]: https://rustc-dev-guide.rust-lang.org/mir/index.html + +use crate::mir::interpret::{ + AllocRange, ConstAllocation, ConstValue, GlobalAlloc, LitToConstInput, Scalar, +}; +use crate::mir::visit::MirVisitable; +use crate::ty::codec::{TyDecoder, TyEncoder}; +use crate::ty::fold::{FallibleTypeFolder, TypeFoldable, TypeSuperFoldable}; +use crate::ty::print::{FmtPrinter, Printer}; +use crate::ty::subst::{GenericArg, InternalSubsts, Subst, SubstsRef}; +use crate::ty::visit::{TypeSuperVisitable, TypeVisitable, TypeVisitor}; +use crate::ty::{self, List, Ty, TyCtxt}; +use crate::ty::{AdtDef, InstanceDef, ScalarInt, UserTypeAnnotationIndex}; + +use rustc_data_structures::captures::Captures; +use rustc_errors::ErrorGuaranteed; +use rustc_hir::def::{CtorKind, Namespace}; +use rustc_hir::def_id::{DefId, LocalDefId, CRATE_DEF_ID}; +use rustc_hir::{self, GeneratorKind}; +use rustc_hir::{self as hir, HirId}; +use rustc_session::Session; +use rustc_target::abi::{Size, VariantIdx}; + +use polonius_engine::Atom; +pub use rustc_ast::Mutability; +use rustc_data_structures::fx::FxHashSet; +use rustc_data_structures::graph::dominators::Dominators; +use rustc_index::bit_set::BitMatrix; +use rustc_index::vec::{Idx, IndexVec}; +use rustc_serialize::{Decodable, Encodable}; +use rustc_span::symbol::Symbol; +use rustc_span::{Span, DUMMY_SP}; + +use either::Either; + +use std::borrow::Cow; +use std::convert::TryInto; +use std::fmt::{self, Debug, Display, Formatter, Write}; +use std::ops::{ControlFlow, Index, IndexMut}; +use std::{iter, mem}; + +pub use self::query::*; +pub use basic_blocks::BasicBlocks; + +mod basic_blocks; +pub mod coverage; +mod generic_graph; +pub mod generic_graphviz; +mod graph_cyclic_cache; +pub mod graphviz; +pub mod interpret; +pub mod mono; +pub mod patch; +mod predecessors; +pub mod pretty; +mod query; +pub mod spanview; +mod syntax; +pub use syntax::*; +mod switch_sources; +pub mod tcx; +pub mod terminator; +pub use terminator::*; + +pub mod traversal; +mod type_foldable; +mod type_visitable; +pub mod visit; + +pub use self::generic_graph::graphviz_safe_def_name; +pub use self::graphviz::write_mir_graphviz; +pub use self::pretty::{ + create_dump_file, display_allocation, dump_enabled, dump_mir, write_mir_pretty, PassWhere, +}; + +/// Types for locals +pub type LocalDecls<'tcx> = IndexVec>; + +pub trait HasLocalDecls<'tcx> { + fn local_decls(&self) -> &LocalDecls<'tcx>; +} + +impl<'tcx> HasLocalDecls<'tcx> for LocalDecls<'tcx> { + #[inline] + fn local_decls(&self) -> &LocalDecls<'tcx> { + self + } +} + +impl<'tcx> HasLocalDecls<'tcx> for Body<'tcx> { + #[inline] + fn local_decls(&self) -> &LocalDecls<'tcx> { + &self.local_decls + } +} + +/// A streamlined trait that you can implement to create a pass; the +/// pass will be named after the type, and it will consist of a main +/// loop that goes over each available MIR and applies `run_pass`. +pub trait MirPass<'tcx> { + fn name(&self) -> Cow<'_, str> { + let name = std::any::type_name::(); + if let Some(tail) = name.rfind(':') { + Cow::from(&name[tail + 1..]) + } else { + Cow::from(name) + } + } + + /// Returns `true` if this pass is enabled with the current combination of compiler flags. + fn is_enabled(&self, _sess: &Session) -> bool { + true + } + + fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>); + + /// If this pass causes the MIR to enter a new phase, return that phase. + fn phase_change(&self) -> Option { + None + } + + fn is_mir_dump_enabled(&self) -> bool { + true + } +} + +impl MirPhase { + /// Gets the index of the current MirPhase within the set of all `MirPhase`s. + pub fn phase_index(&self) -> usize { + *self as usize + } +} + +/// Where a specific `mir::Body` comes from. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[derive(HashStable, TyEncodable, TyDecodable, TypeFoldable, TypeVisitable)] +pub struct MirSource<'tcx> { + pub instance: InstanceDef<'tcx>, + + /// If `Some`, this is a promoted rvalue within the parent function. + pub promoted: Option, +} + +impl<'tcx> MirSource<'tcx> { + pub fn item(def_id: DefId) -> Self { + MirSource { + instance: InstanceDef::Item(ty::WithOptConstParam::unknown(def_id)), + promoted: None, + } + } + + pub fn from_instance(instance: InstanceDef<'tcx>) -> Self { + MirSource { instance, promoted: None } + } + + pub fn with_opt_param(self) -> ty::WithOptConstParam { + self.instance.with_opt_param() + } + + #[inline] + pub fn def_id(&self) -> DefId { + self.instance.def_id() + } +} + +#[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable, TypeFoldable, TypeVisitable)] +pub struct GeneratorInfo<'tcx> { + /// The yield type of the function, if it is a generator. + pub yield_ty: Option>, + + /// Generator drop glue. + pub generator_drop: Option>, + + /// The layout of a generator. Produced by the state transformation. + pub generator_layout: Option>, + + /// If this is a generator then record the type of source expression that caused this generator + /// to be created. + pub generator_kind: GeneratorKind, +} + +/// The lowered representation of a single function. +#[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable, TypeFoldable, TypeVisitable)] +pub struct Body<'tcx> { + /// A list of basic blocks. References to basic block use a newtyped index type [`BasicBlock`] + /// that indexes into this vector. + pub basic_blocks: BasicBlocks<'tcx>, + + /// Records how far through the "desugaring and optimization" process this particular + /// MIR has traversed. This is particularly useful when inlining, since in that context + /// we instantiate the promoted constants and add them to our promoted vector -- but those + /// promoted items have already been optimized, whereas ours have not. This field allows + /// us to see the difference and forego optimization on the inlined promoted items. + pub phase: MirPhase, + + pub source: MirSource<'tcx>, + + /// A list of source scopes; these are referenced by statements + /// and used for debuginfo. Indexed by a `SourceScope`. + pub source_scopes: IndexVec>, + + pub generator: Option>>, + + /// Declarations of locals. + /// + /// The first local is the return value pointer, followed by `arg_count` + /// locals for the function arguments, followed by any user-declared + /// variables and temporaries. + pub local_decls: LocalDecls<'tcx>, + + /// User type annotations. + pub user_type_annotations: ty::CanonicalUserTypeAnnotations<'tcx>, + + /// The number of arguments this function takes. + /// + /// Starting at local 1, `arg_count` locals will be provided by the caller + /// and can be assumed to be initialized. + /// + /// If this MIR was built for a constant, this will be 0. + pub arg_count: usize, + + /// Mark an argument local (which must be a tuple) as getting passed as + /// its individual components at the LLVM level. + /// + /// This is used for the "rust-call" ABI. + pub spread_arg: Option, + + /// Debug information pertaining to user variables, including captures. + pub var_debug_info: Vec>, + + /// A span representing this MIR, for error reporting. + pub span: Span, + + /// Constants that are required to evaluate successfully for this MIR to be well-formed. + /// We hold in this field all the constants we are not able to evaluate yet. + pub required_consts: Vec>, + + /// Does this body use generic parameters. This is used for the `ConstEvaluatable` check. + /// + /// Note that this does not actually mean that this body is not computable right now. + /// The repeat count in the following example is polymorphic, but can still be evaluated + /// without knowing anything about the type parameter `T`. + /// + /// ```rust + /// fn test() { + /// let _ = [0; std::mem::size_of::<*mut T>()]; + /// } + /// ``` + /// + /// **WARNING**: Do not change this flags after the MIR was originally created, even if an optimization + /// removed the last mention of all generic params. We do not want to rely on optimizations and + /// potentially allow things like `[u8; std::mem::size_of::() * 0]` due to this. + pub is_polymorphic: bool, + + pub tainted_by_errors: Option, +} + +impl<'tcx> Body<'tcx> { + pub fn new( + source: MirSource<'tcx>, + basic_blocks: IndexVec>, + source_scopes: IndexVec>, + local_decls: LocalDecls<'tcx>, + user_type_annotations: ty::CanonicalUserTypeAnnotations<'tcx>, + arg_count: usize, + var_debug_info: Vec>, + span: Span, + generator_kind: Option, + tainted_by_errors: Option, + ) -> Self { + // We need `arg_count` locals, and one for the return place. + assert!( + local_decls.len() > arg_count, + "expected at least {} locals, got {}", + arg_count + 1, + local_decls.len() + ); + + let mut body = Body { + phase: MirPhase::Built, + source, + basic_blocks: BasicBlocks::new(basic_blocks), + source_scopes, + generator: generator_kind.map(|generator_kind| { + Box::new(GeneratorInfo { + yield_ty: None, + generator_drop: None, + generator_layout: None, + generator_kind, + }) + }), + local_decls, + user_type_annotations, + arg_count, + spread_arg: None, + var_debug_info, + span, + required_consts: Vec::new(), + is_polymorphic: false, + tainted_by_errors, + }; + body.is_polymorphic = body.has_param_types_or_consts(); + body + } + + /// Returns a partially initialized MIR body containing only a list of basic blocks. + /// + /// The returned MIR contains no `LocalDecl`s (even for the return place) or source scopes. It + /// is only useful for testing but cannot be `#[cfg(test)]` because it is used in a different + /// crate. + pub fn new_cfg_only(basic_blocks: IndexVec>) -> Self { + let mut body = Body { + phase: MirPhase::Built, + source: MirSource::item(CRATE_DEF_ID.to_def_id()), + basic_blocks: BasicBlocks::new(basic_blocks), + source_scopes: IndexVec::new(), + generator: None, + local_decls: IndexVec::new(), + user_type_annotations: IndexVec::new(), + arg_count: 0, + spread_arg: None, + span: DUMMY_SP, + required_consts: Vec::new(), + var_debug_info: Vec::new(), + is_polymorphic: false, + tainted_by_errors: None, + }; + body.is_polymorphic = body.has_param_types_or_consts(); + body + } + + #[inline] + pub fn basic_blocks(&self) -> &IndexVec> { + &self.basic_blocks + } + + #[inline] + pub fn basic_blocks_mut(&mut self) -> &mut IndexVec> { + self.basic_blocks.as_mut() + } + + #[inline] + pub fn local_kind(&self, local: Local) -> LocalKind { + let index = local.as_usize(); + if index == 0 { + debug_assert!( + self.local_decls[local].mutability == Mutability::Mut, + "return place should be mutable" + ); + + LocalKind::ReturnPointer + } else if index < self.arg_count + 1 { + LocalKind::Arg + } else if self.local_decls[local].is_user_variable() { + LocalKind::Var + } else { + LocalKind::Temp + } + } + + /// Returns an iterator over all user-declared mutable locals. + #[inline] + pub fn mut_vars_iter<'a>(&'a self) -> impl Iterator + Captures<'tcx> + 'a { + (self.arg_count + 1..self.local_decls.len()).filter_map(move |index| { + let local = Local::new(index); + let decl = &self.local_decls[local]; + if decl.is_user_variable() && decl.mutability == Mutability::Mut { + Some(local) + } else { + None + } + }) + } + + /// Returns an iterator over all user-declared mutable arguments and locals. + #[inline] + pub fn mut_vars_and_args_iter<'a>( + &'a self, + ) -> impl Iterator + Captures<'tcx> + 'a { + (1..self.local_decls.len()).filter_map(move |index| { + let local = Local::new(index); + let decl = &self.local_decls[local]; + if (decl.is_user_variable() || index < self.arg_count + 1) + && decl.mutability == Mutability::Mut + { + Some(local) + } else { + None + } + }) + } + + /// Returns an iterator over all function arguments. + #[inline] + pub fn args_iter(&self) -> impl Iterator + ExactSizeIterator { + (1..self.arg_count + 1).map(Local::new) + } + + /// Returns an iterator over all user-defined variables and compiler-generated temporaries (all + /// locals that are neither arguments nor the return place). + #[inline] + pub fn vars_and_temps_iter( + &self, + ) -> impl DoubleEndedIterator + ExactSizeIterator { + (self.arg_count + 1..self.local_decls.len()).map(Local::new) + } + + #[inline] + pub fn drain_vars_and_temps<'a>(&'a mut self) -> impl Iterator> + 'a { + self.local_decls.drain(self.arg_count + 1..) + } + + /// Returns the source info associated with `location`. + pub fn source_info(&self, location: Location) -> &SourceInfo { + let block = &self[location.block]; + let stmts = &block.statements; + let idx = location.statement_index; + if idx < stmts.len() { + &stmts[idx].source_info + } else { + assert_eq!(idx, stmts.len()); + &block.terminator().source_info + } + } + + /// Returns the return type; it always return first element from `local_decls` array. + #[inline] + pub fn return_ty(&self) -> Ty<'tcx> { + self.local_decls[RETURN_PLACE].ty + } + + /// Returns the return type; it always return first element from `local_decls` array. + #[inline] + pub fn bound_return_ty(&self) -> ty::EarlyBinder> { + ty::EarlyBinder(self.local_decls[RETURN_PLACE].ty) + } + + /// Gets the location of the terminator for the given block. + #[inline] + pub fn terminator_loc(&self, bb: BasicBlock) -> Location { + Location { block: bb, statement_index: self[bb].statements.len() } + } + + pub fn stmt_at(&self, location: Location) -> Either<&Statement<'tcx>, &Terminator<'tcx>> { + let Location { block, statement_index } = location; + let block_data = &self.basic_blocks[block]; + block_data + .statements + .get(statement_index) + .map(Either::Left) + .unwrap_or_else(|| Either::Right(block_data.terminator())) + } + + #[inline] + pub fn yield_ty(&self) -> Option> { + self.generator.as_ref().and_then(|generator| generator.yield_ty) + } + + #[inline] + pub fn generator_layout(&self) -> Option<&GeneratorLayout<'tcx>> { + self.generator.as_ref().and_then(|generator| generator.generator_layout.as_ref()) + } + + #[inline] + pub fn generator_drop(&self) -> Option<&Body<'tcx>> { + self.generator.as_ref().and_then(|generator| generator.generator_drop.as_ref()) + } + + #[inline] + pub fn generator_kind(&self) -> Option { + self.generator.as_ref().map(|generator| generator.generator_kind) + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug, TyEncodable, TyDecodable, HashStable)] +pub enum Safety { + Safe, + /// Unsafe because of compiler-generated unsafe code, like `await` desugaring + BuiltinUnsafe, + /// Unsafe because of an unsafe fn + FnUnsafe, + /// Unsafe because of an `unsafe` block + ExplicitUnsafe(hir::HirId), +} + +impl<'tcx> Index for Body<'tcx> { + type Output = BasicBlockData<'tcx>; + + #[inline] + fn index(&self, index: BasicBlock) -> &BasicBlockData<'tcx> { + &self.basic_blocks()[index] + } +} + +impl<'tcx> IndexMut for Body<'tcx> { + #[inline] + fn index_mut(&mut self, index: BasicBlock) -> &mut BasicBlockData<'tcx> { + &mut self.basic_blocks.as_mut()[index] + } +} + +#[derive(Copy, Clone, Debug, HashStable, TypeFoldable, TypeVisitable)] +pub enum ClearCrossCrate { + Clear, + Set(T), +} + +impl ClearCrossCrate { + pub fn as_ref(&self) -> ClearCrossCrate<&T> { + match self { + ClearCrossCrate::Clear => ClearCrossCrate::Clear, + ClearCrossCrate::Set(v) => ClearCrossCrate::Set(v), + } + } + + pub fn assert_crate_local(self) -> T { + match self { + ClearCrossCrate::Clear => bug!("unwrapping cross-crate data"), + ClearCrossCrate::Set(v) => v, + } + } +} + +const TAG_CLEAR_CROSS_CRATE_CLEAR: u8 = 0; +const TAG_CLEAR_CROSS_CRATE_SET: u8 = 1; + +impl> Encodable for ClearCrossCrate { + #[inline] + fn encode(&self, e: &mut E) { + if E::CLEAR_CROSS_CRATE { + return; + } + + match *self { + ClearCrossCrate::Clear => TAG_CLEAR_CROSS_CRATE_CLEAR.encode(e), + ClearCrossCrate::Set(ref val) => { + TAG_CLEAR_CROSS_CRATE_SET.encode(e); + val.encode(e); + } + } + } +} +impl> Decodable for ClearCrossCrate { + #[inline] + fn decode(d: &mut D) -> ClearCrossCrate { + if D::CLEAR_CROSS_CRATE { + return ClearCrossCrate::Clear; + } + + let discr = u8::decode(d); + + match discr { + TAG_CLEAR_CROSS_CRATE_CLEAR => ClearCrossCrate::Clear, + TAG_CLEAR_CROSS_CRATE_SET => { + let val = T::decode(d); + ClearCrossCrate::Set(val) + } + tag => panic!("Invalid tag for ClearCrossCrate: {:?}", tag), + } + } +} + +/// Grouped information about the source code origin of a MIR entity. +/// Intended to be inspected by diagnostics and debuginfo. +/// Most passes can work with it as a whole, within a single function. +// The unofficial Cranelift backend, at least as of #65828, needs `SourceInfo` to implement `Eq` and +// `Hash`. Please ping @bjorn3 if removing them. +#[derive(Copy, Clone, Debug, Eq, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +pub struct SourceInfo { + /// The source span for the AST pertaining to this MIR entity. + pub span: Span, + + /// The source scope, keeping track of which bindings can be + /// seen by debuginfo, active lint levels, `unsafe {...}`, etc. + pub scope: SourceScope, +} + +impl SourceInfo { + #[inline] + pub fn outermost(span: Span) -> Self { + SourceInfo { span, scope: OUTERMOST_SOURCE_SCOPE } + } +} + +/////////////////////////////////////////////////////////////////////////// +// Variables and temps + +rustc_index::newtype_index! { + pub struct Local { + derive [HashStable] + DEBUG_FORMAT = "_{}", + const RETURN_PLACE = 0, + } +} + +impl Atom for Local { + fn index(self) -> usize { + Idx::index(self) + } +} + +/// Classifies locals into categories. See `Body::local_kind`. +#[derive(Clone, Copy, PartialEq, Eq, Debug, HashStable)] +pub enum LocalKind { + /// User-declared variable binding. + Var, + /// Compiler-introduced temporary. + Temp, + /// Function argument. + Arg, + /// Location of function's return value. + ReturnPointer, +} + +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub struct VarBindingForm<'tcx> { + /// Is variable bound via `x`, `mut x`, `ref x`, or `ref mut x`? + pub binding_mode: ty::BindingMode, + /// If an explicit type was provided for this variable binding, + /// this holds the source Span of that type. + /// + /// NOTE: if you want to change this to a `HirId`, be wary that + /// doing so breaks incremental compilation (as of this writing), + /// while a `Span` does not cause our tests to fail. + pub opt_ty_info: Option, + /// Place of the RHS of the =, or the subject of the `match` where this + /// variable is initialized. None in the case of `let PATTERN;`. + /// Some((None, ..)) in the case of and `let [mut] x = ...` because + /// (a) the right-hand side isn't evaluated as a place expression. + /// (b) it gives a way to separate this case from the remaining cases + /// for diagnostics. + pub opt_match_place: Option<(Option>, Span)>, + /// The span of the pattern in which this variable was bound. + pub pat_span: Span, +} + +#[derive(Clone, Debug, TyEncodable, TyDecodable)] +pub enum BindingForm<'tcx> { + /// This is a binding for a non-`self` binding, or a `self` that has an explicit type. + Var(VarBindingForm<'tcx>), + /// Binding for a `self`/`&self`/`&mut self` binding where the type is implicit. + ImplicitSelf(ImplicitSelfKind), + /// Reference used in a guard expression to ensure immutability. + RefForGuard, +} + +/// Represents what type of implicit self a function has, if any. +#[derive(Clone, Copy, PartialEq, Debug, TyEncodable, TyDecodable, HashStable)] +pub enum ImplicitSelfKind { + /// Represents a `fn x(self);`. + Imm, + /// Represents a `fn x(mut self);`. + Mut, + /// Represents a `fn x(&self);`. + ImmRef, + /// Represents a `fn x(&mut self);`. + MutRef, + /// Represents when a function does not have a self argument or + /// when a function has a `self: X` argument. + None, +} + +TrivialTypeTraversalAndLiftImpls! { BindingForm<'tcx>, } + +mod binding_form_impl { + use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; + use rustc_query_system::ich::StableHashingContext; + + impl<'a, 'tcx> HashStable> for super::BindingForm<'tcx> { + fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) { + use super::BindingForm::*; + std::mem::discriminant(self).hash_stable(hcx, hasher); + + match self { + Var(binding) => binding.hash_stable(hcx, hasher), + ImplicitSelf(kind) => kind.hash_stable(hcx, hasher), + RefForGuard => (), + } + } + } +} + +/// `BlockTailInfo` is attached to the `LocalDecl` for temporaries +/// created during evaluation of expressions in a block tail +/// expression; that is, a block like `{ STMT_1; STMT_2; EXPR }`. +/// +/// It is used to improve diagnostics when such temporaries are +/// involved in borrow_check errors, e.g., explanations of where the +/// temporaries come from, when their destructors are run, and/or how +/// one might revise the code to satisfy the borrow checker's rules. +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub struct BlockTailInfo { + /// If `true`, then the value resulting from evaluating this tail + /// expression is ignored by the block's expression context. + /// + /// Examples include `{ ...; tail };` and `let _ = { ...; tail };` + /// but not e.g., `let _x = { ...; tail };` + pub tail_result_is_ignored: bool, + + /// `Span` of the tail expression. + pub span: Span, +} + +/// A MIR local. +/// +/// This can be a binding declared by the user, a temporary inserted by the compiler, a function +/// argument, or the return place. +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct LocalDecl<'tcx> { + /// Whether this is a mutable binding (i.e., `let x` or `let mut x`). + /// + /// Temporaries and the return place are always mutable. + pub mutability: Mutability, + + // FIXME(matthewjasper) Don't store in this in `Body` + pub local_info: Option>>, + + /// `true` if this is an internal local. + /// + /// These locals are not based on types in the source code and are only used + /// for a few desugarings at the moment. + /// + /// The generator transformation will sanity check the locals which are live + /// across a suspension point against the type components of the generator + /// which type checking knows are live across a suspension point. We need to + /// flag drop flags to avoid triggering this check as they are introduced + /// outside of type inference. + /// + /// This should be sound because the drop flags are fully algebraic, and + /// therefore don't affect the auto-trait or outlives properties of the + /// generator. + pub internal: bool, + + /// If this local is a temporary and `is_block_tail` is `Some`, + /// then it is a temporary created for evaluation of some + /// subexpression of some block's tail expression (with no + /// intervening statement context). + // FIXME(matthewjasper) Don't store in this in `Body` + pub is_block_tail: Option, + + /// The type of this local. + pub ty: Ty<'tcx>, + + /// If the user manually ascribed a type to this variable, + /// e.g., via `let x: T`, then we carry that type here. The MIR + /// borrow checker needs this information since it can affect + /// region inference. + // FIXME(matthewjasper) Don't store in this in `Body` + pub user_ty: Option>, + + /// The *syntactic* (i.e., not visibility) source scope the local is defined + /// in. If the local was defined in a let-statement, this + /// is *within* the let-statement, rather than outside + /// of it. + /// + /// This is needed because the visibility source scope of locals within + /// a let-statement is weird. + /// + /// The reason is that we want the local to be *within* the let-statement + /// for lint purposes, but we want the local to be *after* the let-statement + /// for names-in-scope purposes. + /// + /// That's it, if we have a let-statement like the one in this + /// function: + /// + /// ``` + /// fn foo(x: &str) { + /// #[allow(unused_mut)] + /// let mut x: u32 = { // <- one unused mut + /// let mut y: u32 = x.parse().unwrap(); + /// y + 2 + /// }; + /// drop(x); + /// } + /// ``` + /// + /// Then, from a lint point of view, the declaration of `x: u32` + /// (and `y: u32`) are within the `#[allow(unused_mut)]` scope - the + /// lint scopes are the same as the AST/HIR nesting. + /// + /// However, from a name lookup point of view, the scopes look more like + /// as if the let-statements were `match` expressions: + /// + /// ``` + /// fn foo(x: &str) { + /// match { + /// match x.parse::().unwrap() { + /// y => y + 2 + /// } + /// } { + /// x => drop(x) + /// }; + /// } + /// ``` + /// + /// We care about the name-lookup scopes for debuginfo - if the + /// debuginfo instruction pointer is at the call to `x.parse()`, we + /// want `x` to refer to `x: &str`, but if it is at the call to + /// `drop(x)`, we want it to refer to `x: u32`. + /// + /// To allow both uses to work, we need to have more than a single scope + /// for a local. We have the `source_info.scope` represent the "syntactic" + /// lint scope (with a variable being under its let block) while the + /// `var_debug_info.source_info.scope` represents the "local variable" + /// scope (where the "rest" of a block is under all prior let-statements). + /// + /// The end result looks like this: + /// + /// ```text + /// ROOT SCOPE + /// │{ argument x: &str } + /// │ + /// │ │{ #[allow(unused_mut)] } // This is actually split into 2 scopes + /// │ │ // in practice because I'm lazy. + /// │ │ + /// │ │← x.source_info.scope + /// │ │← `x.parse().unwrap()` + /// │ │ + /// │ │ │← y.source_info.scope + /// │ │ + /// │ │ │{ let y: u32 } + /// │ │ │ + /// │ │ │← y.var_debug_info.source_info.scope + /// │ │ │← `y + 2` + /// │ + /// │ │{ let x: u32 } + /// │ │← x.var_debug_info.source_info.scope + /// │ │← `drop(x)` // This accesses `x: u32`. + /// ``` + pub source_info: SourceInfo, +} + +// `LocalDecl` is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(LocalDecl<'_>, 56); + +/// Extra information about a some locals that's used for diagnostics and for +/// classifying variables into local variables, statics, etc, which is needed e.g. +/// for unsafety checking. +/// +/// Not used for non-StaticRef temporaries, the return place, or anonymous +/// function parameters. +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub enum LocalInfo<'tcx> { + /// A user-defined local variable or function parameter + /// + /// The `BindingForm` is solely used for local diagnostics when generating + /// warnings/errors when compiling the current crate, and therefore it need + /// not be visible across crates. + User(ClearCrossCrate>), + /// A temporary created that references the static with the given `DefId`. + StaticRef { def_id: DefId, is_thread_local: bool }, + /// A temporary created that references the const with the given `DefId` + ConstRef { def_id: DefId }, + /// A temporary created during the creation of an aggregate + /// (e.g. a temporary for `foo` in `MyStruct { my_field: foo }`) + AggregateTemp, + /// A temporary created during the pass `Derefer` to avoid it's retagging + DerefTemp, +} + +impl<'tcx> LocalDecl<'tcx> { + /// Returns `true` only if local is a binding that can itself be + /// made mutable via the addition of the `mut` keyword, namely + /// something like the occurrences of `x` in: + /// - `fn foo(x: Type) { ... }`, + /// - `let x = ...`, + /// - or `match ... { C(x) => ... }` + pub fn can_be_made_mutable(&self) -> bool { + matches!( + self.local_info, + Some(box LocalInfo::User(ClearCrossCrate::Set( + BindingForm::Var(VarBindingForm { + binding_mode: ty::BindingMode::BindByValue(_), + opt_ty_info: _, + opt_match_place: _, + pat_span: _, + }) | BindingForm::ImplicitSelf(ImplicitSelfKind::Imm), + ))) + ) + } + + /// Returns `true` if local is definitely not a `ref ident` or + /// `ref mut ident` binding. (Such bindings cannot be made into + /// mutable bindings, but the inverse does not necessarily hold). + pub fn is_nonref_binding(&self) -> bool { + matches!( + self.local_info, + Some(box LocalInfo::User(ClearCrossCrate::Set( + BindingForm::Var(VarBindingForm { + binding_mode: ty::BindingMode::BindByValue(_), + opt_ty_info: _, + opt_match_place: _, + pat_span: _, + }) | BindingForm::ImplicitSelf(_), + ))) + ) + } + + /// Returns `true` if this variable is a named variable or function + /// parameter declared by the user. + #[inline] + pub fn is_user_variable(&self) -> bool { + matches!(self.local_info, Some(box LocalInfo::User(_))) + } + + /// Returns `true` if this is a reference to a variable bound in a `match` + /// expression that is used to access said variable for the guard of the + /// match arm. + pub fn is_ref_for_guard(&self) -> bool { + matches!( + self.local_info, + Some(box LocalInfo::User(ClearCrossCrate::Set(BindingForm::RefForGuard))) + ) + } + + /// Returns `Some` if this is a reference to a static item that is used to + /// access that static. + pub fn is_ref_to_static(&self) -> bool { + matches!(self.local_info, Some(box LocalInfo::StaticRef { .. })) + } + + /// Returns `Some` if this is a reference to a thread-local static item that is used to + /// access that static. + pub fn is_ref_to_thread_local(&self) -> bool { + match self.local_info { + Some(box LocalInfo::StaticRef { is_thread_local, .. }) => is_thread_local, + _ => false, + } + } + + /// Returns `true` if this is a DerefTemp + pub fn is_deref_temp(&self) -> bool { + match self.local_info { + Some(box LocalInfo::DerefTemp) => return true, + _ => (), + } + return false; + } + + /// Returns `true` is the local is from a compiler desugaring, e.g., + /// `__next` from a `for` loop. + #[inline] + pub fn from_compiler_desugaring(&self) -> bool { + self.source_info.span.desugaring_kind().is_some() + } + + /// Creates a new `LocalDecl` for a temporary: mutable, non-internal. + #[inline] + pub fn new(ty: Ty<'tcx>, span: Span) -> Self { + Self::with_source_info(ty, SourceInfo::outermost(span)) + } + + /// Like `LocalDecl::new`, but takes a `SourceInfo` instead of a `Span`. + #[inline] + pub fn with_source_info(ty: Ty<'tcx>, source_info: SourceInfo) -> Self { + LocalDecl { + mutability: Mutability::Mut, + local_info: None, + internal: false, + is_block_tail: None, + ty, + user_ty: None, + source_info, + } + } + + /// Converts `self` into same `LocalDecl` except tagged as internal. + #[inline] + pub fn internal(mut self) -> Self { + self.internal = true; + self + } + + /// Converts `self` into same `LocalDecl` except tagged as immutable. + #[inline] + pub fn immutable(mut self) -> Self { + self.mutability = Mutability::Not; + self + } + + /// Converts `self` into same `LocalDecl` except tagged as internal temporary. + #[inline] + pub fn block_tail(mut self, info: BlockTailInfo) -> Self { + assert!(self.is_block_tail.is_none()); + self.is_block_tail = Some(info); + self + } +} + +#[derive(Clone, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub enum VarDebugInfoContents<'tcx> { + /// NOTE(eddyb) There's an unenforced invariant that this `Place` is + /// based on a `Local`, not a `Static`, and contains no indexing. + Place(Place<'tcx>), + Const(Constant<'tcx>), +} + +impl<'tcx> Debug for VarDebugInfoContents<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + match self { + VarDebugInfoContents::Const(c) => write!(fmt, "{}", c), + VarDebugInfoContents::Place(p) => write!(fmt, "{:?}", p), + } + } +} + +/// Debug information pertaining to a user variable. +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct VarDebugInfo<'tcx> { + pub name: Symbol, + + /// Source info of the user variable, including the scope + /// within which the variable is visible (to debuginfo) + /// (see `LocalDecl`'s `source_info` field for more details). + pub source_info: SourceInfo, + + /// Where the data for this user variable is to be found. + pub value: VarDebugInfoContents<'tcx>, +} + +/////////////////////////////////////////////////////////////////////////// +// BasicBlock + +rustc_index::newtype_index! { + /// A node in the MIR [control-flow graph][CFG]. + /// + /// There are no branches (e.g., `if`s, function calls, etc.) within a basic block, which makes + /// it easier to do [data-flow analyses] and optimizations. Instead, branches are represented + /// as an edge in a graph between basic blocks. + /// + /// Basic blocks consist of a series of [statements][Statement], ending with a + /// [terminator][Terminator]. Basic blocks can have multiple predecessors and successors, + /// however there is a MIR pass ([`CriticalCallEdges`]) that removes *critical edges*, which + /// are edges that go from a multi-successor node to a multi-predecessor node. This pass is + /// needed because some analyses require that there are no critical edges in the CFG. + /// + /// Note that this type is just an index into [`Body.basic_blocks`](Body::basic_blocks); + /// the actual data that a basic block holds is in [`BasicBlockData`]. + /// + /// Read more about basic blocks in the [rustc-dev-guide][guide-mir]. + /// + /// [CFG]: https://rustc-dev-guide.rust-lang.org/appendix/background.html#cfg + /// [data-flow analyses]: + /// https://rustc-dev-guide.rust-lang.org/appendix/background.html#what-is-a-dataflow-analysis + /// [`CriticalCallEdges`]: ../../rustc_const_eval/transform/add_call_guards/enum.AddCallGuards.html#variant.CriticalCallEdges + /// [guide-mir]: https://rustc-dev-guide.rust-lang.org/mir/ + pub struct BasicBlock { + derive [HashStable] + DEBUG_FORMAT = "bb{}", + const START_BLOCK = 0, + } +} + +impl BasicBlock { + pub fn start_location(self) -> Location { + Location { block: self, statement_index: 0 } + } +} + +/////////////////////////////////////////////////////////////////////////// +// BasicBlockData + +/// Data for a basic block, including a list of its statements. +/// +/// See [`BasicBlock`] for documentation on what basic blocks are at a high level. +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct BasicBlockData<'tcx> { + /// List of statements in this block. + pub statements: Vec>, + + /// Terminator for this block. + /// + /// N.B., this should generally ONLY be `None` during construction. + /// Therefore, you should generally access it via the + /// `terminator()` or `terminator_mut()` methods. The only + /// exception is that certain passes, such as `simplify_cfg`, swap + /// out the terminator temporarily with `None` while they continue + /// to recurse over the set of basic blocks. + pub terminator: Option>, + + /// If true, this block lies on an unwind path. This is used + /// during codegen where distinct kinds of basic blocks may be + /// generated (particularly for MSVC cleanup). Unwind blocks must + /// only branch to other unwind blocks. + pub is_cleanup: bool, +} + +impl<'tcx> BasicBlockData<'tcx> { + pub fn new(terminator: Option>) -> BasicBlockData<'tcx> { + BasicBlockData { statements: vec![], terminator, is_cleanup: false } + } + + /// Accessor for terminator. + /// + /// Terminator may not be None after construction of the basic block is complete. This accessor + /// provides a convenient way to reach the terminator. + #[inline] + pub fn terminator(&self) -> &Terminator<'tcx> { + self.terminator.as_ref().expect("invalid terminator state") + } + + #[inline] + pub fn terminator_mut(&mut self) -> &mut Terminator<'tcx> { + self.terminator.as_mut().expect("invalid terminator state") + } + + pub fn retain_statements(&mut self, mut f: F) + where + F: FnMut(&mut Statement<'_>) -> bool, + { + for s in &mut self.statements { + if !f(s) { + s.make_nop(); + } + } + } + + pub fn expand_statements(&mut self, mut f: F) + where + F: FnMut(&mut Statement<'tcx>) -> Option, + I: iter::TrustedLen>, + { + // Gather all the iterators we'll need to splice in, and their positions. + let mut splices: Vec<(usize, I)> = vec![]; + let mut extra_stmts = 0; + for (i, s) in self.statements.iter_mut().enumerate() { + if let Some(mut new_stmts) = f(s) { + if let Some(first) = new_stmts.next() { + // We can already store the first new statement. + *s = first; + + // Save the other statements for optimized splicing. + let remaining = new_stmts.size_hint().0; + if remaining > 0 { + splices.push((i + 1 + extra_stmts, new_stmts)); + extra_stmts += remaining; + } + } else { + s.make_nop(); + } + } + } + + // Splice in the new statements, from the end of the block. + // FIXME(eddyb) This could be more efficient with a "gap buffer" + // where a range of elements ("gap") is left uninitialized, with + // splicing adding new elements to the end of that gap and moving + // existing elements from before the gap to the end of the gap. + // For now, this is safe code, emulating a gap but initializing it. + let mut gap = self.statements.len()..self.statements.len() + extra_stmts; + self.statements.resize( + gap.end, + Statement { source_info: SourceInfo::outermost(DUMMY_SP), kind: StatementKind::Nop }, + ); + for (splice_start, new_stmts) in splices.into_iter().rev() { + let splice_end = splice_start + new_stmts.size_hint().0; + while gap.end > splice_end { + gap.start -= 1; + gap.end -= 1; + self.statements.swap(gap.start, gap.end); + } + self.statements.splice(splice_start..splice_end, new_stmts); + gap.end = splice_start; + } + } + + pub fn visitable(&self, index: usize) -> &dyn MirVisitable<'tcx> { + if index < self.statements.len() { &self.statements[index] } else { &self.terminator } + } +} + +impl AssertKind { + /// Getting a description does not require `O` to be printable, and does not + /// require allocation. + /// The caller is expected to handle `BoundsCheck` separately. + pub fn description(&self) -> &'static str { + use AssertKind::*; + match self { + Overflow(BinOp::Add, _, _) => "attempt to add with overflow", + Overflow(BinOp::Sub, _, _) => "attempt to subtract with overflow", + Overflow(BinOp::Mul, _, _) => "attempt to multiply with overflow", + Overflow(BinOp::Div, _, _) => "attempt to divide with overflow", + Overflow(BinOp::Rem, _, _) => "attempt to calculate the remainder with overflow", + OverflowNeg(_) => "attempt to negate with overflow", + Overflow(BinOp::Shr, _, _) => "attempt to shift right with overflow", + Overflow(BinOp::Shl, _, _) => "attempt to shift left with overflow", + Overflow(op, _, _) => bug!("{:?} cannot overflow", op), + DivisionByZero(_) => "attempt to divide by zero", + RemainderByZero(_) => "attempt to calculate the remainder with a divisor of zero", + ResumedAfterReturn(GeneratorKind::Gen) => "generator resumed after completion", + ResumedAfterReturn(GeneratorKind::Async(_)) => "`async fn` resumed after completion", + ResumedAfterPanic(GeneratorKind::Gen) => "generator resumed after panicking", + ResumedAfterPanic(GeneratorKind::Async(_)) => "`async fn` resumed after panicking", + BoundsCheck { .. } => bug!("Unexpected AssertKind"), + } + } + + /// Format the message arguments for the `assert(cond, msg..)` terminator in MIR printing. + pub fn fmt_assert_args(&self, f: &mut W) -> fmt::Result + where + O: Debug, + { + use AssertKind::*; + match self { + BoundsCheck { ref len, ref index } => write!( + f, + "\"index out of bounds: the length is {{}} but the index is {{}}\", {:?}, {:?}", + len, index + ), + + OverflowNeg(op) => { + write!(f, "\"attempt to negate `{{}}`, which would overflow\", {:?}", op) + } + DivisionByZero(op) => write!(f, "\"attempt to divide `{{}}` by zero\", {:?}", op), + RemainderByZero(op) => write!( + f, + "\"attempt to calculate the remainder of `{{}}` with a divisor of zero\", {:?}", + op + ), + Overflow(BinOp::Add, l, r) => write!( + f, + "\"attempt to compute `{{}} + {{}}`, which would overflow\", {:?}, {:?}", + l, r + ), + Overflow(BinOp::Sub, l, r) => write!( + f, + "\"attempt to compute `{{}} - {{}}`, which would overflow\", {:?}, {:?}", + l, r + ), + Overflow(BinOp::Mul, l, r) => write!( + f, + "\"attempt to compute `{{}} * {{}}`, which would overflow\", {:?}, {:?}", + l, r + ), + Overflow(BinOp::Div, l, r) => write!( + f, + "\"attempt to compute `{{}} / {{}}`, which would overflow\", {:?}, {:?}", + l, r + ), + Overflow(BinOp::Rem, l, r) => write!( + f, + "\"attempt to compute the remainder of `{{}} % {{}}`, which would overflow\", {:?}, {:?}", + l, r + ), + Overflow(BinOp::Shr, _, r) => { + write!(f, "\"attempt to shift right by `{{}}`, which would overflow\", {:?}", r) + } + Overflow(BinOp::Shl, _, r) => { + write!(f, "\"attempt to shift left by `{{}}`, which would overflow\", {:?}", r) + } + _ => write!(f, "\"{}\"", self.description()), + } + } +} + +impl fmt::Debug for AssertKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use AssertKind::*; + match self { + BoundsCheck { ref len, ref index } => write!( + f, + "index out of bounds: the length is {:?} but the index is {:?}", + len, index + ), + OverflowNeg(op) => write!(f, "attempt to negate `{:#?}`, which would overflow", op), + DivisionByZero(op) => write!(f, "attempt to divide `{:#?}` by zero", op), + RemainderByZero(op) => write!( + f, + "attempt to calculate the remainder of `{:#?}` with a divisor of zero", + op + ), + Overflow(BinOp::Add, l, r) => { + write!(f, "attempt to compute `{:#?} + {:#?}`, which would overflow", l, r) + } + Overflow(BinOp::Sub, l, r) => { + write!(f, "attempt to compute `{:#?} - {:#?}`, which would overflow", l, r) + } + Overflow(BinOp::Mul, l, r) => { + write!(f, "attempt to compute `{:#?} * {:#?}`, which would overflow", l, r) + } + Overflow(BinOp::Div, l, r) => { + write!(f, "attempt to compute `{:#?} / {:#?}`, which would overflow", l, r) + } + Overflow(BinOp::Rem, l, r) => write!( + f, + "attempt to compute the remainder of `{:#?} % {:#?}`, which would overflow", + l, r + ), + Overflow(BinOp::Shr, _, r) => { + write!(f, "attempt to shift right by `{:#?}`, which would overflow", r) + } + Overflow(BinOp::Shl, _, r) => { + write!(f, "attempt to shift left by `{:#?}`, which would overflow", r) + } + _ => write!(f, "{}", self.description()), + } + } +} + +/////////////////////////////////////////////////////////////////////////// +// Statements + +/// A statement in a basic block, including information about its source code. +#[derive(Clone, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct Statement<'tcx> { + pub source_info: SourceInfo, + pub kind: StatementKind<'tcx>, +} + +// `Statement` is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(Statement<'_>, 32); + +impl Statement<'_> { + /// Changes a statement to a nop. This is both faster than deleting instructions and avoids + /// invalidating statement indices in `Location`s. + pub fn make_nop(&mut self) { + self.kind = StatementKind::Nop + } + + /// Changes a statement to a nop and returns the original statement. + #[must_use = "If you don't need the statement, use `make_nop` instead"] + pub fn replace_nop(&mut self) -> Self { + Statement { + source_info: self.source_info, + kind: mem::replace(&mut self.kind, StatementKind::Nop), + } + } +} + +impl Debug for Statement<'_> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + use self::StatementKind::*; + match self.kind { + Assign(box (ref place, ref rv)) => write!(fmt, "{:?} = {:?}", place, rv), + FakeRead(box (ref cause, ref place)) => { + write!(fmt, "FakeRead({:?}, {:?})", cause, place) + } + Retag(ref kind, ref place) => write!( + fmt, + "Retag({}{:?})", + match kind { + RetagKind::FnEntry => "[fn entry] ", + RetagKind::TwoPhase => "[2phase] ", + RetagKind::Raw => "[raw] ", + RetagKind::Default => "", + }, + place, + ), + StorageLive(ref place) => write!(fmt, "StorageLive({:?})", place), + StorageDead(ref place) => write!(fmt, "StorageDead({:?})", place), + SetDiscriminant { ref place, variant_index } => { + write!(fmt, "discriminant({:?}) = {:?}", place, variant_index) + } + Deinit(ref place) => write!(fmt, "Deinit({:?})", place), + AscribeUserType(box (ref place, ref c_ty), ref variance) => { + write!(fmt, "AscribeUserType({:?}, {:?}, {:?})", place, variance, c_ty) + } + Coverage(box self::Coverage { ref kind, code_region: Some(ref rgn) }) => { + write!(fmt, "Coverage::{:?} for {:?}", kind, rgn) + } + Coverage(box ref coverage) => write!(fmt, "Coverage::{:?}", coverage.kind), + CopyNonOverlapping(box crate::mir::CopyNonOverlapping { + ref src, + ref dst, + ref count, + }) => { + write!(fmt, "copy_nonoverlapping(src={:?}, dst={:?}, count={:?})", src, dst, count) + } + Nop => write!(fmt, "nop"), + } + } +} + +impl<'tcx> StatementKind<'tcx> { + pub fn as_assign_mut(&mut self) -> Option<&mut (Place<'tcx>, Rvalue<'tcx>)> { + match self { + StatementKind::Assign(x) => Some(x), + _ => None, + } + } + + pub fn as_assign(&self) -> Option<&(Place<'tcx>, Rvalue<'tcx>)> { + match self { + StatementKind::Assign(x) => Some(x), + _ => None, + } + } +} + +/////////////////////////////////////////////////////////////////////////// +// Places + +impl ProjectionElem { + /// Returns `true` if the target of this projection may refer to a different region of memory + /// than the base. + fn is_indirect(&self) -> bool { + match self { + Self::Deref => true, + + Self::Field(_, _) + | Self::Index(_) + | Self::ConstantIndex { .. } + | Self::Subslice { .. } + | Self::Downcast(_, _) => false, + } + } + + /// Returns `true` if this is a `Downcast` projection with the given `VariantIdx`. + pub fn is_downcast_to(&self, v: VariantIdx) -> bool { + matches!(*self, Self::Downcast(_, x) if x == v) + } + + /// Returns `true` if this is a `Field` projection with the given index. + pub fn is_field_to(&self, f: Field) -> bool { + matches!(*self, Self::Field(x, _) if x == f) + } +} + +/// Alias for projections as they appear in `UserTypeProjection`, where we +/// need neither the `V` parameter for `Index` nor the `T` for `Field`. +pub type ProjectionKind = ProjectionElem<(), ()>; + +rustc_index::newtype_index! { + /// A [newtype'd][wrapper] index type in the MIR [control-flow graph][CFG] + /// + /// A field (e.g., `f` in `_1.f`) is one variant of [`ProjectionElem`]. Conceptually, + /// rustc can identify that a field projection refers to either two different regions of memory + /// or the same one between the base and the 'projection element'. + /// Read more about projections in the [rustc-dev-guide][mir-datatypes] + /// + /// [wrapper]: https://rustc-dev-guide.rust-lang.org/appendix/glossary.html#newtype + /// [CFG]: https://rustc-dev-guide.rust-lang.org/appendix/background.html#cfg + /// [mir-datatypes]: https://rustc-dev-guide.rust-lang.org/mir/index.html#mir-data-types + pub struct Field { + derive [HashStable] + DEBUG_FORMAT = "field[{}]" + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct PlaceRef<'tcx> { + pub local: Local, + pub projection: &'tcx [PlaceElem<'tcx>], +} + +// Once we stop implementing `Ord` for `DefId`, +// this impl will be unnecessary. Until then, we'll +// leave this impl in place to prevent re-adding a +// dependnecy on the `Ord` impl for `DefId` +impl<'tcx> !PartialOrd for PlaceRef<'tcx> {} + +impl<'tcx> Place<'tcx> { + // FIXME change this to a const fn by also making List::empty a const fn. + pub fn return_place() -> Place<'tcx> { + Place { local: RETURN_PLACE, projection: List::empty() } + } + + /// Returns `true` if this `Place` contains a `Deref` projection. + /// + /// If `Place::is_indirect` returns false, the caller knows that the `Place` refers to the + /// same region of memory as its base. + pub fn is_indirect(&self) -> bool { + self.projection.iter().any(|elem| elem.is_indirect()) + } + + /// If MirPhase >= Derefered and if projection contains Deref, + /// It's guaranteed to be in the first place + pub fn has_deref(&self) -> bool { + // To make sure this is not accidently used in wrong mir phase + debug_assert!(!self.projection[1..].contains(&PlaceElem::Deref)); + self.projection.first() == Some(&PlaceElem::Deref) + } + + /// Finds the innermost `Local` from this `Place`, *if* it is either a local itself or + /// a single deref of a local. + #[inline(always)] + pub fn local_or_deref_local(&self) -> Option { + self.as_ref().local_or_deref_local() + } + + /// If this place represents a local variable like `_X` with no + /// projections, return `Some(_X)`. + #[inline(always)] + pub fn as_local(&self) -> Option { + self.as_ref().as_local() + } + + #[inline] + pub fn as_ref(&self) -> PlaceRef<'tcx> { + PlaceRef { local: self.local, projection: &self.projection } + } + + /// Iterate over the projections in evaluation order, i.e., the first element is the base with + /// its projection and then subsequently more projections are added. + /// As a concrete example, given the place a.b.c, this would yield: + /// - (a, .b) + /// - (a.b, .c) + /// + /// Given a place without projections, the iterator is empty. + #[inline] + pub fn iter_projections( + self, + ) -> impl Iterator, PlaceElem<'tcx>)> + DoubleEndedIterator { + self.as_ref().iter_projections() + } + + /// Generates a new place by appending `more_projections` to the existing ones + /// and interning the result. + pub fn project_deeper(self, more_projections: &[PlaceElem<'tcx>], tcx: TyCtxt<'tcx>) -> Self { + if more_projections.is_empty() { + return self; + } + + let mut v: Vec>; + + let new_projections = if self.projection.is_empty() { + more_projections + } else { + v = Vec::with_capacity(self.projection.len() + more_projections.len()); + v.extend(self.projection); + v.extend(more_projections); + &v + }; + + Place { local: self.local, projection: tcx.intern_place_elems(new_projections) } + } +} + +impl From for Place<'_> { + fn from(local: Local) -> Self { + Place { local, projection: List::empty() } + } +} + +impl<'tcx> PlaceRef<'tcx> { + /// Finds the innermost `Local` from this `Place`, *if* it is either a local itself or + /// a single deref of a local. + pub fn local_or_deref_local(&self) -> Option { + match *self { + PlaceRef { local, projection: [] } + | PlaceRef { local, projection: [ProjectionElem::Deref] } => Some(local), + _ => None, + } + } + + /// If MirPhase >= Derefered and if projection contains Deref, + /// It's guaranteed to be in the first place + pub fn has_deref(&self) -> bool { + self.projection.first() == Some(&PlaceElem::Deref) + } + + /// If this place represents a local variable like `_X` with no + /// projections, return `Some(_X)`. + #[inline] + pub fn as_local(&self) -> Option { + match *self { + PlaceRef { local, projection: [] } => Some(local), + _ => None, + } + } + + #[inline] + pub fn last_projection(&self) -> Option<(PlaceRef<'tcx>, PlaceElem<'tcx>)> { + if let &[ref proj_base @ .., elem] = self.projection { + Some((PlaceRef { local: self.local, projection: proj_base }, elem)) + } else { + None + } + } + + /// Iterate over the projections in evaluation order, i.e., the first element is the base with + /// its projection and then subsequently more projections are added. + /// As a concrete example, given the place a.b.c, this would yield: + /// - (a, .b) + /// - (a.b, .c) + /// + /// Given a place without projections, the iterator is empty. + #[inline] + pub fn iter_projections( + self, + ) -> impl Iterator, PlaceElem<'tcx>)> + DoubleEndedIterator { + self.projection.iter().enumerate().map(move |(i, proj)| { + let base = PlaceRef { local: self.local, projection: &self.projection[..i] }; + (base, *proj) + }) + } +} + +impl Debug for Place<'_> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + for elem in self.projection.iter().rev() { + match elem { + ProjectionElem::Downcast(_, _) | ProjectionElem::Field(_, _) => { + write!(fmt, "(").unwrap(); + } + ProjectionElem::Deref => { + write!(fmt, "(*").unwrap(); + } + ProjectionElem::Index(_) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } => {} + } + } + + write!(fmt, "{:?}", self.local)?; + + for elem in self.projection.iter() { + match elem { + ProjectionElem::Downcast(Some(name), _index) => { + write!(fmt, " as {})", name)?; + } + ProjectionElem::Downcast(None, index) => { + write!(fmt, " as variant#{:?})", index)?; + } + ProjectionElem::Deref => { + write!(fmt, ")")?; + } + ProjectionElem::Field(field, ty) => { + write!(fmt, ".{:?}: {:?})", field.index(), ty)?; + } + ProjectionElem::Index(ref index) => { + write!(fmt, "[{:?}]", index)?; + } + ProjectionElem::ConstantIndex { offset, min_length, from_end: false } => { + write!(fmt, "[{:?} of {:?}]", offset, min_length)?; + } + ProjectionElem::ConstantIndex { offset, min_length, from_end: true } => { + write!(fmt, "[-{:?} of {:?}]", offset, min_length)?; + } + ProjectionElem::Subslice { from, to, from_end: true } if to == 0 => { + write!(fmt, "[{:?}:]", from)?; + } + ProjectionElem::Subslice { from, to, from_end: true } if from == 0 => { + write!(fmt, "[:-{:?}]", to)?; + } + ProjectionElem::Subslice { from, to, from_end: true } => { + write!(fmt, "[{:?}:-{:?}]", from, to)?; + } + ProjectionElem::Subslice { from, to, from_end: false } => { + write!(fmt, "[{:?}..{:?}]", from, to)?; + } + } + } + + Ok(()) + } +} + +/////////////////////////////////////////////////////////////////////////// +// Scopes + +rustc_index::newtype_index! { + pub struct SourceScope { + derive [HashStable] + DEBUG_FORMAT = "scope[{}]", + const OUTERMOST_SOURCE_SCOPE = 0, + } +} + +impl SourceScope { + /// Finds the original HirId this MIR item came from. + /// This is necessary after MIR optimizations, as otherwise we get a HirId + /// from the function that was inlined instead of the function call site. + pub fn lint_root<'tcx>( + self, + source_scopes: &IndexVec>, + ) -> Option { + let mut data = &source_scopes[self]; + // FIXME(oli-obk): we should be able to just walk the `inlined_parent_scope`, but it + // does not work as I thought it would. Needs more investigation and documentation. + while data.inlined.is_some() { + trace!(?data); + data = &source_scopes[data.parent_scope.unwrap()]; + } + trace!(?data); + match &data.local_data { + ClearCrossCrate::Set(data) => Some(data.lint_root), + ClearCrossCrate::Clear => None, + } + } + + /// The instance this source scope was inlined from, if any. + #[inline] + pub fn inlined_instance<'tcx>( + self, + source_scopes: &IndexVec>, + ) -> Option> { + let scope_data = &source_scopes[self]; + if let Some((inlined_instance, _)) = scope_data.inlined { + Some(inlined_instance) + } else if let Some(inlined_scope) = scope_data.inlined_parent_scope { + Some(source_scopes[inlined_scope].inlined.unwrap().0) + } else { + None + } + } +} + +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct SourceScopeData<'tcx> { + pub span: Span, + pub parent_scope: Option, + + /// Whether this scope is the root of a scope tree of another body, + /// inlined into this body by the MIR inliner. + /// `ty::Instance` is the callee, and the `Span` is the call site. + pub inlined: Option<(ty::Instance<'tcx>, Span)>, + + /// Nearest (transitive) parent scope (if any) which is inlined. + /// This is an optimization over walking up `parent_scope` + /// until a scope with `inlined: Some(...)` is found. + pub inlined_parent_scope: Option, + + /// Crate-local information for this source scope, that can't (and + /// needn't) be tracked across crates. + pub local_data: ClearCrossCrate, +} + +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub struct SourceScopeLocalData { + /// An `HirId` with lint levels equivalent to this scope's lint levels. + pub lint_root: hir::HirId, + /// The unsafe block that contains this node. + pub safety: Safety, +} + +/////////////////////////////////////////////////////////////////////////// +// Operands + +impl<'tcx> Debug for Operand<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + use self::Operand::*; + match *self { + Constant(ref a) => write!(fmt, "{:?}", a), + Copy(ref place) => write!(fmt, "{:?}", place), + Move(ref place) => write!(fmt, "move {:?}", place), + } + } +} + +impl<'tcx> Operand<'tcx> { + /// Convenience helper to make a constant that refers to the fn + /// with given `DefId` and substs. Since this is used to synthesize + /// MIR, assumes `user_ty` is None. + pub fn function_handle( + tcx: TyCtxt<'tcx>, + def_id: DefId, + substs: SubstsRef<'tcx>, + span: Span, + ) -> Self { + let ty = tcx.bound_type_of(def_id).subst(tcx, substs); + Operand::Constant(Box::new(Constant { + span, + user_ty: None, + literal: ConstantKind::Val(ConstValue::ZeroSized, ty), + })) + } + + pub fn is_move(&self) -> bool { + matches!(self, Operand::Move(..)) + } + + /// Convenience helper to make a literal-like constant from a given scalar value. + /// Since this is used to synthesize MIR, assumes `user_ty` is None. + pub fn const_from_scalar( + tcx: TyCtxt<'tcx>, + ty: Ty<'tcx>, + val: Scalar, + span: Span, + ) -> Operand<'tcx> { + debug_assert!({ + let param_env_and_ty = ty::ParamEnv::empty().and(ty); + let type_size = tcx + .layout_of(param_env_and_ty) + .unwrap_or_else(|e| panic!("could not compute layout for {:?}: {:?}", ty, e)) + .size; + let scalar_size = match val { + Scalar::Int(int) => int.size(), + _ => panic!("Invalid scalar type {:?}", val), + }; + scalar_size == type_size + }); + Operand::Constant(Box::new(Constant { + span, + user_ty: None, + literal: ConstantKind::Val(ConstValue::Scalar(val), ty), + })) + } + + pub fn to_copy(&self) -> Self { + match *self { + Operand::Copy(_) | Operand::Constant(_) => self.clone(), + Operand::Move(place) => Operand::Copy(place), + } + } + + /// Returns the `Place` that is the target of this `Operand`, or `None` if this `Operand` is a + /// constant. + pub fn place(&self) -> Option> { + match self { + Operand::Copy(place) | Operand::Move(place) => Some(*place), + Operand::Constant(_) => None, + } + } + + /// Returns the `Constant` that is the target of this `Operand`, or `None` if this `Operand` is a + /// place. + pub fn constant(&self) -> Option<&Constant<'tcx>> { + match self { + Operand::Constant(x) => Some(&**x), + Operand::Copy(_) | Operand::Move(_) => None, + } + } + + /// Gets the `ty::FnDef` from an operand if it's a constant function item. + /// + /// While this is unlikely in general, it's the normal case of what you'll + /// find as the `func` in a [`TerminatorKind::Call`]. + pub fn const_fn_def(&self) -> Option<(DefId, SubstsRef<'tcx>)> { + let const_ty = self.constant()?.literal.ty(); + if let ty::FnDef(def_id, substs) = *const_ty.kind() { Some((def_id, substs)) } else { None } + } +} + +/////////////////////////////////////////////////////////////////////////// +/// Rvalues + +impl<'tcx> Rvalue<'tcx> { + /// Returns true if rvalue can be safely removed when the result is unused. + #[inline] + pub fn is_safe_to_remove(&self) -> bool { + match self { + // Pointer to int casts may be side-effects due to exposing the provenance. + // While the model is undecided, we should be conservative. See + // + Rvalue::Cast(CastKind::PointerExposeAddress, _, _) => false, + + Rvalue::Use(_) + | Rvalue::CopyForDeref(_) + | Rvalue::Repeat(_, _) + | Rvalue::Ref(_, _, _) + | Rvalue::ThreadLocalRef(_) + | Rvalue::AddressOf(_, _) + | Rvalue::Len(_) + | Rvalue::Cast( + CastKind::Misc | CastKind::Pointer(_) | CastKind::PointerFromExposedAddress, + _, + _, + ) + | Rvalue::BinaryOp(_, _) + | Rvalue::CheckedBinaryOp(_, _) + | Rvalue::NullaryOp(_, _) + | Rvalue::UnaryOp(_, _) + | Rvalue::Discriminant(_) + | Rvalue::Aggregate(_, _) + | Rvalue::ShallowInitBox(_, _) => true, + } + } +} + +impl BorrowKind { + pub fn allows_two_phase_borrow(&self) -> bool { + match *self { + BorrowKind::Shared | BorrowKind::Shallow | BorrowKind::Unique => false, + BorrowKind::Mut { allow_two_phase_borrow } => allow_two_phase_borrow, + } + } + + pub fn describe_mutability(&self) -> &str { + match *self { + BorrowKind::Shared | BorrowKind::Shallow | BorrowKind::Unique => "immutable", + BorrowKind::Mut { .. } => "mutable", + } + } +} + +impl BinOp { + pub fn is_checkable(self) -> bool { + use self::BinOp::*; + matches!(self, Add | Sub | Mul | Shl | Shr) + } +} + +impl<'tcx> Debug for Rvalue<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + use self::Rvalue::*; + + match *self { + Use(ref place) => write!(fmt, "{:?}", place), + Repeat(ref a, b) => { + write!(fmt, "[{:?}; ", a)?; + pretty_print_const(b, fmt, false)?; + write!(fmt, "]") + } + Len(ref a) => write!(fmt, "Len({:?})", a), + Cast(ref kind, ref place, ref ty) => { + write!(fmt, "{:?} as {:?} ({:?})", place, ty, kind) + } + BinaryOp(ref op, box (ref a, ref b)) => write!(fmt, "{:?}({:?}, {:?})", op, a, b), + CheckedBinaryOp(ref op, box (ref a, ref b)) => { + write!(fmt, "Checked{:?}({:?}, {:?})", op, a, b) + } + UnaryOp(ref op, ref a) => write!(fmt, "{:?}({:?})", op, a), + Discriminant(ref place) => write!(fmt, "discriminant({:?})", place), + NullaryOp(ref op, ref t) => write!(fmt, "{:?}({:?})", op, t), + ThreadLocalRef(did) => ty::tls::with(|tcx| { + let muta = tcx.static_mutability(did).unwrap().prefix_str(); + write!(fmt, "&/*tls*/ {}{}", muta, tcx.def_path_str(did)) + }), + Ref(region, borrow_kind, ref place) => { + let kind_str = match borrow_kind { + BorrowKind::Shared => "", + BorrowKind::Shallow => "shallow ", + BorrowKind::Mut { .. } | BorrowKind::Unique => "mut ", + }; + + // When printing regions, add trailing space if necessary. + let print_region = ty::tls::with(|tcx| { + tcx.sess.verbose() || tcx.sess.opts.unstable_opts.identify_regions + }); + let region = if print_region { + let mut region = region.to_string(); + if !region.is_empty() { + region.push(' '); + } + region + } else { + // Do not even print 'static + String::new() + }; + write!(fmt, "&{}{}{:?}", region, kind_str, place) + } + + CopyForDeref(ref place) => write!(fmt, "deref_copy {:#?}", place), + + AddressOf(mutability, ref place) => { + let kind_str = match mutability { + Mutability::Mut => "mut", + Mutability::Not => "const", + }; + + write!(fmt, "&raw {} {:?}", kind_str, place) + } + + Aggregate(ref kind, ref places) => { + let fmt_tuple = |fmt: &mut Formatter<'_>, name: &str| { + let mut tuple_fmt = fmt.debug_tuple(name); + for place in places { + tuple_fmt.field(place); + } + tuple_fmt.finish() + }; + + match **kind { + AggregateKind::Array(_) => write!(fmt, "{:?}", places), + + AggregateKind::Tuple => { + if places.is_empty() { + write!(fmt, "()") + } else { + fmt_tuple(fmt, "") + } + } + + AggregateKind::Adt(adt_did, variant, substs, _user_ty, _) => { + ty::tls::with(|tcx| { + let variant_def = &tcx.adt_def(adt_did).variant(variant); + let substs = tcx.lift(substs).expect("could not lift for printing"); + let name = FmtPrinter::new(tcx, Namespace::ValueNS) + .print_def_path(variant_def.def_id, substs)? + .into_buffer(); + + match variant_def.ctor_kind { + CtorKind::Const => fmt.write_str(&name), + CtorKind::Fn => fmt_tuple(fmt, &name), + CtorKind::Fictive => { + let mut struct_fmt = fmt.debug_struct(&name); + for (field, place) in iter::zip(&variant_def.fields, places) { + struct_fmt.field(field.name.as_str(), place); + } + struct_fmt.finish() + } + } + }) + } + + AggregateKind::Closure(def_id, substs) => ty::tls::with(|tcx| { + let name = if tcx.sess.opts.unstable_opts.span_free_formats { + let substs = tcx.lift(substs).unwrap(); + format!( + "[closure@{}]", + tcx.def_path_str_with_substs(def_id.to_def_id(), substs), + ) + } else { + let span = tcx.def_span(def_id); + format!( + "[closure@{}]", + tcx.sess.source_map().span_to_diagnostic_string(span) + ) + }; + let mut struct_fmt = fmt.debug_struct(&name); + + // FIXME(project-rfc-2229#48): This should be a list of capture names/places + if let Some(upvars) = tcx.upvars_mentioned(def_id) { + for (&var_id, place) in iter::zip(upvars.keys(), places) { + let var_name = tcx.hir().name(var_id); + struct_fmt.field(var_name.as_str(), place); + } + } + + struct_fmt.finish() + }), + + AggregateKind::Generator(def_id, _, _) => ty::tls::with(|tcx| { + let name = format!("[generator@{:?}]", tcx.def_span(def_id)); + let mut struct_fmt = fmt.debug_struct(&name); + + // FIXME(project-rfc-2229#48): This should be a list of capture names/places + if let Some(upvars) = tcx.upvars_mentioned(def_id) { + for (&var_id, place) in iter::zip(upvars.keys(), places) { + let var_name = tcx.hir().name(var_id); + struct_fmt.field(var_name.as_str(), place); + } + } + + struct_fmt.finish() + }), + } + } + + ShallowInitBox(ref place, ref ty) => { + write!(fmt, "ShallowInitBox({:?}, {:?})", place, ty) + } + } + } +} + +/////////////////////////////////////////////////////////////////////////// +/// Constants +/// +/// Two constants are equal if they are the same constant. Note that +/// this does not necessarily mean that they are `==` in Rust. In +/// particular, one must be wary of `NaN`! + +#[derive(Clone, Copy, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +pub struct Constant<'tcx> { + pub span: Span, + + /// Optional user-given type: for something like + /// `collect::>`, this would be present and would + /// indicate that `Vec<_>` was explicitly specified. + /// + /// Needed for NLL to impose user-given type constraints. + pub user_ty: Option, + + pub literal: ConstantKind<'tcx>, +} + +#[derive(Clone, Copy, PartialEq, Eq, TyEncodable, TyDecodable, Hash, HashStable, Debug)] +#[derive(Lift)] +pub enum ConstantKind<'tcx> { + /// This constant came from the type system + Ty(ty::Const<'tcx>), + /// This constant cannot go back into the type system, as it represents + /// something the type system cannot handle (e.g. pointers). + Val(interpret::ConstValue<'tcx>, Ty<'tcx>), +} + +impl<'tcx> Constant<'tcx> { + pub fn check_static_ptr(&self, tcx: TyCtxt<'_>) -> Option { + match self.literal.try_to_scalar() { + Some(Scalar::Ptr(ptr, _size)) => match tcx.global_alloc(ptr.provenance) { + GlobalAlloc::Static(def_id) => { + assert!(!tcx.is_thread_local_static(def_id)); + Some(def_id) + } + _ => None, + }, + _ => None, + } + } + #[inline] + pub fn ty(&self) -> Ty<'tcx> { + self.literal.ty() + } +} + +impl<'tcx> ConstantKind<'tcx> { + /// Returns `None` if the constant is not trivially safe for use in the type system. + #[inline] + pub fn const_for_ty(&self) -> Option> { + match self { + ConstantKind::Ty(c) => Some(*c), + ConstantKind::Val(..) => None, + } + } + + #[inline(always)] + pub fn ty(&self) -> Ty<'tcx> { + match self { + ConstantKind::Ty(c) => c.ty(), + ConstantKind::Val(_, ty) => *ty, + } + } + + #[inline] + pub fn try_to_value(self, tcx: TyCtxt<'tcx>) -> Option> { + match self { + ConstantKind::Ty(c) => match c.kind() { + ty::ConstKind::Value(valtree) => Some(tcx.valtree_to_const_val((c.ty(), valtree))), + _ => None, + }, + ConstantKind::Val(val, _) => Some(val), + } + } + + #[inline] + pub fn try_to_scalar(self) -> Option { + match self { + ConstantKind::Ty(c) => match c.kind() { + ty::ConstKind::Value(valtree) => match valtree { + ty::ValTree::Leaf(scalar_int) => Some(Scalar::Int(scalar_int)), + ty::ValTree::Branch(_) => None, + }, + _ => None, + }, + ConstantKind::Val(val, _) => val.try_to_scalar(), + } + } + + #[inline] + pub fn try_to_scalar_int(self) -> Option { + Some(self.try_to_scalar()?.assert_int()) + } + + #[inline] + pub fn try_to_bits(self, size: Size) -> Option { + self.try_to_scalar_int()?.to_bits(size).ok() + } + + #[inline] + pub fn try_to_bool(self) -> Option { + self.try_to_scalar_int()?.try_into().ok() + } + + #[inline] + pub fn eval(self, tcx: TyCtxt<'tcx>, param_env: ty::ParamEnv<'tcx>) -> Self { + match self { + Self::Ty(c) => { + if let Some(val) = c.kind().try_eval_for_mir(tcx, param_env) { + match val { + Ok(val) => Self::Val(val, c.ty()), + Err(_) => Self::Ty(tcx.const_error(self.ty())), + } + } else { + self + } + } + Self::Val(_, _) => self, + } + } + + /// Panics if the value cannot be evaluated or doesn't contain a valid integer of the given type. + #[inline] + pub fn eval_bits(self, tcx: TyCtxt<'tcx>, param_env: ty::ParamEnv<'tcx>, ty: Ty<'tcx>) -> u128 { + self.try_eval_bits(tcx, param_env, ty) + .unwrap_or_else(|| bug!("expected bits of {:#?}, got {:#?}", ty, self)) + } + + #[inline] + pub fn try_eval_bits( + &self, + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + ty: Ty<'tcx>, + ) -> Option { + match self { + Self::Ty(ct) => ct.try_eval_bits(tcx, param_env, ty), + Self::Val(val, t) => { + assert_eq!(*t, ty); + let size = + tcx.layout_of(param_env.with_reveal_all_normalized(tcx).and(ty)).ok()?.size; + val.try_to_bits(size) + } + } + } + + #[inline] + pub fn try_eval_bool(&self, tcx: TyCtxt<'tcx>, param_env: ty::ParamEnv<'tcx>) -> Option { + match self { + Self::Ty(ct) => ct.try_eval_bool(tcx, param_env), + Self::Val(val, _) => val.try_to_bool(), + } + } + + #[inline] + pub fn try_eval_usize(&self, tcx: TyCtxt<'tcx>, param_env: ty::ParamEnv<'tcx>) -> Option { + match self { + Self::Ty(ct) => ct.try_eval_usize(tcx, param_env), + Self::Val(val, _) => val.try_to_machine_usize(tcx), + } + } + + #[inline] + pub fn from_value(val: ConstValue<'tcx>, ty: Ty<'tcx>) -> Self { + Self::Val(val, ty) + } + + pub fn from_bits( + tcx: TyCtxt<'tcx>, + bits: u128, + param_env_ty: ty::ParamEnvAnd<'tcx, Ty<'tcx>>, + ) -> Self { + let size = tcx + .layout_of(param_env_ty) + .unwrap_or_else(|e| { + bug!("could not compute layout for {:?}: {:?}", param_env_ty.value, e) + }) + .size; + let cv = ConstValue::Scalar(Scalar::from_uint(bits, size)); + + Self::Val(cv, param_env_ty.value) + } + + #[inline] + pub fn from_bool(tcx: TyCtxt<'tcx>, v: bool) -> Self { + let cv = ConstValue::from_bool(v); + Self::Val(cv, tcx.types.bool) + } + + #[inline] + pub fn zero_sized(ty: Ty<'tcx>) -> Self { + let cv = ConstValue::ZeroSized; + Self::Val(cv, ty) + } + + pub fn from_usize(tcx: TyCtxt<'tcx>, n: u64) -> Self { + let ty = tcx.types.usize; + Self::from_bits(tcx, n as u128, ty::ParamEnv::empty().and(ty)) + } + + #[inline] + pub fn from_scalar(_tcx: TyCtxt<'tcx>, s: Scalar, ty: Ty<'tcx>) -> Self { + let val = ConstValue::Scalar(s); + Self::Val(val, ty) + } + + /// Literals are converted to `ConstantKindVal`, const generic parameters are eagerly + /// converted to a constant, everything else becomes `Unevaluated`. + pub fn from_anon_const( + tcx: TyCtxt<'tcx>, + def_id: LocalDefId, + param_env: ty::ParamEnv<'tcx>, + ) -> Self { + Self::from_opt_const_arg_anon_const(tcx, ty::WithOptConstParam::unknown(def_id), param_env) + } + + #[instrument(skip(tcx), level = "debug")] + pub fn from_inline_const(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> Self { + let hir_id = tcx.hir().local_def_id_to_hir_id(def_id); + let body_id = match tcx.hir().get(hir_id) { + hir::Node::AnonConst(ac) => ac.body, + _ => span_bug!( + tcx.def_span(def_id.to_def_id()), + "from_inline_const can only process anonymous constants" + ), + }; + let expr = &tcx.hir().body(body_id).value; + let ty = tcx.typeck(def_id).node_type(hir_id); + + let lit_input = match expr.kind { + hir::ExprKind::Lit(ref lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: false }), + hir::ExprKind::Unary(hir::UnOp::Neg, ref expr) => match expr.kind { + hir::ExprKind::Lit(ref lit) => { + Some(LitToConstInput { lit: &lit.node, ty, neg: true }) + } + _ => None, + }, + _ => None, + }; + if let Some(lit_input) = lit_input { + // If an error occurred, ignore that it's a literal and leave reporting the error up to + // mir. + match tcx.at(expr.span).lit_to_mir_constant(lit_input) { + Ok(c) => return c, + Err(_) => {} + } + } + + let typeck_root_def_id = tcx.typeck_root_def_id(def_id.to_def_id()); + let parent_substs = + tcx.erase_regions(InternalSubsts::identity_for_item(tcx, typeck_root_def_id)); + let substs = + ty::InlineConstSubsts::new(tcx, ty::InlineConstSubstsParts { parent_substs, ty }) + .substs; + let uneval_const = tcx.mk_const(ty::ConstS { + kind: ty::ConstKind::Unevaluated(ty::Unevaluated { + def: ty::WithOptConstParam::unknown(def_id).to_global(), + substs, + promoted: None, + }), + ty, + }); + debug!(?uneval_const); + debug_assert!(!uneval_const.has_free_regions()); + + Self::Ty(uneval_const) + } + + #[instrument(skip(tcx), level = "debug")] + fn from_opt_const_arg_anon_const( + tcx: TyCtxt<'tcx>, + def: ty::WithOptConstParam, + param_env: ty::ParamEnv<'tcx>, + ) -> Self { + let body_id = match tcx.hir().get_by_def_id(def.did) { + hir::Node::AnonConst(ac) => ac.body, + _ => span_bug!( + tcx.def_span(def.did.to_def_id()), + "from_anon_const can only process anonymous constants" + ), + }; + + let expr = &tcx.hir().body(body_id).value; + debug!(?expr); + + // Unwrap a block, so that e.g. `{ P }` is recognised as a parameter. Const arguments + // currently have to be wrapped in curly brackets, so it's necessary to special-case. + let expr = match &expr.kind { + hir::ExprKind::Block(block, _) if block.stmts.is_empty() && block.expr.is_some() => { + block.expr.as_ref().unwrap() + } + _ => expr, + }; + debug!("expr.kind: {:?}", expr.kind); + + let ty = tcx.type_of(def.def_id_for_type_of()); + debug!(?ty); + + // FIXME(const_generics): We currently have to special case parameters because `min_const_generics` + // does not provide the parents generics to anonymous constants. We still allow generic const + // parameters by themselves however, e.g. `N`. These constants would cause an ICE if we were to + // ever try to substitute the generic parameters in their bodies. + // + // While this doesn't happen as these constants are always used as `ty::ConstKind::Param`, it does + // cause issues if we were to remove that special-case and try to evaluate the constant instead. + use hir::{def::DefKind::ConstParam, def::Res, ExprKind, Path, QPath}; + match expr.kind { + ExprKind::Path(QPath::Resolved(_, &Path { res: Res::Def(ConstParam, def_id), .. })) => { + // Find the name and index of the const parameter by indexing the generics of + // the parent item and construct a `ParamConst`. + let hir_id = tcx.hir().local_def_id_to_hir_id(def_id.expect_local()); + let item_id = tcx.hir().get_parent_node(hir_id); + let item_def_id = tcx.hir().local_def_id(item_id); + let generics = tcx.generics_of(item_def_id.to_def_id()); + let index = generics.param_def_id_to_index[&def_id]; + let name = tcx.hir().name(hir_id); + let ty_const = tcx.mk_const(ty::ConstS { + kind: ty::ConstKind::Param(ty::ParamConst::new(index, name)), + ty, + }); + debug!(?ty_const); + + return Self::Ty(ty_const); + } + _ => {} + } + + let hir_id = tcx.hir().local_def_id_to_hir_id(def.did); + let parent_substs = if let Some(parent_hir_id) = tcx.hir().find_parent_node(hir_id) { + if let Some(parent_did) = tcx.hir().opt_local_def_id(parent_hir_id) { + InternalSubsts::identity_for_item(tcx, parent_did.to_def_id()) + } else { + tcx.mk_substs(Vec::>::new().into_iter()) + } + } else { + tcx.mk_substs(Vec::>::new().into_iter()) + }; + debug!(?parent_substs); + + let did = def.did.to_def_id(); + let child_substs = InternalSubsts::identity_for_item(tcx, did); + let substs = tcx.mk_substs(parent_substs.into_iter().chain(child_substs.into_iter())); + debug!(?substs); + + let hir_id = tcx.hir().local_def_id_to_hir_id(def.did); + let span = tcx.hir().span(hir_id); + let uneval = ty::Unevaluated::new(def.to_global(), substs); + debug!(?span, ?param_env); + + match tcx.const_eval_resolve(param_env, uneval, Some(span)) { + Ok(val) => { + debug!("evaluated const value: {:?}", val); + Self::Val(val, ty) + } + Err(_) => { + debug!("error encountered during evaluation"); + // Error was handled in `const_eval_resolve`. Here we just create a + // new unevaluated const and error hard later in codegen + let ty_const = tcx.mk_const(ty::ConstS { + kind: ty::ConstKind::Unevaluated(ty::Unevaluated { + def: def.to_global(), + substs: InternalSubsts::identity_for_item(tcx, def.did.to_def_id()), + promoted: None, + }), + ty, + }); + debug!(?ty_const); + + Self::Ty(ty_const) + } + } + } + + pub fn from_const(c: ty::Const<'tcx>, tcx: TyCtxt<'tcx>) -> Self { + match c.kind() { + ty::ConstKind::Value(valtree) => { + let const_val = tcx.valtree_to_const_val((c.ty(), valtree)); + Self::Val(const_val, c.ty()) + } + _ => Self::Ty(c), + } + } +} + +/// A collection of projections into user types. +/// +/// They are projections because a binding can occur a part of a +/// parent pattern that has been ascribed a type. +/// +/// Its a collection because there can be multiple type ascriptions on +/// the path from the root of the pattern down to the binding itself. +/// +/// An example: +/// +/// ```ignore (illustrative) +/// struct S<'a>((i32, &'a str), String); +/// let S((_, w): (i32, &'static str), _): S = ...; +/// // ------ ^^^^^^^^^^^^^^^^^^^ (1) +/// // --------------------------------- ^ (2) +/// ``` +/// +/// The highlights labelled `(1)` show the subpattern `(_, w)` being +/// ascribed the type `(i32, &'static str)`. +/// +/// The highlights labelled `(2)` show the whole pattern being +/// ascribed the type `S`. +/// +/// In this example, when we descend to `w`, we will have built up the +/// following two projected types: +/// +/// * base: `S`, projection: `(base.0).1` +/// * base: `(i32, &'static str)`, projection: `base.1` +/// +/// The first will lead to the constraint `w: &'1 str` (for some +/// inferred region `'1`). The second will lead to the constraint `w: +/// &'static str`. +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct UserTypeProjections { + pub contents: Vec<(UserTypeProjection, Span)>, +} + +impl<'tcx> UserTypeProjections { + pub fn none() -> Self { + UserTypeProjections { contents: vec![] } + } + + pub fn is_empty(&self) -> bool { + self.contents.is_empty() + } + + pub fn projections_and_spans( + &self, + ) -> impl Iterator + ExactSizeIterator { + self.contents.iter() + } + + pub fn projections(&self) -> impl Iterator + ExactSizeIterator { + self.contents.iter().map(|&(ref user_type, _span)| user_type) + } + + pub fn push_projection(mut self, user_ty: &UserTypeProjection, span: Span) -> Self { + self.contents.push((user_ty.clone(), span)); + self + } + + fn map_projections( + mut self, + mut f: impl FnMut(UserTypeProjection) -> UserTypeProjection, + ) -> Self { + self.contents = self.contents.into_iter().map(|(proj, span)| (f(proj), span)).collect(); + self + } + + pub fn index(self) -> Self { + self.map_projections(|pat_ty_proj| pat_ty_proj.index()) + } + + pub fn subslice(self, from: u64, to: u64) -> Self { + self.map_projections(|pat_ty_proj| pat_ty_proj.subslice(from, to)) + } + + pub fn deref(self) -> Self { + self.map_projections(|pat_ty_proj| pat_ty_proj.deref()) + } + + pub fn leaf(self, field: Field) -> Self { + self.map_projections(|pat_ty_proj| pat_ty_proj.leaf(field)) + } + + pub fn variant(self, adt_def: AdtDef<'tcx>, variant_index: VariantIdx, field: Field) -> Self { + self.map_projections(|pat_ty_proj| pat_ty_proj.variant(adt_def, variant_index, field)) + } +} + +/// Encodes the effect of a user-supplied type annotation on the +/// subcomponents of a pattern. The effect is determined by applying the +/// given list of projections to some underlying base type. Often, +/// the projection element list `projs` is empty, in which case this +/// directly encodes a type in `base`. But in the case of complex patterns with +/// subpatterns and bindings, we want to apply only a *part* of the type to a variable, +/// in which case the `projs` vector is used. +/// +/// Examples: +/// +/// * `let x: T = ...` -- here, the `projs` vector is empty. +/// +/// * `let (x, _): T = ...` -- here, the `projs` vector would contain +/// `field[0]` (aka `.0`), indicating that the type of `s` is +/// determined by finding the type of the `.0` field from `T`. +#[derive(Clone, Debug, TyEncodable, TyDecodable, Hash, HashStable, PartialEq)] +pub struct UserTypeProjection { + pub base: UserTypeAnnotationIndex, + pub projs: Vec, +} + +impl Copy for ProjectionKind {} + +impl UserTypeProjection { + pub(crate) fn index(mut self) -> Self { + self.projs.push(ProjectionElem::Index(())); + self + } + + pub(crate) fn subslice(mut self, from: u64, to: u64) -> Self { + self.projs.push(ProjectionElem::Subslice { from, to, from_end: true }); + self + } + + pub(crate) fn deref(mut self) -> Self { + self.projs.push(ProjectionElem::Deref); + self + } + + pub(crate) fn leaf(mut self, field: Field) -> Self { + self.projs.push(ProjectionElem::Field(field, ())); + self + } + + pub(crate) fn variant( + mut self, + adt_def: AdtDef<'_>, + variant_index: VariantIdx, + field: Field, + ) -> Self { + self.projs.push(ProjectionElem::Downcast( + Some(adt_def.variant(variant_index).name), + variant_index, + )); + self.projs.push(ProjectionElem::Field(field, ())); + self + } +} + +TrivialTypeTraversalAndLiftImpls! { ProjectionKind, } + +impl<'tcx> TypeFoldable<'tcx> for UserTypeProjection { + fn try_fold_with>(self, folder: &mut F) -> Result { + Ok(UserTypeProjection { + base: self.base.try_fold_with(folder)?, + projs: self.projs.try_fold_with(folder)?, + }) + } +} + +impl<'tcx> TypeVisitable<'tcx> for UserTypeProjection { + fn visit_with>(&self, visitor: &mut Vs) -> ControlFlow { + self.base.visit_with(visitor) + // Note: there's nothing in `self.proj` to visit. + } +} + +rustc_index::newtype_index! { + pub struct Promoted { + derive [HashStable] + DEBUG_FORMAT = "promoted[{}]" + } +} + +impl<'tcx> Debug for Constant<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + write!(fmt, "{}", self) + } +} + +impl<'tcx> Display for Constant<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + match self.ty().kind() { + ty::FnDef(..) => {} + _ => write!(fmt, "const ")?, + } + Display::fmt(&self.literal, fmt) + } +} + +impl<'tcx> Display for ConstantKind<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + match *self { + ConstantKind::Ty(c) => pretty_print_const(c, fmt, true), + ConstantKind::Val(val, ty) => pretty_print_const_value(val, ty, fmt, true), + } + } +} + +fn pretty_print_const<'tcx>( + c: ty::Const<'tcx>, + fmt: &mut Formatter<'_>, + print_types: bool, +) -> fmt::Result { + use crate::ty::print::PrettyPrinter; + ty::tls::with(|tcx| { + let literal = tcx.lift(c).unwrap(); + let mut cx = FmtPrinter::new(tcx, Namespace::ValueNS); + cx.print_alloc_ids = true; + let cx = cx.pretty_print_const(literal, print_types)?; + fmt.write_str(&cx.into_buffer())?; + Ok(()) + }) +} + +fn pretty_print_byte_str(fmt: &mut Formatter<'_>, byte_str: &[u8]) -> fmt::Result { + fmt.write_str("b\"")?; + for &c in byte_str { + for e in std::ascii::escape_default(c) { + fmt.write_char(e as char)?; + } + } + fmt.write_str("\"")?; + + Ok(()) +} + +fn comma_sep<'tcx>(fmt: &mut Formatter<'_>, elems: Vec>) -> fmt::Result { + let mut first = true; + for elem in elems { + if !first { + fmt.write_str(", ")?; + } + fmt.write_str(&format!("{}", elem))?; + first = false; + } + Ok(()) +} + +// FIXME: Move that into `mir/pretty.rs`. +fn pretty_print_const_value<'tcx>( + ct: ConstValue<'tcx>, + ty: Ty<'tcx>, + fmt: &mut Formatter<'_>, + print_ty: bool, +) -> fmt::Result { + use crate::ty::print::PrettyPrinter; + + ty::tls::with(|tcx| { + let ct = tcx.lift(ct).unwrap(); + let ty = tcx.lift(ty).unwrap(); + + if tcx.sess.verbose() { + fmt.write_str(&format!("ConstValue({:?}: {})", ct, ty))?; + return Ok(()); + } + + let u8_type = tcx.types.u8; + match (ct, ty.kind()) { + // Byte/string slices, printed as (byte) string literals. + (ConstValue::Slice { data, start, end }, ty::Ref(_, inner, _)) => { + match inner.kind() { + ty::Slice(t) => { + if *t == u8_type { + // The `inspect` here is okay since we checked the bounds, and there are + // no relocations (we have an active slice reference here). We don't use + // this result to affect interpreter execution. + let byte_str = data + .inner() + .inspect_with_uninit_and_ptr_outside_interpreter(start..end); + pretty_print_byte_str(fmt, byte_str)?; + return Ok(()); + } + } + ty::Str => { + // The `inspect` here is okay since we checked the bounds, and there are no + // relocations (we have an active `str` reference here). We don't use this + // result to affect interpreter execution. + let slice = data + .inner() + .inspect_with_uninit_and_ptr_outside_interpreter(start..end); + fmt.write_str(&format!("{:?}", String::from_utf8_lossy(slice)))?; + return Ok(()); + } + _ => {} + } + } + (ConstValue::ByRef { alloc, offset }, ty::Array(t, n)) if *t == u8_type => { + let n = n.kind().try_to_bits(tcx.data_layout.pointer_size).unwrap(); + // cast is ok because we already checked for pointer size (32 or 64 bit) above + let range = AllocRange { start: offset, size: Size::from_bytes(n) }; + let byte_str = alloc.inner().get_bytes(&tcx, range).unwrap(); + fmt.write_str("*")?; + pretty_print_byte_str(fmt, byte_str)?; + return Ok(()); + } + // Aggregates, printed as array/tuple/struct/variant construction syntax. + // + // NB: the `has_param_types_or_consts` check ensures that we can use + // the `destructure_const` query with an empty `ty::ParamEnv` without + // introducing ICEs (e.g. via `layout_of`) from missing bounds. + // E.g. `transmute([0usize; 2]): (u8, *mut T)` needs to know `T: Sized` + // to be able to destructure the tuple into `(0u8, *mut T) + // + // FIXME(eddyb) for `--emit=mir`/`-Z dump-mir`, we should provide the + // correct `ty::ParamEnv` to allow printing *all* constant values. + (_, ty::Array(..) | ty::Tuple(..) | ty::Adt(..)) if !ty.has_param_types_or_consts() => { + let ct = tcx.lift(ct).unwrap(); + let ty = tcx.lift(ty).unwrap(); + if let Some(contents) = tcx.try_destructure_mir_constant( + ty::ParamEnv::reveal_all().and(ConstantKind::Val(ct, ty)), + ) { + let fields = contents.fields.iter().copied().collect::>(); + match *ty.kind() { + ty::Array(..) => { + fmt.write_str("[")?; + comma_sep(fmt, fields)?; + fmt.write_str("]")?; + } + ty::Tuple(..) => { + fmt.write_str("(")?; + comma_sep(fmt, fields)?; + if contents.fields.len() == 1 { + fmt.write_str(",")?; + } + fmt.write_str(")")?; + } + ty::Adt(def, _) if def.variants().is_empty() => { + fmt.write_str(&format!("{{unreachable(): {}}}", ty))?; + } + ty::Adt(def, substs) => { + let variant_idx = contents + .variant + .expect("destructed mir constant of adt without variant idx"); + let variant_def = &def.variant(variant_idx); + let substs = tcx.lift(substs).unwrap(); + let mut cx = FmtPrinter::new(tcx, Namespace::ValueNS); + cx.print_alloc_ids = true; + let cx = cx.print_value_path(variant_def.def_id, substs)?; + fmt.write_str(&cx.into_buffer())?; + + match variant_def.ctor_kind { + CtorKind::Const => {} + CtorKind::Fn => { + fmt.write_str("(")?; + comma_sep(fmt, fields)?; + fmt.write_str(")")?; + } + CtorKind::Fictive => { + fmt.write_str(" {{ ")?; + let mut first = true; + for (field_def, field) in iter::zip(&variant_def.fields, fields) + { + if !first { + fmt.write_str(", ")?; + } + fmt.write_str(&format!("{}: {}", field_def.name, field))?; + first = false; + } + fmt.write_str(" }}")?; + } + } + } + _ => unreachable!(), + } + return Ok(()); + } else { + // Fall back to debug pretty printing for invalid constants. + fmt.write_str(&format!("{:?}", ct))?; + if print_ty { + fmt.write_str(&format!(": {}", ty))?; + } + return Ok(()); + }; + } + (ConstValue::Scalar(scalar), _) => { + let mut cx = FmtPrinter::new(tcx, Namespace::ValueNS); + cx.print_alloc_ids = true; + let ty = tcx.lift(ty).unwrap(); + cx = cx.pretty_print_const_scalar(scalar, ty, print_ty)?; + fmt.write_str(&cx.into_buffer())?; + return Ok(()); + } + (ConstValue::ZeroSized, ty::FnDef(d, s)) => { + let mut cx = FmtPrinter::new(tcx, Namespace::ValueNS); + cx.print_alloc_ids = true; + let cx = cx.print_value_path(*d, s)?; + fmt.write_str(&cx.into_buffer())?; + return Ok(()); + } + // FIXME(oli-obk): also pretty print arrays and other aggregate constants by reading + // their fields instead of just dumping the memory. + _ => {} + } + // fallback + fmt.write_str(&format!("{:?}", ct))?; + if print_ty { + fmt.write_str(&format!(": {}", ty))?; + } + Ok(()) + }) +} + +/// `Location` represents the position of the start of the statement; or, if +/// `statement_index` equals the number of statements, then the start of the +/// terminator. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, HashStable)] +pub struct Location { + /// The block that the location is within. + pub block: BasicBlock, + + pub statement_index: usize, +} + +impl fmt::Debug for Location { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "{:?}[{}]", self.block, self.statement_index) + } +} + +impl Location { + pub const START: Location = Location { block: START_BLOCK, statement_index: 0 }; + + /// Returns the location immediately after this one within the enclosing block. + /// + /// Note that if this location represents a terminator, then the + /// resulting location would be out of bounds and invalid. + pub fn successor_within_block(&self) -> Location { + Location { block: self.block, statement_index: self.statement_index + 1 } + } + + /// Returns `true` if `other` is earlier in the control flow graph than `self`. + pub fn is_predecessor_of<'tcx>(&self, other: Location, body: &Body<'tcx>) -> bool { + // If we are in the same block as the other location and are an earlier statement + // then we are a predecessor of `other`. + if self.block == other.block && self.statement_index < other.statement_index { + return true; + } + + let predecessors = body.basic_blocks.predecessors(); + + // If we're in another block, then we want to check that block is a predecessor of `other`. + let mut queue: Vec = predecessors[other.block].to_vec(); + let mut visited = FxHashSet::default(); + + while let Some(block) = queue.pop() { + // If we haven't visited this block before, then make sure we visit its predecessors. + if visited.insert(block) { + queue.extend(predecessors[block].iter().cloned()); + } else { + continue; + } + + // If we found the block that `self` is in, then we are a predecessor of `other` (since + // we found that block by looking at the predecessors of `other`). + if self.block == block { + return true; + } + } + + false + } + + pub fn dominates(&self, other: Location, dominators: &Dominators) -> bool { + if self.block == other.block { + self.statement_index <= other.statement_index + } else { + dominators.is_dominated_by(other.block, self.block) + } + } +} diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs new file mode 100644 index 000000000..21ae121e1 --- /dev/null +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -0,0 +1,527 @@ +use crate::dep_graph::{DepNode, WorkProduct, WorkProductId}; +use crate::ty::{subst::InternalSubsts, Instance, InstanceDef, SymbolName, TyCtxt}; +use rustc_attr::InlineAttr; +use rustc_data_structures::base_n; +use rustc_data_structures::fingerprint::Fingerprint; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE}; +use rustc_hir::ItemId; +use rustc_index::vec::Idx; +use rustc_query_system::ich::StableHashingContext; +use rustc_session::config::OptLevel; +use rustc_span::source_map::Span; +use rustc_span::symbol::Symbol; +use std::fmt; +use std::hash::Hash; + +/// Describes how a monomorphization will be instantiated in object files. +#[derive(PartialEq)] +pub enum InstantiationMode { + /// There will be exactly one instance of the given MonoItem. It will have + /// external linkage so that it can be linked to from other codegen units. + GloballyShared { + /// In some compilation scenarios we may decide to take functions that + /// are typically `LocalCopy` and instead move them to `GloballyShared` + /// to avoid codegenning them a bunch of times. In this situation, + /// however, our local copy may conflict with other crates also + /// inlining the same function. + /// + /// This flag indicates that this situation is occurring, and informs + /// symbol name calculation that some extra mangling is needed to + /// avoid conflicts. Note that this may eventually go away entirely if + /// ThinLTO enables us to *always* have a globally shared instance of a + /// function within one crate's compilation. + may_conflict: bool, + }, + + /// Each codegen unit containing a reference to the given MonoItem will + /// have its own private copy of the function (with internal linkage). + LocalCopy, +} + +#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, HashStable)] +pub enum MonoItem<'tcx> { + Fn(Instance<'tcx>), + Static(DefId), + GlobalAsm(ItemId), +} + +impl<'tcx> MonoItem<'tcx> { + /// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims). + pub fn is_user_defined(&self) -> bool { + match *self { + MonoItem::Fn(instance) => matches!(instance.def, InstanceDef::Item(..)), + MonoItem::Static(..) | MonoItem::GlobalAsm(..) => true, + } + } + + pub fn size_estimate(&self, tcx: TyCtxt<'tcx>) -> usize { + match *self { + MonoItem::Fn(instance) => { + // Estimate the size of a function based on how many statements + // it contains. + tcx.instance_def_size_estimate(instance.def) + } + // Conservatively estimate the size of a static declaration + // or assembly to be 1. + MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1, + } + } + + pub fn is_generic_fn(&self) -> bool { + match *self { + MonoItem::Fn(ref instance) => instance.substs.non_erasable_generics().next().is_some(), + MonoItem::Static(..) | MonoItem::GlobalAsm(..) => false, + } + } + + pub fn symbol_name(&self, tcx: TyCtxt<'tcx>) -> SymbolName<'tcx> { + match *self { + MonoItem::Fn(instance) => tcx.symbol_name(instance), + MonoItem::Static(def_id) => tcx.symbol_name(Instance::mono(tcx, def_id)), + MonoItem::GlobalAsm(item_id) => { + SymbolName::new(tcx, &format!("global_asm_{:?}", item_id.def_id)) + } + } + } + + pub fn instantiation_mode(&self, tcx: TyCtxt<'tcx>) -> InstantiationMode { + let generate_cgu_internal_copies = tcx + .sess + .opts + .unstable_opts + .inline_in_all_cgus + .unwrap_or_else(|| tcx.sess.opts.optimize != OptLevel::No) + && !tcx.sess.link_dead_code(); + + match *self { + MonoItem::Fn(ref instance) => { + let entry_def_id = tcx.entry_fn(()).map(|(id, _)| id); + // If this function isn't inlined or otherwise has an extern + // indicator, then we'll be creating a globally shared version. + if tcx.codegen_fn_attrs(instance.def_id()).contains_extern_indicator() + || !instance.def.generates_cgu_internal_copy(tcx) + || Some(instance.def_id()) == entry_def_id + { + return InstantiationMode::GloballyShared { may_conflict: false }; + } + + // At this point we don't have explicit linkage and we're an + // inlined function. If we're inlining into all CGUs then we'll + // be creating a local copy per CGU. + if generate_cgu_internal_copies { + return InstantiationMode::LocalCopy; + } + + // Finally, if this is `#[inline(always)]` we're sure to respect + // that with an inline copy per CGU, but otherwise we'll be + // creating one copy of this `#[inline]` function which may + // conflict with upstream crates as it could be an exported + // symbol. + match tcx.codegen_fn_attrs(instance.def_id()).inline { + InlineAttr::Always => InstantiationMode::LocalCopy, + _ => InstantiationMode::GloballyShared { may_conflict: true }, + } + } + MonoItem::Static(..) | MonoItem::GlobalAsm(..) => { + InstantiationMode::GloballyShared { may_conflict: false } + } + } + } + + pub fn explicit_linkage(&self, tcx: TyCtxt<'tcx>) -> Option { + let def_id = match *self { + MonoItem::Fn(ref instance) => instance.def_id(), + MonoItem::Static(def_id) => def_id, + MonoItem::GlobalAsm(..) => return None, + }; + + let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id); + codegen_fn_attrs.linkage + } + + /// Returns `true` if this instance is instantiable - whether it has no unsatisfied + /// predicates. + /// + /// In order to codegen an item, all of its predicates must hold, because + /// otherwise the item does not make sense. Type-checking ensures that + /// the predicates of every item that is *used by* a valid item *do* + /// hold, so we can rely on that. + /// + /// However, we codegen collector roots (reachable items) and functions + /// in vtables when they are seen, even if they are not used, and so they + /// might not be instantiable. For example, a programmer can define this + /// public function: + /// + /// pub fn foo<'a>(s: &'a mut ()) where &'a mut (): Clone { + /// <&mut () as Clone>::clone(&s); + /// } + /// + /// That function can't be codegened, because the method `<&mut () as Clone>::clone` + /// does not exist. Luckily for us, that function can't ever be used, + /// because that would require for `&'a mut (): Clone` to hold, so we + /// can just not emit any code, or even a linker reference for it. + /// + /// Similarly, if a vtable method has such a signature, and therefore can't + /// be used, we can just not emit it and have a placeholder (a null pointer, + /// which will never be accessed) in its place. + pub fn is_instantiable(&self, tcx: TyCtxt<'tcx>) -> bool { + debug!("is_instantiable({:?})", self); + let (def_id, substs) = match *self { + MonoItem::Fn(ref instance) => (instance.def_id(), instance.substs), + MonoItem::Static(def_id) => (def_id, InternalSubsts::empty()), + // global asm never has predicates + MonoItem::GlobalAsm(..) => return true, + }; + + !tcx.subst_and_check_impossible_predicates((def_id, &substs)) + } + + pub fn local_span(&self, tcx: TyCtxt<'tcx>) -> Option { + match *self { + MonoItem::Fn(Instance { def, .. }) => def.def_id().as_local(), + MonoItem::Static(def_id) => def_id.as_local(), + MonoItem::GlobalAsm(item_id) => Some(item_id.def_id), + } + .map(|def_id| tcx.def_span(def_id)) + } + + // Only used by rustc_codegen_cranelift + pub fn codegen_dep_node(&self, tcx: TyCtxt<'tcx>) -> DepNode { + crate::dep_graph::make_compile_mono_item(tcx, self) + } + + /// Returns the item's `CrateNum` + pub fn krate(&self) -> CrateNum { + match self { + MonoItem::Fn(ref instance) => instance.def_id().krate, + MonoItem::Static(def_id) => def_id.krate, + MonoItem::GlobalAsm(..) => LOCAL_CRATE, + } + } +} + +impl<'tcx> fmt::Display for MonoItem<'tcx> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + MonoItem::Fn(instance) => write!(f, "fn {}", instance), + MonoItem::Static(def_id) => { + write!(f, "static {}", Instance::new(def_id, InternalSubsts::empty())) + } + MonoItem::GlobalAsm(..) => write!(f, "global_asm"), + } + } +} + +#[derive(Debug)] +pub struct CodegenUnit<'tcx> { + /// A name for this CGU. Incremental compilation requires that + /// name be unique amongst **all** crates. Therefore, it should + /// contain something unique to this crate (e.g., a module path) + /// as well as the crate name and disambiguator. + name: Symbol, + items: FxHashMap, (Linkage, Visibility)>, + size_estimate: Option, + primary: bool, + /// True if this is CGU is used to hold code coverage information for dead code, + /// false otherwise. + is_code_coverage_dead_code_cgu: bool, +} + +/// Specifies the linkage type for a `MonoItem`. +/// +/// See for more details about these variants. +#[derive(Copy, Clone, PartialEq, Debug, TyEncodable, TyDecodable, HashStable)] +pub enum Linkage { + External, + AvailableExternally, + LinkOnceAny, + LinkOnceODR, + WeakAny, + WeakODR, + Appending, + Internal, + Private, + ExternalWeak, + Common, +} + +#[derive(Copy, Clone, PartialEq, Debug, HashStable)] +pub enum Visibility { + Default, + Hidden, + Protected, +} + +impl<'tcx> CodegenUnit<'tcx> { + #[inline] + pub fn new(name: Symbol) -> CodegenUnit<'tcx> { + CodegenUnit { + name, + items: Default::default(), + size_estimate: None, + primary: false, + is_code_coverage_dead_code_cgu: false, + } + } + + pub fn name(&self) -> Symbol { + self.name + } + + pub fn set_name(&mut self, name: Symbol) { + self.name = name; + } + + pub fn is_primary(&self) -> bool { + self.primary + } + + pub fn make_primary(&mut self) { + self.primary = true; + } + + pub fn items(&self) -> &FxHashMap, (Linkage, Visibility)> { + &self.items + } + + pub fn items_mut(&mut self) -> &mut FxHashMap, (Linkage, Visibility)> { + &mut self.items + } + + pub fn is_code_coverage_dead_code_cgu(&self) -> bool { + self.is_code_coverage_dead_code_cgu + } + + /// Marks this CGU as the one used to contain code coverage information for dead code. + pub fn make_code_coverage_dead_code_cgu(&mut self) { + self.is_code_coverage_dead_code_cgu = true; + } + + pub fn mangle_name(human_readable_name: &str) -> String { + // We generate a 80 bit hash from the name. This should be enough to + // avoid collisions and is still reasonably short for filenames. + let mut hasher = StableHasher::new(); + human_readable_name.hash(&mut hasher); + let hash: u128 = hasher.finish(); + let hash = hash & ((1u128 << 80) - 1); + base_n::encode(hash, base_n::CASE_INSENSITIVE) + } + + pub fn estimate_size(&mut self, tcx: TyCtxt<'tcx>) { + // Estimate the size of a codegen unit as (approximately) the number of MIR + // statements it corresponds to. + self.size_estimate = Some(self.items.keys().map(|mi| mi.size_estimate(tcx)).sum()); + } + + #[inline] + pub fn size_estimate(&self) -> usize { + // Should only be called if `estimate_size` has previously been called. + self.size_estimate.expect("estimate_size must be called before getting a size_estimate") + } + + pub fn modify_size_estimate(&mut self, delta: usize) { + assert!(self.size_estimate.is_some()); + if let Some(size_estimate) = self.size_estimate { + self.size_estimate = Some(size_estimate + delta); + } + } + + pub fn contains_item(&self, item: &MonoItem<'tcx>) -> bool { + self.items().contains_key(item) + } + + pub fn work_product_id(&self) -> WorkProductId { + WorkProductId::from_cgu_name(self.name().as_str()) + } + + pub fn previous_work_product(&self, tcx: TyCtxt<'_>) -> WorkProduct { + let work_product_id = self.work_product_id(); + tcx.dep_graph + .previous_work_product(&work_product_id) + .unwrap_or_else(|| panic!("Could not find work-product for CGU `{}`", self.name())) + } + + pub fn items_in_deterministic_order( + &self, + tcx: TyCtxt<'tcx>, + ) -> Vec<(MonoItem<'tcx>, (Linkage, Visibility))> { + // The codegen tests rely on items being process in the same order as + // they appear in the file, so for local items, we sort by node_id first + #[derive(PartialEq, Eq, PartialOrd, Ord)] + pub struct ItemSortKey<'tcx>(Option, SymbolName<'tcx>); + + fn item_sort_key<'tcx>(tcx: TyCtxt<'tcx>, item: MonoItem<'tcx>) -> ItemSortKey<'tcx> { + ItemSortKey( + match item { + MonoItem::Fn(ref instance) => { + match instance.def { + // We only want to take HirIds of user-defined + // instances into account. The others don't matter for + // the codegen tests and can even make item order + // unstable. + InstanceDef::Item(def) => def.did.as_local().map(Idx::index), + InstanceDef::VTableShim(..) + | InstanceDef::ReifyShim(..) + | InstanceDef::Intrinsic(..) + | InstanceDef::FnPtrShim(..) + | InstanceDef::Virtual(..) + | InstanceDef::ClosureOnceShim { .. } + | InstanceDef::DropGlue(..) + | InstanceDef::CloneShim(..) => None, + } + } + MonoItem::Static(def_id) => def_id.as_local().map(Idx::index), + MonoItem::GlobalAsm(item_id) => Some(item_id.def_id.index()), + }, + item.symbol_name(tcx), + ) + } + + let mut items: Vec<_> = self.items().iter().map(|(&i, &l)| (i, l)).collect(); + items.sort_by_cached_key(|&(i, _)| item_sort_key(tcx, i)); + items + } + + pub fn codegen_dep_node(&self, tcx: TyCtxt<'tcx>) -> DepNode { + crate::dep_graph::make_compile_codegen_unit(tcx, self.name()) + } +} + +impl<'a, 'tcx> HashStable> for CodegenUnit<'tcx> { + fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) { + let CodegenUnit { + ref items, + name, + // The size estimate is not relevant to the hash + size_estimate: _, + primary: _, + is_code_coverage_dead_code_cgu, + } = *self; + + name.hash_stable(hcx, hasher); + is_code_coverage_dead_code_cgu.hash_stable(hcx, hasher); + + let mut items: Vec<(Fingerprint, _)> = items + .iter() + .map(|(mono_item, &attrs)| { + let mut hasher = StableHasher::new(); + mono_item.hash_stable(hcx, &mut hasher); + let mono_item_fingerprint = hasher.finish(); + (mono_item_fingerprint, attrs) + }) + .collect(); + + items.sort_unstable_by_key(|i| i.0); + items.hash_stable(hcx, hasher); + } +} + +pub struct CodegenUnitNameBuilder<'tcx> { + tcx: TyCtxt<'tcx>, + cache: FxHashMap, +} + +impl<'tcx> CodegenUnitNameBuilder<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>) -> Self { + CodegenUnitNameBuilder { tcx, cache: Default::default() } + } + + /// CGU names should fulfill the following requirements: + /// - They should be able to act as a file name on any kind of file system + /// - They should not collide with other CGU names, even for different versions + /// of the same crate. + /// + /// Consequently, we don't use special characters except for '.' and '-' and we + /// prefix each name with the crate-name and crate-disambiguator. + /// + /// This function will build CGU names of the form: + /// + /// ```text + /// .[-in-](-)*[.] + /// = . + /// ``` + /// + /// The '.' before `` makes sure that names with a special + /// suffix can never collide with a name built out of regular Rust + /// identifiers (e.g., module paths). + pub fn build_cgu_name( + &mut self, + cnum: CrateNum, + components: I, + special_suffix: Option, + ) -> Symbol + where + I: IntoIterator, + C: fmt::Display, + S: fmt::Display, + { + let cgu_name = self.build_cgu_name_no_mangle(cnum, components, special_suffix); + + if self.tcx.sess.opts.unstable_opts.human_readable_cgu_names { + cgu_name + } else { + Symbol::intern(&CodegenUnit::mangle_name(cgu_name.as_str())) + } + } + + /// Same as `CodegenUnit::build_cgu_name()` but will never mangle the + /// resulting name. + pub fn build_cgu_name_no_mangle( + &mut self, + cnum: CrateNum, + components: I, + special_suffix: Option, + ) -> Symbol + where + I: IntoIterator, + C: fmt::Display, + S: fmt::Display, + { + use std::fmt::Write; + + let mut cgu_name = String::with_capacity(64); + + // Start out with the crate name and disambiguator + let tcx = self.tcx; + let crate_prefix = self.cache.entry(cnum).or_insert_with(|| { + // Whenever the cnum is not LOCAL_CRATE we also mix in the + // local crate's ID. Otherwise there can be collisions between CGUs + // instantiating stuff for upstream crates. + let local_crate_id = if cnum != LOCAL_CRATE { + let local_stable_crate_id = tcx.sess.local_stable_crate_id(); + format!( + "-in-{}.{:08x}", + tcx.crate_name(LOCAL_CRATE), + local_stable_crate_id.to_u64() as u32, + ) + } else { + String::new() + }; + + let stable_crate_id = tcx.sess.local_stable_crate_id(); + format!( + "{}.{:08x}{}", + tcx.crate_name(cnum), + stable_crate_id.to_u64() as u32, + local_crate_id, + ) + }); + + write!(cgu_name, "{}", crate_prefix).unwrap(); + + // Add the components + for component in components { + write!(cgu_name, "-{}", component).unwrap(); + } + + if let Some(special_suffix) = special_suffix { + // We add a dot in here so it cannot clash with anything in a regular + // Rust identifier + write!(cgu_name, ".{}", special_suffix).unwrap(); + } + + Symbol::intern(&cgu_name) + } +} diff --git a/compiler/rustc_middle/src/mir/patch.rs b/compiler/rustc_middle/src/mir/patch.rs new file mode 100644 index 000000000..15496842d --- /dev/null +++ b/compiler/rustc_middle/src/mir/patch.rs @@ -0,0 +1,196 @@ +use rustc_index::vec::{Idx, IndexVec}; +use rustc_middle::mir::*; +use rustc_middle::ty::Ty; +use rustc_span::Span; + +/// This struct represents a patch to MIR, which can add +/// new statements and basic blocks and patch over block +/// terminators. +pub struct MirPatch<'tcx> { + patch_map: IndexVec>>, + new_blocks: Vec>, + new_statements: Vec<(Location, StatementKind<'tcx>)>, + new_locals: Vec>, + resume_block: Option, + body_span: Span, + next_local: usize, +} + +impl<'tcx> MirPatch<'tcx> { + pub fn new(body: &Body<'tcx>) -> Self { + let mut result = MirPatch { + patch_map: IndexVec::from_elem(None, body.basic_blocks()), + new_blocks: vec![], + new_statements: vec![], + new_locals: vec![], + next_local: body.local_decls.len(), + resume_block: None, + body_span: body.span, + }; + + // Check if we already have a resume block + for (bb, block) in body.basic_blocks().iter_enumerated() { + if let TerminatorKind::Resume = block.terminator().kind && block.statements.is_empty() { + result.resume_block = Some(bb); + break; + } + } + + result + } + + pub fn resume_block(&mut self) -> BasicBlock { + if let Some(bb) = self.resume_block { + return bb; + } + + let bb = self.new_block(BasicBlockData { + statements: vec![], + terminator: Some(Terminator { + source_info: SourceInfo::outermost(self.body_span), + kind: TerminatorKind::Resume, + }), + is_cleanup: true, + }); + self.resume_block = Some(bb); + bb + } + + pub fn is_patched(&self, bb: BasicBlock) -> bool { + self.patch_map[bb].is_some() + } + + pub fn terminator_loc(&self, body: &Body<'tcx>, bb: BasicBlock) -> Location { + let offset = match bb.index().checked_sub(body.basic_blocks().len()) { + Some(index) => self.new_blocks[index].statements.len(), + None => body[bb].statements.len(), + }; + Location { block: bb, statement_index: offset } + } + + pub fn new_local_with_info( + &mut self, + ty: Ty<'tcx>, + span: Span, + local_info: Option>>, + ) -> Local { + let index = self.next_local; + self.next_local += 1; + let mut new_decl = LocalDecl::new(ty, span); + new_decl.local_info = local_info; + self.new_locals.push(new_decl); + Local::new(index as usize) + } + + pub fn new_temp(&mut self, ty: Ty<'tcx>, span: Span) -> Local { + self.new_local_with_info(ty, span, None) + } + + pub fn new_internal(&mut self, ty: Ty<'tcx>, span: Span) -> Local { + let index = self.next_local; + self.next_local += 1; + self.new_locals.push(LocalDecl::new(ty, span).internal()); + Local::new(index as usize) + } + + pub fn new_block(&mut self, data: BasicBlockData<'tcx>) -> BasicBlock { + let block = BasicBlock::new(self.patch_map.len()); + debug!("MirPatch: new_block: {:?}: {:?}", block, data); + self.new_blocks.push(data); + self.patch_map.push(None); + block + } + + pub fn patch_terminator(&mut self, block: BasicBlock, new: TerminatorKind<'tcx>) { + assert!(self.patch_map[block].is_none()); + debug!("MirPatch: patch_terminator({:?}, {:?})", block, new); + self.patch_map[block] = Some(new); + } + + pub fn add_statement(&mut self, loc: Location, stmt: StatementKind<'tcx>) { + debug!("MirPatch: add_statement({:?}, {:?})", loc, stmt); + self.new_statements.push((loc, stmt)); + } + + pub fn add_assign(&mut self, loc: Location, place: Place<'tcx>, rv: Rvalue<'tcx>) { + self.add_statement(loc, StatementKind::Assign(Box::new((place, rv)))); + } + + pub fn apply(self, body: &mut Body<'tcx>) { + debug!( + "MirPatch: {:?} new temps, starting from index {}: {:?}", + self.new_locals.len(), + body.local_decls.len(), + self.new_locals + ); + debug!( + "MirPatch: {} new blocks, starting from index {}", + self.new_blocks.len(), + body.basic_blocks().len() + ); + let bbs = if self.patch_map.is_empty() && self.new_blocks.is_empty() { + body.basic_blocks.as_mut_preserves_cfg() + } else { + body.basic_blocks.as_mut() + }; + bbs.extend(self.new_blocks); + body.local_decls.extend(self.new_locals); + for (src, patch) in self.patch_map.into_iter_enumerated() { + if let Some(patch) = patch { + debug!("MirPatch: patching block {:?}", src); + bbs[src].terminator_mut().kind = patch; + } + } + + let mut new_statements = self.new_statements; + new_statements.sort_by_key(|s| s.0); + + let mut delta = 0; + let mut last_bb = START_BLOCK; + let mut stmts_and_targets: Vec<(Statement<'_>, BasicBlock)> = Vec::new(); + for (mut loc, stmt) in new_statements { + if loc.block != last_bb { + delta = 0; + last_bb = loc.block; + } + debug!("MirPatch: adding statement {:?} at loc {:?}+{}", stmt, loc, delta); + loc.statement_index += delta; + let source_info = Self::source_info_for_index(&body[loc.block], loc); + + // For mir-opt `Derefer` to work in all cases we need to + // get terminator's targets and apply the statement to all of them. + if loc.statement_index > body[loc.block].statements.len() { + let term = body[loc.block].terminator(); + for i in term.successors() { + stmts_and_targets.push((Statement { source_info, kind: stmt.clone() }, i)); + } + delta += 1; + continue; + } + + body[loc.block] + .statements + .insert(loc.statement_index, Statement { source_info, kind: stmt }); + delta += 1; + } + + for (stmt, target) in stmts_and_targets.into_iter().rev() { + body[target].statements.insert(0, stmt); + } + } + + pub fn source_info_for_index(data: &BasicBlockData<'_>, loc: Location) -> SourceInfo { + match data.statements.get(loc.statement_index) { + Some(stmt) => stmt.source_info, + None => data.terminator().source_info, + } + } + + pub fn source_info_for_location(&self, body: &Body<'tcx>, loc: Location) -> SourceInfo { + let data = match loc.block.index().checked_sub(body.basic_blocks().len()) { + Some(new) => &self.new_blocks[new], + None => &body[loc.block], + }; + Self::source_info_for_index(data, loc) + } +} diff --git a/compiler/rustc_middle/src/mir/predecessors.rs b/compiler/rustc_middle/src/mir/predecessors.rs new file mode 100644 index 000000000..5f1fadaf3 --- /dev/null +++ b/compiler/rustc_middle/src/mir/predecessors.rs @@ -0,0 +1,78 @@ +//! Lazily compute the reverse control-flow graph for the MIR. + +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::OnceCell; +use rustc_index::vec::IndexVec; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use smallvec::SmallVec; + +use crate::mir::{BasicBlock, BasicBlockData}; + +// Typically 95%+ of basic blocks have 4 or fewer predecessors. +pub type Predecessors = IndexVec>; + +#[derive(Clone, Debug)] +pub(super) struct PredecessorCache { + cache: OnceCell, +} + +impl PredecessorCache { + #[inline] + pub(super) fn new() -> Self { + PredecessorCache { cache: OnceCell::new() } + } + + /// Invalidates the predecessor cache. + #[inline] + pub(super) fn invalidate(&mut self) { + // Invalidating the predecessor cache requires mutating the MIR, which in turn requires a + // unique reference (`&mut`) to the `mir::Body`. Because of this, we can assume that all + // callers of `invalidate` have a unique reference to the MIR and thus to the predecessor + // cache. This means we never need to do synchronization when `invalidate` is called, we can + // simply reinitialize the `OnceCell`. + self.cache = OnceCell::new(); + } + + /// Returns the predecessor graph for this MIR. + #[inline] + pub(super) fn compute( + &self, + basic_blocks: &IndexVec>, + ) -> &Predecessors { + self.cache.get_or_init(|| { + let mut preds = IndexVec::from_elem(SmallVec::new(), basic_blocks); + for (bb, data) in basic_blocks.iter_enumerated() { + if let Some(term) = &data.terminator { + for succ in term.successors() { + preds[succ].push(bb); + } + } + } + + preds + }) + } +} + +impl Encodable for PredecessorCache { + #[inline] + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for PredecessorCache { + #[inline] + fn decode(_: &mut D) -> Self { + Self::new() + } +} + +impl HashStable for PredecessorCache { + #[inline] + fn hash_stable(&self, _: &mut CTX, _: &mut StableHasher) { + // do nothing + } +} + +TrivialTypeTraversalAndLiftImpls! { + PredecessorCache, +} diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs new file mode 100644 index 000000000..0ce41337b --- /dev/null +++ b/compiler/rustc_middle/src/mir/pretty.rs @@ -0,0 +1,1067 @@ +use std::collections::BTreeSet; +use std::fmt::Display; +use std::fmt::Write as _; +use std::fs; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; + +use super::graphviz::write_mir_fn_graphviz; +use super::spanview::write_mir_fn_spanview; +use either::Either; +use rustc_data_structures::fx::FxHashMap; +use rustc_hir::def_id::DefId; +use rustc_index::vec::Idx; +use rustc_middle::mir::interpret::{ + read_target_uint, AllocId, Allocation, ConstAllocation, ConstValue, GlobalAlloc, Pointer, + Provenance, +}; +use rustc_middle::mir::visit::Visitor; +use rustc_middle::mir::MirSource; +use rustc_middle::mir::*; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_target::abi::Size; + +const INDENT: &str = " "; +/// Alignment for lining up comments following MIR statements +pub(crate) const ALIGN: usize = 40; + +/// An indication of where we are in the control flow graph. Used for printing +/// extra information in `dump_mir` +pub enum PassWhere { + /// We have not started dumping the control flow graph, but we are about to. + BeforeCFG, + + /// We just finished dumping the control flow graph. This is right before EOF + AfterCFG, + + /// We are about to start dumping the given basic block. + BeforeBlock(BasicBlock), + + /// We are just about to dump the given statement or terminator. + BeforeLocation(Location), + + /// We just dumped the given statement or terminator. + AfterLocation(Location), + + /// We just dumped the terminator for a block but not the closing `}`. + AfterTerminator(BasicBlock), +} + +/// If the session is properly configured, dumps a human-readable +/// representation of the mir into: +/// +/// ```text +/// rustc.node... +/// ``` +/// +/// Output from this function is controlled by passing `-Z dump-mir=`, +/// where `` takes the following forms: +/// +/// - `all` -- dump MIR for all fns, all passes, all everything +/// - a filter defined by a set of substrings combined with `&` and `|` +/// (`&` has higher precedence). At least one of the `|`-separated groups +/// must match; an `|`-separated group matches if all of its `&`-separated +/// substrings are matched. +/// +/// Example: +/// +/// - `nll` == match if `nll` appears in the name +/// - `foo & nll` == match if `foo` and `nll` both appear in the name +/// - `foo & nll | typeck` == match if `foo` and `nll` both appear in the name +/// or `typeck` appears in the name. +/// - `foo & nll | bar & typeck` == match if `foo` and `nll` both appear in the name +/// or `typeck` and `bar` both appear in the name. +#[inline] +pub fn dump_mir<'tcx, F>( + tcx: TyCtxt<'tcx>, + pass_num: Option<&dyn Display>, + pass_name: &str, + disambiguator: &dyn Display, + body: &Body<'tcx>, + extra_data: F, +) where + F: FnMut(PassWhere, &mut dyn Write) -> io::Result<()>, +{ + if !dump_enabled(tcx, pass_name, body.source.def_id()) { + return; + } + + dump_matched_mir_node(tcx, pass_num, pass_name, disambiguator, body, extra_data); +} + +pub fn dump_enabled<'tcx>(tcx: TyCtxt<'tcx>, pass_name: &str, def_id: DefId) -> bool { + let Some(ref filters) = tcx.sess.opts.unstable_opts.dump_mir else { + return false; + }; + // see notes on #41697 below + let node_path = ty::print::with_forced_impl_filename_line!(tcx.def_path_str(def_id)); + filters.split('|').any(|or_filter| { + or_filter.split('&').all(|and_filter| { + let and_filter_trimmed = and_filter.trim(); + and_filter_trimmed == "all" + || pass_name.contains(and_filter_trimmed) + || node_path.contains(and_filter_trimmed) + }) + }) +} + +// #41697 -- we use `with_forced_impl_filename_line()` because +// `def_path_str()` would otherwise trigger `type_of`, and this can +// run while we are already attempting to evaluate `type_of`. + +fn dump_matched_mir_node<'tcx, F>( + tcx: TyCtxt<'tcx>, + pass_num: Option<&dyn Display>, + pass_name: &str, + disambiguator: &dyn Display, + body: &Body<'tcx>, + mut extra_data: F, +) where + F: FnMut(PassWhere, &mut dyn Write) -> io::Result<()>, +{ + let _: io::Result<()> = try { + let mut file = + create_dump_file(tcx, "mir", pass_num, pass_name, disambiguator, body.source)?; + // see notes on #41697 above + let def_path = + ty::print::with_forced_impl_filename_line!(tcx.def_path_str(body.source.def_id())); + write!(file, "// MIR for `{}", def_path)?; + match body.source.promoted { + None => write!(file, "`")?, + Some(promoted) => write!(file, "::{:?}`", promoted)?, + } + writeln!(file, " {} {}", disambiguator, pass_name)?; + if let Some(ref layout) = body.generator_layout() { + writeln!(file, "/* generator_layout = {:#?} */", layout)?; + } + writeln!(file)?; + extra_data(PassWhere::BeforeCFG, &mut file)?; + write_user_type_annotations(tcx, body, &mut file)?; + write_mir_fn(tcx, body, &mut extra_data, &mut file)?; + extra_data(PassWhere::AfterCFG, &mut file)?; + }; + + if tcx.sess.opts.unstable_opts.dump_mir_graphviz { + let _: io::Result<()> = try { + let mut file = + create_dump_file(tcx, "dot", pass_num, pass_name, disambiguator, body.source)?; + write_mir_fn_graphviz(tcx, body, false, &mut file)?; + }; + } + + if let Some(spanview) = tcx.sess.opts.unstable_opts.dump_mir_spanview { + let _: io::Result<()> = try { + let file_basename = + dump_file_basename(tcx, pass_num, pass_name, disambiguator, body.source); + let mut file = create_dump_file_with_basename(tcx, &file_basename, "html")?; + if body.source.def_id().is_local() { + write_mir_fn_spanview(tcx, body, spanview, &file_basename, &mut file)?; + } + }; + } +} + +/// Returns the file basename portion (without extension) of a filename path +/// where we should dump a MIR representation output files. +fn dump_file_basename<'tcx>( + tcx: TyCtxt<'tcx>, + pass_num: Option<&dyn Display>, + pass_name: &str, + disambiguator: &dyn Display, + source: MirSource<'tcx>, +) -> String { + let promotion_id = match source.promoted { + Some(id) => format!("-{:?}", id), + None => String::new(), + }; + + let pass_num = if tcx.sess.opts.unstable_opts.dump_mir_exclude_pass_number { + String::new() + } else { + match pass_num { + None => ".-------".to_string(), + Some(pass_num) => format!(".{}", pass_num), + } + }; + + let crate_name = tcx.crate_name(source.def_id().krate); + let item_name = tcx.def_path(source.def_id()).to_filename_friendly_no_crate(); + // All drop shims have the same DefId, so we have to add the type + // to get unique file names. + let shim_disambiguator = match source.instance { + ty::InstanceDef::DropGlue(_, Some(ty)) => { + // Unfortunately, pretty-printed typed are not very filename-friendly. + // We dome some filtering. + let mut s = ".".to_owned(); + s.extend(ty.to_string().chars().filter_map(|c| match c { + ' ' => None, + ':' | '<' | '>' => Some('_'), + c => Some(c), + })); + s + } + _ => String::new(), + }; + + format!( + "{}.{}{}{}{}.{}.{}", + crate_name, item_name, shim_disambiguator, promotion_id, pass_num, pass_name, disambiguator, + ) +} + +/// Returns the path to the filename where we should dump a given MIR. +/// Also used by other bits of code (e.g., NLL inference) that dump +/// graphviz data or other things. +fn dump_path(tcx: TyCtxt<'_>, basename: &str, extension: &str) -> PathBuf { + let mut file_path = PathBuf::new(); + file_path.push(Path::new(&tcx.sess.opts.unstable_opts.dump_mir_dir)); + + let file_name = format!("{}.{}", basename, extension,); + + file_path.push(&file_name); + + file_path +} + +/// Attempts to open the MIR dump file with the given name and extension. +fn create_dump_file_with_basename( + tcx: TyCtxt<'_>, + file_basename: &str, + extension: &str, +) -> io::Result> { + let file_path = dump_path(tcx, file_basename, extension); + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent).map_err(|e| { + io::Error::new( + e.kind(), + format!("IO error creating MIR dump directory: {:?}; {}", parent, e), + ) + })?; + } + Ok(io::BufWriter::new(fs::File::create(&file_path).map_err(|e| { + io::Error::new(e.kind(), format!("IO error creating MIR dump file: {:?}; {}", file_path, e)) + })?)) +} + +/// Attempts to open a file where we should dump a given MIR or other +/// bit of MIR-related data. Used by `mir-dump`, but also by other +/// bits of code (e.g., NLL inference) that dump graphviz data or +/// other things, and hence takes the extension as an argument. +pub fn create_dump_file<'tcx>( + tcx: TyCtxt<'tcx>, + extension: &str, + pass_num: Option<&dyn Display>, + pass_name: &str, + disambiguator: &dyn Display, + source: MirSource<'tcx>, +) -> io::Result> { + create_dump_file_with_basename( + tcx, + &dump_file_basename(tcx, pass_num, pass_name, disambiguator, source), + extension, + ) +} + +/// Write out a human-readable textual representation for the given MIR. +pub fn write_mir_pretty<'tcx>( + tcx: TyCtxt<'tcx>, + single: Option, + w: &mut dyn Write, +) -> io::Result<()> { + writeln!(w, "// WARNING: This output format is intended for human consumers only")?; + writeln!(w, "// and is subject to change without notice. Knock yourself out.")?; + + let mut first = true; + for def_id in dump_mir_def_ids(tcx, single) { + if first { + first = false; + } else { + // Put empty lines between all items + writeln!(w)?; + } + + let render_body = |w: &mut dyn Write, body| -> io::Result<()> { + write_mir_fn(tcx, body, &mut |_, _| Ok(()), w)?; + + for body in tcx.promoted_mir(def_id) { + writeln!(w)?; + write_mir_fn(tcx, body, &mut |_, _| Ok(()), w)?; + } + Ok(()) + }; + + // For `const fn` we want to render both the optimized MIR and the MIR for ctfe. + if tcx.is_const_fn_raw(def_id) { + render_body(w, tcx.optimized_mir(def_id))?; + writeln!(w)?; + writeln!(w, "// MIR FOR CTFE")?; + // Do not use `render_body`, as that would render the promoteds again, but these + // are shared between mir_for_ctfe and optimized_mir + write_mir_fn(tcx, tcx.mir_for_ctfe(def_id), &mut |_, _| Ok(()), w)?; + } else { + let instance_mir = + tcx.instance_mir(ty::InstanceDef::Item(ty::WithOptConstParam::unknown(def_id))); + render_body(w, instance_mir)?; + } + } + Ok(()) +} + +/// Write out a human-readable textual representation for the given function. +pub fn write_mir_fn<'tcx, F>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + extra_data: &mut F, + w: &mut dyn Write, +) -> io::Result<()> +where + F: FnMut(PassWhere, &mut dyn Write) -> io::Result<()>, +{ + write_mir_intro(tcx, body, w)?; + for block in body.basic_blocks().indices() { + extra_data(PassWhere::BeforeBlock(block), w)?; + write_basic_block(tcx, block, body, extra_data, w)?; + if block.index() + 1 != body.basic_blocks().len() { + writeln!(w)?; + } + } + + writeln!(w, "}}")?; + + write_allocations(tcx, body, w)?; + + Ok(()) +} + +/// Write out a human-readable textual representation for the given basic block. +pub fn write_basic_block<'tcx, F>( + tcx: TyCtxt<'tcx>, + block: BasicBlock, + body: &Body<'tcx>, + extra_data: &mut F, + w: &mut dyn Write, +) -> io::Result<()> +where + F: FnMut(PassWhere, &mut dyn Write) -> io::Result<()>, +{ + let data = &body[block]; + + // Basic block label at the top. + let cleanup_text = if data.is_cleanup { " (cleanup)" } else { "" }; + writeln!(w, "{}{:?}{}: {{", INDENT, block, cleanup_text)?; + + // List of statements in the middle. + let mut current_location = Location { block, statement_index: 0 }; + for statement in &data.statements { + extra_data(PassWhere::BeforeLocation(current_location), w)?; + let indented_body = format!("{0}{0}{1:?};", INDENT, statement); + writeln!( + w, + "{:A$} // {}{}", + indented_body, + if tcx.sess.verbose() { format!("{:?}: ", current_location) } else { String::new() }, + comment(tcx, statement.source_info, body.span), + A = ALIGN, + )?; + + write_extra(tcx, w, |visitor| { + visitor.visit_statement(statement, current_location); + })?; + + extra_data(PassWhere::AfterLocation(current_location), w)?; + + current_location.statement_index += 1; + } + + // Terminator at the bottom. + extra_data(PassWhere::BeforeLocation(current_location), w)?; + let indented_terminator = format!("{0}{0}{1:?};", INDENT, data.terminator().kind); + writeln!( + w, + "{:A$} // {}{}", + indented_terminator, + if tcx.sess.verbose() { format!("{:?}: ", current_location) } else { String::new() }, + comment(tcx, data.terminator().source_info, body.span), + A = ALIGN, + )?; + + write_extra(tcx, w, |visitor| { + visitor.visit_terminator(data.terminator(), current_location); + })?; + + extra_data(PassWhere::AfterLocation(current_location), w)?; + extra_data(PassWhere::AfterTerminator(block), w)?; + + writeln!(w, "{}}}", INDENT) +} + +/// After we print the main statement, we sometimes dump extra +/// information. There's often a lot of little things "nuzzled up" in +/// a statement. +fn write_extra<'tcx, F>(tcx: TyCtxt<'tcx>, write: &mut dyn Write, mut visit_op: F) -> io::Result<()> +where + F: FnMut(&mut ExtraComments<'tcx>), +{ + let mut extra_comments = ExtraComments { tcx, comments: vec![] }; + visit_op(&mut extra_comments); + for comment in extra_comments.comments { + writeln!(write, "{:A$} // {}", "", comment, A = ALIGN)?; + } + Ok(()) +} + +struct ExtraComments<'tcx> { + tcx: TyCtxt<'tcx>, + comments: Vec, +} + +impl<'tcx> ExtraComments<'tcx> { + fn push(&mut self, lines: &str) { + for line in lines.split('\n') { + self.comments.push(line.to_string()); + } + } +} + +fn use_verbose<'tcx>(ty: Ty<'tcx>, fn_def: bool) -> bool { + match *ty.kind() { + ty::Int(_) | ty::Uint(_) | ty::Bool | ty::Char | ty::Float(_) => false, + // Unit type + ty::Tuple(g_args) if g_args.is_empty() => false, + ty::Tuple(g_args) => g_args.iter().any(|g_arg| use_verbose(g_arg, fn_def)), + ty::Array(ty, _) => use_verbose(ty, fn_def), + ty::FnDef(..) => fn_def, + _ => true, + } +} + +impl<'tcx> Visitor<'tcx> for ExtraComments<'tcx> { + fn visit_constant(&mut self, constant: &Constant<'tcx>, _location: Location) { + let Constant { span, user_ty, literal } = constant; + if use_verbose(literal.ty(), true) { + self.push("mir::Constant"); + self.push(&format!( + "+ span: {}", + self.tcx.sess.source_map().span_to_embeddable_string(*span) + )); + if let Some(user_ty) = user_ty { + self.push(&format!("+ user_ty: {:?}", user_ty)); + } + + // FIXME: this is a poor version of `pretty_print_const_value`. + let fmt_val = |val: &ConstValue<'tcx>| match val { + ConstValue::ZeroSized => format!(""), + ConstValue::Scalar(s) => format!("Scalar({:?})", s), + ConstValue::Slice { .. } => format!("Slice(..)"), + ConstValue::ByRef { .. } => format!("ByRef(..)"), + }; + + let fmt_valtree = |valtree: &ty::ValTree<'tcx>| match valtree { + ty::ValTree::Leaf(leaf) => format!("ValTree::Leaf({:?})", leaf), + ty::ValTree::Branch(_) => format!("ValTree::Branch(..)"), + }; + + let val = match literal { + ConstantKind::Ty(ct) => match ct.kind() { + ty::ConstKind::Param(p) => format!("Param({})", p), + ty::ConstKind::Unevaluated(uv) => format!( + "Unevaluated({}, {:?}, {:?})", + self.tcx.def_path_str(uv.def.did), + uv.substs, + uv.promoted, + ), + ty::ConstKind::Value(val) => format!("Value({})", fmt_valtree(&val)), + ty::ConstKind::Error(_) => "Error".to_string(), + // These variants shouldn't exist in the MIR. + ty::ConstKind::Placeholder(_) + | ty::ConstKind::Infer(_) + | ty::ConstKind::Bound(..) => bug!("unexpected MIR constant: {:?}", literal), + }, + // To keep the diffs small, we render this like we render `ty::Const::Value`. + // + // This changes once `ty::Const::Value` is represented using valtrees. + ConstantKind::Val(val, _) => format!("Value({})", fmt_val(&val)), + }; + + // This reflects what `Const` looked liked before `val` was renamed + // as `kind`. We print it like this to avoid having to update + // expected output in a lot of tests. + self.push(&format!("+ literal: Const {{ ty: {}, val: {} }}", literal.ty(), val)); + } + } + + fn visit_rvalue(&mut self, rvalue: &Rvalue<'tcx>, location: Location) { + self.super_rvalue(rvalue, location); + if let Rvalue::Aggregate(kind, _) = rvalue { + match **kind { + AggregateKind::Closure(def_id, substs) => { + self.push("closure"); + self.push(&format!("+ def_id: {:?}", def_id)); + self.push(&format!("+ substs: {:#?}", substs)); + } + + AggregateKind::Generator(def_id, substs, movability) => { + self.push("generator"); + self.push(&format!("+ def_id: {:?}", def_id)); + self.push(&format!("+ substs: {:#?}", substs)); + self.push(&format!("+ movability: {:?}", movability)); + } + + AggregateKind::Adt(_, _, _, Some(user_ty), _) => { + self.push("adt"); + self.push(&format!("+ user_ty: {:?}", user_ty)); + } + + _ => {} + } + } + } +} + +fn comment(tcx: TyCtxt<'_>, SourceInfo { span, scope }: SourceInfo, function_span: Span) -> String { + let location = if tcx.sess.opts.unstable_opts.mir_pretty_relative_line_numbers { + tcx.sess.source_map().span_to_relative_line_string(span, function_span) + } else { + tcx.sess.source_map().span_to_embeddable_string(span) + }; + + format!("scope {} at {}", scope.index(), location,) +} + +/// Prints local variables in a scope tree. +fn write_scope_tree( + tcx: TyCtxt<'_>, + body: &Body<'_>, + scope_tree: &FxHashMap>, + w: &mut dyn Write, + parent: SourceScope, + depth: usize, +) -> io::Result<()> { + let indent = depth * INDENT.len(); + + // Local variable debuginfo. + for var_debug_info in &body.var_debug_info { + if var_debug_info.source_info.scope != parent { + // Not declared in this scope. + continue; + } + + let indented_debug_info = format!( + "{0:1$}debug {2} => {3:?};", + INDENT, indent, var_debug_info.name, var_debug_info.value, + ); + + writeln!( + w, + "{0:1$} // in {2}", + indented_debug_info, + ALIGN, + comment(tcx, var_debug_info.source_info, body.span), + )?; + } + + // Local variable types. + for (local, local_decl) in body.local_decls.iter_enumerated() { + if (1..body.arg_count + 1).contains(&local.index()) { + // Skip over argument locals, they're printed in the signature. + continue; + } + + if local_decl.source_info.scope != parent { + // Not declared in this scope. + continue; + } + + let mut_str = if local_decl.mutability == Mutability::Mut { "mut " } else { "" }; + + let mut indented_decl = + format!("{0:1$}let {2}{3:?}: {4:?}", INDENT, indent, mut_str, local, local_decl.ty); + if let Some(user_ty) = &local_decl.user_ty { + for user_ty in user_ty.projections() { + write!(indented_decl, " as {:?}", user_ty).unwrap(); + } + } + indented_decl.push(';'); + + let local_name = if local == RETURN_PLACE { " return place" } else { "" }; + + writeln!( + w, + "{0:1$} //{2} in {3}", + indented_decl, + ALIGN, + local_name, + comment(tcx, local_decl.source_info, body.span), + )?; + } + + let Some(children) = scope_tree.get(&parent) else { + return Ok(()); + }; + + for &child in children { + let child_data = &body.source_scopes[child]; + assert_eq!(child_data.parent_scope, Some(parent)); + + let (special, span) = if let Some((callee, callsite_span)) = child_data.inlined { + ( + format!( + " (inlined {}{})", + if callee.def.requires_caller_location(tcx) { "#[track_caller] " } else { "" }, + callee + ), + Some(callsite_span), + ) + } else { + (String::new(), None) + }; + + let indented_header = format!("{0:1$}scope {2}{3} {{", "", indent, child.index(), special); + + if let Some(span) = span { + writeln!( + w, + "{0:1$} // at {2}", + indented_header, + ALIGN, + tcx.sess.source_map().span_to_embeddable_string(span), + )?; + } else { + writeln!(w, "{}", indented_header)?; + } + + write_scope_tree(tcx, body, scope_tree, w, child, depth + 1)?; + writeln!(w, "{0:1$}}}", "", depth * INDENT.len())?; + } + + Ok(()) +} + +/// Write out a human-readable textual representation of the MIR's `fn` type and the types of its +/// local variables (both user-defined bindings and compiler temporaries). +pub fn write_mir_intro<'tcx>( + tcx: TyCtxt<'tcx>, + body: &Body<'_>, + w: &mut dyn Write, +) -> io::Result<()> { + write_mir_sig(tcx, body, w)?; + writeln!(w, "{{")?; + + // construct a scope tree and write it out + let mut scope_tree: FxHashMap> = Default::default(); + for (index, scope_data) in body.source_scopes.iter().enumerate() { + if let Some(parent) = scope_data.parent_scope { + scope_tree.entry(parent).or_default().push(SourceScope::new(index)); + } else { + // Only the argument scope has no parent, because it's the root. + assert_eq!(index, OUTERMOST_SOURCE_SCOPE.index()); + } + } + + write_scope_tree(tcx, body, &scope_tree, w, OUTERMOST_SOURCE_SCOPE, 1)?; + + // Add an empty line before the first block is printed. + writeln!(w)?; + + Ok(()) +} + +/// Find all `AllocId`s mentioned (recursively) in the MIR body and print their corresponding +/// allocations. +pub fn write_allocations<'tcx>( + tcx: TyCtxt<'tcx>, + body: &Body<'_>, + w: &mut dyn Write, +) -> io::Result<()> { + fn alloc_ids_from_alloc( + alloc: ConstAllocation<'_>, + ) -> impl DoubleEndedIterator + '_ { + alloc.inner().relocations().values().map(|id| *id) + } + + fn alloc_ids_from_const_val(val: ConstValue<'_>) -> impl Iterator + '_ { + match val { + ConstValue::Scalar(interpret::Scalar::Ptr(ptr, _)) => { + Either::Left(Either::Left(std::iter::once(ptr.provenance))) + } + ConstValue::Scalar(interpret::Scalar::Int { .. }) => { + Either::Left(Either::Right(std::iter::empty())) + } + ConstValue::ZeroSized => Either::Left(Either::Right(std::iter::empty())), + ConstValue::ByRef { alloc, .. } | ConstValue::Slice { data: alloc, .. } => { + Either::Right(alloc_ids_from_alloc(alloc)) + } + } + } + struct CollectAllocIds(BTreeSet); + + impl<'tcx> Visitor<'tcx> for CollectAllocIds { + fn visit_constant(&mut self, c: &Constant<'tcx>, loc: Location) { + match c.literal { + ConstantKind::Ty(c) => self.visit_const(c, loc), + ConstantKind::Val(val, _) => { + self.0.extend(alloc_ids_from_const_val(val)); + } + } + } + } + + let mut visitor = CollectAllocIds(Default::default()); + visitor.visit_body(body); + + // `seen` contains all seen allocations, including the ones we have *not* printed yet. + // The protocol is to first `insert` into `seen`, and only if that returns `true` + // then push to `todo`. + let mut seen = visitor.0; + let mut todo: Vec<_> = seen.iter().copied().collect(); + while let Some(id) = todo.pop() { + let mut write_allocation_track_relocs = + |w: &mut dyn Write, alloc: ConstAllocation<'tcx>| -> io::Result<()> { + // `.rev()` because we are popping them from the back of the `todo` vector. + for id in alloc_ids_from_alloc(alloc).rev() { + if seen.insert(id) { + todo.push(id); + } + } + write!(w, "{}", display_allocation(tcx, alloc.inner())) + }; + write!(w, "\n{id:?}")?; + match tcx.try_get_global_alloc(id) { + // This can't really happen unless there are bugs, but it doesn't cost us anything to + // gracefully handle it and allow buggy rustc to be debugged via allocation printing. + None => write!(w, " (deallocated)")?, + Some(GlobalAlloc::Function(inst)) => write!(w, " (fn: {inst})")?, + Some(GlobalAlloc::VTable(ty, Some(trait_ref))) => { + write!(w, " (vtable: impl {trait_ref} for {ty})")? + } + Some(GlobalAlloc::VTable(ty, None)) => { + write!(w, " (vtable: impl for {ty})")? + } + Some(GlobalAlloc::Static(did)) if !tcx.is_foreign_item(did) => { + match tcx.eval_static_initializer(did) { + Ok(alloc) => { + write!(w, " (static: {}, ", tcx.def_path_str(did))?; + write_allocation_track_relocs(w, alloc)?; + } + Err(_) => write!( + w, + " (static: {}, error during initializer evaluation)", + tcx.def_path_str(did) + )?, + } + } + Some(GlobalAlloc::Static(did)) => { + write!(w, " (extern static: {})", tcx.def_path_str(did))? + } + Some(GlobalAlloc::Memory(alloc)) => { + write!(w, " (")?; + write_allocation_track_relocs(w, alloc)? + } + } + writeln!(w)?; + } + Ok(()) +} + +/// Dumps the size and metadata and content of an allocation to the given writer. +/// The expectation is that the caller first prints other relevant metadata, so the exact +/// format of this function is (*without* leading or trailing newline): +/// +/// ```text +/// size: {}, align: {}) { +/// +/// } +/// ``` +/// +/// The byte format is similar to how hex editors print bytes. Each line starts with the address of +/// the start of the line, followed by all bytes in hex format (space separated). +/// If the allocation is small enough to fit into a single line, no start address is given. +/// After the hex dump, an ascii dump follows, replacing all unprintable characters (control +/// characters or characters whose value is larger than 127) with a `.` +/// This also prints relocations adequately. +pub fn display_allocation<'a, 'tcx, Prov, Extra>( + tcx: TyCtxt<'tcx>, + alloc: &'a Allocation, +) -> RenderAllocation<'a, 'tcx, Prov, Extra> { + RenderAllocation { tcx, alloc } +} + +#[doc(hidden)] +pub struct RenderAllocation<'a, 'tcx, Prov, Extra> { + tcx: TyCtxt<'tcx>, + alloc: &'a Allocation, +} + +impl<'a, 'tcx, Prov: Provenance, Extra> std::fmt::Display + for RenderAllocation<'a, 'tcx, Prov, Extra> +{ + fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let RenderAllocation { tcx, alloc } = *self; + write!(w, "size: {}, align: {})", alloc.size().bytes(), alloc.align.bytes())?; + if alloc.size() == Size::ZERO { + // We are done. + return write!(w, " {{}}"); + } + // Write allocation bytes. + writeln!(w, " {{")?; + write_allocation_bytes(tcx, alloc, w, " ")?; + write!(w, "}}")?; + Ok(()) + } +} + +fn write_allocation_endline(w: &mut dyn std::fmt::Write, ascii: &str) -> std::fmt::Result { + for _ in 0..(BYTES_PER_LINE - ascii.chars().count()) { + write!(w, " ")?; + } + writeln!(w, " │ {}", ascii) +} + +/// Number of bytes to print per allocation hex dump line. +const BYTES_PER_LINE: usize = 16; + +/// Prints the line start address and returns the new line start address. +fn write_allocation_newline( + w: &mut dyn std::fmt::Write, + mut line_start: Size, + ascii: &str, + pos_width: usize, + prefix: &str, +) -> Result { + write_allocation_endline(w, ascii)?; + line_start += Size::from_bytes(BYTES_PER_LINE); + write!(w, "{}0x{:02$x} │ ", prefix, line_start.bytes(), pos_width)?; + Ok(line_start) +} + +/// The `prefix` argument allows callers to add an arbitrary prefix before each line (even if there +/// is only one line). Note that your prefix should contain a trailing space as the lines are +/// printed directly after it. +fn write_allocation_bytes<'tcx, Prov: Provenance, Extra>( + tcx: TyCtxt<'tcx>, + alloc: &Allocation, + w: &mut dyn std::fmt::Write, + prefix: &str, +) -> std::fmt::Result { + let num_lines = alloc.size().bytes_usize().saturating_sub(BYTES_PER_LINE); + // Number of chars needed to represent all line numbers. + let pos_width = hex_number_length(alloc.size().bytes()); + + if num_lines > 0 { + write!(w, "{}0x{:02$x} │ ", prefix, 0, pos_width)?; + } else { + write!(w, "{}", prefix)?; + } + + let mut i = Size::ZERO; + let mut line_start = Size::ZERO; + + let ptr_size = tcx.data_layout.pointer_size; + + let mut ascii = String::new(); + + let oversized_ptr = |target: &mut String, width| { + if target.len() > width { + write!(target, " ({} ptr bytes)", ptr_size.bytes()).unwrap(); + } + }; + + while i < alloc.size() { + // The line start already has a space. While we could remove that space from the line start + // printing and unconditionally print a space here, that would cause the single-line case + // to have a single space before it, which looks weird. + if i != line_start { + write!(w, " ")?; + } + if let Some(&prov) = alloc.relocations().get(&i) { + // Memory with a relocation must be defined + assert!(alloc.init_mask().is_range_initialized(i, i + ptr_size).is_ok()); + let j = i.bytes_usize(); + let offset = alloc + .inspect_with_uninit_and_ptr_outside_interpreter(j..j + ptr_size.bytes_usize()); + let offset = read_target_uint(tcx.data_layout.endian, offset).unwrap(); + let offset = Size::from_bytes(offset); + let relocation_width = |bytes| bytes * 3; + let ptr = Pointer::new(prov, offset); + let mut target = format!("{:?}", ptr); + if target.len() > relocation_width(ptr_size.bytes_usize() - 1) { + // This is too long, try to save some space. + target = format!("{:#?}", ptr); + } + if ((i - line_start) + ptr_size).bytes_usize() > BYTES_PER_LINE { + // This branch handles the situation where a relocation starts in the current line + // but ends in the next one. + let remainder = Size::from_bytes(BYTES_PER_LINE) - (i - line_start); + let overflow = ptr_size - remainder; + let remainder_width = relocation_width(remainder.bytes_usize()) - 2; + let overflow_width = relocation_width(overflow.bytes_usize() - 1) + 1; + ascii.push('╾'); + for _ in 0..remainder.bytes() - 1 { + ascii.push('─'); + } + if overflow_width > remainder_width && overflow_width >= target.len() { + // The case where the relocation fits into the part in the next line + write!(w, "╾{0:─^1$}", "", remainder_width)?; + line_start = + write_allocation_newline(w, line_start, &ascii, pos_width, prefix)?; + ascii.clear(); + write!(w, "{0:─^1$}╼", target, overflow_width)?; + } else { + oversized_ptr(&mut target, remainder_width); + write!(w, "╾{0:─^1$}", target, remainder_width)?; + line_start = + write_allocation_newline(w, line_start, &ascii, pos_width, prefix)?; + write!(w, "{0:─^1$}╼", "", overflow_width)?; + ascii.clear(); + } + for _ in 0..overflow.bytes() - 1 { + ascii.push('─'); + } + ascii.push('╼'); + i += ptr_size; + continue; + } else { + // This branch handles a relocation that starts and ends in the current line. + let relocation_width = relocation_width(ptr_size.bytes_usize() - 1); + oversized_ptr(&mut target, relocation_width); + ascii.push('╾'); + write!(w, "╾{0:─^1$}╼", target, relocation_width)?; + for _ in 0..ptr_size.bytes() - 2 { + ascii.push('─'); + } + ascii.push('╼'); + i += ptr_size; + } + } else if alloc.init_mask().is_range_initialized(i, i + Size::from_bytes(1)).is_ok() { + let j = i.bytes_usize(); + + // Checked definedness (and thus range) and relocations. This access also doesn't + // influence interpreter execution but is only for debugging. + let c = alloc.inspect_with_uninit_and_ptr_outside_interpreter(j..j + 1)[0]; + write!(w, "{:02x}", c)?; + if c.is_ascii_control() || c >= 0x80 { + ascii.push('.'); + } else { + ascii.push(char::from(c)); + } + i += Size::from_bytes(1); + } else { + write!(w, "__")?; + ascii.push('░'); + i += Size::from_bytes(1); + } + // Print a new line header if the next line still has some bytes to print. + if i == line_start + Size::from_bytes(BYTES_PER_LINE) && i != alloc.size() { + line_start = write_allocation_newline(w, line_start, &ascii, pos_width, prefix)?; + ascii.clear(); + } + } + write_allocation_endline(w, &ascii)?; + + Ok(()) +} + +fn write_mir_sig(tcx: TyCtxt<'_>, body: &Body<'_>, w: &mut dyn Write) -> io::Result<()> { + use rustc_hir::def::DefKind; + + trace!("write_mir_sig: {:?}", body.source.instance); + let def_id = body.source.def_id(); + let kind = tcx.def_kind(def_id); + let is_function = match kind { + DefKind::Fn | DefKind::AssocFn | DefKind::Ctor(..) => true, + _ => tcx.is_closure(def_id), + }; + match (kind, body.source.promoted) { + (_, Some(i)) => write!(w, "{:?} in ", i)?, + (DefKind::Const | DefKind::AssocConst, _) => write!(w, "const ")?, + (DefKind::Static(hir::Mutability::Not), _) => write!(w, "static ")?, + (DefKind::Static(hir::Mutability::Mut), _) => write!(w, "static mut ")?, + (_, _) if is_function => write!(w, "fn ")?, + (DefKind::AnonConst | DefKind::InlineConst, _) => {} // things like anon const, not an item + _ => bug!("Unexpected def kind {:?}", kind), + } + + ty::print::with_forced_impl_filename_line! { + // see notes on #41697 elsewhere + write!(w, "{}", tcx.def_path_str(def_id))? + } + + if body.source.promoted.is_none() && is_function { + write!(w, "(")?; + + // fn argument types. + for (i, arg) in body.args_iter().enumerate() { + if i != 0 { + write!(w, ", ")?; + } + write!(w, "{:?}: {}", Place::from(arg), body.local_decls[arg].ty)?; + } + + write!(w, ") -> {}", body.return_ty())?; + } else { + assert_eq!(body.arg_count, 0); + write!(w, ": {} =", body.return_ty())?; + } + + if let Some(yield_ty) = body.yield_ty() { + writeln!(w)?; + writeln!(w, "yields {}", yield_ty)?; + } + + write!(w, " ")?; + // Next thing that gets printed is the opening { + + Ok(()) +} + +fn write_user_type_annotations( + tcx: TyCtxt<'_>, + body: &Body<'_>, + w: &mut dyn Write, +) -> io::Result<()> { + if !body.user_type_annotations.is_empty() { + writeln!(w, "| User Type Annotations")?; + } + for (index, annotation) in body.user_type_annotations.iter_enumerated() { + writeln!( + w, + "| {:?}: user_ty: {:?}, span: {}, inferred_ty: {:?}", + index.index(), + annotation.user_ty, + tcx.sess.source_map().span_to_embeddable_string(annotation.span), + annotation.inferred_ty, + )?; + } + if !body.user_type_annotations.is_empty() { + writeln!(w, "|")?; + } + Ok(()) +} + +pub fn dump_mir_def_ids(tcx: TyCtxt<'_>, single: Option) -> Vec { + if let Some(i) = single { + vec![i] + } else { + tcx.mir_keys(()).iter().map(|def_id| def_id.to_def_id()).collect() + } +} + +/// Calc converted u64 decimal into hex and return it's length in chars +/// +/// ```ignore (cannot-test-private-function) +/// assert_eq!(1, hex_number_length(0)); +/// assert_eq!(1, hex_number_length(1)); +/// assert_eq!(2, hex_number_length(16)); +/// ``` +fn hex_number_length(x: u64) -> usize { + if x == 0 { + return 1; + } + let mut length = 0; + let mut x_left = x; + while x_left > 0 { + x_left /= 16; + length += 1; + } + length +} diff --git a/compiler/rustc_middle/src/mir/query.rs b/compiler/rustc_middle/src/mir/query.rs new file mode 100644 index 000000000..dd9f8795f --- /dev/null +++ b/compiler/rustc_middle/src/mir/query.rs @@ -0,0 +1,476 @@ +//! Values computed by queries that use MIR. + +use crate::mir::{Body, ConstantKind, Promoted}; +use crate::ty::{self, OpaqueHiddenType, Ty, TyCtxt}; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::vec_map::VecMap; +use rustc_errors::ErrorGuaranteed; +use rustc_hir as hir; +use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_index::bit_set::BitMatrix; +use rustc_index::vec::IndexVec; +use rustc_span::Span; +use rustc_target::abi::VariantIdx; +use smallvec::SmallVec; +use std::cell::Cell; +use std::fmt::{self, Debug}; + +use super::{Field, SourceInfo}; + +#[derive(Copy, Clone, PartialEq, TyEncodable, TyDecodable, HashStable, Debug)] +pub enum UnsafetyViolationKind { + /// Unsafe operation outside `unsafe`. + General, + /// Unsafe operation in an `unsafe fn` but outside an `unsafe` block. + /// Has to be handled as a lint for backwards compatibility. + UnsafeFn, +} + +#[derive(Copy, Clone, PartialEq, TyEncodable, TyDecodable, HashStable, Debug)] +pub enum UnsafetyViolationDetails { + CallToUnsafeFunction, + UseOfInlineAssembly, + InitializingTypeWith, + CastOfPointerToInt, + UseOfMutableStatic, + UseOfExternStatic, + DerefOfRawPointer, + AccessToUnionField, + MutationOfLayoutConstrainedField, + BorrowOfLayoutConstrainedField, + CallToFunctionWith, +} + +impl UnsafetyViolationDetails { + pub fn description_and_note(&self) -> (&'static str, &'static str) { + use UnsafetyViolationDetails::*; + match self { + CallToUnsafeFunction => ( + "call to unsafe function", + "consult the function's documentation for information on how to avoid undefined \ + behavior", + ), + UseOfInlineAssembly => ( + "use of inline assembly", + "inline assembly is entirely unchecked and can cause undefined behavior", + ), + InitializingTypeWith => ( + "initializing type with `rustc_layout_scalar_valid_range` attr", + "initializing a layout restricted type's field with a value outside the valid \ + range is undefined behavior", + ), + CastOfPointerToInt => { + ("cast of pointer to int", "casting pointers to integers in constants") + } + UseOfMutableStatic => ( + "use of mutable static", + "mutable statics can be mutated by multiple threads: aliasing violations or data \ + races will cause undefined behavior", + ), + UseOfExternStatic => ( + "use of extern static", + "extern statics are not controlled by the Rust type system: invalid data, \ + aliasing violations or data races will cause undefined behavior", + ), + DerefOfRawPointer => ( + "dereference of raw pointer", + "raw pointers may be null, dangling or unaligned; they can violate aliasing rules \ + and cause data races: all of these are undefined behavior", + ), + AccessToUnionField => ( + "access to union field", + "the field may not be properly initialized: using uninitialized data will cause \ + undefined behavior", + ), + MutationOfLayoutConstrainedField => ( + "mutation of layout constrained field", + "mutating layout constrained fields cannot statically be checked for valid values", + ), + BorrowOfLayoutConstrainedField => ( + "borrow of layout constrained field with interior mutability", + "references to fields of layout constrained fields lose the constraints. Coupled \ + with interior mutability, the field can be changed to invalid values", + ), + CallToFunctionWith => ( + "call to function with `#[target_feature]`", + "can only be called if the required target features are available", + ), + } + } +} + +#[derive(Copy, Clone, PartialEq, TyEncodable, TyDecodable, HashStable, Debug)] +pub struct UnsafetyViolation { + pub source_info: SourceInfo, + pub lint_root: hir::HirId, + pub kind: UnsafetyViolationKind, + pub details: UnsafetyViolationDetails, +} + +#[derive(Copy, Clone, PartialEq, TyEncodable, TyDecodable, HashStable, Debug)] +pub enum UnusedUnsafe { + /// `unsafe` block contains no unsafe operations + /// > ``unnecessary `unsafe` block`` + Unused, + /// `unsafe` block nested under another (used) `unsafe` block + /// > ``… because it's nested under this `unsafe` block`` + InUnsafeBlock(hir::HirId), + /// `unsafe` block nested under `unsafe fn` + /// > ``… because it's nested under this `unsafe fn` `` + /// + /// the second HirId here indicates the first usage of the `unsafe` block, + /// which allows retrieval of the LintLevelSource for why that operation would + /// have been permitted without the block + InUnsafeFn(hir::HirId, hir::HirId), +} + +#[derive(Copy, Clone, PartialEq, TyEncodable, TyDecodable, HashStable, Debug)] +pub enum UsedUnsafeBlockData { + SomeDisallowedInUnsafeFn, + // the HirId here indicates the first usage of the `unsafe` block + // (i.e. the one that's first encountered in the MIR traversal of the unsafety check) + AllAllowedInUnsafeFn(hir::HirId), +} + +#[derive(TyEncodable, TyDecodable, HashStable, Debug)] +pub struct UnsafetyCheckResult { + /// Violations that are propagated *upwards* from this function. + pub violations: Vec, + + /// Used `unsafe` blocks in this function. This is used for the "unused_unsafe" lint. + /// + /// The keys are the used `unsafe` blocks, the UnusedUnsafeKind indicates whether + /// or not any of the usages happen at a place that doesn't allow `unsafe_op_in_unsafe_fn`. + pub used_unsafe_blocks: FxHashMap, + + /// This is `Some` iff the item is not a closure. + pub unused_unsafes: Option>, +} + +rustc_index::newtype_index! { + pub struct GeneratorSavedLocal { + derive [HashStable] + DEBUG_FORMAT = "_{}", + } +} + +/// The layout of generator state. +#[derive(Clone, TyEncodable, TyDecodable, HashStable, TypeFoldable, TypeVisitable)] +pub struct GeneratorLayout<'tcx> { + /// The type of every local stored inside the generator. + pub field_tys: IndexVec>, + + /// Which of the above fields are in each variant. Note that one field may + /// be stored in multiple variants. + pub variant_fields: IndexVec>, + + /// The source that led to each variant being created (usually, a yield or + /// await). + pub variant_source_info: IndexVec, + + /// Which saved locals are storage-live at the same time. Locals that do not + /// have conflicts with each other are allowed to overlap in the computed + /// layout. + pub storage_conflicts: BitMatrix, +} + +impl Debug for GeneratorLayout<'_> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + /// Prints an iterator of (key, value) tuples as a map. + struct MapPrinter<'a, K, V>(Cell + 'a>>>); + impl<'a, K, V> MapPrinter<'a, K, V> { + fn new(iter: impl Iterator + 'a) -> Self { + Self(Cell::new(Some(Box::new(iter)))) + } + } + impl<'a, K: Debug, V: Debug> Debug for MapPrinter<'a, K, V> { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_map().entries(self.0.take().unwrap()).finish() + } + } + + /// Prints the generator variant name. + struct GenVariantPrinter(VariantIdx); + impl From for GenVariantPrinter { + fn from(idx: VariantIdx) -> Self { + GenVariantPrinter(idx) + } + } + impl Debug for GenVariantPrinter { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + let variant_name = ty::GeneratorSubsts::variant_name(self.0); + if fmt.alternate() { + write!(fmt, "{:9}({:?})", variant_name, self.0) + } else { + write!(fmt, "{}", variant_name) + } + } + } + + /// Forces its contents to print in regular mode instead of alternate mode. + struct OneLinePrinter(T); + impl Debug for OneLinePrinter { + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(fmt, "{:?}", self.0) + } + } + + fmt.debug_struct("GeneratorLayout") + .field("field_tys", &MapPrinter::new(self.field_tys.iter_enumerated())) + .field( + "variant_fields", + &MapPrinter::new( + self.variant_fields + .iter_enumerated() + .map(|(k, v)| (GenVariantPrinter(k), OneLinePrinter(v))), + ), + ) + .field("storage_conflicts", &self.storage_conflicts) + .finish() + } +} + +#[derive(Debug, TyEncodable, TyDecodable, HashStable)] +pub struct BorrowCheckResult<'tcx> { + /// All the opaque types that are restricted to concrete types + /// by this function. Unlike the value in `TypeckResults`, this has + /// unerased regions. + pub concrete_opaque_types: VecMap>, + pub closure_requirements: Option>, + pub used_mut_upvars: SmallVec<[Field; 8]>, + pub tainted_by_errors: Option, +} + +/// The result of the `mir_const_qualif` query. +/// +/// Each field (except `error_occurred`) corresponds to an implementer of the `Qualif` trait in +/// `rustc_const_eval/src/transform/check_consts/qualifs.rs`. See that file for more information on each +/// `Qualif`. +#[derive(Clone, Copy, Debug, Default, TyEncodable, TyDecodable, HashStable)] +pub struct ConstQualifs { + pub has_mut_interior: bool, + pub needs_drop: bool, + pub needs_non_const_drop: bool, + pub custom_eq: bool, + pub tainted_by_errors: Option, +} + +/// After we borrow check a closure, we are left with various +/// requirements that we have inferred between the free regions that +/// appear in the closure's signature or on its field types. These +/// requirements are then verified and proved by the closure's +/// creating function. This struct encodes those requirements. +/// +/// The requirements are listed as being between various `RegionVid`. The 0th +/// region refers to `'static`; subsequent region vids refer to the free +/// regions that appear in the closure (or generator's) type, in order of +/// appearance. (This numbering is actually defined by the `UniversalRegions` +/// struct in the NLL region checker. See for example +/// `UniversalRegions::closure_mapping`.) Note the free regions in the +/// closure's signature and captures are erased. +/// +/// Example: If type check produces a closure with the closure substs: +/// +/// ```text +/// ClosureSubsts = [ +/// 'a, // From the parent. +/// 'b, +/// i8, // the "closure kind" +/// for<'x> fn(&' &'x u32) -> &'x u32, // the "closure signature" +/// &' String, // some upvar +/// ] +/// ``` +/// +/// We would "renumber" each free region to a unique vid, as follows: +/// +/// ```text +/// ClosureSubsts = [ +/// '1, // From the parent. +/// '2, +/// i8, // the "closure kind" +/// for<'x> fn(&'3 &'x u32) -> &'x u32, // the "closure signature" +/// &'4 String, // some upvar +/// ] +/// ``` +/// +/// Now the code might impose a requirement like `'1: '2`. When an +/// instance of the closure is created, the corresponding free regions +/// can be extracted from its type and constrained to have the given +/// outlives relationship. +/// +/// In some cases, we have to record outlives requirements between types and +/// regions as well. In that case, if those types include any regions, those +/// regions are recorded using their external names (`ReStatic`, +/// `ReEarlyBound`, `ReFree`). We use these because in a query response we +/// cannot use `ReVar` (which is what we use internally within the rest of the +/// NLL code). +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub struct ClosureRegionRequirements<'tcx> { + /// The number of external regions defined on the closure. In our + /// example above, it would be 3 -- one for `'static`, then `'1` + /// and `'2`. This is just used for a sanity check later on, to + /// make sure that the number of regions we see at the callsite + /// matches. + pub num_external_vids: usize, + + /// Requirements between the various free regions defined in + /// indices. + pub outlives_requirements: Vec>, +} + +/// Indicates an outlives-constraint between a type or between two +/// free regions declared on the closure. +#[derive(Copy, Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub struct ClosureOutlivesRequirement<'tcx> { + // This region or type ... + pub subject: ClosureOutlivesSubject<'tcx>, + + // ... must outlive this one. + pub outlived_free_region: ty::RegionVid, + + // If not, report an error here ... + pub blame_span: Span, + + // ... due to this reason. + pub category: ConstraintCategory<'tcx>, +} + +// Make sure this enum doesn't unintentionally grow +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(ConstraintCategory<'_>, 16); + +/// Outlives-constraints can be categorized to determine whether and why they +/// are interesting (for error reporting). Order of variants indicates sort +/// order of the category, thereby influencing diagnostic output. +/// +/// See also `rustc_const_eval::borrow_check::constraints`. +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] +#[derive(TyEncodable, TyDecodable, HashStable)] +pub enum ConstraintCategory<'tcx> { + Return(ReturnConstraint), + Yield, + UseAsConst, + UseAsStatic, + TypeAnnotation, + Cast, + + /// A constraint that came from checking the body of a closure. + /// + /// We try to get the category that the closure used when reporting this. + ClosureBounds, + + /// Contains the function type if available. + CallArgument(Option>), + CopyBound, + SizedBound, + Assignment, + /// A constraint that came from a usage of a variable (e.g. in an ADT expression + /// like `Foo { field: my_val }`) + Usage, + OpaqueType, + ClosureUpvar(Field), + + /// A constraint from a user-written predicate + /// with the provided span, written on the item + /// with the given `DefId` + Predicate(Span), + + /// A "boring" constraint (caused by the given location) is one that + /// the user probably doesn't want to see described in diagnostics, + /// because it is kind of an artifact of the type system setup. + Boring, + // Boring and applicable everywhere. + BoringNoLocation, + + /// A constraint that doesn't correspond to anything the user sees. + Internal, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] +#[derive(TyEncodable, TyDecodable, HashStable)] +pub enum ReturnConstraint { + Normal, + ClosureUpvar(Field), +} + +/// The subject of a `ClosureOutlivesRequirement` -- that is, the thing +/// that must outlive some region. +#[derive(Copy, Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub enum ClosureOutlivesSubject<'tcx> { + /// Subject is a type, typically a type parameter, but could also + /// be a projection. Indicates a requirement like `T: 'a` being + /// passed to the caller, where the type here is `T`. + /// + /// The type here is guaranteed not to contain any free regions at + /// present. + Ty(Ty<'tcx>), + + /// Subject is a free region from the closure. Indicates a requirement + /// like `'a: 'b` being passed to the caller; the region here is `'a`. + Region(ty::RegionVid), +} + +/// The constituent parts of a type level constant of kind ADT or array. +#[derive(Copy, Clone, Debug, HashStable)] +pub struct DestructuredConst<'tcx> { + pub variant: Option, + pub fields: &'tcx [ty::Const<'tcx>], +} + +/// The constituent parts of a mir constant of kind ADT or array. +#[derive(Copy, Clone, Debug, HashStable)] +pub struct DestructuredMirConstant<'tcx> { + pub variant: Option, + pub fields: &'tcx [ConstantKind<'tcx>], +} + +/// Coverage information summarized from a MIR if instrumented for source code coverage (see +/// compiler option `-Cinstrument-coverage`). This information is generated by the +/// `InstrumentCoverage` MIR pass and can be retrieved via the `coverageinfo` query. +#[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable)] +pub struct CoverageInfo { + /// The total number of coverage region counters added to the MIR `Body`. + pub num_counters: u32, + + /// The total number of coverage region counter expressions added to the MIR `Body`. + pub num_expressions: u32, +} + +/// Shims which make dealing with `WithOptConstParam` easier. +/// +/// For more information on why this is needed, consider looking +/// at the docs for `WithOptConstParam` itself. +impl<'tcx> TyCtxt<'tcx> { + #[inline] + pub fn mir_const_qualif_opt_const_arg( + self, + def: ty::WithOptConstParam, + ) -> ConstQualifs { + if let Some(param_did) = def.const_param_did { + self.mir_const_qualif_const_arg((def.did, param_did)) + } else { + self.mir_const_qualif(def.did) + } + } + + #[inline] + pub fn promoted_mir_opt_const_arg( + self, + def: ty::WithOptConstParam, + ) -> &'tcx IndexVec> { + if let Some((did, param_did)) = def.as_const_arg() { + self.promoted_mir_of_const_arg((did, param_did)) + } else { + self.promoted_mir(def.did) + } + } + + #[inline] + pub fn mir_for_ctfe_opt_const_arg(self, def: ty::WithOptConstParam) -> &'tcx Body<'tcx> { + if let Some((did, param_did)) = def.as_const_arg() { + self.mir_for_ctfe_of_const_arg((did, param_did)) + } else { + self.mir_for_ctfe(def.did) + } + } +} diff --git a/compiler/rustc_middle/src/mir/spanview.rs b/compiler/rustc_middle/src/mir/spanview.rs new file mode 100644 index 000000000..4418b848e --- /dev/null +++ b/compiler/rustc_middle/src/mir/spanview.rs @@ -0,0 +1,691 @@ +use rustc_hir::def_id::DefId; +use rustc_middle::hir; +use rustc_middle::mir::*; +use rustc_middle::ty::TyCtxt; +use rustc_session::config::MirSpanview; +use rustc_span::{BytePos, Pos, Span, SyntaxContext}; + +use std::cmp; +use std::io::{self, Write}; + +pub const TOOLTIP_INDENT: &str = " "; + +const CARET: char = '\u{2038}'; // Unicode `CARET` +const ANNOTATION_LEFT_BRACKET: char = '\u{298a}'; // Unicode `Z NOTATION RIGHT BINDING BRACKET +const ANNOTATION_RIGHT_BRACKET: char = '\u{2989}'; // Unicode `Z NOTATION LEFT BINDING BRACKET` +const NEW_LINE_SPAN: &str = "\n"; +const HEADER: &str = r#" + +"#; +const START_BODY: &str = r#" +"#; +const FOOTER: &str = r#" +"#; + +const STYLE_SECTION: &str = r#""#; + +/// Metadata to highlight the span of a MIR BasicBlock, Statement, or Terminator. +#[derive(Clone, Debug)] +pub struct SpanViewable { + pub bb: BasicBlock, + pub span: Span, + pub id: String, + pub tooltip: String, +} + +/// Write a spanview HTML+CSS file to analyze MIR element spans. +pub fn write_mir_fn_spanview<'tcx, W>( + tcx: TyCtxt<'tcx>, + body: &Body<'tcx>, + spanview: MirSpanview, + title: &str, + w: &mut W, +) -> io::Result<()> +where + W: Write, +{ + let def_id = body.source.def_id(); + let hir_body = hir_body(tcx, def_id); + if hir_body.is_none() { + return Ok(()); + } + let body_span = hir_body.unwrap().value.span; + let mut span_viewables = Vec::new(); + for (bb, data) in body.basic_blocks().iter_enumerated() { + match spanview { + MirSpanview::Statement => { + for (i, statement) in data.statements.iter().enumerate() { + if let Some(span_viewable) = + statement_span_viewable(tcx, body_span, bb, i, statement) + { + span_viewables.push(span_viewable); + } + } + if let Some(span_viewable) = terminator_span_viewable(tcx, body_span, bb, data) { + span_viewables.push(span_viewable); + } + } + MirSpanview::Terminator => { + if let Some(span_viewable) = terminator_span_viewable(tcx, body_span, bb, data) { + span_viewables.push(span_viewable); + } + } + MirSpanview::Block => { + if let Some(span_viewable) = block_span_viewable(tcx, body_span, bb, data) { + span_viewables.push(span_viewable); + } + } + } + } + write_document(tcx, fn_span(tcx, def_id), span_viewables, title, w)?; + Ok(()) +} + +/// Generate a spanview HTML+CSS document for the given local function `def_id`, and a pre-generated +/// list `SpanViewable`s. +pub fn write_document<'tcx, W>( + tcx: TyCtxt<'tcx>, + spanview_span: Span, + mut span_viewables: Vec, + title: &str, + w: &mut W, +) -> io::Result<()> +where + W: Write, +{ + let mut from_pos = spanview_span.lo(); + let end_pos = spanview_span.hi(); + let source_map = tcx.sess.source_map(); + let start = source_map.lookup_char_pos(from_pos); + let indent_to_initial_start_col = " ".repeat(start.col.to_usize()); + debug!( + "spanview_span={:?}; source is:\n{}{}", + spanview_span, + indent_to_initial_start_col, + source_map.span_to_snippet(spanview_span).expect("function should have printable source") + ); + writeln!(w, "{}", HEADER)?; + writeln!(w, "{}", title)?; + writeln!(w, "{}", STYLE_SECTION)?; + writeln!(w, "{}", START_BODY)?; + write!( + w, + r#"
{}"#, + start.line - 1, + indent_to_initial_start_col, + )?; + span_viewables.sort_unstable_by(|a, b| { + let a = a.span; + let b = b.span; + if a.lo() == b.lo() { + // Sort hi() in reverse order so shorter spans are attempted after longer spans. + // This should give shorter spans a higher "layer", so they are not covered by + // the longer spans. + b.hi().partial_cmp(&a.hi()) + } else { + a.lo().partial_cmp(&b.lo()) + } + .unwrap() + }); + let mut ordered_viewables = &span_viewables[..]; + const LOWEST_VIEWABLE_LAYER: usize = 1; + let mut alt = false; + while ordered_viewables.len() > 0 { + debug!( + "calling write_next_viewable with from_pos={}, end_pos={}, and viewables len={}", + from_pos.to_usize(), + end_pos.to_usize(), + ordered_viewables.len() + ); + let curr_id = &ordered_viewables[0].id; + let (next_from_pos, next_ordered_viewables) = write_next_viewable_with_overlaps( + tcx, + from_pos, + end_pos, + ordered_viewables, + alt, + LOWEST_VIEWABLE_LAYER, + w, + )?; + debug!( + "DONE calling write_next_viewable, with new from_pos={}, \ + and remaining viewables len={}", + next_from_pos.to_usize(), + next_ordered_viewables.len() + ); + assert!( + from_pos != next_from_pos || ordered_viewables.len() != next_ordered_viewables.len(), + "write_next_viewable_with_overlaps() must make a state change" + ); + from_pos = next_from_pos; + if next_ordered_viewables.len() != ordered_viewables.len() { + ordered_viewables = next_ordered_viewables; + if let Some(next_ordered_viewable) = ordered_viewables.first() { + if &next_ordered_viewable.id != curr_id { + alt = !alt; + } + } + } + } + if from_pos < end_pos { + write_coverage_gap(tcx, from_pos, end_pos, w)?; + } + writeln!(w, r#"
"#)?; + writeln!(w, "{}", FOOTER)?; + Ok(()) +} + +/// Format a string showing the start line and column, and end line and column within a file. +pub fn source_range_no_file<'tcx>(tcx: TyCtxt<'tcx>, span: Span) -> String { + let source_map = tcx.sess.source_map(); + let start = source_map.lookup_char_pos(span.lo()); + let end = source_map.lookup_char_pos(span.hi()); + format!("{}:{}-{}:{}", start.line, start.col.to_usize() + 1, end.line, end.col.to_usize() + 1) +} + +pub fn statement_kind_name(statement: &Statement<'_>) -> &'static str { + use StatementKind::*; + match statement.kind { + Assign(..) => "Assign", + FakeRead(..) => "FakeRead", + SetDiscriminant { .. } => "SetDiscriminant", + Deinit(..) => "Deinit", + StorageLive(..) => "StorageLive", + StorageDead(..) => "StorageDead", + Retag(..) => "Retag", + AscribeUserType(..) => "AscribeUserType", + Coverage(..) => "Coverage", + CopyNonOverlapping(..) => "CopyNonOverlapping", + Nop => "Nop", + } +} + +pub fn terminator_kind_name(term: &Terminator<'_>) -> &'static str { + use TerminatorKind::*; + match term.kind { + Goto { .. } => "Goto", + SwitchInt { .. } => "SwitchInt", + Resume => "Resume", + Abort => "Abort", + Return => "Return", + Unreachable => "Unreachable", + Drop { .. } => "Drop", + DropAndReplace { .. } => "DropAndReplace", + Call { .. } => "Call", + Assert { .. } => "Assert", + Yield { .. } => "Yield", + GeneratorDrop => "GeneratorDrop", + FalseEdge { .. } => "FalseEdge", + FalseUnwind { .. } => "FalseUnwind", + InlineAsm { .. } => "InlineAsm", + } +} + +fn statement_span_viewable<'tcx>( + tcx: TyCtxt<'tcx>, + body_span: Span, + bb: BasicBlock, + i: usize, + statement: &Statement<'tcx>, +) -> Option { + let span = statement.source_info.span; + if !body_span.contains(span) { + return None; + } + let id = format!("{}[{}]", bb.index(), i); + let tooltip = tooltip(tcx, &id, span, vec![statement.clone()], &None); + Some(SpanViewable { bb, span, id, tooltip }) +} + +fn terminator_span_viewable<'tcx>( + tcx: TyCtxt<'tcx>, + body_span: Span, + bb: BasicBlock, + data: &BasicBlockData<'tcx>, +) -> Option { + let term = data.terminator(); + let span = term.source_info.span; + if !body_span.contains(span) { + return None; + } + let id = format!("{}:{}", bb.index(), terminator_kind_name(term)); + let tooltip = tooltip(tcx, &id, span, vec![], &data.terminator); + Some(SpanViewable { bb, span, id, tooltip }) +} + +fn block_span_viewable<'tcx>( + tcx: TyCtxt<'tcx>, + body_span: Span, + bb: BasicBlock, + data: &BasicBlockData<'tcx>, +) -> Option { + let span = compute_block_span(data, body_span); + if !body_span.contains(span) { + return None; + } + let id = format!("{}", bb.index()); + let tooltip = tooltip(tcx, &id, span, data.statements.clone(), &data.terminator); + Some(SpanViewable { bb, span, id, tooltip }) +} + +fn compute_block_span<'tcx>(data: &BasicBlockData<'tcx>, body_span: Span) -> Span { + let mut span = data.terminator().source_info.span; + for statement_span in data.statements.iter().map(|statement| statement.source_info.span) { + // Only combine Spans from the root context, and within the function's body_span. + if statement_span.ctxt() == SyntaxContext::root() && body_span.contains(statement_span) { + span = span.to(statement_span); + } + } + span +} + +/// Recursively process each ordered span. Spans that overlap will have progressively varying +/// styles, such as increased padding for each overlap. Non-overlapping adjacent spans will +/// have alternating style choices, to help distinguish between them if, visually adjacent. +/// The `layer` is incremented for each overlap, and the `alt` bool alternates between true +/// and false, for each adjacent non-overlapping span. Source code between the spans (code +/// that is not in any coverage region) has neutral styling. +fn write_next_viewable_with_overlaps<'tcx, 'b, W>( + tcx: TyCtxt<'tcx>, + mut from_pos: BytePos, + mut to_pos: BytePos, + ordered_viewables: &'b [SpanViewable], + alt: bool, + layer: usize, + w: &mut W, +) -> io::Result<(BytePos, &'b [SpanViewable])> +where + W: Write, +{ + let debug_indent = " ".repeat(layer); + let (viewable, mut remaining_viewables) = + ordered_viewables.split_first().expect("ordered_viewables should have some"); + + if from_pos < viewable.span.lo() { + debug!( + "{}advance from_pos to next SpanViewable (from from_pos={} to viewable.span.lo()={} \ + of {:?}), with to_pos={}", + debug_indent, + from_pos.to_usize(), + viewable.span.lo().to_usize(), + viewable.span, + to_pos.to_usize() + ); + let hi = cmp::min(viewable.span.lo(), to_pos); + write_coverage_gap(tcx, from_pos, hi, w)?; + from_pos = hi; + if from_pos < viewable.span.lo() { + debug!( + "{}EARLY RETURN: stopped before getting to next SpanViewable, at {}", + debug_indent, + from_pos.to_usize() + ); + return Ok((from_pos, ordered_viewables)); + } + } + + if from_pos < viewable.span.hi() { + // Set to_pos to the end of this `viewable` to ensure the recursive calls stop writing + // with room to print the tail. + to_pos = cmp::min(viewable.span.hi(), to_pos); + debug!( + "{}update to_pos (if not closer) to viewable.span.hi()={}; to_pos is now {}", + debug_indent, + viewable.span.hi().to_usize(), + to_pos.to_usize() + ); + } + + let mut subalt = false; + while remaining_viewables.len() > 0 && remaining_viewables[0].span.overlaps(viewable.span) { + let overlapping_viewable = &remaining_viewables[0]; + debug!("{}overlapping_viewable.span={:?}", debug_indent, overlapping_viewable.span); + + let span = + trim_span(viewable.span, from_pos, cmp::min(overlapping_viewable.span.lo(), to_pos)); + let mut some_html_snippet = if from_pos <= viewable.span.hi() || viewable.span.is_empty() { + // `viewable` is not yet fully rendered, so start writing the span, up to either the + // `to_pos` or the next `overlapping_viewable`, whichever comes first. + debug!( + "{}make html_snippet (may not write it if early exit) for partial span {:?} \ + of viewable.span {:?}", + debug_indent, span, viewable.span + ); + from_pos = span.hi(); + make_html_snippet(tcx, span, Some(&viewable)) + } else { + None + }; + + // Defer writing the HTML snippet (until after early return checks) ONLY for empty spans. + // An empty Span with Some(html_snippet) is probably a tail marker. If there is an early + // exit, there should be another opportunity to write the tail marker. + if !span.is_empty() { + if let Some(ref html_snippet) = some_html_snippet { + debug!( + "{}write html_snippet for that partial span of viewable.span {:?}", + debug_indent, viewable.span + ); + write_span(html_snippet, &viewable.tooltip, alt, layer, w)?; + } + some_html_snippet = None; + } + + if from_pos < overlapping_viewable.span.lo() { + debug!( + "{}EARLY RETURN: from_pos={} has not yet reached the \ + overlapping_viewable.span {:?}", + debug_indent, + from_pos.to_usize(), + overlapping_viewable.span + ); + // must have reached `to_pos` before reaching the start of the + // `overlapping_viewable.span` + return Ok((from_pos, ordered_viewables)); + } + + if from_pos == to_pos + && !(from_pos == overlapping_viewable.span.lo() && overlapping_viewable.span.is_empty()) + { + debug!( + "{}EARLY RETURN: from_pos=to_pos={} and overlapping_viewable.span {:?} is not \ + empty, or not from_pos", + debug_indent, + to_pos.to_usize(), + overlapping_viewable.span + ); + // `to_pos` must have occurred before the overlapping viewable. Return + // `ordered_viewables` so we can continue rendering the `viewable`, from after the + // `to_pos`. + return Ok((from_pos, ordered_viewables)); + } + + if let Some(ref html_snippet) = some_html_snippet { + debug!( + "{}write html_snippet for that partial span of viewable.span {:?}", + debug_indent, viewable.span + ); + write_span(html_snippet, &viewable.tooltip, alt, layer, w)?; + } + + debug!( + "{}recursively calling write_next_viewable with from_pos={}, to_pos={}, \ + and viewables len={}", + debug_indent, + from_pos.to_usize(), + to_pos.to_usize(), + remaining_viewables.len() + ); + // Write the overlaps (and the overlaps' overlaps, if any) up to `to_pos`. + let curr_id = &remaining_viewables[0].id; + let (next_from_pos, next_remaining_viewables) = write_next_viewable_with_overlaps( + tcx, + from_pos, + to_pos, + &remaining_viewables, + subalt, + layer + 1, + w, + )?; + debug!( + "{}DONE recursively calling write_next_viewable, with new from_pos={}, and remaining \ + viewables len={}", + debug_indent, + next_from_pos.to_usize(), + next_remaining_viewables.len() + ); + assert!( + from_pos != next_from_pos + || remaining_viewables.len() != next_remaining_viewables.len(), + "write_next_viewable_with_overlaps() must make a state change" + ); + from_pos = next_from_pos; + if next_remaining_viewables.len() != remaining_viewables.len() { + remaining_viewables = next_remaining_viewables; + if let Some(next_ordered_viewable) = remaining_viewables.first() { + if &next_ordered_viewable.id != curr_id { + subalt = !subalt; + } + } + } + } + if from_pos <= viewable.span.hi() { + let span = trim_span(viewable.span, from_pos, to_pos); + debug!( + "{}After overlaps, writing (end span?) {:?} of viewable.span {:?}", + debug_indent, span, viewable.span + ); + if let Some(ref html_snippet) = make_html_snippet(tcx, span, Some(&viewable)) { + from_pos = span.hi(); + write_span(html_snippet, &viewable.tooltip, alt, layer, w)?; + } + } + debug!("{}RETURN: No more overlap", debug_indent); + Ok(( + from_pos, + if from_pos < viewable.span.hi() { ordered_viewables } else { remaining_viewables }, + )) +} + +#[inline(always)] +fn write_coverage_gap<'tcx, W>( + tcx: TyCtxt<'tcx>, + lo: BytePos, + hi: BytePos, + w: &mut W, +) -> io::Result<()> +where + W: Write, +{ + let span = Span::with_root_ctxt(lo, hi); + if let Some(ref html_snippet) = make_html_snippet(tcx, span, None) { + write_span(html_snippet, "", false, 0, w) + } else { + Ok(()) + } +} + +fn write_span( + html_snippet: &str, + tooltip: &str, + alt: bool, + layer: usize, + w: &mut W, +) -> io::Result<()> +where + W: Write, +{ + let maybe_alt_class = if layer > 0 { + if alt { " odd" } else { " even" } + } else { + "" + }; + let maybe_title_attr = if !tooltip.is_empty() { + format!(" title=\"{}\"", escape_attr(tooltip)) + } else { + "".to_owned() + }; + if layer == 1 { + write!(w, "")?; + } + for (i, line) in html_snippet.lines().enumerate() { + if i > 0 { + write!(w, "{}", NEW_LINE_SPAN)?; + } + write!( + w, + r#"{}"#, + maybe_alt_class, layer, maybe_title_attr, line + )?; + } + // Check for and translate trailing newlines, because `str::lines()` ignores them + if html_snippet.ends_with('\n') { + write!(w, "{}", NEW_LINE_SPAN)?; + } + if layer == 1 { + write!(w, "")?; + } + Ok(()) +} + +fn make_html_snippet<'tcx>( + tcx: TyCtxt<'tcx>, + span: Span, + some_viewable: Option<&SpanViewable>, +) -> Option { + let source_map = tcx.sess.source_map(); + let snippet = source_map + .span_to_snippet(span) + .unwrap_or_else(|err| bug!("span_to_snippet error for span {:?}: {:?}", span, err)); + let html_snippet = if let Some(viewable) = some_viewable { + let is_head = span.lo() == viewable.span.lo(); + let is_tail = span.hi() == viewable.span.hi(); + let mut labeled_snippet = if is_head { + format!(r#"{}{}"#, viewable.id, ANNOTATION_LEFT_BRACKET) + } else { + "".to_owned() + }; + if span.is_empty() { + if is_head && is_tail { + labeled_snippet.push(CARET); + } + } else { + labeled_snippet.push_str(&escape_html(&snippet)); + }; + if is_tail { + labeled_snippet.push_str(&format!( + r#"{}{}"#, + ANNOTATION_RIGHT_BRACKET, viewable.id + )); + } + labeled_snippet + } else { + escape_html(&snippet) + }; + if html_snippet.is_empty() { None } else { Some(html_snippet) } +} + +fn tooltip<'tcx>( + tcx: TyCtxt<'tcx>, + spanview_id: &str, + span: Span, + statements: Vec>, + terminator: &Option>, +) -> String { + let source_map = tcx.sess.source_map(); + let mut text = Vec::new(); + text.push(format!("{}: {}:", spanview_id, &source_map.span_to_embeddable_string(span))); + for statement in statements { + let source_range = source_range_no_file(tcx, statement.source_info.span); + text.push(format!( + "\n{}{}: {}: {:?}", + TOOLTIP_INDENT, + source_range, + statement_kind_name(&statement), + statement + )); + } + if let Some(term) = terminator { + let source_range = source_range_no_file(tcx, term.source_info.span); + text.push(format!( + "\n{}{}: {}: {:?}", + TOOLTIP_INDENT, + source_range, + terminator_kind_name(term), + term.kind + )); + } + text.join("") +} + +fn trim_span(span: Span, from_pos: BytePos, to_pos: BytePos) -> Span { + trim_span_hi(trim_span_lo(span, from_pos), to_pos) +} + +fn trim_span_lo(span: Span, from_pos: BytePos) -> Span { + if from_pos <= span.lo() { span } else { span.with_lo(cmp::min(span.hi(), from_pos)) } +} + +fn trim_span_hi(span: Span, to_pos: BytePos) -> Span { + if to_pos >= span.hi() { span } else { span.with_hi(cmp::max(span.lo(), to_pos)) } +} + +fn fn_span<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> Span { + let fn_decl_span = tcx.def_span(def_id); + if let Some(body_span) = hir_body(tcx, def_id).map(|hir_body| hir_body.value.span) { + if fn_decl_span.eq_ctxt(body_span) { fn_decl_span.to(body_span) } else { body_span } + } else { + fn_decl_span + } +} + +fn hir_body<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> Option<&'tcx rustc_hir::Body<'tcx>> { + let hir_node = tcx.hir().get_if_local(def_id).expect("expected DefId is local"); + hir::map::associated_body(hir_node).map(|fn_body_id| tcx.hir().body(fn_body_id)) +} + +fn escape_html(s: &str) -> String { + s.replace('&', "&").replace('<', "<").replace('>', ">") +} + +fn escape_attr(s: &str) -> String { + s.replace('&', "&") + .replace('\"', """) + .replace('\'', "'") + .replace('<', "<") + .replace('>', ">") +} diff --git a/compiler/rustc_middle/src/mir/switch_sources.rs b/compiler/rustc_middle/src/mir/switch_sources.rs new file mode 100644 index 000000000..b91c0c257 --- /dev/null +++ b/compiler/rustc_middle/src/mir/switch_sources.rs @@ -0,0 +1,78 @@ +//! Lazily compute the inverse of each `SwitchInt`'s switch targets. Modeled after +//! `Predecessors`/`PredecessorCache`. + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::OnceCell; +use rustc_index::vec::IndexVec; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use smallvec::SmallVec; + +use crate::mir::{BasicBlock, BasicBlockData, Terminator, TerminatorKind}; + +pub type SwitchSources = FxHashMap<(BasicBlock, BasicBlock), SmallVec<[Option; 1]>>; + +#[derive(Clone, Debug)] +pub(super) struct SwitchSourceCache { + cache: OnceCell, +} + +impl SwitchSourceCache { + #[inline] + pub(super) fn new() -> Self { + SwitchSourceCache { cache: OnceCell::new() } + } + + /// Invalidates the switch source cache. + #[inline] + pub(super) fn invalidate(&mut self) { + self.cache = OnceCell::new(); + } + + /// Returns the switch sources for this MIR. + #[inline] + pub(super) fn compute( + &self, + basic_blocks: &IndexVec>, + ) -> &SwitchSources { + self.cache.get_or_init(|| { + let mut switch_sources: SwitchSources = FxHashMap::default(); + for (bb, data) in basic_blocks.iter_enumerated() { + if let Some(Terminator { + kind: TerminatorKind::SwitchInt { targets, .. }, .. + }) = &data.terminator + { + for (value, target) in targets.iter() { + switch_sources.entry((target, bb)).or_default().push(Some(value)); + } + switch_sources.entry((targets.otherwise(), bb)).or_default().push(None); + } + } + + switch_sources + }) + } +} + +impl Encodable for SwitchSourceCache { + #[inline] + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for SwitchSourceCache { + #[inline] + fn decode(_: &mut D) -> Self { + Self::new() + } +} + +impl HashStable for SwitchSourceCache { + #[inline] + fn hash_stable(&self, _: &mut CTX, _: &mut StableHasher) { + // do nothing + } +} + +TrivialTypeTraversalAndLiftImpls! { + SwitchSourceCache, +} diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs new file mode 100644 index 000000000..eb90169d0 --- /dev/null +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -0,0 +1,1168 @@ +//! This defines the syntax of MIR, i.e., the set of available MIR operations, and other definitions +//! closely related to MIR semantics. +//! This is in a dedicated file so that changes to this file can be reviewed more carefully. +//! The intention is that this file only contains datatype declarations, no code. + +use super::{BasicBlock, Constant, Field, Local, SwitchTargets, UserTypeProjection}; + +use crate::mir::coverage::{CodeRegion, CoverageKind}; +use crate::ty::adjustment::PointerCast; +use crate::ty::subst::SubstsRef; +use crate::ty::{self, List, Ty}; +use crate::ty::{Region, UserTypeAnnotationIndex}; + +use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece}; +use rustc_hir::def_id::DefId; +use rustc_hir::{self as hir}; +use rustc_hir::{self, GeneratorKind}; +use rustc_target::abi::VariantIdx; + +use rustc_ast::Mutability; +use rustc_span::def_id::LocalDefId; +use rustc_span::symbol::Symbol; +use rustc_span::Span; +use rustc_target::asm::InlineAsmRegOrRegClass; + +/// The various "big phases" that MIR goes through. +/// +/// These phases all describe dialects of MIR. Since all MIR uses the same datastructures, the +/// dialects forbid certain variants or values in certain phases. The sections below summarize the +/// changes, but do not document them thoroughly. The full documentation is found in the appropriate +/// documentation for the thing the change is affecting. +/// +/// Warning: ordering of variants is significant. +#[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(HashStable)] +pub enum MirPhase { + /// The dialect of MIR used during all phases before `DropsLowered` is the same. This is also + /// the MIR that analysis such as borrowck uses. + /// + /// One important thing to remember about the behavior of this section of MIR is that drop terminators + /// (including drop and replace) are *conditional*. The elaborate drops pass will then replace each + /// instance of a drop terminator with a nop, an unconditional drop, or a drop conditioned on a drop + /// flag. Of course, this means that it is important that the drop elaboration can accurately recognize + /// when things are initialized and when things are de-initialized. That means any code running on this + /// version of MIR must be sure to produce output that drop elaboration can reason about. See the + /// section on the drop terminatorss for more details. + Built = 0, + // FIXME(oli-obk): it's unclear whether we still need this phase (and its corresponding query). + // We used to have this for pre-miri MIR based const eval. + Const = 1, + /// This phase checks the MIR for promotable elements and takes them out of the main MIR body + /// by creating a new MIR body per promoted element. After this phase (and thus the termination + /// of the `mir_promoted` query), these promoted elements are available in the `promoted_mir` + /// query. + ConstsPromoted = 2, + /// After this projections may only contain deref projections as the first element. + Derefered = 3, + /// Beginning with this phase, the following variants are disallowed: + /// * [`TerminatorKind::DropAndReplace`] + /// * [`TerminatorKind::FalseUnwind`] + /// * [`TerminatorKind::FalseEdge`] + /// * [`StatementKind::FakeRead`] + /// * [`StatementKind::AscribeUserType`] + /// * [`Rvalue::Ref`] with `BorrowKind::Shallow` + /// + /// And the following variant is allowed: + /// * [`StatementKind::Retag`] + /// + /// Furthermore, `Drop` now uses explicit drop flags visible in the MIR and reaching a `Drop` + /// terminator means that the auto-generated drop glue will be invoked. Also, `Copy` operands + /// are allowed for non-`Copy` types. + DropsLowered = 4, + /// Beginning with this phase, the following variant is disallowed: + /// * [`Rvalue::Aggregate`] for any `AggregateKind` except `Array` + /// + /// And the following variant is allowed: + /// * [`StatementKind::SetDiscriminant`] + Deaggregated = 5, + /// Before this phase, generators are in the "source code" form, featuring `yield` statements + /// and such. With this phase change, they are transformed into a proper state machine. Running + /// optimizations before this change can be potentially dangerous because the source code is to + /// some extent a "lie." In particular, `yield` terminators effectively make the value of all + /// locals visible to the caller. This means that dead store elimination before them, or code + /// motion across them, is not correct in general. This is also exasperated by type checking + /// having pre-computed a list of the types that it thinks are ok to be live across a yield + /// point - this is necessary to decide eg whether autotraits are implemented. Introducing new + /// types across a yield point will lead to ICEs becaues of this. + /// + /// Beginning with this phase, the following variants are disallowed: + /// * [`TerminatorKind::Yield`] + /// * [`TerminatorKind::GeneratorDrop`] + /// * [`ProjectionElem::Deref`] of `Box` + GeneratorsLowered = 6, + Optimized = 7, +} + +/////////////////////////////////////////////////////////////////////////// +// Borrow kinds + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, TyEncodable, TyDecodable)] +#[derive(Hash, HashStable)] +pub enum BorrowKind { + /// Data must be immutable and is aliasable. + Shared, + + /// The immediately borrowed place must be immutable, but projections from + /// it don't need to be. For example, a shallow borrow of `a.b` doesn't + /// conflict with a mutable borrow of `a.b.c`. + /// + /// This is used when lowering matches: when matching on a place we want to + /// ensure that place have the same value from the start of the match until + /// an arm is selected. This prevents this code from compiling: + /// ```compile_fail,E0510 + /// let mut x = &Some(0); + /// match *x { + /// None => (), + /// Some(_) if { x = &None; false } => (), + /// Some(_) => (), + /// } + /// ``` + /// This can't be a shared borrow because mutably borrowing (*x as Some).0 + /// should not prevent `if let None = x { ... }`, for example, because the + /// mutating `(*x as Some).0` can't affect the discriminant of `x`. + /// We can also report errors with this kind of borrow differently. + Shallow, + + /// Data must be immutable but not aliasable. This kind of borrow + /// cannot currently be expressed by the user and is used only in + /// implicit closure bindings. It is needed when the closure is + /// borrowing or mutating a mutable referent, e.g.: + /// ``` + /// let mut z = 3; + /// let x: &mut isize = &mut z; + /// let y = || *x += 5; + /// ``` + /// If we were to try to translate this closure into a more explicit + /// form, we'd encounter an error with the code as written: + /// ```compile_fail,E0594 + /// struct Env<'a> { x: &'a &'a mut isize } + /// let mut z = 3; + /// let x: &mut isize = &mut z; + /// let y = (&mut Env { x: &x }, fn_ptr); // Closure is pair of env and fn + /// fn fn_ptr(env: &mut Env) { **env.x += 5; } + /// ``` + /// This is then illegal because you cannot mutate an `&mut` found + /// in an aliasable location. To solve, you'd have to translate with + /// an `&mut` borrow: + /// ```compile_fail,E0596 + /// struct Env<'a> { x: &'a mut &'a mut isize } + /// let mut z = 3; + /// let x: &mut isize = &mut z; + /// let y = (&mut Env { x: &mut x }, fn_ptr); // changed from &x to &mut x + /// fn fn_ptr(env: &mut Env) { **env.x += 5; } + /// ``` + /// Now the assignment to `**env.x` is legal, but creating a + /// mutable pointer to `x` is not because `x` is not mutable. We + /// could fix this by declaring `x` as `let mut x`. This is ok in + /// user code, if awkward, but extra weird for closures, since the + /// borrow is hidden. + /// + /// So we introduce a "unique imm" borrow -- the referent is + /// immutable, but not aliasable. This solves the problem. For + /// simplicity, we don't give users the way to express this + /// borrow, it's just used when translating closures. + Unique, + + /// Data is mutable and not aliasable. + Mut { + /// `true` if this borrow arose from method-call auto-ref + /// (i.e., `adjustment::Adjust::Borrow`). + allow_two_phase_borrow: bool, + }, +} + +/////////////////////////////////////////////////////////////////////////// +// Statements + +/// The various kinds of statements that can appear in MIR. +/// +/// Not all of these are allowed at every [`MirPhase`]. Check the documentation there to see which +/// ones you do not have to worry about. The MIR validator will generally enforce such restrictions, +/// causing an ICE if they are violated. +#[derive(Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +#[derive(TypeFoldable, TypeVisitable)] +pub enum StatementKind<'tcx> { + /// Assign statements roughly correspond to an assignment in Rust proper (`x = ...`) except + /// without the possibility of dropping the previous value (that must be done separately, if at + /// all). The *exact* way this works is undecided. It probably does something like evaluating + /// the LHS to a place and the RHS to a value, and then storing the value to the place. Various + /// parts of this may do type specific things that are more complicated than simply copying + /// bytes. + /// + /// **Needs clarification**: The implication of the above idea would be that assignment implies + /// that the resulting value is initialized. I believe we could commit to this separately from + /// committing to whatever part of the memory model we would need to decide on to make the above + /// paragragh precise. Do we want to? + /// + /// Assignments in which the types of the place and rvalue differ are not well-formed. + /// + /// **Needs clarification**: Do we ever want to worry about non-free (in the body) lifetimes for + /// the typing requirement in post drop-elaboration MIR? I think probably not - I'm not sure we + /// could meaningfully require this anyway. How about free lifetimes? Is ignoring this + /// interesting for optimizations? Do we want to allow such optimizations? + /// + /// **Needs clarification**: We currently require that the LHS place not overlap with any place + /// read as part of computation of the RHS for some rvalues (generally those not producing + /// primitives). This requirement is under discussion in [#68364]. As a part of this discussion, + /// it is also unclear in what order the components are evaluated. + /// + /// [#68364]: https://github.com/rust-lang/rust/issues/68364 + /// + /// See [`Rvalue`] documentation for details on each of those. + Assign(Box<(Place<'tcx>, Rvalue<'tcx>)>), + + /// This represents all the reading that a pattern match may do (e.g., inspecting constants and + /// discriminant values), and the kind of pattern it comes from. This is in order to adapt + /// potential error messages to these specific patterns. + /// + /// Note that this also is emitted for regular `let` bindings to ensure that locals that are + /// never accessed still get some sanity checks for, e.g., `let x: ! = ..;` + /// + /// When executed at runtime this is a nop. + /// + /// Disallowed after drop elaboration. + FakeRead(Box<(FakeReadCause, Place<'tcx>)>), + + /// Write the discriminant for a variant to the enum Place. + /// + /// This is permitted for both generators and ADTs. This does not necessarily write to the + /// entire place; instead, it writes to the minimum set of bytes as required by the layout for + /// the type. + SetDiscriminant { place: Box>, variant_index: VariantIdx }, + + /// Deinitializes the place. + /// + /// This writes `uninit` bytes to the entire place. + Deinit(Box>), + + /// `StorageLive` and `StorageDead` statements mark the live range of a local. + /// + /// At any point during the execution of a function, each local is either allocated or + /// unallocated. Except as noted below, all locals except function parameters are initially + /// unallocated. `StorageLive` statements cause memory to be allocated for the local while + /// `StorageDead` statements cause the memory to be freed. Using a local in any way (not only + /// reading/writing from it) while it is unallocated is UB. + /// + /// Some locals have no `StorageLive` or `StorageDead` statements within the entire MIR body. + /// These locals are implicitly allocated for the full duration of the function. There is a + /// convenience method at `rustc_mir_dataflow::storage::always_storage_live_locals` for + /// computing these locals. + /// + /// If the local is already allocated, calling `StorageLive` again is UB. However, for an + /// unallocated local an additional `StorageDead` all is simply a nop. + StorageLive(Local), + + /// See `StorageLive` above. + StorageDead(Local), + + /// Retag references in the given place, ensuring they got fresh tags. + /// + /// This is part of the Stacked Borrows model. These statements are currently only interpreted + /// by miri and only generated when `-Z mir-emit-retag` is passed. See + /// for + /// more details. + /// + /// For code that is not specific to stacked borrows, you should consider retags to read + /// and modify the place in an opaque way. + Retag(RetagKind, Box>), + + /// Encodes a user's type ascription. These need to be preserved + /// intact so that NLL can respect them. For example: + /// ```ignore (illustrative) + /// let a: T = y; + /// ``` + /// The effect of this annotation is to relate the type `T_y` of the place `y` + /// to the user-given type `T`. The effect depends on the specified variance: + /// + /// - `Covariant` -- requires that `T_y <: T` + /// - `Contravariant` -- requires that `T_y :> T` + /// - `Invariant` -- requires that `T_y == T` + /// - `Bivariant` -- no effect + /// + /// When executed at runtime this is a nop. + /// + /// Disallowed after drop elaboration. + AscribeUserType(Box<(Place<'tcx>, UserTypeProjection)>, ty::Variance), + + /// Marks the start of a "coverage region", injected with '-Cinstrument-coverage'. A + /// `Coverage` statement carries metadata about the coverage region, used to inject a coverage + /// map into the binary. If `Coverage::kind` is a `Counter`, the statement also generates + /// executable code, to increment a counter variable at runtime, each time the code region is + /// executed. + Coverage(Box), + + /// Denotes a call to the intrinsic function `copy_nonoverlapping`. + /// + /// First, all three operands are evaluated. `src` and `dest` must each be a reference, pointer, + /// or `Box` pointing to the same type `T`. `count` must evaluate to a `usize`. Then, `src` and + /// `dest` are dereferenced, and `count * size_of::()` bytes beginning with the first byte of + /// the `src` place are copied to the continguous range of bytes beginning with the first byte + /// of `dest`. + /// + /// **Needs clarification**: In what order are operands computed and dereferenced? It should + /// probably match the order for assignment, but that is also undecided. + /// + /// **Needs clarification**: Is this typed or not, ie is there a typed load and store involved? + /// I vaguely remember Ralf saying somewhere that he thought it should not be. + CopyNonOverlapping(Box>), + + /// No-op. Useful for deleting instructions without affecting statement indices. + Nop, +} + +/// Describes what kind of retag is to be performed. +#[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, Hash, HashStable)] +#[rustc_pass_by_value] +pub enum RetagKind { + /// The initial retag when entering a function. + FnEntry, + /// Retag preparing for a two-phase borrow. + TwoPhase, + /// Retagging raw pointers. + Raw, + /// A "normal" retag. + Default, +} + +/// The `FakeReadCause` describes the type of pattern why a FakeRead statement exists. +#[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, Hash, HashStable, PartialEq)] +pub enum FakeReadCause { + /// Inject a fake read of the borrowed input at the end of each guards + /// code. + /// + /// This should ensure that you cannot change the variant for an enum while + /// you are in the midst of matching on it. + ForMatchGuard, + + /// `let x: !; match x {}` doesn't generate any read of x so we need to + /// generate a read of x to check that it is initialized and safe. + /// + /// If a closure pattern matches a Place starting with an Upvar, then we introduce a + /// FakeRead for that Place outside the closure, in such a case this option would be + /// Some(closure_def_id). + /// Otherwise, the value of the optional LocalDefId will be None. + // + // We can use LocaDefId here since fake read statements are removed + // before codegen in the `CleanupNonCodegenStatements` pass. + ForMatchedPlace(Option), + + /// A fake read of the RefWithinGuard version of a bind-by-value variable + /// in a match guard to ensure that its value hasn't change by the time + /// we create the OutsideGuard version. + ForGuardBinding, + + /// Officially, the semantics of + /// + /// `let pattern = ;` + /// + /// is that `` is evaluated into a temporary and then this temporary is + /// into the pattern. + /// + /// However, if we see the simple pattern `let var = `, we optimize this to + /// evaluate `` directly into the variable `var`. This is mostly unobservable, + /// but in some cases it can affect the borrow checker, as in #53695. + /// Therefore, we insert a "fake read" here to ensure that we get + /// appropriate errors. + /// + /// If a closure pattern matches a Place starting with an Upvar, then we introduce a + /// FakeRead for that Place outside the closure, in such a case this option would be + /// Some(closure_def_id). + /// Otherwise, the value of the optional DefId will be None. + ForLet(Option), + + /// If we have an index expression like + /// + /// (*x)[1][{ x = y; 4}] + /// + /// then the first bounds check is invalidated when we evaluate the second + /// index expression. Thus we create a fake borrow of `x` across the second + /// indexer, which will cause a borrow check error. + ForIndex, +} + +#[derive(Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +#[derive(TypeFoldable, TypeVisitable)] +pub struct Coverage { + pub kind: CoverageKind, + pub code_region: Option, +} + +#[derive(Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +#[derive(TypeFoldable, TypeVisitable)] +pub struct CopyNonOverlapping<'tcx> { + pub src: Operand<'tcx>, + pub dst: Operand<'tcx>, + /// Number of elements to copy from src to dest, not bytes. + pub count: Operand<'tcx>, +} + +/////////////////////////////////////////////////////////////////////////// +// Terminators + +/// The various kinds of terminators, representing ways of exiting from a basic block. +/// +/// A note on unwinding: Panics may occur during the execution of some terminators. Depending on the +/// `-C panic` flag, this may either cause the program to abort or the call stack to unwind. Such +/// terminators have a `cleanup: Option` field on them. If stack unwinding occurs, then +/// once the current function is reached, execution continues at the given basic block, if any. If +/// `cleanup` is `None` then no cleanup is performed, and the stack continues unwinding. This is +/// equivalent to the execution of a `Resume` terminator. +/// +/// The basic block pointed to by a `cleanup` field must have its `cleanup` flag set. `cleanup` +/// basic blocks have a couple restrictions: +/// 1. All `cleanup` fields in them must be `None`. +/// 2. `Return` terminators are not allowed in them. `Abort` and `Unwind` terminators are. +/// 3. All other basic blocks (in the current body) that are reachable from `cleanup` basic blocks +/// must also be `cleanup`. This is a part of the type system and checked statically, so it is +/// still an error to have such an edge in the CFG even if it's known that it won't be taken at +/// runtime. +#[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq)] +pub enum TerminatorKind<'tcx> { + /// Block has one successor; we continue execution there. + Goto { target: BasicBlock }, + + /// Switches based on the computed value. + /// + /// First, evaluates the `discr` operand. The type of the operand must be a signed or unsigned + /// integer, char, or bool, and must match the given type. Then, if the list of switch targets + /// contains the computed value, continues execution at the associated basic block. Otherwise, + /// continues execution at the "otherwise" basic block. + /// + /// Target values may not appear more than once. + SwitchInt { + /// The discriminant value being tested. + discr: Operand<'tcx>, + + /// The type of value being tested. + /// This is always the same as the type of `discr`. + /// FIXME: remove this redundant information. Currently, it is relied on by pretty-printing. + switch_ty: Ty<'tcx>, + + targets: SwitchTargets, + }, + + /// Indicates that the landing pad is finished and that the process should continue unwinding. + /// + /// Like a return, this marks the end of this invocation of the function. + /// + /// Only permitted in cleanup blocks. `Resume` is not permitted with `-C unwind=abort` after + /// deaggregation runs. + Resume, + + /// Indicates that the landing pad is finished and that the process should abort. + /// + /// Used to prevent unwinding for foreign items or with `-C unwind=abort`. Only permitted in + /// cleanup blocks. + Abort, + + /// Returns from the function. + /// + /// Like function calls, the exact semantics of returns in Rust are unclear. Returning very + /// likely at least assigns the value currently in the return place (`_0`) to the place + /// specified in the associated `Call` terminator in the calling function, as if assigned via + /// `dest = move _0`. It might additionally do other things, like have side-effects in the + /// aliasing model. + /// + /// If the body is a generator body, this has slightly different semantics; it instead causes a + /// `GeneratorState::Returned(_0)` to be created (as if by an `Aggregate` rvalue) and assigned + /// to the return place. + Return, + + /// Indicates a terminator that can never be reached. + /// + /// Executing this terminator is UB. + Unreachable, + + /// The behavior of this statement differs significantly before and after drop elaboration. + /// After drop elaboration, `Drop` executes the drop glue for the specified place, after which + /// it continues execution/unwinds at the given basic blocks. It is possible that executing drop + /// glue is special - this would be part of Rust's memory model. (**FIXME**: due we have an + /// issue tracking if drop glue has any interesting semantics in addition to those of a function + /// call?) + /// + /// `Drop` before drop elaboration is a *conditional* execution of the drop glue. Specifically, the + /// `Drop` will be executed if... + /// + /// **Needs clarification**: End of that sentence. This in effect should document the exact + /// behavior of drop elaboration. The following sounds vaguely right, but I'm not quite sure: + /// + /// > The drop glue is executed if, among all statements executed within this `Body`, an assignment to + /// > the place or one of its "parents" occurred more recently than a move out of it. This does not + /// > consider indirect assignments. + Drop { place: Place<'tcx>, target: BasicBlock, unwind: Option }, + + /// Drops the place and assigns a new value to it. + /// + /// This first performs the exact same operation as the pre drop-elaboration `Drop` terminator; + /// it then additionally assigns the `value` to the `place` as if by an assignment statement. + /// This assignment occurs both in the unwind and the regular code paths. The semantics are best + /// explained by the elaboration: + /// + /// ```ignore (MIR) + /// BB0 { + /// DropAndReplace(P <- V, goto BB1, unwind BB2) + /// } + /// ``` + /// + /// becomes + /// + /// ```ignore (MIR) + /// BB0 { + /// Drop(P, goto BB1, unwind BB2) + /// } + /// BB1 { + /// // P is now uninitialized + /// P <- V + /// } + /// BB2 { + /// // P is now uninitialized -- its dtor panicked + /// P <- V + /// } + /// ``` + /// + /// Disallowed after drop elaboration. + DropAndReplace { + place: Place<'tcx>, + value: Operand<'tcx>, + target: BasicBlock, + unwind: Option, + }, + + /// Roughly speaking, evaluates the `func` operand and the arguments, and starts execution of + /// the referred to function. The operand types must match the argument types of the function. + /// The return place type must match the return type. The type of the `func` operand must be + /// callable, meaning either a function pointer, a function type, or a closure type. + /// + /// **Needs clarification**: The exact semantics of this. Current backends rely on `move` + /// operands not aliasing the return place. It is unclear how this is justified in MIR, see + /// [#71117]. + /// + /// [#71117]: https://github.com/rust-lang/rust/issues/71117 + Call { + /// The function that’s being called. + func: Operand<'tcx>, + /// Arguments the function is called with. + /// These are owned by the callee, which is free to modify them. + /// This allows the memory occupied by "by-value" arguments to be + /// reused across function calls without duplicating the contents. + args: Vec>, + /// Where the returned value will be written + destination: Place<'tcx>, + /// Where to go after this call returns. If none, the call necessarily diverges. + target: Option, + /// Cleanups to be done if the call unwinds. + cleanup: Option, + /// `true` if this is from a call in HIR rather than from an overloaded + /// operator. True for overloaded function call. + from_hir_call: bool, + /// This `Span` is the span of the function, without the dot and receiver + /// (e.g. `foo(a, b)` in `x.foo(a, b)` + fn_span: Span, + }, + + /// Evaluates the operand, which must have type `bool`. If it is not equal to `expected`, + /// initiates a panic. Initiating a panic corresponds to a `Call` terminator with some + /// unspecified constant as the function to call, all the operands stored in the `AssertMessage` + /// as parameters, and `None` for the destination. Keep in mind that the `cleanup` path is not + /// necessarily executed even in the case of a panic, for example in `-C panic=abort`. If the + /// assertion does not fail, execution continues at the specified basic block. + Assert { + cond: Operand<'tcx>, + expected: bool, + msg: AssertMessage<'tcx>, + target: BasicBlock, + cleanup: Option, + }, + + /// Marks a suspend point. + /// + /// Like `Return` terminators in generator bodies, this computes `value` and then a + /// `GeneratorState::Yielded(value)` as if by `Aggregate` rvalue. That value is then assigned to + /// the return place of the function calling this one, and execution continues in the calling + /// function. When next invoked with the same first argument, execution of this function + /// continues at the `resume` basic block, with the second argument written to the `resume_arg` + /// place. If the generator is dropped before then, the `drop` basic block is invoked. + /// + /// Not permitted in bodies that are not generator bodies, or after generator lowering. + /// + /// **Needs clarification**: What about the evaluation order of the `resume_arg` and `value`? + Yield { + /// The value to return. + value: Operand<'tcx>, + /// Where to resume to. + resume: BasicBlock, + /// The place to store the resume argument in. + resume_arg: Place<'tcx>, + /// Cleanup to be done if the generator is dropped at this suspend point. + drop: Option, + }, + + /// Indicates the end of dropping a generator. + /// + /// Semantically just a `return` (from the generators drop glue). Only permitted in the same situations + /// as `yield`. + /// + /// **Needs clarification**: Is that even correct? The generator drop code is always confusing + /// to me, because it's not even really in the current body. + /// + /// **Needs clarification**: Are there type system constraints on these terminators? Should + /// there be a "block type" like `cleanup` blocks for them? + GeneratorDrop, + + /// A block where control flow only ever takes one real path, but borrowck needs to be more + /// conservative. + /// + /// At runtime this is semantically just a goto. + /// + /// Disallowed after drop elaboration. + FalseEdge { + /// The target normal control flow will take. + real_target: BasicBlock, + /// A block control flow could conceptually jump to, but won't in + /// practice. + imaginary_target: BasicBlock, + }, + + /// A terminator for blocks that only take one path in reality, but where we reserve the right + /// to unwind in borrowck, even if it won't happen in practice. This can arise in infinite loops + /// with no function calls for example. + /// + /// At runtime this is semantically just a goto. + /// + /// Disallowed after drop elaboration. + FalseUnwind { + /// The target normal control flow will take. + real_target: BasicBlock, + /// The imaginary cleanup block link. This particular path will never be taken + /// in practice, but in order to avoid fragility we want to always + /// consider it in borrowck. We don't want to accept programs which + /// pass borrowck only when `panic=abort` or some assertions are disabled + /// due to release vs. debug mode builds. This needs to be an `Option` because + /// of the `remove_noop_landing_pads` and `abort_unwinding_calls` passes. + unwind: Option, + }, + + /// Block ends with an inline assembly block. This is a terminator since + /// inline assembly is allowed to diverge. + InlineAsm { + /// The template for the inline assembly, with placeholders. + template: &'tcx [InlineAsmTemplatePiece], + + /// The operands for the inline assembly, as `Operand`s or `Place`s. + operands: Vec>, + + /// Miscellaneous options for the inline assembly. + options: InlineAsmOptions, + + /// Source spans for each line of the inline assembly code. These are + /// used to map assembler errors back to the line in the source code. + line_spans: &'tcx [Span], + + /// Destination block after the inline assembly returns, unless it is + /// diverging (InlineAsmOptions::NORETURN). + destination: Option, + + /// Cleanup to be done if the inline assembly unwinds. This is present + /// if and only if InlineAsmOptions::MAY_UNWIND is set. + cleanup: Option, + }, +} + +/// Information about an assertion failure. +#[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, PartialOrd)] +pub enum AssertKind { + BoundsCheck { len: O, index: O }, + Overflow(BinOp, O, O), + OverflowNeg(O), + DivisionByZero(O), + RemainderByZero(O), + ResumedAfterReturn(GeneratorKind), + ResumedAfterPanic(GeneratorKind), +} + +#[derive(Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +#[derive(TypeFoldable, TypeVisitable)] +pub enum InlineAsmOperand<'tcx> { + In { + reg: InlineAsmRegOrRegClass, + value: Operand<'tcx>, + }, + Out { + reg: InlineAsmRegOrRegClass, + late: bool, + place: Option>, + }, + InOut { + reg: InlineAsmRegOrRegClass, + late: bool, + in_value: Operand<'tcx>, + out_place: Option>, + }, + Const { + value: Box>, + }, + SymFn { + value: Box>, + }, + SymStatic { + def_id: DefId, + }, +} + +/// Type for MIR `Assert` terminator error messages. +pub type AssertMessage<'tcx> = AssertKind>; + +/////////////////////////////////////////////////////////////////////////// +// Places + +/// Places roughly correspond to a "location in memory." Places in MIR are the same mathematical +/// object as places in Rust. This of course means that what exactly they are is undecided and part +/// of the Rust memory model. However, they will likely contain at least the following pieces of +/// information in some form: +/// +/// 1. The address in memory that the place refers to. +/// 2. The provenance with which the place is being accessed. +/// 3. The type of the place and an optional variant index. See [`PlaceTy`][super::tcx::PlaceTy]. +/// 4. Optionally, some metadata. This exists if and only if the type of the place is not `Sized`. +/// +/// We'll give a description below of how all pieces of the place except for the provenance are +/// calculated. We cannot give a description of the provenance, because that is part of the +/// undecided aliasing model - we only include it here at all to acknowledge its existence. +/// +/// Each local naturally corresponds to the place `Place { local, projection: [] }`. This place has +/// the address of the local's allocation and the type of the local. +/// +/// **Needs clarification:** Unsized locals seem to present a bit of an issue. Their allocation +/// can't actually be created on `StorageLive`, because it's unclear how big to make the allocation. +/// Furthermore, MIR produces assignments to unsized locals, although that is not permitted under +/// `#![feature(unsized_locals)]` in Rust. Besides just putting "unsized locals are special and +/// different" in a bunch of places, I (JakobDegen) don't know how to incorporate this behavior into +/// the current MIR semantics in a clean way - possibly this needs some design work first. +/// +/// For places that are not locals, ie they have a non-empty list of projections, we define the +/// values as a function of the parent place, that is the place with its last [`ProjectionElem`] +/// stripped. The way this is computed of course depends on the kind of that last projection +/// element: +/// +/// - [`Downcast`](ProjectionElem::Downcast): This projection sets the place's variant index to the +/// given one, and makes no other changes. A `Downcast` projection on a place with its variant +/// index already set is not well-formed. +/// - [`Field`](ProjectionElem::Field): `Field` projections take their parent place and create a +/// place referring to one of the fields of the type. The resulting address is the parent +/// address, plus the offset of the field. The type becomes the type of the field. If the parent +/// was unsized and so had metadata associated with it, then the metadata is retained if the +/// field is unsized and thrown out if it is sized. +/// +/// These projections are only legal for tuples, ADTs, closures, and generators. If the ADT or +/// generator has more than one variant, the parent place's variant index must be set, indicating +/// which variant is being used. If it has just one variant, the variant index may or may not be +/// included - the single possible variant is inferred if it is not included. +/// - [`ConstantIndex`](ProjectionElem::ConstantIndex): Computes an offset in units of `T` into the +/// place as described in the documentation for the `ProjectionElem`. The resulting address is +/// the parent's address plus that offset, and the type is `T`. This is only legal if the parent +/// place has type `[T; N]` or `[T]` (*not* `&[T]`). Since such a `T` is always sized, any +/// resulting metadata is thrown out. +/// - [`Subslice`](ProjectionElem::Subslice): This projection calculates an offset and a new +/// address in a similar manner as `ConstantIndex`. It is also only legal on `[T; N]` and `[T]`. +/// However, this yields a `Place` of type `[T]`, and additionally sets the metadata to be the +/// length of the subslice. +/// - [`Index`](ProjectionElem::Index): Like `ConstantIndex`, only legal on `[T; N]` or `[T]`. +/// However, `Index` additionally takes a local from which the value of the index is computed at +/// runtime. Computing the value of the index involves interpreting the `Local` as a +/// `Place { local, projection: [] }`, and then computing its value as if done via +/// [`Operand::Copy`]. The array/slice is then indexed with the resulting value. The local must +/// have type `usize`. +/// - [`Deref`](ProjectionElem::Deref): Derefs are the last type of projection, and the most +/// complicated. They are only legal on parent places that are references, pointers, or `Box`. A +/// `Deref` projection begins by loading a value from the parent place, as if by +/// [`Operand::Copy`]. It then dereferences the resulting pointer, creating a place of the +/// pointee's type. The resulting address is the address that was stored in the pointer. If the +/// pointee type is unsized, the pointer additionally stored the value of the metadata. +/// +/// Computing a place may cause UB. One possibility is that the pointer used for a `Deref` may not +/// be suitably aligned. Another possibility is that the place is not in bounds, meaning it does not +/// point to an actual allocation. +/// +/// However, if this is actually UB and when the UB kicks in is undecided. This is being discussed +/// in [UCG#319]. The options include that every place must obey those rules, that only some places +/// must obey them, or that places impose no rules of their own. +/// +/// [UCG#319]: https://github.com/rust-lang/unsafe-code-guidelines/issues/319 +/// +/// Rust currently requires that every place obey those two rules. This is checked by MIRI and taken +/// advantage of by codegen (via `gep inbounds`). That is possibly subject to change. +#[derive(Copy, Clone, PartialEq, Eq, Hash, TyEncodable, HashStable)] +pub struct Place<'tcx> { + pub local: Local, + + /// projection out of a place (access a field, deref a pointer, etc) + pub projection: &'tcx List>, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(TyEncodable, TyDecodable, HashStable)] +pub enum ProjectionElem { + Deref, + Field(Field, T), + /// Index into a slice/array. + /// + /// Note that this does not also dereference, and so it does not exactly correspond to slice + /// indexing in Rust. In other words, in the below Rust code: + /// + /// ```rust + /// let x = &[1, 2, 3, 4]; + /// let i = 2; + /// x[i]; + /// ``` + /// + /// The `x[i]` is turned into a `Deref` followed by an `Index`, not just an `Index`. The same + /// thing is true of the `ConstantIndex` and `Subslice` projections below. + Index(V), + + /// These indices are generated by slice patterns. Easiest to explain + /// by example: + /// + /// ```ignore (illustrative) + /// [X, _, .._, _, _] => { offset: 0, min_length: 4, from_end: false }, + /// [_, X, .._, _, _] => { offset: 1, min_length: 4, from_end: false }, + /// [_, _, .._, X, _] => { offset: 2, min_length: 4, from_end: true }, + /// [_, _, .._, _, X] => { offset: 1, min_length: 4, from_end: true }, + /// ``` + ConstantIndex { + /// index or -index (in Python terms), depending on from_end + offset: u64, + /// The thing being indexed must be at least this long. For arrays this + /// is always the exact length. + min_length: u64, + /// Counting backwards from end? This is always false when indexing an + /// array. + from_end: bool, + }, + + /// These indices are generated by slice patterns. + /// + /// If `from_end` is true `slice[from..slice.len() - to]`. + /// Otherwise `array[from..to]`. + Subslice { + from: u64, + to: u64, + /// Whether `to` counts from the start or end of the array/slice. + /// For `PlaceElem`s this is `true` if and only if the base is a slice. + /// For `ProjectionKind`, this can also be `true` for arrays. + from_end: bool, + }, + + /// "Downcast" to a variant of an enum or a generator. + /// + /// The included Symbol is the name of the variant, used for printing MIR. + Downcast(Option, VariantIdx), +} + +/// Alias for projections as they appear in places, where the base is a place +/// and the index is a local. +pub type PlaceElem<'tcx> = ProjectionElem>; + +/////////////////////////////////////////////////////////////////////////// +// Operands + +/// An operand in MIR represents a "value" in Rust, the definition of which is undecided and part of +/// the memory model. One proposal for a definition of values can be found [on UCG][value-def]. +/// +/// [value-def]: https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/value-domain.md +/// +/// The most common way to create values is via loading a place. Loading a place is an operation +/// which reads the memory of the place and converts it to a value. This is a fundamentally *typed* +/// operation. The nature of the value produced depends on the type of the conversion. Furthermore, +/// there may be other effects: if the type has a validity constraint loading the place might be UB +/// if the validity constraint is not met. +/// +/// **Needs clarification:** Ralf proposes that loading a place not have side-effects. +/// This is what is implemented in miri today. Are these the semantics we want for MIR? Is this +/// something we can even decide without knowing more about Rust's memory model? +/// +/// **Needs clarifiation:** Is loading a place that has its variant index set well-formed? Miri +/// currently implements it, but it seems like this may be something to check against in the +/// validator. +#[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] +pub enum Operand<'tcx> { + /// Creates a value by loading the given place. + /// + /// Before drop elaboration, the type of the place must be `Copy`. After drop elaboration there + /// is no such requirement. + Copy(Place<'tcx>), + + /// Creates a value by performing loading the place, just like the `Copy` operand. + /// + /// This *may* additionally overwrite the place with `uninit` bytes, depending on how we decide + /// in [UCG#188]. You should not emit MIR that may attempt a subsequent second load of this + /// place without first re-initializing it. + /// + /// [UCG#188]: https://github.com/rust-lang/unsafe-code-guidelines/issues/188 + Move(Place<'tcx>), + + /// Constants are already semantically values, and remain unchanged. + Constant(Box>), +} + +/////////////////////////////////////////////////////////////////////////// +// Rvalues + +/// The various kinds of rvalues that can appear in MIR. +/// +/// Not all of these are allowed at every [`MirPhase`] - when this is the case, it's stated below. +/// +/// Computing any rvalue begins by evaluating the places and operands in some order (**Needs +/// clarification**: Which order?). These are then used to produce a "value" - the same kind of +/// value that an [`Operand`] produces. +#[derive(Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq)] +pub enum Rvalue<'tcx> { + /// Yields the operand unchanged + Use(Operand<'tcx>), + + /// Creates an array where each element is the value of the operand. + /// + /// This is the cause of a bug in the case where the repetition count is zero because the value + /// is not dropped, see [#74836]. + /// + /// Corresponds to source code like `[x; 32]`. + /// + /// [#74836]: https://github.com/rust-lang/rust/issues/74836 + Repeat(Operand<'tcx>, ty::Const<'tcx>), + + /// Creates a reference of the indicated kind to the place. + /// + /// There is not much to document here, because besides the obvious parts the semantics of this + /// are essentially entirely a part of the aliasing model. There are many UCG issues discussing + /// exactly what the behavior of this operation should be. + /// + /// `Shallow` borrows are disallowed after drop lowering. + Ref(Region<'tcx>, BorrowKind, Place<'tcx>), + + /// Creates a pointer/reference to the given thread local. + /// + /// The yielded type is a `*mut T` if the static is mutable, otherwise if the static is extern a + /// `*const T`, and if neither of those apply a `&T`. + /// + /// **Note:** This is a runtime operation that actually executes code and is in this sense more + /// like a function call. Also, eliminating dead stores of this rvalue causes `fn main() {}` to + /// SIGILL for some reason that I (JakobDegen) never got a chance to look into. + /// + /// **Needs clarification**: Are there weird additional semantics here related to the runtime + /// nature of this operation? + ThreadLocalRef(DefId), + + /// Creates a pointer with the indicated mutability to the place. + /// + /// This is generated by pointer casts like `&v as *const _` or raw address of expressions like + /// `&raw v` or `addr_of!(v)`. + /// + /// Like with references, the semantics of this operation are heavily dependent on the aliasing + /// model. + AddressOf(Mutability, Place<'tcx>), + + /// Yields the length of the place, as a `usize`. + /// + /// If the type of the place is an array, this is the array length. For slices (`[T]`, not + /// `&[T]`) this accesses the place's metadata to determine the length. This rvalue is + /// ill-formed for places of other types. + Len(Place<'tcx>), + + /// Performs essentially all of the casts that can be performed via `as`. + /// + /// This allows for casts from/to a variety of types. + /// + /// **FIXME**: Document exactly which `CastKind`s allow which types of casts. Figure out why + /// `ArrayToPointer` and `MutToConstPointer` are special. + Cast(CastKind, Operand<'tcx>, Ty<'tcx>), + + /// * `Offset` has the same semantics as [`offset`](pointer::offset), except that the second + /// parameter may be a `usize` as well. + /// * The comparison operations accept `bool`s, `char`s, signed or unsigned integers, floats, + /// raw pointers, or function pointers and return a `bool`. The types of the operands must be + /// matching, up to the usual caveat of the lifetimes in function pointers. + /// * Left and right shift operations accept signed or unsigned integers not necessarily of the + /// same type and return a value of the same type as their LHS. Like in Rust, the RHS is + /// truncated as needed. + /// * The `Bit*` operations accept signed integers, unsigned integers, or bools with matching + /// types and return a value of that type. + /// * The remaining operations accept signed integers, unsigned integers, or floats with + /// matching types and return a value of that type. + BinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), + + /// Same as `BinaryOp`, but yields `(T, bool)` with a `bool` indicating an error condition. + /// + /// When overflow checking is disabled and we are generating run-time code, the error condition + /// is false. Otherwise, and always during CTFE, the error condition is determined as described + /// below. + /// + /// For addition, subtraction, and multiplication on integers the error condition is set when + /// the infinite precision result would be unequal to the actual result. + /// + /// For shift operations on integers the error condition is set when the value of right-hand + /// side is greater than or equal to the number of bits in the type of the left-hand side, or + /// when the value of right-hand side is negative. + /// + /// Other combinations of types and operators are unsupported. + CheckedBinaryOp(BinOp, Box<(Operand<'tcx>, Operand<'tcx>)>), + + /// Computes a value as described by the operation. + NullaryOp(NullOp, Ty<'tcx>), + + /// Exactly like `BinaryOp`, but less operands. + /// + /// Also does two's-complement arithmetic. Negation requires a signed integer or a float; + /// bitwise not requires a signed integer, unsigned integer, or bool. Both operation kinds + /// return a value with the same type as their operand. + UnaryOp(UnOp, Operand<'tcx>), + + /// Computes the discriminant of the place, returning it as an integer of type + /// [`discriminant_ty`]. Returns zero for types without discriminant. + /// + /// The validity requirements for the underlying value are undecided for this rvalue, see + /// [#91095]. Note too that the value of the discriminant is not the same thing as the + /// variant index; use [`discriminant_for_variant`] to convert. + /// + /// [`discriminant_ty`]: crate::ty::Ty::discriminant_ty + /// [#91095]: https://github.com/rust-lang/rust/issues/91095 + /// [`discriminant_for_variant`]: crate::ty::Ty::discriminant_for_variant + Discriminant(Place<'tcx>), + + /// Creates an aggregate value, like a tuple or struct. + /// + /// This is needed because dataflow analysis needs to distinguish + /// `dest = Foo { x: ..., y: ... }` from `dest.x = ...; dest.y = ...;` in the case that `Foo` + /// has a destructor. + /// + /// Disallowed after deaggregation for all aggregate kinds except `Array` and `Generator`. After + /// generator lowering, `Generator` aggregate kinds are disallowed too. + Aggregate(Box>, Vec>), + + /// Transmutes a `*mut u8` into shallow-initialized `Box`. + /// + /// This is different from a normal transmute because dataflow analysis will treat the box as + /// initialized but its content as uninitialized. Like other pointer casts, this in general + /// affects alias analysis. + ShallowInitBox(Operand<'tcx>, Ty<'tcx>), + + /// A CopyForDeref is equivalent to a read from a place at the + /// codegen level, but is treated specially by drop elaboration. When such a read happens, it + /// is guaranteed (via nature of the mir_opt `Derefer` in rustc_mir_transform/src/deref_separator) + /// that the only use of the returned value is a deref operation, immediately + /// followed by one or more projections. Drop elaboration treats this rvalue as if the + /// read never happened and just projects further. This allows simplifying various MIR + /// optimizations and codegen backends that previously had to handle deref operations anywhere + /// in a place. + CopyForDeref(Place<'tcx>), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, TyEncodable, TyDecodable, Hash, HashStable)] +pub enum CastKind { + /// An exposing pointer to address cast. A cast between a pointer and an integer type, or + /// between a function pointer and an integer type. + /// See the docs on `expose_addr` for more details. + PointerExposeAddress, + /// An address-to-pointer cast that picks up an exposed provenance. + /// See the docs on `from_exposed_addr` for more details. + PointerFromExposedAddress, + /// All sorts of pointer-to-pointer casts. Note that reference-to-raw-ptr casts are + /// translated into `&raw mut/const *r`, i.e., they are not actually casts. + Pointer(PointerCast), + /// Remaining unclassified casts. + Misc, +} + +#[derive(Clone, Debug, PartialEq, Eq, TyEncodable, TyDecodable, Hash, HashStable)] +pub enum AggregateKind<'tcx> { + /// The type is of the element + Array(Ty<'tcx>), + Tuple, + + /// The second field is the variant index. It's equal to 0 for struct + /// and union expressions. The fourth field is + /// active field number and is present only for union expressions + /// -- e.g., for a union expression `SomeUnion { c: .. }`, the + /// active field index would identity the field `c` + Adt(DefId, VariantIdx, SubstsRef<'tcx>, Option, Option), + + // Note: We can use LocalDefId since closures and generators a deaggregated + // before codegen. + Closure(LocalDefId, SubstsRef<'tcx>), + Generator(LocalDefId, SubstsRef<'tcx>, hir::Movability), +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, TyEncodable, TyDecodable, Hash, HashStable)] +pub enum NullOp { + /// Returns the size of a value of that type + SizeOf, + /// Returns the minimum alignment of a type + AlignOf, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, TyEncodable, TyDecodable, Hash, HashStable)] +pub enum UnOp { + /// The `!` operator for logical inversion + Not, + /// The `-` operator for negation + Neg, +} + +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq, TyEncodable, TyDecodable, Hash, HashStable)] +pub enum BinOp { + /// The `+` operator (addition) + Add, + /// The `-` operator (subtraction) + Sub, + /// The `*` operator (multiplication) + Mul, + /// The `/` operator (division) + /// + /// Division by zero is UB, because the compiler should have inserted checks + /// prior to this. + Div, + /// The `%` operator (modulus) + /// + /// Using zero as the modulus (second operand) is UB, because the compiler + /// should have inserted checks prior to this. + Rem, + /// The `^` operator (bitwise xor) + BitXor, + /// The `&` operator (bitwise and) + BitAnd, + /// The `|` operator (bitwise or) + BitOr, + /// The `<<` operator (shift left) + /// + /// The offset is truncated to the size of the first operand before shifting. + Shl, + /// The `>>` operator (shift right) + /// + /// The offset is truncated to the size of the first operand before shifting. + Shr, + /// The `==` operator (equality) + Eq, + /// The `<` operator (less than) + Lt, + /// The `<=` operator (less than or equal to) + Le, + /// The `!=` operator (not equal to) + Ne, + /// The `>=` operator (greater than or equal to) + Ge, + /// The `>` operator (greater than) + Gt, + /// The `ptr.offset` operator + Offset, +} + +// Some nodes are used a lot. Make sure they don't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +mod size_asserts { + use super::*; + // These are in alphabetical order, which is easy to maintain. + static_assert_size!(AggregateKind<'_>, 48); + static_assert_size!(Operand<'_>, 24); + static_assert_size!(Place<'_>, 16); + static_assert_size!(PlaceElem<'_>, 24); + static_assert_size!(Rvalue<'_>, 40); +} diff --git a/compiler/rustc_middle/src/mir/tcx.rs b/compiler/rustc_middle/src/mir/tcx.rs new file mode 100644 index 000000000..405003156 --- /dev/null +++ b/compiler/rustc_middle/src/mir/tcx.rs @@ -0,0 +1,307 @@ +/*! + * Methods for the various MIR types. These are intended for use after + * building is complete. + */ + +use crate::mir::*; +use crate::ty::subst::Subst; +use crate::ty::{self, Ty, TyCtxt}; +use rustc_hir as hir; +use rustc_target::abi::VariantIdx; + +#[derive(Copy, Clone, Debug, TypeFoldable, TypeVisitable)] +pub struct PlaceTy<'tcx> { + pub ty: Ty<'tcx>, + /// Downcast to a particular variant of an enum or a generator, if included. + pub variant_index: Option, +} + +// At least on 64 bit systems, `PlaceTy` should not be larger than two or three pointers. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +static_assert_size!(PlaceTy<'_>, 16); + +impl<'tcx> PlaceTy<'tcx> { + #[inline] + pub fn from_ty(ty: Ty<'tcx>) -> PlaceTy<'tcx> { + PlaceTy { ty, variant_index: None } + } + + /// `place_ty.field_ty(tcx, f)` computes the type at a given field + /// of a record or enum-variant. (Most clients of `PlaceTy` can + /// instead just extract the relevant type directly from their + /// `PlaceElem`, but some instances of `ProjectionElem` do + /// not carry a `Ty` for `T`.) + /// + /// Note that the resulting type has not been normalized. + pub fn field_ty(self, tcx: TyCtxt<'tcx>, f: Field) -> Ty<'tcx> { + let answer = match self.ty.kind() { + ty::Adt(adt_def, substs) => { + let variant_def = match self.variant_index { + None => adt_def.non_enum_variant(), + Some(variant_index) => { + assert!(adt_def.is_enum()); + &adt_def.variant(variant_index) + } + }; + let field_def = &variant_def.fields[f.index()]; + field_def.ty(tcx, substs) + } + ty::Tuple(tys) => tys[f.index()], + _ => bug!("extracting field of non-tuple non-adt: {:?}", self), + }; + debug!("field_ty self: {:?} f: {:?} yields: {:?}", self, f, answer); + answer + } + + /// Convenience wrapper around `projection_ty_core` for + /// `PlaceElem`, where we can just use the `Ty` that is already + /// stored inline on field projection elems. + pub fn projection_ty(self, tcx: TyCtxt<'tcx>, elem: PlaceElem<'tcx>) -> PlaceTy<'tcx> { + self.projection_ty_core(tcx, ty::ParamEnv::empty(), &elem, |_, _, ty| ty) + } + + /// `place_ty.projection_ty_core(tcx, elem, |...| { ... })` + /// projects `place_ty` onto `elem`, returning the appropriate + /// `Ty` or downcast variant corresponding to that projection. + /// The `handle_field` callback must map a `Field` to its `Ty`, + /// (which should be trivial when `T` = `Ty`). + pub fn projection_ty_core( + self, + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + elem: &ProjectionElem, + mut handle_field: impl FnMut(&Self, Field, T) -> Ty<'tcx>, + ) -> PlaceTy<'tcx> + where + V: ::std::fmt::Debug, + T: ::std::fmt::Debug + Copy, + { + if self.variant_index.is_some() && !matches!(elem, ProjectionElem::Field(..)) { + bug!("cannot use non field projection on downcasted place") + } + let answer = match *elem { + ProjectionElem::Deref => { + let ty = self + .ty + .builtin_deref(true) + .unwrap_or_else(|| { + bug!("deref projection of non-dereferenceable ty {:?}", self) + }) + .ty; + PlaceTy::from_ty(ty) + } + ProjectionElem::Index(_) | ProjectionElem::ConstantIndex { .. } => { + PlaceTy::from_ty(self.ty.builtin_index().unwrap()) + } + ProjectionElem::Subslice { from, to, from_end } => { + PlaceTy::from_ty(match self.ty.kind() { + ty::Slice(..) => self.ty, + ty::Array(inner, _) if !from_end => tcx.mk_array(*inner, (to - from) as u64), + ty::Array(inner, size) if from_end => { + let size = size.eval_usize(tcx, param_env); + let len = size - (from as u64) - (to as u64); + tcx.mk_array(*inner, len) + } + _ => bug!("cannot subslice non-array type: `{:?}`", self), + }) + } + ProjectionElem::Downcast(_name, index) => { + PlaceTy { ty: self.ty, variant_index: Some(index) } + } + ProjectionElem::Field(f, fty) => PlaceTy::from_ty(handle_field(&self, f, fty)), + }; + debug!("projection_ty self: {:?} elem: {:?} yields: {:?}", self, elem, answer); + answer + } +} + +impl<'tcx> Place<'tcx> { + pub fn ty_from( + local: Local, + projection: &[PlaceElem<'tcx>], + local_decls: &D, + tcx: TyCtxt<'tcx>, + ) -> PlaceTy<'tcx> + where + D: HasLocalDecls<'tcx>, + { + projection + .iter() + .fold(PlaceTy::from_ty(local_decls.local_decls()[local].ty), |place_ty, &elem| { + place_ty.projection_ty(tcx, elem) + }) + } + + pub fn ty(&self, local_decls: &D, tcx: TyCtxt<'tcx>) -> PlaceTy<'tcx> + where + D: HasLocalDecls<'tcx>, + { + Place::ty_from(self.local, &self.projection, local_decls, tcx) + } +} + +impl<'tcx> PlaceRef<'tcx> { + pub fn ty(&self, local_decls: &D, tcx: TyCtxt<'tcx>) -> PlaceTy<'tcx> + where + D: HasLocalDecls<'tcx>, + { + Place::ty_from(self.local, &self.projection, local_decls, tcx) + } +} + +pub enum RvalueInitializationState { + Shallow, + Deep, +} + +impl<'tcx> Rvalue<'tcx> { + pub fn ty(&self, local_decls: &D, tcx: TyCtxt<'tcx>) -> Ty<'tcx> + where + D: HasLocalDecls<'tcx>, + { + match *self { + Rvalue::Use(ref operand) => operand.ty(local_decls, tcx), + Rvalue::Repeat(ref operand, count) => { + tcx.mk_ty(ty::Array(operand.ty(local_decls, tcx), count)) + } + Rvalue::ThreadLocalRef(did) => { + let static_ty = tcx.type_of(did); + if tcx.is_mutable_static(did) { + tcx.mk_mut_ptr(static_ty) + } else if tcx.is_foreign_item(did) { + tcx.mk_imm_ptr(static_ty) + } else { + // FIXME: These things don't *really* have 'static lifetime. + tcx.mk_imm_ref(tcx.lifetimes.re_static, static_ty) + } + } + Rvalue::Ref(reg, bk, ref place) => { + let place_ty = place.ty(local_decls, tcx).ty; + tcx.mk_ref(reg, ty::TypeAndMut { ty: place_ty, mutbl: bk.to_mutbl_lossy() }) + } + Rvalue::AddressOf(mutability, ref place) => { + let place_ty = place.ty(local_decls, tcx).ty; + tcx.mk_ptr(ty::TypeAndMut { ty: place_ty, mutbl: mutability }) + } + Rvalue::Len(..) => tcx.types.usize, + Rvalue::Cast(.., ty) => ty, + Rvalue::BinaryOp(op, box (ref lhs, ref rhs)) => { + let lhs_ty = lhs.ty(local_decls, tcx); + let rhs_ty = rhs.ty(local_decls, tcx); + op.ty(tcx, lhs_ty, rhs_ty) + } + Rvalue::CheckedBinaryOp(op, box (ref lhs, ref rhs)) => { + let lhs_ty = lhs.ty(local_decls, tcx); + let rhs_ty = rhs.ty(local_decls, tcx); + let ty = op.ty(tcx, lhs_ty, rhs_ty); + tcx.intern_tup(&[ty, tcx.types.bool]) + } + Rvalue::UnaryOp(UnOp::Not | UnOp::Neg, ref operand) => operand.ty(local_decls, tcx), + Rvalue::Discriminant(ref place) => place.ty(local_decls, tcx).ty.discriminant_ty(tcx), + Rvalue::NullaryOp(NullOp::SizeOf | NullOp::AlignOf, _) => tcx.types.usize, + Rvalue::Aggregate(ref ak, ref ops) => match **ak { + AggregateKind::Array(ty) => tcx.mk_array(ty, ops.len() as u64), + AggregateKind::Tuple => tcx.mk_tup(ops.iter().map(|op| op.ty(local_decls, tcx))), + AggregateKind::Adt(did, _, substs, _, _) => { + tcx.bound_type_of(did).subst(tcx, substs) + } + AggregateKind::Closure(did, substs) => tcx.mk_closure(did.to_def_id(), substs), + AggregateKind::Generator(did, substs, movability) => { + tcx.mk_generator(did.to_def_id(), substs, movability) + } + }, + Rvalue::ShallowInitBox(_, ty) => tcx.mk_box(ty), + Rvalue::CopyForDeref(ref place) => place.ty(local_decls, tcx).ty, + } + } + + #[inline] + /// Returns `true` if this rvalue is deeply initialized (most rvalues) or + /// whether its only shallowly initialized (`Rvalue::Box`). + pub fn initialization_state(&self) -> RvalueInitializationState { + match *self { + Rvalue::ShallowInitBox(_, _) => RvalueInitializationState::Shallow, + _ => RvalueInitializationState::Deep, + } + } +} + +impl<'tcx> Operand<'tcx> { + pub fn ty(&self, local_decls: &D, tcx: TyCtxt<'tcx>) -> Ty<'tcx> + where + D: HasLocalDecls<'tcx>, + { + match self { + &Operand::Copy(ref l) | &Operand::Move(ref l) => l.ty(local_decls, tcx).ty, + &Operand::Constant(ref c) => c.literal.ty(), + } + } +} + +impl<'tcx> BinOp { + pub fn ty(&self, tcx: TyCtxt<'tcx>, lhs_ty: Ty<'tcx>, rhs_ty: Ty<'tcx>) -> Ty<'tcx> { + // FIXME: handle SIMD correctly + match self { + &BinOp::Add + | &BinOp::Sub + | &BinOp::Mul + | &BinOp::Div + | &BinOp::Rem + | &BinOp::BitXor + | &BinOp::BitAnd + | &BinOp::BitOr => { + // these should be integers or floats of the same size. + assert_eq!(lhs_ty, rhs_ty); + lhs_ty + } + &BinOp::Shl | &BinOp::Shr | &BinOp::Offset => { + lhs_ty // lhs_ty can be != rhs_ty + } + &BinOp::Eq | &BinOp::Lt | &BinOp::Le | &BinOp::Ne | &BinOp::Ge | &BinOp::Gt => { + tcx.types.bool + } + } + } +} + +impl BorrowKind { + pub fn to_mutbl_lossy(self) -> hir::Mutability { + match self { + BorrowKind::Mut { .. } => hir::Mutability::Mut, + BorrowKind::Shared => hir::Mutability::Not, + + // We have no type corresponding to a unique imm borrow, so + // use `&mut`. It gives all the capabilities of a `&uniq` + // and hence is a safe "over approximation". + BorrowKind::Unique => hir::Mutability::Mut, + + // We have no type corresponding to a shallow borrow, so use + // `&` as an approximation. + BorrowKind::Shallow => hir::Mutability::Not, + } + } +} + +impl BinOp { + pub fn to_hir_binop(self) -> hir::BinOpKind { + match self { + BinOp::Add => hir::BinOpKind::Add, + BinOp::Sub => hir::BinOpKind::Sub, + BinOp::Mul => hir::BinOpKind::Mul, + BinOp::Div => hir::BinOpKind::Div, + BinOp::Rem => hir::BinOpKind::Rem, + BinOp::BitXor => hir::BinOpKind::BitXor, + BinOp::BitAnd => hir::BinOpKind::BitAnd, + BinOp::BitOr => hir::BinOpKind::BitOr, + BinOp::Shl => hir::BinOpKind::Shl, + BinOp::Shr => hir::BinOpKind::Shr, + BinOp::Eq => hir::BinOpKind::Eq, + BinOp::Ne => hir::BinOpKind::Ne, + BinOp::Lt => hir::BinOpKind::Lt, + BinOp::Gt => hir::BinOpKind::Gt, + BinOp::Le => hir::BinOpKind::Le, + BinOp::Ge => hir::BinOpKind::Ge, + BinOp::Offset => unreachable!(), + } + } +} diff --git a/compiler/rustc_middle/src/mir/terminator.rs b/compiler/rustc_middle/src/mir/terminator.rs new file mode 100644 index 000000000..9ccf5aea6 --- /dev/null +++ b/compiler/rustc_middle/src/mir/terminator.rs @@ -0,0 +1,448 @@ +use crate::mir; +use crate::mir::interpret::Scalar; +use crate::ty::{self, Ty, TyCtxt}; +use smallvec::{smallvec, SmallVec}; + +use super::{BasicBlock, InlineAsmOperand, Operand, SourceInfo, TerminatorKind}; +use rustc_ast::InlineAsmTemplatePiece; +pub use rustc_ast::Mutability; +use rustc_macros::HashStable; +use std::borrow::Cow; +use std::fmt::{self, Debug, Formatter, Write}; +use std::iter; +use std::slice; + +pub use super::query::*; + +#[derive(Debug, Clone, TyEncodable, TyDecodable, Hash, HashStable, PartialEq, PartialOrd)] +pub struct SwitchTargets { + /// Possible values. The locations to branch to in each case + /// are found in the corresponding indices from the `targets` vector. + values: SmallVec<[u128; 1]>, + + /// Possible branch sites. The last element of this vector is used + /// for the otherwise branch, so targets.len() == values.len() + 1 + /// should hold. + // + // This invariant is quite non-obvious and also could be improved. + // One way to make this invariant is to have something like this instead: + // + // branches: Vec<(ConstInt, BasicBlock)>, + // otherwise: Option // exhaustive if None + // + // However we’ve decided to keep this as-is until we figure a case + // where some other approach seems to be strictly better than other. + targets: SmallVec<[BasicBlock; 2]>, +} + +impl SwitchTargets { + /// Creates switch targets from an iterator of values and target blocks. + /// + /// The iterator may be empty, in which case the `SwitchInt` instruction is equivalent to + /// `goto otherwise;`. + pub fn new(targets: impl Iterator, otherwise: BasicBlock) -> Self { + let (values, mut targets): (SmallVec<_>, SmallVec<_>) = targets.unzip(); + targets.push(otherwise); + Self { values, targets } + } + + /// Builds a switch targets definition that jumps to `then` if the tested value equals `value`, + /// and to `else_` if not. + pub fn static_if(value: u128, then: BasicBlock, else_: BasicBlock) -> Self { + Self { values: smallvec![value], targets: smallvec![then, else_] } + } + + /// Returns the fallback target that is jumped to when none of the values match the operand. + pub fn otherwise(&self) -> BasicBlock { + *self.targets.last().unwrap() + } + + /// Returns an iterator over the switch targets. + /// + /// The iterator will yield tuples containing the value and corresponding target to jump to, not + /// including the `otherwise` fallback target. + /// + /// Note that this may yield 0 elements. Only the `otherwise` branch is mandatory. + pub fn iter(&self) -> SwitchTargetsIter<'_> { + SwitchTargetsIter { inner: iter::zip(&self.values, &self.targets) } + } + + /// Returns a slice with all possible jump targets (including the fallback target). + pub fn all_targets(&self) -> &[BasicBlock] { + &self.targets + } + + pub fn all_targets_mut(&mut self) -> &mut [BasicBlock] { + &mut self.targets + } + + /// Finds the `BasicBlock` to which this `SwitchInt` will branch given the + /// specific value. This cannot fail, as it'll return the `otherwise` + /// branch if there's not a specific match for the value. + pub fn target_for_value(&self, value: u128) -> BasicBlock { + self.iter().find_map(|(v, t)| (v == value).then_some(t)).unwrap_or_else(|| self.otherwise()) + } +} + +pub struct SwitchTargetsIter<'a> { + inner: iter::Zip, slice::Iter<'a, BasicBlock>>, +} + +impl<'a> Iterator for SwitchTargetsIter<'a> { + type Item = (u128, BasicBlock); + + fn next(&mut self) -> Option { + self.inner.next().map(|(val, bb)| (*val, *bb)) + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +impl<'a> ExactSizeIterator for SwitchTargetsIter<'a> {} + +#[derive(Clone, Debug, TyEncodable, TyDecodable, HashStable)] +pub struct Terminator<'tcx> { + pub source_info: SourceInfo, + pub kind: TerminatorKind<'tcx>, +} + +pub type Successors<'a> = impl Iterator + 'a; +pub type SuccessorsMut<'a> = + iter::Chain, slice::IterMut<'a, BasicBlock>>; + +impl<'tcx> Terminator<'tcx> { + pub fn successors(&self) -> Successors<'_> { + self.kind.successors() + } + + pub fn successors_mut(&mut self) -> SuccessorsMut<'_> { + self.kind.successors_mut() + } + + pub fn unwind(&self) -> Option<&Option> { + self.kind.unwind() + } + + pub fn unwind_mut(&mut self) -> Option<&mut Option> { + self.kind.unwind_mut() + } +} + +impl<'tcx> TerminatorKind<'tcx> { + pub fn if_( + tcx: TyCtxt<'tcx>, + cond: Operand<'tcx>, + t: BasicBlock, + f: BasicBlock, + ) -> TerminatorKind<'tcx> { + TerminatorKind::SwitchInt { + discr: cond, + switch_ty: tcx.types.bool, + targets: SwitchTargets::static_if(0, f, t), + } + } + + pub fn successors(&self) -> Successors<'_> { + use self::TerminatorKind::*; + match *self { + Resume + | Abort + | GeneratorDrop + | Return + | Unreachable + | Call { target: None, cleanup: None, .. } + | InlineAsm { destination: None, cleanup: None, .. } => { + None.into_iter().chain((&[]).into_iter().copied()) + } + Goto { target: t } + | Call { target: None, cleanup: Some(t), .. } + | Call { target: Some(t), cleanup: None, .. } + | Yield { resume: t, drop: None, .. } + | DropAndReplace { target: t, unwind: None, .. } + | Drop { target: t, unwind: None, .. } + | Assert { target: t, cleanup: None, .. } + | FalseUnwind { real_target: t, unwind: None } + | InlineAsm { destination: Some(t), cleanup: None, .. } + | InlineAsm { destination: None, cleanup: Some(t), .. } => { + Some(t).into_iter().chain((&[]).into_iter().copied()) + } + Call { target: Some(t), cleanup: Some(ref u), .. } + | Yield { resume: t, drop: Some(ref u), .. } + | DropAndReplace { target: t, unwind: Some(ref u), .. } + | Drop { target: t, unwind: Some(ref u), .. } + | Assert { target: t, cleanup: Some(ref u), .. } + | FalseUnwind { real_target: t, unwind: Some(ref u) } + | InlineAsm { destination: Some(t), cleanup: Some(ref u), .. } => { + Some(t).into_iter().chain(slice::from_ref(u).into_iter().copied()) + } + SwitchInt { ref targets, .. } => { + None.into_iter().chain(targets.targets.iter().copied()) + } + FalseEdge { real_target, ref imaginary_target } => Some(real_target) + .into_iter() + .chain(slice::from_ref(imaginary_target).into_iter().copied()), + } + } + + pub fn successors_mut(&mut self) -> SuccessorsMut<'_> { + use self::TerminatorKind::*; + match *self { + Resume + | Abort + | GeneratorDrop + | Return + | Unreachable + | Call { target: None, cleanup: None, .. } + | InlineAsm { destination: None, cleanup: None, .. } => None.into_iter().chain(&mut []), + Goto { target: ref mut t } + | Call { target: None, cleanup: Some(ref mut t), .. } + | Call { target: Some(ref mut t), cleanup: None, .. } + | Yield { resume: ref mut t, drop: None, .. } + | DropAndReplace { target: ref mut t, unwind: None, .. } + | Drop { target: ref mut t, unwind: None, .. } + | Assert { target: ref mut t, cleanup: None, .. } + | FalseUnwind { real_target: ref mut t, unwind: None } + | InlineAsm { destination: Some(ref mut t), cleanup: None, .. } + | InlineAsm { destination: None, cleanup: Some(ref mut t), .. } => { + Some(t).into_iter().chain(&mut []) + } + Call { target: Some(ref mut t), cleanup: Some(ref mut u), .. } + | Yield { resume: ref mut t, drop: Some(ref mut u), .. } + | DropAndReplace { target: ref mut t, unwind: Some(ref mut u), .. } + | Drop { target: ref mut t, unwind: Some(ref mut u), .. } + | Assert { target: ref mut t, cleanup: Some(ref mut u), .. } + | FalseUnwind { real_target: ref mut t, unwind: Some(ref mut u) } + | InlineAsm { destination: Some(ref mut t), cleanup: Some(ref mut u), .. } => { + Some(t).into_iter().chain(slice::from_mut(u)) + } + SwitchInt { ref mut targets, .. } => None.into_iter().chain(&mut targets.targets), + FalseEdge { ref mut real_target, ref mut imaginary_target } => { + Some(real_target).into_iter().chain(slice::from_mut(imaginary_target)) + } + } + } + + pub fn unwind(&self) -> Option<&Option> { + match *self { + TerminatorKind::Goto { .. } + | TerminatorKind::Resume + | TerminatorKind::Abort + | TerminatorKind::Return + | TerminatorKind::Unreachable + | TerminatorKind::GeneratorDrop + | TerminatorKind::Yield { .. } + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::FalseEdge { .. } => None, + TerminatorKind::Call { cleanup: ref unwind, .. } + | TerminatorKind::Assert { cleanup: ref unwind, .. } + | TerminatorKind::DropAndReplace { ref unwind, .. } + | TerminatorKind::Drop { ref unwind, .. } + | TerminatorKind::FalseUnwind { ref unwind, .. } + | TerminatorKind::InlineAsm { cleanup: ref unwind, .. } => Some(unwind), + } + } + + pub fn unwind_mut(&mut self) -> Option<&mut Option> { + match *self { + TerminatorKind::Goto { .. } + | TerminatorKind::Resume + | TerminatorKind::Abort + | TerminatorKind::Return + | TerminatorKind::Unreachable + | TerminatorKind::GeneratorDrop + | TerminatorKind::Yield { .. } + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::FalseEdge { .. } => None, + TerminatorKind::Call { cleanup: ref mut unwind, .. } + | TerminatorKind::Assert { cleanup: ref mut unwind, .. } + | TerminatorKind::DropAndReplace { ref mut unwind, .. } + | TerminatorKind::Drop { ref mut unwind, .. } + | TerminatorKind::FalseUnwind { ref mut unwind, .. } + | TerminatorKind::InlineAsm { cleanup: ref mut unwind, .. } => Some(unwind), + } + } + + pub fn as_switch(&self) -> Option<(&Operand<'tcx>, Ty<'tcx>, &SwitchTargets)> { + match self { + TerminatorKind::SwitchInt { discr, switch_ty, targets } => { + Some((discr, *switch_ty, targets)) + } + _ => None, + } + } + + pub fn as_goto(&self) -> Option { + match self { + TerminatorKind::Goto { target } => Some(*target), + _ => None, + } + } +} + +impl<'tcx> Debug for TerminatorKind<'tcx> { + fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { + self.fmt_head(fmt)?; + let successor_count = self.successors().count(); + let labels = self.fmt_successor_labels(); + assert_eq!(successor_count, labels.len()); + + match successor_count { + 0 => Ok(()), + + 1 => write!(fmt, " -> {:?}", self.successors().next().unwrap()), + + _ => { + write!(fmt, " -> [")?; + for (i, target) in self.successors().enumerate() { + if i > 0 { + write!(fmt, ", ")?; + } + write!(fmt, "{}: {:?}", labels[i], target)?; + } + write!(fmt, "]") + } + } + } +} + +impl<'tcx> TerminatorKind<'tcx> { + /// Writes the "head" part of the terminator; that is, its name and the data it uses to pick the + /// successor basic block, if any. The only information not included is the list of possible + /// successors, which may be rendered differently between the text and the graphviz format. + pub fn fmt_head(&self, fmt: &mut W) -> fmt::Result { + use self::TerminatorKind::*; + match self { + Goto { .. } => write!(fmt, "goto"), + SwitchInt { discr, .. } => write!(fmt, "switchInt({:?})", discr), + Return => write!(fmt, "return"), + GeneratorDrop => write!(fmt, "generator_drop"), + Resume => write!(fmt, "resume"), + Abort => write!(fmt, "abort"), + Yield { value, resume_arg, .. } => write!(fmt, "{:?} = yield({:?})", resume_arg, value), + Unreachable => write!(fmt, "unreachable"), + Drop { place, .. } => write!(fmt, "drop({:?})", place), + DropAndReplace { place, value, .. } => { + write!(fmt, "replace({:?} <- {:?})", place, value) + } + Call { func, args, destination, .. } => { + write!(fmt, "{:?} = ", destination)?; + write!(fmt, "{:?}(", func)?; + for (index, arg) in args.iter().enumerate() { + if index > 0 { + write!(fmt, ", ")?; + } + write!(fmt, "{:?}", arg)?; + } + write!(fmt, ")") + } + Assert { cond, expected, msg, .. } => { + write!(fmt, "assert(")?; + if !expected { + write!(fmt, "!")?; + } + write!(fmt, "{:?}, ", cond)?; + msg.fmt_assert_args(fmt)?; + write!(fmt, ")") + } + FalseEdge { .. } => write!(fmt, "falseEdge"), + FalseUnwind { .. } => write!(fmt, "falseUnwind"), + InlineAsm { template, ref operands, options, .. } => { + write!(fmt, "asm!(\"{}\"", InlineAsmTemplatePiece::to_string(template))?; + for op in operands { + write!(fmt, ", ")?; + let print_late = |&late| if late { "late" } else { "" }; + match op { + InlineAsmOperand::In { reg, value } => { + write!(fmt, "in({}) {:?}", reg, value)?; + } + InlineAsmOperand::Out { reg, late, place: Some(place) } => { + write!(fmt, "{}out({}) {:?}", print_late(late), reg, place)?; + } + InlineAsmOperand::Out { reg, late, place: None } => { + write!(fmt, "{}out({}) _", print_late(late), reg)?; + } + InlineAsmOperand::InOut { + reg, + late, + in_value, + out_place: Some(out_place), + } => { + write!( + fmt, + "in{}out({}) {:?} => {:?}", + print_late(late), + reg, + in_value, + out_place + )?; + } + InlineAsmOperand::InOut { reg, late, in_value, out_place: None } => { + write!(fmt, "in{}out({}) {:?} => _", print_late(late), reg, in_value)?; + } + InlineAsmOperand::Const { value } => { + write!(fmt, "const {:?}", value)?; + } + InlineAsmOperand::SymFn { value } => { + write!(fmt, "sym_fn {:?}", value)?; + } + InlineAsmOperand::SymStatic { def_id } => { + write!(fmt, "sym_static {:?}", def_id)?; + } + } + } + write!(fmt, ", options({:?}))", options) + } + } + } + + /// Returns the list of labels for the edges to the successor basic blocks. + pub fn fmt_successor_labels(&self) -> Vec> { + use self::TerminatorKind::*; + match *self { + Return | Resume | Abort | Unreachable | GeneratorDrop => vec![], + Goto { .. } => vec!["".into()], + SwitchInt { ref targets, switch_ty, .. } => ty::tls::with(|tcx| { + let param_env = ty::ParamEnv::empty(); + let switch_ty = tcx.lift(switch_ty).unwrap(); + let size = tcx.layout_of(param_env.and(switch_ty)).unwrap().size; + targets + .values + .iter() + .map(|&u| { + mir::ConstantKind::from_scalar(tcx, Scalar::from_uint(u, size), switch_ty) + .to_string() + .into() + }) + .chain(iter::once("otherwise".into())) + .collect() + }), + Call { target: Some(_), cleanup: Some(_), .. } => { + vec!["return".into(), "unwind".into()] + } + Call { target: Some(_), cleanup: None, .. } => vec!["return".into()], + Call { target: None, cleanup: Some(_), .. } => vec!["unwind".into()], + Call { target: None, cleanup: None, .. } => vec![], + Yield { drop: Some(_), .. } => vec!["resume".into(), "drop".into()], + Yield { drop: None, .. } => vec!["resume".into()], + DropAndReplace { unwind: None, .. } | Drop { unwind: None, .. } => { + vec!["return".into()] + } + DropAndReplace { unwind: Some(_), .. } | Drop { unwind: Some(_), .. } => { + vec!["return".into(), "unwind".into()] + } + Assert { cleanup: None, .. } => vec!["".into()], + Assert { .. } => vec!["success".into(), "unwind".into()], + FalseEdge { .. } => vec!["real".into(), "imaginary".into()], + FalseUnwind { unwind: Some(_), .. } => vec!["real".into(), "cleanup".into()], + FalseUnwind { unwind: None, .. } => vec!["real".into()], + InlineAsm { destination: Some(_), cleanup: Some(_), .. } => { + vec!["return".into(), "unwind".into()] + } + InlineAsm { destination: Some(_), cleanup: None, .. } => vec!["return".into()], + InlineAsm { destination: None, cleanup: Some(_), .. } => vec!["unwind".into()], + InlineAsm { destination: None, cleanup: None, .. } => vec![], + } + } +} diff --git a/compiler/rustc_middle/src/mir/traversal.rs b/compiler/rustc_middle/src/mir/traversal.rs new file mode 100644 index 000000000..627dc32f3 --- /dev/null +++ b/compiler/rustc_middle/src/mir/traversal.rs @@ -0,0 +1,388 @@ +use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; +use rustc_data_structures::sync::OnceCell; +use rustc_index::bit_set::BitSet; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; + +use super::*; + +/// Preorder traversal of a graph. +/// +/// Preorder traversal is when each node is visited after at least one of its predecessors. If you +/// are familiar with some basic graph theory, then this performs a depth first search and returns +/// nodes in order of discovery time. +/// +/// ```text +/// +/// A +/// / \ +/// / \ +/// B C +/// \ / +/// \ / +/// D +/// ``` +/// +/// A preorder traversal of this graph is either `A B D C` or `A C D B` +#[derive(Clone)] +pub struct Preorder<'a, 'tcx> { + body: &'a Body<'tcx>, + visited: BitSet, + worklist: Vec, + root_is_start_block: bool, +} + +impl<'a, 'tcx> Preorder<'a, 'tcx> { + pub fn new(body: &'a Body<'tcx>, root: BasicBlock) -> Preorder<'a, 'tcx> { + let worklist = vec![root]; + + Preorder { + body, + visited: BitSet::new_empty(body.basic_blocks().len()), + worklist, + root_is_start_block: root == START_BLOCK, + } + } +} + +pub fn preorder<'a, 'tcx>(body: &'a Body<'tcx>) -> Preorder<'a, 'tcx> { + Preorder::new(body, START_BLOCK) +} + +impl<'a, 'tcx> Iterator for Preorder<'a, 'tcx> { + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + while let Some(idx) = self.worklist.pop() { + if !self.visited.insert(idx) { + continue; + } + + let data = &self.body[idx]; + + if let Some(ref term) = data.terminator { + self.worklist.extend(term.successors()); + } + + return Some((idx, data)); + } + + None + } + + fn size_hint(&self) -> (usize, Option) { + // All the blocks, minus the number of blocks we've visited. + let upper = self.body.basic_blocks().len() - self.visited.count(); + + let lower = if self.root_is_start_block { + // We will visit all remaining blocks exactly once. + upper + } else { + self.worklist.len() + }; + + (lower, Some(upper)) + } +} + +/// Postorder traversal of a graph. +/// +/// Postorder traversal is when each node is visited after all of its successors, except when the +/// successor is only reachable by a back-edge. If you are familiar with some basic graph theory, +/// then this performs a depth first search and returns nodes in order of completion time. +/// +/// +/// ```text +/// +/// A +/// / \ +/// / \ +/// B C +/// \ / +/// \ / +/// D +/// ``` +/// +/// A Postorder traversal of this graph is `D B C A` or `D C B A` +pub struct Postorder<'a, 'tcx> { + basic_blocks: &'a IndexVec>, + visited: BitSet, + visit_stack: Vec<(BasicBlock, Successors<'a>)>, + root_is_start_block: bool, +} + +impl<'a, 'tcx> Postorder<'a, 'tcx> { + pub fn new( + basic_blocks: &'a IndexVec>, + root: BasicBlock, + ) -> Postorder<'a, 'tcx> { + let mut po = Postorder { + basic_blocks, + visited: BitSet::new_empty(basic_blocks.len()), + visit_stack: Vec::new(), + root_is_start_block: root == START_BLOCK, + }; + + let data = &po.basic_blocks[root]; + + if let Some(ref term) = data.terminator { + po.visited.insert(root); + po.visit_stack.push((root, term.successors())); + po.traverse_successor(); + } + + po + } + + fn traverse_successor(&mut self) { + // This is quite a complex loop due to 1. the borrow checker not liking it much + // and 2. what exactly is going on is not clear + // + // It does the actual traversal of the graph, while the `next` method on the iterator + // just pops off of the stack. `visit_stack` is a stack containing pairs of nodes and + // iterators over the successors of those nodes. Each iteration attempts to get the next + // node from the top of the stack, then pushes that node and an iterator over the + // successors to the top of the stack. This loop only grows `visit_stack`, stopping when + // we reach a child that has no children that we haven't already visited. + // + // For a graph that looks like this: + // + // A + // / \ + // / \ + // B C + // | | + // | | + // D | + // \ / + // \ / + // E + // + // The state of the stack starts out with just the root node (`A` in this case); + // [(A, [B, C])] + // + // When the first call to `traverse_successor` happens, the following happens: + // + // [(B, [D]), // `B` taken from the successors of `A`, pushed to the + // // top of the stack along with the successors of `B` + // (A, [C])] + // + // [(D, [E]), // `D` taken from successors of `B`, pushed to stack + // (B, []), + // (A, [C])] + // + // [(E, []), // `E` taken from successors of `D`, pushed to stack + // (D, []), + // (B, []), + // (A, [C])] + // + // Now that the top of the stack has no successors we can traverse, each item will + // be popped off during iteration until we get back to `A`. This yields [E, D, B]. + // + // When we yield `B` and call `traverse_successor`, we push `C` to the stack, but + // since we've already visited `E`, that child isn't added to the stack. The last + // two iterations yield `C` and finally `A` for a final traversal of [E, D, B, C, A] + loop { + let bb = if let Some(&mut (_, ref mut iter)) = self.visit_stack.last_mut() { + if let Some(bb) = iter.next() { + bb + } else { + break; + } + } else { + break; + }; + + if self.visited.insert(bb) { + if let Some(term) = &self.basic_blocks[bb].terminator { + self.visit_stack.push((bb, term.successors())); + } + } + } + } +} + +pub fn postorder<'a, 'tcx>(body: &'a Body<'tcx>) -> Postorder<'a, 'tcx> { + Postorder::new(&body.basic_blocks, START_BLOCK) +} + +impl<'a, 'tcx> Iterator for Postorder<'a, 'tcx> { + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + let next = self.visit_stack.pop(); + if next.is_some() { + self.traverse_successor(); + } + + next.map(|(bb, _)| (bb, &self.basic_blocks[bb])) + } + + fn size_hint(&self) -> (usize, Option) { + // All the blocks, minus the number of blocks we've visited. + let upper = self.basic_blocks.len() - self.visited.count(); + + let lower = if self.root_is_start_block { + // We will visit all remaining blocks exactly once. + upper + } else { + self.visit_stack.len() + }; + + (lower, Some(upper)) + } +} + +/// Reverse postorder traversal of a graph +/// +/// Reverse postorder is the reverse order of a postorder traversal. +/// This is different to a preorder traversal and represents a natural +/// linearization of control-flow. +/// +/// ```text +/// +/// A +/// / \ +/// / \ +/// B C +/// \ / +/// \ / +/// D +/// ``` +/// +/// A reverse postorder traversal of this graph is either `A B C D` or `A C B D` +/// Note that for a graph containing no loops (i.e., A DAG), this is equivalent to +/// a topological sort. +/// +/// Construction of a `ReversePostorder` traversal requires doing a full +/// postorder traversal of the graph, therefore this traversal should be +/// constructed as few times as possible. Use the `reset` method to be able +/// to re-use the traversal +#[derive(Clone)] +pub struct ReversePostorder<'a, 'tcx> { + body: &'a Body<'tcx>, + blocks: Vec, + idx: usize, +} + +impl<'a, 'tcx> ReversePostorder<'a, 'tcx> { + pub fn new(body: &'a Body<'tcx>, root: BasicBlock) -> ReversePostorder<'a, 'tcx> { + let blocks: Vec<_> = Postorder::new(&body.basic_blocks, root).map(|(bb, _)| bb).collect(); + let len = blocks.len(); + ReversePostorder { body, blocks, idx: len } + } +} + +impl<'a, 'tcx> Iterator for ReversePostorder<'a, 'tcx> { + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + if self.idx == 0 { + return None; + } + self.idx -= 1; + + self.blocks.get(self.idx).map(|&bb| (bb, &self.body[bb])) + } + + fn size_hint(&self) -> (usize, Option) { + (self.idx, Some(self.idx)) + } +} + +impl<'a, 'tcx> ExactSizeIterator for ReversePostorder<'a, 'tcx> {} + +/// Returns an iterator over all basic blocks reachable from the `START_BLOCK` in no particular +/// order. +/// +/// This is clearer than writing `preorder` in cases where the order doesn't matter. +pub fn reachable<'a, 'tcx>( + body: &'a Body<'tcx>, +) -> impl 'a + Iterator)> { + preorder(body) +} + +/// Returns a `BitSet` containing all basic blocks reachable from the `START_BLOCK`. +pub fn reachable_as_bitset<'tcx>(body: &Body<'tcx>) -> BitSet { + let mut iter = preorder(body); + (&mut iter).for_each(drop); + iter.visited +} + +#[derive(Clone)] +pub struct ReversePostorderIter<'a, 'tcx> { + body: &'a Body<'tcx>, + blocks: &'a [BasicBlock], + idx: usize, +} + +impl<'a, 'tcx> Iterator for ReversePostorderIter<'a, 'tcx> { + type Item = (BasicBlock, &'a BasicBlockData<'tcx>); + + fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> { + if self.idx == 0 { + return None; + } + self.idx -= 1; + + self.blocks.get(self.idx).map(|&bb| (bb, &self.body[bb])) + } + + fn size_hint(&self) -> (usize, Option) { + (self.idx, Some(self.idx)) + } +} + +impl<'a, 'tcx> ExactSizeIterator for ReversePostorderIter<'a, 'tcx> {} + +pub fn reverse_postorder<'a, 'tcx>(body: &'a Body<'tcx>) -> ReversePostorderIter<'a, 'tcx> { + let blocks = body.basic_blocks.postorder(); + let len = blocks.len(); + ReversePostorderIter { body, blocks, idx: len } +} + +#[derive(Clone, Debug)] +pub(super) struct PostorderCache { + cache: OnceCell>, +} + +impl PostorderCache { + #[inline] + pub(super) fn new() -> Self { + PostorderCache { cache: OnceCell::new() } + } + + /// Invalidates the postorder cache. + #[inline] + pub(super) fn invalidate(&mut self) { + self.cache = OnceCell::new(); + } + + /// Returns the `&[BasicBlocks]` represents the postorder graph for this MIR. + #[inline] + pub(super) fn compute(&self, body: &IndexVec>) -> &[BasicBlock] { + self.cache.get_or_init(|| Postorder::new(body, START_BLOCK).map(|(bb, _)| bb).collect()) + } +} + +impl Encodable for PostorderCache { + #[inline] + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for PostorderCache { + #[inline] + fn decode(_: &mut D) -> Self { + Self::new() + } +} + +impl HashStable for PostorderCache { + #[inline] + fn hash_stable(&self, _: &mut CTX, _: &mut StableHasher) { + // do nothing + } +} + +TrivialTypeTraversalAndLiftImpls! { + PostorderCache, +} diff --git a/compiler/rustc_middle/src/mir/type_foldable.rs b/compiler/rustc_middle/src/mir/type_foldable.rs new file mode 100644 index 000000000..82a6b0c50 --- /dev/null +++ b/compiler/rustc_middle/src/mir/type_foldable.rs @@ -0,0 +1,240 @@ +//! `TypeFoldable` implementations for MIR types + +use super::*; +use crate::ty; +use rustc_data_structures::functor::IdFunctor; + +TrivialTypeTraversalAndLiftImpls! { + BlockTailInfo, + MirPhase, + SourceInfo, + FakeReadCause, + RetagKind, + SourceScope, + SourceScopeLocalData, + UserTypeAnnotationIndex, +} + +impl<'tcx> TypeFoldable<'tcx> for Terminator<'tcx> { + fn try_fold_with>(self, folder: &mut F) -> Result { + use crate::mir::TerminatorKind::*; + + let kind = match self.kind { + Goto { target } => Goto { target }, + SwitchInt { discr, switch_ty, targets } => SwitchInt { + discr: discr.try_fold_with(folder)?, + switch_ty: switch_ty.try_fold_with(folder)?, + targets, + }, + Drop { place, target, unwind } => { + Drop { place: place.try_fold_with(folder)?, target, unwind } + } + DropAndReplace { place, value, target, unwind } => DropAndReplace { + place: place.try_fold_with(folder)?, + value: value.try_fold_with(folder)?, + target, + unwind, + }, + Yield { value, resume, resume_arg, drop } => Yield { + value: value.try_fold_with(folder)?, + resume, + resume_arg: resume_arg.try_fold_with(folder)?, + drop, + }, + Call { func, args, destination, target, cleanup, from_hir_call, fn_span } => Call { + func: func.try_fold_with(folder)?, + args: args.try_fold_with(folder)?, + destination: destination.try_fold_with(folder)?, + target, + cleanup, + from_hir_call, + fn_span, + }, + Assert { cond, expected, msg, target, cleanup } => { + use AssertKind::*; + let msg = match msg { + BoundsCheck { len, index } => BoundsCheck { + len: len.try_fold_with(folder)?, + index: index.try_fold_with(folder)?, + }, + Overflow(op, l, r) => { + Overflow(op, l.try_fold_with(folder)?, r.try_fold_with(folder)?) + } + OverflowNeg(op) => OverflowNeg(op.try_fold_with(folder)?), + DivisionByZero(op) => DivisionByZero(op.try_fold_with(folder)?), + RemainderByZero(op) => RemainderByZero(op.try_fold_with(folder)?), + ResumedAfterReturn(_) | ResumedAfterPanic(_) => msg, + }; + Assert { cond: cond.try_fold_with(folder)?, expected, msg, target, cleanup } + } + GeneratorDrop => GeneratorDrop, + Resume => Resume, + Abort => Abort, + Return => Return, + Unreachable => Unreachable, + FalseEdge { real_target, imaginary_target } => { + FalseEdge { real_target, imaginary_target } + } + FalseUnwind { real_target, unwind } => FalseUnwind { real_target, unwind }, + InlineAsm { template, operands, options, line_spans, destination, cleanup } => { + InlineAsm { + template, + operands: operands.try_fold_with(folder)?, + options, + line_spans, + destination, + cleanup, + } + } + }; + Ok(Terminator { source_info: self.source_info, kind }) + } +} + +impl<'tcx> TypeFoldable<'tcx> for GeneratorKind { + fn try_fold_with>(self, _: &mut F) -> Result { + Ok(self) + } +} + +impl<'tcx> TypeFoldable<'tcx> for Place<'tcx> { + fn try_fold_with>(self, folder: &mut F) -> Result { + Ok(Place { + local: self.local.try_fold_with(folder)?, + projection: self.projection.try_fold_with(folder)?, + }) + } +} + +impl<'tcx> TypeFoldable<'tcx> for &'tcx ty::List> { + fn try_fold_with>(self, folder: &mut F) -> Result { + ty::util::fold_list(self, folder, |tcx, v| tcx.intern_place_elems(v)) + } +} + +impl<'tcx> TypeFoldable<'tcx> for Rvalue<'tcx> { + fn try_fold_with>(self, folder: &mut F) -> Result { + use crate::mir::Rvalue::*; + Ok(match self { + Use(op) => Use(op.try_fold_with(folder)?), + Repeat(op, len) => Repeat(op.try_fold_with(folder)?, len.try_fold_with(folder)?), + ThreadLocalRef(did) => ThreadLocalRef(did.try_fold_with(folder)?), + Ref(region, bk, place) => { + Ref(region.try_fold_with(folder)?, bk, place.try_fold_with(folder)?) + } + CopyForDeref(place) => CopyForDeref(place.try_fold_with(folder)?), + AddressOf(mutability, place) => AddressOf(mutability, place.try_fold_with(folder)?), + Len(place) => Len(place.try_fold_with(folder)?), + Cast(kind, op, ty) => Cast(kind, op.try_fold_with(folder)?, ty.try_fold_with(folder)?), + BinaryOp(op, box (rhs, lhs)) => { + BinaryOp(op, Box::new((rhs.try_fold_with(folder)?, lhs.try_fold_with(folder)?))) + } + CheckedBinaryOp(op, box (rhs, lhs)) => CheckedBinaryOp( + op, + Box::new((rhs.try_fold_with(folder)?, lhs.try_fold_with(folder)?)), + ), + UnaryOp(op, val) => UnaryOp(op, val.try_fold_with(folder)?), + Discriminant(place) => Discriminant(place.try_fold_with(folder)?), + NullaryOp(op, ty) => NullaryOp(op, ty.try_fold_with(folder)?), + Aggregate(kind, fields) => { + let kind = kind.try_map_id(|kind| { + Ok(match kind { + AggregateKind::Array(ty) => AggregateKind::Array(ty.try_fold_with(folder)?), + AggregateKind::Tuple => AggregateKind::Tuple, + AggregateKind::Adt(def, v, substs, user_ty, n) => AggregateKind::Adt( + def, + v, + substs.try_fold_with(folder)?, + user_ty.try_fold_with(folder)?, + n, + ), + AggregateKind::Closure(id, substs) => { + AggregateKind::Closure(id, substs.try_fold_with(folder)?) + } + AggregateKind::Generator(id, substs, movablity) => { + AggregateKind::Generator(id, substs.try_fold_with(folder)?, movablity) + } + }) + })?; + Aggregate(kind, fields.try_fold_with(folder)?) + } + ShallowInitBox(op, ty) => { + ShallowInitBox(op.try_fold_with(folder)?, ty.try_fold_with(folder)?) + } + }) + } +} + +impl<'tcx> TypeFoldable<'tcx> for Operand<'tcx> { + fn try_fold_with>(self, folder: &mut F) -> Result { + Ok(match self { + Operand::Copy(place) => Operand::Copy(place.try_fold_with(folder)?), + Operand::Move(place) => Operand::Move(place.try_fold_with(folder)?), + Operand::Constant(c) => Operand::Constant(c.try_fold_with(folder)?), + }) + } +} + +impl<'tcx> TypeFoldable<'tcx> for PlaceElem<'tcx> { + fn try_fold_with>(self, folder: &mut F) -> Result { + use crate::mir::ProjectionElem::*; + + Ok(match self { + Deref => Deref, + Field(f, ty) => Field(f, ty.try_fold_with(folder)?), + Index(v) => Index(v.try_fold_with(folder)?), + Downcast(symbol, variantidx) => Downcast(symbol, variantidx), + ConstantIndex { offset, min_length, from_end } => { + ConstantIndex { offset, min_length, from_end } + } + Subslice { from, to, from_end } => Subslice { from, to, from_end }, + }) + } +} + +impl<'tcx> TypeFoldable<'tcx> for Field { + fn try_fold_with>(self, _: &mut F) -> Result { + Ok(self) + } +} + +impl<'tcx> TypeFoldable<'tcx> for GeneratorSavedLocal { + fn try_fold_with>(self, _: &mut F) -> Result { + Ok(self) + } +} + +impl<'tcx, R: Idx, C: Idx> TypeFoldable<'tcx> for BitMatrix { + fn try_fold_with>(self, _: &mut F) -> Result { + Ok(self) + } +} + +impl<'tcx> TypeFoldable<'tcx> for Constant<'tcx> { + fn try_fold_with>(self, folder: &mut F) -> Result { + Ok(Constant { + span: self.span, + user_ty: self.user_ty.try_fold_with(folder)?, + literal: self.literal.try_fold_with(folder)?, + }) + } +} + +impl<'tcx> TypeFoldable<'tcx> for ConstantKind<'tcx> { + #[inline(always)] + fn try_fold_with>(self, folder: &mut F) -> Result { + folder.try_fold_mir_const(self) + } +} + +impl<'tcx> TypeSuperFoldable<'tcx> for ConstantKind<'tcx> { + fn try_super_fold_with>( + self, + folder: &mut F, + ) -> Result { + match self { + ConstantKind::Ty(c) => Ok(ConstantKind::Ty(c.try_fold_with(folder)?)), + ConstantKind::Val(v, t) => Ok(ConstantKind::Val(v, t.try_fold_with(folder)?)), + } + } +} diff --git a/compiler/rustc_middle/src/mir/type_visitable.rs b/compiler/rustc_middle/src/mir/type_visitable.rs new file mode 100644 index 000000000..6a0801cb0 --- /dev/null +++ b/compiler/rustc_middle/src/mir/type_visitable.rs @@ -0,0 +1,190 @@ +//! `TypeVisitable` implementations for MIR types + +use super::*; +use crate::ty; + +impl<'tcx> TypeVisitable<'tcx> for Terminator<'tcx> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + use crate::mir::TerminatorKind::*; + + match self.kind { + SwitchInt { ref discr, switch_ty, .. } => { + discr.visit_with(visitor)?; + switch_ty.visit_with(visitor) + } + Drop { ref place, .. } => place.visit_with(visitor), + DropAndReplace { ref place, ref value, .. } => { + place.visit_with(visitor)?; + value.visit_with(visitor) + } + Yield { ref value, .. } => value.visit_with(visitor), + Call { ref func, ref args, ref destination, .. } => { + destination.visit_with(visitor)?; + func.visit_with(visitor)?; + args.visit_with(visitor) + } + Assert { ref cond, ref msg, .. } => { + cond.visit_with(visitor)?; + use AssertKind::*; + match msg { + BoundsCheck { ref len, ref index } => { + len.visit_with(visitor)?; + index.visit_with(visitor) + } + Overflow(_, l, r) => { + l.visit_with(visitor)?; + r.visit_with(visitor) + } + OverflowNeg(op) | DivisionByZero(op) | RemainderByZero(op) => { + op.visit_with(visitor) + } + ResumedAfterReturn(_) | ResumedAfterPanic(_) => ControlFlow::CONTINUE, + } + } + InlineAsm { ref operands, .. } => operands.visit_with(visitor), + Goto { .. } + | Resume + | Abort + | Return + | GeneratorDrop + | Unreachable + | FalseEdge { .. } + | FalseUnwind { .. } => ControlFlow::CONTINUE, + } + } +} + +impl<'tcx> TypeVisitable<'tcx> for GeneratorKind { + fn visit_with>(&self, _: &mut V) -> ControlFlow { + ControlFlow::CONTINUE + } +} + +impl<'tcx> TypeVisitable<'tcx> for Place<'tcx> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + self.local.visit_with(visitor)?; + self.projection.visit_with(visitor) + } +} + +impl<'tcx> TypeVisitable<'tcx> for &'tcx ty::List> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + self.iter().try_for_each(|t| t.visit_with(visitor)) + } +} + +impl<'tcx> TypeVisitable<'tcx> for Rvalue<'tcx> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + use crate::mir::Rvalue::*; + match *self { + Use(ref op) => op.visit_with(visitor), + CopyForDeref(ref place) => { + let op = &Operand::Copy(*place); + op.visit_with(visitor) + } + Repeat(ref op, _) => op.visit_with(visitor), + ThreadLocalRef(did) => did.visit_with(visitor), + Ref(region, _, ref place) => { + region.visit_with(visitor)?; + place.visit_with(visitor) + } + AddressOf(_, ref place) => place.visit_with(visitor), + Len(ref place) => place.visit_with(visitor), + Cast(_, ref op, ty) => { + op.visit_with(visitor)?; + ty.visit_with(visitor) + } + BinaryOp(_, box (ref rhs, ref lhs)) | CheckedBinaryOp(_, box (ref rhs, ref lhs)) => { + rhs.visit_with(visitor)?; + lhs.visit_with(visitor) + } + UnaryOp(_, ref val) => val.visit_with(visitor), + Discriminant(ref place) => place.visit_with(visitor), + NullaryOp(_, ty) => ty.visit_with(visitor), + Aggregate(ref kind, ref fields) => { + match **kind { + AggregateKind::Array(ty) => { + ty.visit_with(visitor)?; + } + AggregateKind::Tuple => {} + AggregateKind::Adt(_, _, substs, user_ty, _) => { + substs.visit_with(visitor)?; + user_ty.visit_with(visitor)?; + } + AggregateKind::Closure(_, substs) => { + substs.visit_with(visitor)?; + } + AggregateKind::Generator(_, substs, _) => { + substs.visit_with(visitor)?; + } + } + fields.visit_with(visitor) + } + ShallowInitBox(ref op, ty) => { + op.visit_with(visitor)?; + ty.visit_with(visitor) + } + } + } +} + +impl<'tcx> TypeVisitable<'tcx> for Operand<'tcx> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + match *self { + Operand::Copy(ref place) | Operand::Move(ref place) => place.visit_with(visitor), + Operand::Constant(ref c) => c.visit_with(visitor), + } + } +} + +impl<'tcx> TypeVisitable<'tcx> for PlaceElem<'tcx> { + fn visit_with>(&self, visitor: &mut Vs) -> ControlFlow { + use crate::mir::ProjectionElem::*; + + match self { + Field(_, ty) => ty.visit_with(visitor), + Index(v) => v.visit_with(visitor), + _ => ControlFlow::CONTINUE, + } + } +} + +impl<'tcx> TypeVisitable<'tcx> for Field { + fn visit_with>(&self, _: &mut V) -> ControlFlow { + ControlFlow::CONTINUE + } +} + +impl<'tcx> TypeVisitable<'tcx> for GeneratorSavedLocal { + fn visit_with>(&self, _: &mut V) -> ControlFlow { + ControlFlow::CONTINUE + } +} + +impl<'tcx, R: Idx, C: Idx> TypeVisitable<'tcx> for BitMatrix { + fn visit_with>(&self, _: &mut V) -> ControlFlow { + ControlFlow::CONTINUE + } +} + +impl<'tcx> TypeVisitable<'tcx> for Constant<'tcx> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + self.literal.visit_with(visitor)?; + self.user_ty.visit_with(visitor) + } +} + +impl<'tcx> TypeVisitable<'tcx> for ConstantKind<'tcx> { + fn visit_with>(&self, visitor: &mut V) -> ControlFlow { + visitor.visit_mir_const(*self) + } +} + +impl<'tcx> TypeSuperVisitable<'tcx> for ConstantKind<'tcx> { + fn super_visit_with>(&self, visitor: &mut V) -> ControlFlow { + match *self { + ConstantKind::Ty(c) => c.visit_with(visitor), + ConstantKind::Val(_, t) => t.visit_with(visitor), + } + } +} diff --git a/compiler/rustc_middle/src/mir/visit.rs b/compiler/rustc_middle/src/mir/visit.rs new file mode 100644 index 000000000..891608764 --- /dev/null +++ b/compiler/rustc_middle/src/mir/visit.rs @@ -0,0 +1,1330 @@ +//! # The MIR Visitor +//! +//! ## Overview +//! +//! There are two visitors, one for immutable and one for mutable references, +//! but both are generated by the following macro. The code is written according +//! to the following conventions: +//! +//! - introduce a `visit_foo` and a `super_foo` method for every MIR type +//! - `visit_foo`, by default, calls `super_foo` +//! - `super_foo`, by default, destructures the `foo` and calls `visit_foo` +//! +//! This allows you as a user to override `visit_foo` for types are +//! interested in, and invoke (within that method) call +//! `self.super_foo` to get the default behavior. Just as in an OO +//! language, you should never call `super` methods ordinarily except +//! in that circumstance. +//! +//! For the most part, we do not destructure things external to the +//! MIR, e.g., types, spans, etc, but simply visit them and stop. This +//! avoids duplication with other visitors like `TypeFoldable`. +//! +//! ## Updating +//! +//! The code is written in a very deliberate style intended to minimize +//! the chance of things being overlooked. You'll notice that we always +//! use pattern matching to reference fields and we ensure that all +//! matches are exhaustive. +//! +//! For example, the `super_basic_block_data` method begins like this: +//! +//! ```ignore (pseudo-rust) +//! fn super_basic_block_data( +//! &mut self, +//! block: BasicBlock, +//! data: & $($mutability)? BasicBlockData<'tcx> +//! ) { +//! let BasicBlockData { +//! statements, +//! terminator, +//! is_cleanup: _ +//! } = *data; +//! +//! for statement in statements { +//! self.visit_statement(block, statement); +//! } +//! +//! ... +//! } +//! ``` +//! +//! Here we used `let BasicBlockData { } = *data` deliberately, +//! rather than writing `data.statements` in the body. This is because if one +//! adds a new field to `BasicBlockData`, one will be forced to revise this code, +//! and hence one will (hopefully) invoke the correct visit methods (if any). +//! +//! For this to work, ALL MATCHES MUST BE EXHAUSTIVE IN FIELDS AND VARIANTS. +//! That means you never write `..` to skip over fields, nor do you write `_` +//! to skip over variants in a `match`. +//! +//! The only place that `_` is acceptable is to match a field (or +//! variant argument) that does not require visiting, as in +//! `is_cleanup` above. + +use crate::mir::*; +use crate::ty::subst::SubstsRef; +use crate::ty::{CanonicalUserTypeAnnotation, Ty}; +use rustc_span::Span; + +macro_rules! make_mir_visitor { + ($visitor_trait_name:ident, $($mutability:ident)?) => { + pub trait $visitor_trait_name<'tcx> { + // Override these, and call `self.super_xxx` to revert back to the + // default behavior. + + fn visit_body( + &mut self, + body: &$($mutability)? Body<'tcx>, + ) { + self.super_body(body); + } + + fn visit_basic_block_data( + &mut self, + block: BasicBlock, + data: & $($mutability)? BasicBlockData<'tcx>, + ) { + self.super_basic_block_data(block, data); + } + + fn visit_source_scope_data( + &mut self, + scope_data: & $($mutability)? SourceScopeData<'tcx>, + ) { + self.super_source_scope_data(scope_data); + } + + fn visit_statement( + &mut self, + statement: & $($mutability)? Statement<'tcx>, + location: Location, + ) { + self.super_statement(statement, location); + } + + fn visit_assign( + &mut self, + place: & $($mutability)? Place<'tcx>, + rvalue: & $($mutability)? Rvalue<'tcx>, + location: Location, + ) { + self.super_assign(place, rvalue, location); + } + + fn visit_terminator( + &mut self, + terminator: & $($mutability)? Terminator<'tcx>, + location: Location, + ) { + self.super_terminator(terminator, location); + } + + fn visit_assert_message( + &mut self, + msg: & $($mutability)? AssertMessage<'tcx>, + location: Location, + ) { + self.super_assert_message(msg, location); + } + + fn visit_rvalue( + &mut self, + rvalue: & $($mutability)? Rvalue<'tcx>, + location: Location, + ) { + self.super_rvalue(rvalue, location); + } + + fn visit_operand( + &mut self, + operand: & $($mutability)? Operand<'tcx>, + location: Location, + ) { + self.super_operand(operand, location); + } + + fn visit_ascribe_user_ty( + &mut self, + place: & $($mutability)? Place<'tcx>, + variance: $(& $mutability)? ty::Variance, + user_ty: & $($mutability)? UserTypeProjection, + location: Location, + ) { + self.super_ascribe_user_ty(place, variance, user_ty, location); + } + + fn visit_coverage( + &mut self, + coverage: & $($mutability)? Coverage, + location: Location, + ) { + self.super_coverage(coverage, location); + } + + fn visit_retag( + &mut self, + kind: $(& $mutability)? RetagKind, + place: & $($mutability)? Place<'tcx>, + location: Location, + ) { + self.super_retag(kind, place, location); + } + + fn visit_place( + &mut self, + place: & $($mutability)? Place<'tcx>, + context: PlaceContext, + location: Location, + ) { + self.super_place(place, context, location); + } + + visit_place_fns!($($mutability)?); + + fn visit_constant( + &mut self, + constant: & $($mutability)? Constant<'tcx>, + location: Location, + ) { + self.super_constant(constant, location); + } + + fn visit_span( + &mut self, + span: $(& $mutability)? Span, + ) { + self.super_span(span); + } + + fn visit_source_info( + &mut self, + source_info: & $($mutability)? SourceInfo, + ) { + self.super_source_info(source_info); + } + + fn visit_ty( + &mut self, + ty: $(& $mutability)? Ty<'tcx>, + _: TyContext, + ) { + self.super_ty(ty); + } + + fn visit_user_type_projection( + &mut self, + ty: & $($mutability)? UserTypeProjection, + ) { + self.super_user_type_projection(ty); + } + + fn visit_user_type_annotation( + &mut self, + index: UserTypeAnnotationIndex, + ty: & $($mutability)? CanonicalUserTypeAnnotation<'tcx>, + ) { + self.super_user_type_annotation(index, ty); + } + + fn visit_region( + &mut self, + region: $(& $mutability)? ty::Region<'tcx>, + _: Location, + ) { + self.super_region(region); + } + + fn visit_const( + &mut self, + constant: $(& $mutability)? ty::Const<'tcx>, + _: Location, + ) { + self.super_const(constant); + } + + fn visit_substs( + &mut self, + substs: & $($mutability)? SubstsRef<'tcx>, + _: Location, + ) { + self.super_substs(substs); + } + + fn visit_local_decl( + &mut self, + local: Local, + local_decl: & $($mutability)? LocalDecl<'tcx>, + ) { + self.super_local_decl(local, local_decl); + } + + fn visit_var_debug_info( + &mut self, + var_debug_info: & $($mutability)* VarDebugInfo<'tcx>, + ) { + self.super_var_debug_info(var_debug_info); + } + + fn visit_local( + &mut self, + _local: $(& $mutability)? Local, + _context: PlaceContext, + _location: Location, + ) {} + + fn visit_source_scope( + &mut self, + scope: $(& $mutability)? SourceScope, + ) { + self.super_source_scope(scope); + } + + // The `super_xxx` methods comprise the default behavior and are + // not meant to be overridden. + + fn super_body( + &mut self, + body: &$($mutability)? Body<'tcx>, + ) { + let span = body.span; + if let Some(gen) = &$($mutability)? body.generator { + if let Some(yield_ty) = $(& $mutability)? gen.yield_ty { + self.visit_ty( + yield_ty, + TyContext::YieldTy(SourceInfo::outermost(span)) + ); + } + } + + // for best performance, we want to use an iterator rather + // than a for-loop, to avoid calling `body::Body::invalidate` for + // each basic block. + #[allow(unused_macro_rules)] + macro_rules! basic_blocks { + (mut) => (body.basic_blocks_mut().iter_enumerated_mut()); + () => (body.basic_blocks().iter_enumerated()); + } + for (bb, data) in basic_blocks!($($mutability)?) { + self.visit_basic_block_data(bb, data); + } + + for scope in &$($mutability)? body.source_scopes { + self.visit_source_scope_data(scope); + } + + self.visit_ty( + $(& $mutability)? body.return_ty(), + TyContext::ReturnTy(SourceInfo::outermost(body.span)) + ); + + for local in body.local_decls.indices() { + self.visit_local_decl(local, & $($mutability)? body.local_decls[local]); + } + + #[allow(unused_macro_rules)] + macro_rules! type_annotations { + (mut) => (body.user_type_annotations.iter_enumerated_mut()); + () => (body.user_type_annotations.iter_enumerated()); + } + + for (index, annotation) in type_annotations!($($mutability)?) { + self.visit_user_type_annotation( + index, annotation + ); + } + + for var_debug_info in &$($mutability)? body.var_debug_info { + self.visit_var_debug_info(var_debug_info); + } + + self.visit_span($(& $mutability)? body.span); + + for const_ in &$($mutability)? body.required_consts { + let location = START_BLOCK.start_location(); + self.visit_constant(const_, location); + } + } + + fn super_basic_block_data(&mut self, + block: BasicBlock, + data: & $($mutability)? BasicBlockData<'tcx>) { + let BasicBlockData { + statements, + terminator, + is_cleanup: _ + } = data; + + let mut index = 0; + for statement in statements { + let location = Location { block, statement_index: index }; + self.visit_statement(statement, location); + index += 1; + } + + if let Some(terminator) = terminator { + let location = Location { block, statement_index: index }; + self.visit_terminator(terminator, location); + } + } + + fn super_source_scope_data( + &mut self, + scope_data: & $($mutability)? SourceScopeData<'tcx>, + ) { + let SourceScopeData { + span, + parent_scope, + inlined, + inlined_parent_scope, + local_data: _, + } = scope_data; + + self.visit_span($(& $mutability)? *span); + if let Some(parent_scope) = parent_scope { + self.visit_source_scope($(& $mutability)? *parent_scope); + } + if let Some((callee, callsite_span)) = inlined { + let location = START_BLOCK.start_location(); + + self.visit_span($(& $mutability)? *callsite_span); + + let ty::Instance { def: callee_def, substs: callee_substs } = callee; + match callee_def { + ty::InstanceDef::Item(_def_id) => {} + + ty::InstanceDef::Intrinsic(_def_id) | + ty::InstanceDef::VTableShim(_def_id) | + ty::InstanceDef::ReifyShim(_def_id) | + ty::InstanceDef::Virtual(_def_id, _) | + ty::InstanceDef::ClosureOnceShim { call_once: _def_id, track_caller: _ } | + ty::InstanceDef::DropGlue(_def_id, None) => {} + + ty::InstanceDef::FnPtrShim(_def_id, ty) | + ty::InstanceDef::DropGlue(_def_id, Some(ty)) | + ty::InstanceDef::CloneShim(_def_id, ty) => { + // FIXME(eddyb) use a better `TyContext` here. + self.visit_ty($(& $mutability)? *ty, TyContext::Location(location)); + } + } + self.visit_substs(callee_substs, location); + } + if let Some(inlined_parent_scope) = inlined_parent_scope { + self.visit_source_scope($(& $mutability)? *inlined_parent_scope); + } + } + + fn super_statement(&mut self, + statement: & $($mutability)? Statement<'tcx>, + location: Location) { + let Statement { + source_info, + kind, + } = statement; + + self.visit_source_info(source_info); + match kind { + StatementKind::Assign( + box (place, rvalue) + ) => { + self.visit_assign(place, rvalue, location); + } + StatementKind::FakeRead(box (_, place)) => { + self.visit_place( + place, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Inspect), + location + ); + } + StatementKind::SetDiscriminant { place, .. } => { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::SetDiscriminant), + location + ); + } + StatementKind::Deinit(place) => { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::Deinit), + location + ) + } + StatementKind::StorageLive(local) => { + self.visit_local( + $(& $mutability)? *local, + PlaceContext::NonUse(NonUseContext::StorageLive), + location + ); + } + StatementKind::StorageDead(local) => { + self.visit_local( + $(& $mutability)? *local, + PlaceContext::NonUse(NonUseContext::StorageDead), + location + ); + } + StatementKind::Retag(kind, place) => { + self.visit_retag($(& $mutability)? *kind, place, location); + } + StatementKind::AscribeUserType( + box (place, user_ty), + variance + ) => { + self.visit_ascribe_user_ty(place, $(& $mutability)? *variance, user_ty, location); + } + StatementKind::Coverage(coverage) => { + self.visit_coverage( + coverage, + location + ) + } + StatementKind::CopyNonOverlapping(box crate::mir::CopyNonOverlapping{ + src, + dst, + count, + }) => { + self.visit_operand(src, location); + self.visit_operand(dst, location); + self.visit_operand(count, location) + } + StatementKind::Nop => {} + } + } + + fn super_assign(&mut self, + place: &$($mutability)? Place<'tcx>, + rvalue: &$($mutability)? Rvalue<'tcx>, + location: Location) { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::Store), + location + ); + self.visit_rvalue(rvalue, location); + } + + fn super_terminator(&mut self, + terminator: &$($mutability)? Terminator<'tcx>, + location: Location) { + let Terminator { source_info, kind } = terminator; + + self.visit_source_info(source_info); + match kind { + TerminatorKind::Goto { .. } | + TerminatorKind::Resume | + TerminatorKind::Abort | + TerminatorKind::GeneratorDrop | + TerminatorKind::Unreachable | + TerminatorKind::FalseEdge { .. } | + TerminatorKind::FalseUnwind { .. } => {} + + TerminatorKind::Return => { + // `return` logically moves from the return place `_0`. Note that the place + // cannot be changed by any visitor, though. + let $($mutability)? local = RETURN_PLACE; + self.visit_local( + $(& $mutability)? local, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Move), + location, + ); + + assert_eq!( + local, + RETURN_PLACE, + "`MutVisitor` tried to mutate return place of `return` terminator" + ); + } + + TerminatorKind::SwitchInt { + discr, + switch_ty, + targets: _ + } => { + self.visit_operand(discr, location); + self.visit_ty($(& $mutability)? *switch_ty, TyContext::Location(location)); + } + + TerminatorKind::Drop { + place, + target: _, + unwind: _, + } => { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::Drop), + location + ); + } + + TerminatorKind::DropAndReplace { + place, + value, + target: _, + unwind: _, + } => { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::Drop), + location + ); + self.visit_operand(value, location); + } + + TerminatorKind::Call { + func, + args, + destination, + target: _, + cleanup: _, + from_hir_call: _, + fn_span: _ + } => { + self.visit_operand(func, location); + for arg in args { + self.visit_operand(arg, location); + } + self.visit_place( + destination, + PlaceContext::MutatingUse(MutatingUseContext::Call), + location + ); + } + + TerminatorKind::Assert { + cond, + expected: _, + msg, + target: _, + cleanup: _, + } => { + self.visit_operand(cond, location); + self.visit_assert_message(msg, location); + } + + TerminatorKind::Yield { + value, + resume: _, + resume_arg, + drop: _, + } => { + self.visit_operand(value, location); + self.visit_place( + resume_arg, + PlaceContext::MutatingUse(MutatingUseContext::Yield), + location, + ); + } + + TerminatorKind::InlineAsm { + template: _, + operands, + options: _, + line_spans: _, + destination: _, + cleanup: _, + } => { + for op in operands { + match op { + InlineAsmOperand::In { value, .. } => { + self.visit_operand(value, location); + } + InlineAsmOperand::Out { place: Some(place), .. } => { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::AsmOutput), + location, + ); + } + InlineAsmOperand::InOut { in_value, out_place, .. } => { + self.visit_operand(in_value, location); + if let Some(out_place) = out_place { + self.visit_place( + out_place, + PlaceContext::MutatingUse(MutatingUseContext::AsmOutput), + location, + ); + } + } + InlineAsmOperand::Const { value } + | InlineAsmOperand::SymFn { value } => { + self.visit_constant(value, location); + } + InlineAsmOperand::Out { place: None, .. } + | InlineAsmOperand::SymStatic { def_id: _ } => {} + } + } + } + } + } + + fn super_assert_message(&mut self, + msg: & $($mutability)? AssertMessage<'tcx>, + location: Location) { + use crate::mir::AssertKind::*; + match msg { + BoundsCheck { len, index } => { + self.visit_operand(len, location); + self.visit_operand(index, location); + } + Overflow(_, l, r) => { + self.visit_operand(l, location); + self.visit_operand(r, location); + } + OverflowNeg(op) | DivisionByZero(op) | RemainderByZero(op) => { + self.visit_operand(op, location); + } + ResumedAfterReturn(_) | ResumedAfterPanic(_) => { + // Nothing to visit + } + } + } + + fn super_rvalue(&mut self, + rvalue: & $($mutability)? Rvalue<'tcx>, + location: Location) { + match rvalue { + Rvalue::Use(operand) => { + self.visit_operand(operand, location); + } + + Rvalue::Repeat(value, _) => { + self.visit_operand(value, location); + } + + Rvalue::ThreadLocalRef(_) => {} + + Rvalue::Ref(r, bk, path) => { + self.visit_region($(& $mutability)? *r, location); + let ctx = match bk { + BorrowKind::Shared => PlaceContext::NonMutatingUse( + NonMutatingUseContext::SharedBorrow + ), + BorrowKind::Shallow => PlaceContext::NonMutatingUse( + NonMutatingUseContext::ShallowBorrow + ), + BorrowKind::Unique => PlaceContext::NonMutatingUse( + NonMutatingUseContext::UniqueBorrow + ), + BorrowKind::Mut { .. } => + PlaceContext::MutatingUse(MutatingUseContext::Borrow), + }; + self.visit_place(path, ctx, location); + } + Rvalue::CopyForDeref(place) => { + self.visit_place( + place, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Inspect), + location + ); + } + + Rvalue::AddressOf(m, path) => { + let ctx = match m { + Mutability::Mut => PlaceContext::MutatingUse( + MutatingUseContext::AddressOf + ), + Mutability::Not => PlaceContext::NonMutatingUse( + NonMutatingUseContext::AddressOf + ), + }; + self.visit_place(path, ctx, location); + } + + Rvalue::Len(path) => { + self.visit_place( + path, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Inspect), + location + ); + } + + Rvalue::Cast(_cast_kind, operand, ty) => { + self.visit_operand(operand, location); + self.visit_ty($(& $mutability)? *ty, TyContext::Location(location)); + } + + Rvalue::BinaryOp(_bin_op, box(lhs, rhs)) + | Rvalue::CheckedBinaryOp(_bin_op, box(lhs, rhs)) => { + self.visit_operand(lhs, location); + self.visit_operand(rhs, location); + } + + Rvalue::UnaryOp(_un_op, op) => { + self.visit_operand(op, location); + } + + Rvalue::Discriminant(place) => { + self.visit_place( + place, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Inspect), + location + ); + } + + Rvalue::NullaryOp(_op, ty) => { + self.visit_ty($(& $mutability)? *ty, TyContext::Location(location)); + } + + Rvalue::Aggregate(kind, operands) => { + let kind = &$($mutability)? **kind; + match kind { + AggregateKind::Array(ty) => { + self.visit_ty($(& $mutability)? *ty, TyContext::Location(location)); + } + AggregateKind::Tuple => { + } + AggregateKind::Adt( + _adt_def, + _variant_index, + substs, + _user_substs, + _active_field_index + ) => { + self.visit_substs(substs, location); + } + AggregateKind::Closure( + _, + closure_substs + ) => { + self.visit_substs(closure_substs, location); + } + AggregateKind::Generator( + _, + generator_substs, + _movability, + ) => { + self.visit_substs(generator_substs, location); + } + } + + for operand in operands { + self.visit_operand(operand, location); + } + } + + Rvalue::ShallowInitBox(operand, ty) => { + self.visit_operand(operand, location); + self.visit_ty($(& $mutability)? *ty, TyContext::Location(location)); + } + } + } + + fn super_operand(&mut self, + operand: & $($mutability)? Operand<'tcx>, + location: Location) { + match operand { + Operand::Copy(place) => { + self.visit_place( + place, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy), + location + ); + } + Operand::Move(place) => { + self.visit_place( + place, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Move), + location + ); + } + Operand::Constant(constant) => { + self.visit_constant(constant, location); + } + } + } + + fn super_ascribe_user_ty(&mut self, + place: & $($mutability)? Place<'tcx>, + _variance: $(& $mutability)? ty::Variance, + user_ty: & $($mutability)? UserTypeProjection, + location: Location) { + self.visit_place( + place, + PlaceContext::NonUse(NonUseContext::AscribeUserTy), + location + ); + self.visit_user_type_projection(user_ty); + } + + fn super_coverage(&mut self, + _coverage: & $($mutability)? Coverage, + _location: Location) { + } + + fn super_retag(&mut self, + _kind: $(& $mutability)? RetagKind, + place: & $($mutability)? Place<'tcx>, + location: Location) { + self.visit_place( + place, + PlaceContext::MutatingUse(MutatingUseContext::Retag), + location, + ); + } + + fn super_local_decl(&mut self, + local: Local, + local_decl: & $($mutability)? LocalDecl<'tcx>) { + let LocalDecl { + mutability: _, + ty, + user_ty, + source_info, + internal: _, + local_info: _, + is_block_tail: _, + } = local_decl; + + self.visit_ty($(& $mutability)? *ty, TyContext::LocalDecl { + local, + source_info: *source_info, + }); + if let Some(user_ty) = user_ty { + for (user_ty, _) in & $($mutability)? user_ty.contents { + self.visit_user_type_projection(user_ty); + } + } + self.visit_source_info(source_info); + } + + fn super_var_debug_info( + &mut self, + var_debug_info: & $($mutability)? VarDebugInfo<'tcx> + ) { + let VarDebugInfo { + name: _, + source_info, + value, + } = var_debug_info; + + self.visit_source_info(source_info); + let location = START_BLOCK.start_location(); + match value { + VarDebugInfoContents::Const(c) => self.visit_constant(c, location), + VarDebugInfoContents::Place(place) => + self.visit_place( + place, + PlaceContext::NonUse(NonUseContext::VarDebugInfo), + location + ), + } + } + + fn super_source_scope( + &mut self, + _scope: $(& $mutability)? SourceScope + ) {} + + fn super_constant( + &mut self, + constant: & $($mutability)? Constant<'tcx>, + location: Location + ) { + let Constant { + span, + user_ty, + literal, + } = constant; + + self.visit_span($(& $mutability)? *span); + drop(user_ty); // no visit method for this + match literal { + ConstantKind::Ty(ct) => self.visit_const($(& $mutability)? *ct, location), + ConstantKind::Val(_, ty) => self.visit_ty($(& $mutability)? *ty, TyContext::Location(location)), + } + } + + fn super_span(&mut self, _span: $(& $mutability)? Span) { + } + + fn super_source_info(&mut self, source_info: & $($mutability)? SourceInfo) { + let SourceInfo { + span, + scope, + } = source_info; + + self.visit_span($(& $mutability)? *span); + self.visit_source_scope($(& $mutability)? *scope); + } + + fn super_user_type_projection( + &mut self, + _ty: & $($mutability)? UserTypeProjection, + ) { + } + + fn super_user_type_annotation( + &mut self, + _index: UserTypeAnnotationIndex, + ty: & $($mutability)? CanonicalUserTypeAnnotation<'tcx>, + ) { + self.visit_span($(& $mutability)? ty.span); + self.visit_ty($(& $mutability)? ty.inferred_ty, TyContext::UserTy(ty.span)); + } + + fn super_ty(&mut self, _ty: $(& $mutability)? Ty<'tcx>) { + } + + fn super_region(&mut self, _region: $(& $mutability)? ty::Region<'tcx>) { + } + + fn super_const(&mut self, _const: $(& $mutability)? ty::Const<'tcx>) { + } + + fn super_substs(&mut self, _substs: & $($mutability)? SubstsRef<'tcx>) { + } + + // Convenience methods + + fn visit_location( + &mut self, + body: &$($mutability)? Body<'tcx>, + location: Location + ) { + #[allow(unused_macro_rules)] + macro_rules! basic_blocks { + (mut) => (body.basic_blocks_mut()); + () => (body.basic_blocks()); + } + let basic_block = & $($mutability)? basic_blocks!($($mutability)?)[location.block]; + if basic_block.statements.len() == location.statement_index { + if let Some(ref $($mutability)? terminator) = basic_block.terminator { + self.visit_terminator(terminator, location) + } + } else { + let statement = & $($mutability)? + basic_block.statements[location.statement_index]; + self.visit_statement(statement, location) + } + } + } + } +} + +macro_rules! visit_place_fns { + (mut) => { + fn tcx<'a>(&'a self) -> TyCtxt<'tcx>; + + fn super_place( + &mut self, + place: &mut Place<'tcx>, + context: PlaceContext, + location: Location, + ) { + self.visit_local(&mut place.local, context, location); + + if let Some(new_projection) = self.process_projection(&place.projection, location) { + place.projection = self.tcx().intern_place_elems(&new_projection); + } + } + + fn process_projection<'a>( + &mut self, + projection: &'a [PlaceElem<'tcx>], + location: Location, + ) -> Option>> { + let mut projection = Cow::Borrowed(projection); + + for i in 0..projection.len() { + if let Some(&elem) = projection.get(i) { + if let Some(elem) = self.process_projection_elem(elem, location) { + // This converts the borrowed projection into `Cow::Owned(_)` and returns a + // clone of the projection so we can mutate and reintern later. + let vec = projection.to_mut(); + vec[i] = elem; + } + } + } + + match projection { + Cow::Borrowed(_) => None, + Cow::Owned(vec) => Some(vec), + } + } + + fn process_projection_elem( + &mut self, + elem: PlaceElem<'tcx>, + location: Location, + ) -> Option> { + match elem { + PlaceElem::Index(local) => { + let mut new_local = local; + self.visit_local( + &mut new_local, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy), + location, + ); + + if new_local == local { None } else { Some(PlaceElem::Index(new_local)) } + } + PlaceElem::Field(field, ty) => { + let mut new_ty = ty; + self.visit_ty(&mut new_ty, TyContext::Location(location)); + if ty != new_ty { Some(PlaceElem::Field(field, new_ty)) } else { None } + } + PlaceElem::Deref + | PlaceElem::ConstantIndex { .. } + | PlaceElem::Subslice { .. } + | PlaceElem::Downcast(..) => None, + } + } + }; + + () => { + fn visit_projection( + &mut self, + place_ref: PlaceRef<'tcx>, + context: PlaceContext, + location: Location, + ) { + self.super_projection(place_ref, context, location); + } + + fn visit_projection_elem( + &mut self, + local: Local, + proj_base: &[PlaceElem<'tcx>], + elem: PlaceElem<'tcx>, + context: PlaceContext, + location: Location, + ) { + self.super_projection_elem(local, proj_base, elem, context, location); + } + + fn super_place(&mut self, place: &Place<'tcx>, context: PlaceContext, location: Location) { + let mut context = context; + + if !place.projection.is_empty() { + if context.is_use() { + // ^ Only change the context if it is a real use, not a "use" in debuginfo. + context = if context.is_mutating_use() { + PlaceContext::MutatingUse(MutatingUseContext::Projection) + } else { + PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) + }; + } + } + + self.visit_local(place.local, context, location); + + self.visit_projection(place.as_ref(), context, location); + } + + fn super_projection( + &mut self, + place_ref: PlaceRef<'tcx>, + context: PlaceContext, + location: Location, + ) { + for (base, elem) in place_ref.iter_projections().rev() { + let base_proj = base.projection; + self.visit_projection_elem(place_ref.local, base_proj, elem, context, location); + } + } + + fn super_projection_elem( + &mut self, + _local: Local, + _proj_base: &[PlaceElem<'tcx>], + elem: PlaceElem<'tcx>, + _context: PlaceContext, + location: Location, + ) { + match elem { + ProjectionElem::Field(_field, ty) => { + self.visit_ty(ty, TyContext::Location(location)); + } + ProjectionElem::Index(local) => { + self.visit_local( + local, + PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy), + location, + ); + } + ProjectionElem::Deref + | ProjectionElem::Subslice { from: _, to: _, from_end: _ } + | ProjectionElem::ConstantIndex { offset: _, min_length: _, from_end: _ } + | ProjectionElem::Downcast(_, _) => {} + } + } + }; +} + +make_mir_visitor!(Visitor,); +make_mir_visitor!(MutVisitor, mut); + +pub trait MirVisitable<'tcx> { + fn apply(&self, location: Location, visitor: &mut dyn Visitor<'tcx>); +} + +impl<'tcx> MirVisitable<'tcx> for Statement<'tcx> { + fn apply(&self, location: Location, visitor: &mut dyn Visitor<'tcx>) { + visitor.visit_statement(self, location) + } +} + +impl<'tcx> MirVisitable<'tcx> for Terminator<'tcx> { + fn apply(&self, location: Location, visitor: &mut dyn Visitor<'tcx>) { + visitor.visit_terminator(self, location) + } +} + +impl<'tcx> MirVisitable<'tcx> for Option> { + fn apply(&self, location: Location, visitor: &mut dyn Visitor<'tcx>) { + visitor.visit_terminator(self.as_ref().unwrap(), location) + } +} + +/// Extra information passed to `visit_ty` and friends to give context +/// about where the type etc appears. +#[derive(Debug)] +pub enum TyContext { + LocalDecl { + /// The index of the local variable we are visiting. + local: Local, + + /// The source location where this local variable was declared. + source_info: SourceInfo, + }, + + /// The inferred type of a user type annotation. + UserTy(Span), + + /// The return type of the function. + ReturnTy(SourceInfo), + + YieldTy(SourceInfo), + + /// A type found at some location. + Location(Location), +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum NonMutatingUseContext { + /// Being inspected in some way, like loading a len. + Inspect, + /// Consumed as part of an operand. + Copy, + /// Consumed as part of an operand. + Move, + /// Shared borrow. + SharedBorrow, + /// Shallow borrow. + ShallowBorrow, + /// Unique borrow. + UniqueBorrow, + /// AddressOf for *const pointer. + AddressOf, + /// Used as base for another place, e.g., `x` in `x.y`. Will not mutate the place. + /// For example, the projection `x.y` is not marked as a mutation in these cases: + /// ```ignore (illustrative) + /// z = x.y; + /// f(&x.y); + /// ``` + Projection, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum MutatingUseContext { + /// Appears as LHS of an assignment. + Store, + /// Appears on `SetDiscriminant` + SetDiscriminant, + /// Appears on `Deinit` + Deinit, + /// Output operand of an inline assembly block. + AsmOutput, + /// Destination of a call. + Call, + /// Destination of a yield. + Yield, + /// Being dropped. + Drop, + /// Mutable borrow. + Borrow, + /// AddressOf for *mut pointer. + AddressOf, + /// Used as base for another place, e.g., `x` in `x.y`. Could potentially mutate the place. + /// For example, the projection `x.y` is marked as a mutation in these cases: + /// ```ignore (illustrative) + /// x.y = ...; + /// f(&mut x.y); + /// ``` + Projection, + /// Retagging, a "Stacked Borrows" shadow state operation + Retag, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum NonUseContext { + /// Starting a storage live range. + StorageLive, + /// Ending a storage live range. + StorageDead, + /// User type annotation assertions for NLL. + AscribeUserTy, + /// The data of a user variable, for debug info. + VarDebugInfo, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum PlaceContext { + NonMutatingUse(NonMutatingUseContext), + MutatingUse(MutatingUseContext), + NonUse(NonUseContext), +} + +impl PlaceContext { + /// Returns `true` if this place context represents a drop. + #[inline] + pub fn is_drop(&self) -> bool { + matches!(self, PlaceContext::MutatingUse(MutatingUseContext::Drop)) + } + + /// Returns `true` if this place context represents a borrow. + pub fn is_borrow(&self) -> bool { + matches!( + self, + PlaceContext::NonMutatingUse( + NonMutatingUseContext::SharedBorrow + | NonMutatingUseContext::ShallowBorrow + | NonMutatingUseContext::UniqueBorrow + ) | PlaceContext::MutatingUse(MutatingUseContext::Borrow) + ) + } + + /// Returns `true` if this place context represents a storage live or storage dead marker. + #[inline] + pub fn is_storage_marker(&self) -> bool { + matches!( + self, + PlaceContext::NonUse(NonUseContext::StorageLive | NonUseContext::StorageDead) + ) + } + + /// Returns `true` if this place context represents a use that potentially changes the value. + #[inline] + pub fn is_mutating_use(&self) -> bool { + matches!(self, PlaceContext::MutatingUse(..)) + } + + /// Returns `true` if this place context represents a use. + #[inline] + pub fn is_use(&self) -> bool { + !matches!(self, PlaceContext::NonUse(..)) + } + + /// Returns `true` if this place context represents an assignment statement. + pub fn is_place_assignment(&self) -> bool { + matches!( + self, + PlaceContext::MutatingUse( + MutatingUseContext::Store + | MutatingUseContext::Call + | MutatingUseContext::AsmOutput, + ) + ) + } +} -- cgit v1.2.3