summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen/src/regalloc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/cranelift-codegen/src/regalloc
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/regalloc')
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/affinity.rs126
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs169
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs1107
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/coloring.rs1324
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/context.rs252
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/diversion.rs315
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs344
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/liveness.rs443
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/liverange.rs720
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/mod.rs26
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/pressure.rs371
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/register_set.rs391
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/reload.rs485
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs65
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/solver.rs1383
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/spilling.rs637
-rw-r--r--third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs505
17 files changed, 8663 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/affinity.rs b/third_party/rust/cranelift-codegen/src/regalloc/affinity.rs
new file mode 100644
index 0000000000..efcc4dabfa
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/affinity.rs
@@ -0,0 +1,126 @@
+//! Value affinity for register allocation.
+//!
+//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
+//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
+//! instruction operand constraints.
+//!
+//! For values that want to be in registers, the affinity hint includes a register class or
+//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
+//! larger register class instead.
+
+use crate::ir::{AbiParam, ArgumentLoc};
+use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
+use core::fmt;
+
+/// Preferred register allocation for an SSA value.
+#[derive(Clone, Copy, Debug)]
+pub enum Affinity {
+ /// No affinity.
+ ///
+ /// This indicates a value that is not defined or used by any real instructions. It is a ghost
+ /// value that won't appear in the final program.
+ Unassigned,
+
+ /// This value should be placed in a spill slot on the stack.
+ Stack,
+
+ /// This value prefers a register from the given register class.
+ Reg(RegClassIndex),
+}
+
+impl Default for Affinity {
+ fn default() -> Self {
+ Self::Unassigned
+ }
+}
+
+impl Affinity {
+ /// Create an affinity that satisfies a single constraint.
+ ///
+ /// This will never create an `Affinity::Unassigned`.
+ /// Use the `Default` implementation for that.
+ pub fn new(constraint: &OperandConstraint) -> Self {
+ if constraint.kind == ConstraintKind::Stack {
+ Self::Stack
+ } else {
+ Self::Reg(constraint.regclass.into())
+ }
+ }
+
+ /// Create an affinity that matches an ABI argument for `isa`.
+ pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self {
+ match arg.location {
+ ArgumentLoc::Unassigned => Self::Unassigned,
+ ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
+ ArgumentLoc::Stack(_) => Self::Stack,
+ }
+ }
+
+ /// Is this the `Unassigned` affinity?
+ pub fn is_unassigned(self) -> bool {
+ match self {
+ Self::Unassigned => true,
+ _ => false,
+ }
+ }
+
+ /// Is this the `Reg` affinity?
+ pub fn is_reg(self) -> bool {
+ match self {
+ Self::Reg(_) => true,
+ _ => false,
+ }
+ }
+
+ /// Is this the `Stack` affinity?
+ pub fn is_stack(self) -> bool {
+ match self {
+ Self::Stack => true,
+ _ => false,
+ }
+ }
+
+ /// Merge an operand constraint into this affinity.
+ ///
+ /// Note that this does not guarantee that the register allocator will pick a register that
+ /// satisfies the constraint.
+ pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) {
+ match *self {
+ Self::Unassigned => *self = Self::new(constraint),
+ Self::Reg(rc) => {
+ // If the preferred register class is a subclass of the constraint, there's no need
+ // to change anything.
+ if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc)
+ {
+ // If the register classes overlap, try to shrink our preferred register class.
+ if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) {
+ *self = Self::Reg(subclass);
+ }
+ }
+ }
+ Self::Stack => {}
+ }
+ }
+
+ /// Return an object that can display this value affinity, using the register info from the
+ /// target ISA.
+ pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
+ DisplayAffinity(self, regs.into())
+ }
+}
+
+/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayAffinity<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.0 {
+ Affinity::Unassigned => write!(f, "unassigned"),
+ Affinity::Stack => write!(f, "stack"),
+ Affinity::Reg(rci) => match self.1 {
+ Some(regs) => write!(f, "{}", regs.rc(rci)),
+ None => write!(f, "{}", rci),
+ },
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs b/third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs
new file mode 100644
index 0000000000..4e9a159f3e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs
@@ -0,0 +1,169 @@
+//! Split the outgoing edges of conditional branches that pass parameters.
+//!
+//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions
+//! between a conditional branch and the following terminator.
+use alloc::vec::Vec;
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList};
+use crate::isa::TargetIsa;
+use crate::topo_order::TopoOrder;
+
+pub fn run(
+ isa: &dyn TargetIsa,
+ func: &mut Function,
+ cfg: &mut ControlFlowGraph,
+ domtree: &mut DominatorTree,
+ topo: &mut TopoOrder,
+) {
+ let mut ctx = Context {
+ has_new_blocks: false,
+ cur: EncCursor::new(func, isa),
+ domtree,
+ topo,
+ cfg,
+ };
+ ctx.run()
+}
+
+struct Context<'a> {
+ /// True if new blocks were inserted.
+ has_new_blocks: bool,
+
+ /// Current instruction as well as reference to function and ISA.
+ cur: EncCursor<'a>,
+
+ /// References to contextual data structures we need.
+ domtree: &'a mut DominatorTree,
+ topo: &'a mut TopoOrder,
+ cfg: &'a mut ControlFlowGraph,
+}
+
+impl<'a> Context<'a> {
+ fn run(&mut self) {
+ // Any block order will do.
+ self.topo.reset(self.cur.func.layout.blocks());
+ while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
+ // Branches can only be at the last or second to last position in an extended basic
+ // block.
+ self.cur.goto_last_inst(block);
+ let terminator_inst = self.cur.current_inst().expect("terminator");
+ if let Some(inst) = self.cur.prev_inst() {
+ let opcode = self.cur.func.dfg[inst].opcode();
+ if opcode.is_branch() {
+ self.visit_conditional_branch(inst, opcode);
+ self.cur.goto_inst(terminator_inst);
+ self.visit_terminator_branch(terminator_inst);
+ }
+ }
+ }
+
+ // If blocks were added the cfg and domtree are inconsistent and must be recomputed.
+ if self.has_new_blocks {
+ self.cfg.compute(&self.cur.func);
+ self.domtree.compute(&self.cur.func, self.cfg);
+ }
+ }
+
+ fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) {
+ // TODO: target = dfg[branch].branch_destination().expect("conditional branch");
+ let target = match self.cur.func.dfg[branch] {
+ InstructionData::Branch { destination, .. }
+ | InstructionData::BranchIcmp { destination, .. }
+ | InstructionData::BranchInt { destination, .. }
+ | InstructionData::BranchFloat { destination, .. } => destination,
+ _ => panic!("Unexpected instruction in visit_conditional_branch"),
+ };
+
+ // If there are any parameters, split the edge.
+ if self.should_split_edge(target) {
+ // Create the block the branch will jump to.
+ let new_block = self.cur.func.dfg.make_block();
+
+ // Insert the new block before the destination, such that it can fallthrough in the
+ // target block.
+ assert_ne!(Some(target), self.cur.layout().entry_block());
+ self.cur.layout_mut().insert_block(new_block, target);
+ self.has_new_blocks = true;
+
+ // Extract the arguments of the branch instruction, split the Block parameters and the
+ // branch arguments
+ let num_fixed = opcode.constraints().num_fixed_value_arguments();
+ let dfg = &mut self.cur.func.dfg;
+ let old_args: Vec<_> = {
+ let args = dfg[branch].take_value_list().expect("block parameters");
+ args.as_slice(&dfg.value_lists).iter().copied().collect()
+ };
+ let (branch_args, block_params) = old_args.split_at(num_fixed);
+
+ // Replace the branch destination by the new Block created with no parameters, and restore
+ // the branch arguments, without the original Block parameters.
+ {
+ let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists);
+ let data = &mut dfg[branch];
+ *data.branch_destination_mut().expect("branch") = new_block;
+ data.put_value_list(branch_args);
+ }
+ let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok();
+ debug_assert!(ok);
+
+ // Insert a jump to the original target with its arguments into the new block.
+ self.cur.goto_first_insertion_point(new_block);
+ self.cur.ins().jump(target, block_params);
+
+ // Reset the cursor to point to the branch.
+ self.cur.goto_inst(branch);
+ }
+ }
+
+ fn visit_terminator_branch(&mut self, inst: Inst) {
+ let inst_data = &self.cur.func.dfg[inst];
+ let opcode = inst_data.opcode();
+ if opcode != Opcode::Jump && opcode != Opcode::Fallthrough {
+ // This opcode is ignored as it does not have any block parameters.
+ if opcode != Opcode::IndirectJumpTableBr {
+ debug_assert!(!opcode.is_branch())
+ }
+ return;
+ }
+
+ let target = match inst_data {
+ InstructionData::Jump { destination, .. } => destination,
+ _ => panic!(
+ "Unexpected instruction {} in visit_terminator_branch",
+ self.cur.display_inst(inst)
+ ),
+ };
+ debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator());
+
+ // If there are any parameters, split the edge.
+ if self.should_split_edge(*target) {
+ // Create the block the branch will jump to.
+ let new_block = self.cur.func.dfg.make_block();
+ self.has_new_blocks = true;
+
+ // Split the current block before its terminator, and insert a new jump instruction to
+ // jump to it.
+ let jump = self.cur.ins().jump(new_block, &[]);
+ self.cur.insert_block(new_block);
+
+ // Reset the cursor to point to new terminator of the old block.
+ self.cur.goto_inst(jump);
+ }
+ }
+
+ /// Returns whether we should introduce a new branch.
+ fn should_split_edge(&self, target: Block) -> bool {
+ // We should split the edge if the target has any parameters.
+ if !self.cur.func.dfg.block_params(target).is_empty() {
+ return true;
+ };
+
+ // Or, if the target has more than one block reaching it.
+ debug_assert!(self.cfg.pred_iter(target).next() != None);
+
+ self.cfg.pred_iter(target).nth(1).is_some()
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs b/third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs
new file mode 100644
index 0000000000..23c7bb14b4
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs
@@ -0,0 +1,1107 @@
+//! Constructing Conventional SSA form.
+//!
+//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related
+//! values do not interfere. We construct CSSA by building virtual registers that are as large as
+//! possible and inserting copies where necessary such that all argument values passed to a block
+//! parameter will belong to the same virtual register as the block parameter value itself.
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dbg::DisplayList;
+use crate::dominator_tree::{DominatorTree, DominatorTreePreorder};
+use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
+use crate::fx::FxHashMap;
+use crate::ir::{self, InstBuilder, ProgramOrder};
+use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value};
+use crate::isa::{EncInfo, TargetIsa};
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::virtregs::{VirtReg, VirtRegs};
+use crate::timing;
+use alloc::vec::Vec;
+use core::cmp;
+use core::fmt;
+use core::iter;
+use core::slice;
+use log::debug;
+
+// # Implementation
+//
+// The coalescing algorithm implemented follows this paper fairly closely:
+//
+// Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and
+// live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534
+//
+// We use a more efficient dominator forest representation (a linear stack) described here:
+//
+// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for
+// correctness, code quality and efficiency.
+//
+// The algorithm has two main phases:
+//
+// Phase 1: Union-find.
+//
+// We use the union-find support in `VirtRegs` to build virtual registers such that block parameter
+// values always belong to the same virtual register as their corresponding block arguments at the
+// predecessor branches. Trivial interferences between parameter and argument value live ranges are
+// detected and resolved before unioning congruence classes, but non-trivial interferences between
+// values that end up in the same congruence class are possible.
+//
+// Phase 2: Dominator forests.
+//
+// The virtual registers formed in phase 1 can contain interferences that we need to detect and
+// eliminate. By ordering the values in a virtual register according to a dominator tree pre-order,
+// we can identify all interferences in the virtual register in linear time.
+//
+// Interfering values are isolated and virtual registers rebuilt.
+
+/// Data structures to be used by the coalescing pass.
+pub struct Coalescing {
+ preorder: DominatorTreePreorder,
+ forest: DomForest,
+ vcopies: VirtualCopies,
+ values: Vec<Value>,
+ predecessors: Vec<Inst>,
+ backedges: Vec<Inst>,
+}
+
+/// One-shot context created once per invocation.
+struct Context<'a> {
+ isa: &'a dyn TargetIsa,
+ encinfo: EncInfo,
+
+ func: &'a mut Function,
+ cfg: &'a ControlFlowGraph,
+ domtree: &'a DominatorTree,
+ preorder: &'a DominatorTreePreorder,
+ liveness: &'a mut Liveness,
+ virtregs: &'a mut VirtRegs,
+
+ forest: &'a mut DomForest,
+ vcopies: &'a mut VirtualCopies,
+ values: &'a mut Vec<Value>,
+ predecessors: &'a mut Vec<Inst>,
+ backedges: &'a mut Vec<Inst>,
+}
+
+impl Coalescing {
+ /// Create a new coalescing pass.
+ pub fn new() -> Self {
+ Self {
+ forest: DomForest::new(),
+ preorder: DominatorTreePreorder::new(),
+ vcopies: VirtualCopies::new(),
+ values: Vec::new(),
+ predecessors: Vec::new(),
+ backedges: Vec::new(),
+ }
+ }
+
+ /// Clear all data structures in this coalescing pass.
+ pub fn clear(&mut self) {
+ self.forest.clear();
+ self.vcopies.clear();
+ self.values.clear();
+ self.predecessors.clear();
+ self.backedges.clear();
+ }
+
+ /// Convert `func` to Conventional SSA form and build virtual registers in the process.
+ pub fn conventional_ssa(
+ &mut self,
+ isa: &dyn TargetIsa,
+ func: &mut Function,
+ cfg: &ControlFlowGraph,
+ domtree: &DominatorTree,
+ liveness: &mut Liveness,
+ virtregs: &mut VirtRegs,
+ ) {
+ let _tt = timing::ra_cssa();
+ debug!("Coalescing for:\n{}", func.display(isa));
+ self.preorder.compute(domtree, &func.layout);
+ let mut context = Context {
+ isa,
+ encinfo: isa.encoding_info(),
+ func,
+ cfg,
+ domtree,
+ preorder: &self.preorder,
+ liveness,
+ virtregs,
+ forest: &mut self.forest,
+ vcopies: &mut self.vcopies,
+ values: &mut self.values,
+ predecessors: &mut self.predecessors,
+ backedges: &mut self.backedges,
+ };
+
+ // Run phase 1 (union-find) of the coalescing algorithm on the current function.
+ for &block in domtree.cfg_postorder() {
+ context.union_find_block(block);
+ }
+ context.finish_union_find();
+
+ // Run phase 2 (dominator forests) on the current function.
+ context.process_vregs();
+ }
+}
+
+/// Phase 1: Union-find.
+///
+/// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`.
+impl<'a> Context<'a> {
+ /// Run the union-find algorithm on the parameter values on `block`.
+ ///
+ /// This ensure that all block parameters will belong to the same virtual register as their
+ /// corresponding arguments at all predecessor branches.
+ pub fn union_find_block(&mut self, block: Block) {
+ let num_params = self.func.dfg.num_block_params(block);
+ if num_params == 0 {
+ return;
+ }
+
+ self.isolate_conflicting_params(block, num_params);
+
+ for i in 0..num_params {
+ self.union_pred_args(block, i);
+ }
+ }
+
+ // Identify block parameter values that are live at one of the predecessor branches.
+ //
+ // Such a parameter value will conflict with any argument value at the predecessor branch, so
+ // it must be isolated by inserting a copy.
+ fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) {
+ debug_assert_eq!(num_params, self.func.dfg.num_block_params(block));
+ // The only way a parameter value can interfere with a predecessor branch is if the block is
+ // dominating the predecessor branch. That is, we are looking for loop back-edges.
+ for BlockPredecessor {
+ block: pred_block,
+ inst: pred_inst,
+ } in self.cfg.pred_iter(block)
+ {
+ // The quick pre-order dominance check is accurate because the block parameter is defined
+ // at the top of the block before any branches.
+ if !self.preorder.dominates(block, pred_block) {
+ continue;
+ }
+
+ debug!(
+ " - checking {} params at back-edge {}: {}",
+ num_params,
+ pred_block,
+ self.func.dfg.display_inst(pred_inst, self.isa)
+ );
+
+ // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values
+ // to be live at the use.
+ for i in 0..num_params {
+ let param = self.func.dfg.block_params(block)[i];
+ if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) {
+ self.isolate_param(block, param);
+ }
+ }
+ }
+ }
+
+ // Union block parameter value `num` with the corresponding block arguments on the predecessor
+ // branches.
+ //
+ // Detect cases where the argument value is live-in to `block` so it conflicts with any block
+ // parameter. Isolate the argument in those cases before unioning it with the parameter value.
+ fn union_pred_args(&mut self, block: Block, argnum: usize) {
+ let param = self.func.dfg.block_params(block)[argnum];
+
+ for BlockPredecessor {
+ block: pred_block,
+ inst: pred_inst,
+ } in self.cfg.pred_iter(block)
+ {
+ let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum];
+
+ // Never coalesce incoming function parameters on the stack. These parameters are
+ // pre-spilled, and the rest of the virtual register would be forced to spill to the
+ // `incoming_arg` stack slot too.
+ if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) {
+ if Some(def_block) == self.func.layout.entry_block()
+ && self.func.signature.params[def_num].location.is_stack()
+ {
+ debug!("-> isolating function stack parameter {}", arg);
+ let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg);
+ self.virtregs.union(param, new_arg);
+ continue;
+ }
+ }
+
+ // Check for basic interference: If `arg` overlaps a value defined at the entry to
+ // `block`, it can never be used as a block argument.
+ let interference = {
+ let lr = &self.liveness[arg];
+
+ // There are two ways the argument value can interfere with `block`:
+ //
+ // 1. It is defined in a dominating block and live-in to `block`.
+ // 2. If is itself a parameter value for `block`. This case should already have been
+ // eliminated by `isolate_conflicting_params()`.
+ debug_assert!(
+ lr.def() != block.into(),
+ "{} parameter {} was missed by isolate_conflicting_params()",
+ block,
+ arg
+ );
+
+ // The only other possibility is that `arg` is live-in to `block`.
+ lr.is_livein(block, &self.func.layout)
+ };
+
+ if interference {
+ let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg);
+ self.virtregs.union(param, new_arg);
+ } else {
+ self.virtregs.union(param, arg);
+ }
+ }
+ }
+
+ // Isolate block parameter value `param` on `block`.
+ //
+ // When `param=v10`:
+ //
+ // block1(v10: i32):
+ // foo
+ //
+ // becomes:
+ //
+ // block1(v11: i32):
+ // v10 = copy v11
+ // foo
+ //
+ // This function inserts the copy and updates the live ranges of the old and new parameter
+ // values. Returns the new parameter value.
+ fn isolate_param(&mut self, block: Block, param: Value) -> Value {
+ debug_assert_eq!(
+ self.func.dfg.value_def(param).pp(),
+ ExpandedProgramPoint::Block(block)
+ );
+ let ty = self.func.dfg.value_type(param);
+ let new_val = self.func.dfg.replace_block_param(param, ty);
+
+ // Insert a copy instruction at the top of `block`.
+ let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block);
+ if let Some(inst) = pos.current_inst() {
+ pos.use_srcloc(inst);
+ }
+ pos.ins().with_result(param).copy(new_val);
+ let inst = pos.built_inst();
+ self.liveness.move_def_locally(param, inst);
+
+ debug!(
+ "-> inserted {}, following {}({}: {})",
+ pos.display_inst(inst),
+ block,
+ new_val,
+ ty
+ );
+
+ // Create a live range for the new value.
+ // TODO: Should we handle ghost values?
+ let affinity = Affinity::new(
+ &self
+ .encinfo
+ .operand_constraints(pos.func.encodings[inst])
+ .expect("Bad copy encoding")
+ .outs[0],
+ );
+ self.liveness.create_dead(new_val, block, affinity);
+ self.liveness
+ .extend_locally(new_val, block, inst, &pos.func.layout);
+
+ new_val
+ }
+
+ // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`.
+ //
+ // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block
+ // argument is `pred_val`. Since the argument value interferes with the corresponding block
+ // parameter at the destination, a copy is used instead:
+ //
+ // brnz v1, block2(v10)
+ //
+ // Becomes:
+ //
+ // v11 = copy v10
+ // brnz v1, block2(v11)
+ //
+ // This way the interference with the block parameter is avoided.
+ //
+ // A live range for the new value is created while the live range for `pred_val` is left
+ // unaltered.
+ //
+ // The new argument value is returned.
+ fn isolate_arg(
+ &mut self,
+ pred_block: Block,
+ pred_inst: Inst,
+ argnum: usize,
+ pred_val: Value,
+ ) -> Value {
+ let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst);
+ pos.use_srcloc(pred_inst);
+ let copy = pos.ins().copy(pred_val);
+ let inst = pos.built_inst();
+
+ // Create a live range for the new value.
+ // TODO: Handle affinity for ghost values.
+ let affinity = Affinity::new(
+ &self
+ .encinfo
+ .operand_constraints(pos.func.encodings[inst])
+ .expect("Bad copy encoding")
+ .outs[0],
+ );
+ self.liveness.create_dead(copy, inst, affinity);
+ self.liveness
+ .extend_locally(copy, pred_block, pred_inst, &pos.func.layout);
+
+ pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy;
+
+ debug!(
+ "-> inserted {}, before {}: {}",
+ pos.display_inst(inst),
+ pred_block,
+ pos.display_inst(pred_inst)
+ );
+
+ copy
+ }
+
+ /// Finish the union-find part of the coalescing algorithm.
+ ///
+ /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric
+ /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`.
+ fn finish_union_find(&mut self) {
+ self.virtregs.finish_union_find(None);
+ debug!("After union-find phase:{}", self.virtregs);
+ }
+}
+
+/// Phase 2: Dominator forests.
+///
+/// The main entry point is `process_vregs()`.
+impl<'a> Context<'a> {
+ /// Check al virtual registers for interference and fix conflicts.
+ pub fn process_vregs(&mut self) {
+ for vreg in self.virtregs.all_virtregs() {
+ self.process_vreg(vreg);
+ }
+ }
+
+ // Check `vreg` for interferences and fix conflicts.
+ fn process_vreg(&mut self, vreg: VirtReg) {
+ if !self.check_vreg(vreg) {
+ self.synthesize_vreg(vreg);
+ }
+ }
+
+ // Check `vreg` for interferences.
+ //
+ // We use a Budimlic dominator forest to check for interferences between the values in `vreg`
+ // and identify values that should be isolated.
+ //
+ // Returns true if `vreg` is free of interference.
+ fn check_vreg(&mut self, vreg: VirtReg) -> bool {
+ // Order the values according to the dominator pre-order of their definition.
+ let values = self.virtregs.sort_values(vreg, self.func, self.preorder);
+ debug!("Checking {} = {}", vreg, DisplayList(values));
+
+ // Now push the values in order to the dominator forest.
+ // This gives us the closest dominating value def for each of the values.
+ self.forest.clear();
+ for &value in values {
+ let node = Node::value(value, 0, self.func);
+
+ // Push this value and get the nearest dominating def back.
+ let parent = match self
+ .forest
+ .push_node(node, self.func, self.domtree, self.preorder)
+ {
+ None => continue,
+ Some(n) => n,
+ };
+
+ // Check for interference between `parent` and `value`. Since `parent` dominates
+ // `value`, we only have to check if it overlaps the definition.
+ if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) {
+ // The two values are interfering, so they can't be in the same virtual register.
+ debug!("-> interference: {} overlaps def of {}", parent, value);
+ return false;
+ }
+ }
+
+ // No interference found.
+ true
+ }
+
+ /// Destroy and rebuild `vreg` by iterative coalescing.
+ ///
+ /// When detecting that a virtual register formed in phase 1 contains interference, we have to
+ /// start over in a more careful way. We'll split the vreg into individual values and then
+ /// reassemble virtual registers using an iterative algorithm of pairwise merging.
+ ///
+ /// It is possible to recover multiple large virtual registers this way while still avoiding
+ /// a lot of copies.
+ fn synthesize_vreg(&mut self, vreg: VirtReg) {
+ self.vcopies.initialize(
+ self.virtregs.values(vreg),
+ self.func,
+ self.cfg,
+ self.preorder,
+ );
+ debug!(
+ "Synthesizing {} from {} branches and params {}",
+ vreg,
+ self.vcopies.branches.len(),
+ DisplayList(&self.vcopies.params)
+ );
+ self.virtregs.remove(vreg);
+
+ while let Some(param) = self.vcopies.next_param() {
+ self.merge_param(param);
+ self.vcopies.merged_param(param, self.func);
+ }
+ }
+
+ /// Merge block parameter value `param` with virtual registers at its predecessors.
+ fn merge_param(&mut self, param: Value) {
+ let (block, argnum) = match self.func.dfg.value_def(param) {
+ ir::ValueDef::Param(e, n) => (e, n),
+ ir::ValueDef::Result(_, _) => panic!("Expected parameter"),
+ };
+
+ // Collect all the predecessors and rearrange them.
+ //
+ // The order we process the predecessors matters because once one predecessor's virtual
+ // register is merged, it can cause interference with following merges. This means that the
+ // first predecessors processed are more likely to be copy-free. We want an ordering that
+ // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses
+ // instruction numbers which is not great for reproducible test cases.
+ //
+ // First merge loop back-edges in layout order, on the theory that shorter back-edges are
+ // more sensitive to inserted copies.
+ //
+ // Second everything else in reverse layout order. Again, short forward branches get merged
+ // first. There can also be backwards branches mixed in here, though, as long as they are
+ // not loop backedges.
+ debug_assert!(self.predecessors.is_empty());
+ debug_assert!(self.backedges.is_empty());
+ for BlockPredecessor {
+ block: pred_block,
+ inst: pred_inst,
+ } in self.cfg.pred_iter(block)
+ {
+ if self.preorder.dominates(block, pred_block) {
+ self.backedges.push(pred_inst);
+ } else {
+ self.predecessors.push(pred_inst);
+ }
+ }
+ // Order instructions in reverse order so we can pop them off the back.
+ {
+ let l = &self.func.layout;
+ self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a));
+ self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b));
+ self.predecessors.extend_from_slice(&self.backedges);
+ self.backedges.clear();
+ }
+
+ while let Some(pred_inst) = self.predecessors.pop() {
+ let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum];
+
+ // We want to merge the vreg containing `param` with the vreg containing `arg`.
+ if self.try_merge_vregs(param, arg) {
+ continue;
+ }
+
+ // Can't merge because of interference. Insert a copy instead.
+ let pred_block = self.func.layout.pp_block(pred_inst);
+ let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg);
+ self.virtregs
+ .insert_single(param, new_arg, self.func, self.preorder);
+ }
+ }
+
+ /// Merge the virtual registers containing `param` and `arg` if possible.
+ ///
+ /// Use self.vcopies to check for virtual copy interference too.
+ ///
+ /// Returns true if the virtual registers are successfully merged.
+ fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool {
+ if self.virtregs.same_class(param, arg) {
+ return true;
+ }
+
+ if !self.can_merge_vregs(param, arg) {
+ return false;
+ }
+
+ let _vreg = self.virtregs.unify(self.values);
+ debug!("-> merged into {} = {}", _vreg, DisplayList(self.values));
+ true
+ }
+
+ /// Check if it is possible to merge two virtual registers.
+ ///
+ /// Also leave `self.values` with the ordered list of values in the merged vreg.
+ fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool {
+ // We only need an immutable function reference.
+ let func = &*self.func;
+ let domtree = self.domtree;
+ let preorder = self.preorder;
+
+ // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties
+ // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`.
+ self.vcopies
+ .set_filter([param, arg], func, self.virtregs, preorder);
+
+ // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual
+ // registers and the filtered virtual copies.
+ let v0 = self.virtregs.congruence_class(&param);
+ let v1 = self.virtregs.congruence_class(&arg);
+ debug!(
+ " - set 0: {}\n - set 1: {}",
+ DisplayList(v0),
+ DisplayList(v1)
+ );
+ let nodes = MergeNodes::new(
+ func,
+ preorder,
+ MergeNodes::new(
+ func,
+ preorder,
+ v0.iter().map(|&value| Node::value(value, 0, func)),
+ v1.iter().map(|&value| Node::value(value, 1, func)),
+ ),
+ self.vcopies.iter(func),
+ );
+
+ // Now push the values in order to the dominator forest.
+ // This gives us the closest dominating value def for each of the values.
+ self.forest.clear();
+ self.values.clear();
+ for node in nodes {
+ // Accumulate ordered values for the new vreg.
+ if node.is_value() {
+ self.values.push(node.value);
+ }
+
+ // Push this value and get the nearest dominating def back.
+ let parent = match self.forest.push_node(node, func, domtree, preorder) {
+ None => {
+ if node.is_vcopy {
+ self.forest.pop_last();
+ }
+ continue;
+ }
+ Some(n) => n,
+ };
+
+ if node.is_vcopy {
+ // Vcopy nodes don't represent interference if they are copies of the parent value.
+ // In that case, the node must be removed because the parent value can still be
+ // live belong the vcopy.
+ if parent.is_vcopy || node.value == parent.value {
+ self.forest.pop_last();
+ continue;
+ }
+
+ // Check if the parent value interferes with the virtual copy.
+ let inst = node.def.unwrap_inst();
+ if node.set_id != parent.set_id
+ && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout)
+ {
+ debug!(
+ " - interference: {} overlaps vcopy at {}:{}",
+ parent,
+ node.block,
+ self.func.dfg.display_inst(inst, self.isa)
+ );
+ return false;
+ }
+
+ // Keep this vcopy on the stack. It will save us a few interference checks.
+ continue;
+ }
+
+ // Parent vcopies never represent any interference. We only keep them on the stack to
+ // avoid an interference check against a value higher up.
+ if parent.is_vcopy {
+ continue;
+ }
+
+ // Both node and parent are values, so check for interference.
+ debug_assert!(node.is_value() && parent.is_value());
+ if node.set_id != parent.set_id
+ && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout)
+ {
+ // The two values are interfering.
+ debug!(" - interference: {} overlaps def of {}", parent, node.value);
+ return false;
+ }
+ }
+
+ // The values vector should receive all values.
+ debug_assert_eq!(v0.len() + v1.len(), self.values.len());
+
+ // No interference found.
+ true
+ }
+}
+
+/// Dominator forest.
+///
+/// This is a utility type used for detecting interference in virtual registers, where each virtual
+/// register is a list of values ordered according to the dominator tree pre-order.
+///
+/// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack
+/// representation in the Boissinot paper. Our version of the linear stack is slightly modified
+/// because we have a pre-order of the dominator tree at the block granularity, not basic block
+/// granularity.
+///
+/// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed,
+/// `push_node` will return the nearest previously pushed value that dominates the definition.
+#[allow(dead_code)]
+struct DomForest {
+ // Stack representing the rightmost edge of the dominator forest so far, ending in the last
+ // element of `values`.
+ //
+ // At all times, the block of each element in the stack dominates the block of the next one.
+ stack: Vec<Node>,
+}
+
+/// A node in the dominator forest.
+#[derive(Clone, Copy, Debug)]
+#[allow(dead_code)]
+struct Node {
+ /// The program point where the live range is defined.
+ def: ExpandedProgramPoint,
+ /// block containing `def`.
+ block: Block,
+ /// Is this a virtual copy or a value?
+ is_vcopy: bool,
+ /// Set identifier.
+ set_id: u8,
+ /// For a value node: The value defined at `def`.
+ /// For a vcopy node: The relevant branch argument at `def`.
+ value: Value,
+}
+
+impl Node {
+ /// Create a node representing `value`.
+ pub fn value(value: Value, set_id: u8, func: &Function) -> Self {
+ let def = func.dfg.value_def(value).pp();
+ let block = func.layout.pp_block(def);
+ Self {
+ def,
+ block,
+ is_vcopy: false,
+ set_id,
+ value,
+ }
+ }
+
+ /// Create a node representing a virtual copy.
+ pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self {
+ let def = branch.into();
+ let block = func.layout.pp_block(def);
+ Self {
+ def,
+ block,
+ is_vcopy: true,
+ set_id,
+ value,
+ }
+ }
+
+ /// IF this a value node?
+ pub fn is_value(&self) -> bool {
+ !self.is_vcopy
+ }
+}
+
+impl fmt::Display for Node {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if self.is_vcopy {
+ write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block)
+ } else {
+ write!(f, "{}:{}@{}", self.set_id, self.value, self.block)
+ }
+ }
+}
+
+impl DomForest {
+ /// Create a new empty dominator forest.
+ pub fn new() -> Self {
+ Self { stack: Vec::new() }
+ }
+
+ /// Clear all data structures in this dominator forest.
+ pub fn clear(&mut self) {
+ self.stack.clear();
+ }
+
+ /// Add a single node to the forest.
+ ///
+ /// Update the stack so its dominance invariants are preserved. Detect a parent node on the
+ /// stack which is the closest one dominating the new node and return it.
+ fn push_node(
+ &mut self,
+ node: Node,
+ func: &Function,
+ domtree: &DominatorTree,
+ preorder: &DominatorTreePreorder,
+ ) -> Option<Node> {
+ // The stack contains the current sequence of dominating defs. Pop elements until we
+ // find one whose block dominates `node.block`.
+ while let Some(top) = self.stack.pop() {
+ if preorder.dominates(top.block, node.block) {
+ // This is the right insertion spot for `node`.
+ self.stack.push(top);
+ self.stack.push(node);
+
+ // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does
+ // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def`
+ // program point may be below the last branch in `top.block` that dominates
+ // `node.def`.
+ //
+ // We do know, though, that if there is a nearest value dominating `node.def`, it
+ // will be on the stack. We just need to find the last stack entry that actually
+ // dominates.
+ let mut last_dom = node.def;
+ for &n in self.stack.iter().rev().skip(1) {
+ // If the node is defined at the block header, it does in fact dominate
+ // everything else pushed on the stack.
+ let def_inst = match n.def {
+ ExpandedProgramPoint::Block(_) => return Some(n),
+ ExpandedProgramPoint::Inst(i) => i,
+ };
+
+ // We need to find the last program point in `n.block` to dominate `node.def`.
+ last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) {
+ None => n.block.into(),
+ Some(inst) => {
+ if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater {
+ return Some(n);
+ }
+ inst.into()
+ }
+ };
+ }
+
+ // No real dominator found on the stack.
+ return None;
+ }
+ }
+
+ // No dominators, start a new tree in the forest.
+ self.stack.push(node);
+ None
+ }
+
+ pub fn pop_last(&mut self) {
+ self.stack.pop().expect("Stack is empty");
+ }
+}
+
+/// Virtual copies.
+///
+/// When building a full virtual register at once, like phase 1 does with union-find, it is good
+/// enough to check for interference between the values in the full virtual register like
+/// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual
+/// registers that don't represent the full transitive closure of the block argument-parameter
+/// relation. This means that just checking for interference between values is inadequate.
+///
+/// Example:
+///
+/// v1 = iconst.i32 1
+/// brnz v10, block1(v1)
+/// v2 = iconst.i32 2
+/// brnz v11, block1(v2)
+/// return v1
+///
+/// block1(v3: i32):
+/// v4 = iadd v3, v1
+///
+/// With just value interference checking, we could build the virtual register [v3, v1] since those
+/// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2
+/// interfere. However, we can't resolve that interference either by inserting a copy:
+///
+/// v1 = iconst.i32 1
+/// brnz v10, block1(v1)
+/// v2 = iconst.i32 2
+/// v20 = copy v2 <-- new value
+/// brnz v11, block1(v20)
+/// return v1
+///
+/// block1(v3: i32):
+/// v4 = iadd v3, v1
+///
+/// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We
+/// shouldn't have placed v1 and v3 in the same virtual register to begin with.
+///
+/// LLVM detects this form of interference by inserting copies in the predecessors of all phi
+/// instructions, then attempting to delete the copies. This is quite expensive because it involves
+/// creating a large number of copies and value.
+///
+/// We'll detect this form of interference with *virtual copies*: Each block parameter value that
+/// hasn't yet been fully merged with its block argument values is given a set of virtual copies at
+/// the predecessors. Any candidate value to be merged is checked for interference against both the
+/// virtual register and the virtual copies.
+///
+/// In the general case, we're checking if two virtual registers can be merged, and both can
+/// contain incomplete block parameter values with associated virtual copies.
+///
+/// The `VirtualCopies` struct represents a set of incomplete parameters and their associated
+/// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes
+/// representing the virtual copies in both vregs.
+struct VirtualCopies {
+ // Incomplete block parameters. These don't need to belong to the same virtual register.
+ params: Vec<Value>,
+
+ // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks
+ // whose parameters can be found in `params`.
+ //
+ // Ordered by dominator tree pre-order of the branch instructions.
+ branches: Vec<(Inst, Block)>,
+
+ // Filter for the currently active node iterator.
+ //
+ // A block => (set_id, num) entry means that branches to `block` are active in `set_id` with
+ // branch argument number `num`.
+ filter: FxHashMap<Block, (u8, usize)>,
+}
+
+impl VirtualCopies {
+ /// Create an empty VirtualCopies struct.
+ pub fn new() -> Self {
+ Self {
+ params: Vec::new(),
+ branches: Vec::new(),
+ filter: FxHashMap(),
+ }
+ }
+
+ /// Clear all state.
+ pub fn clear(&mut self) {
+ self.params.clear();
+ self.branches.clear();
+ self.filter.clear();
+ }
+
+ /// Initialize virtual copies from the (interfering) values in a union-find virtual register
+ /// that is going to be broken up and reassembled iteratively.
+ ///
+ /// The values are assumed to be in domtree pre-order.
+ ///
+ /// This will extract the block parameter values and associate virtual copies all of them.
+ pub fn initialize(
+ &mut self,
+ values: &[Value],
+ func: &Function,
+ cfg: &ControlFlowGraph,
+ preorder: &DominatorTreePreorder,
+ ) {
+ self.clear();
+
+ let mut last_block = None;
+ for &val in values {
+ if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) {
+ self.params.push(val);
+
+ // We may have multiple parameters from the same block, but we only need to collect
+ // predecessors once. Also verify the ordering of values.
+ if let Some(last) = last_block {
+ match preorder.pre_cmp_block(last, block) {
+ cmp::Ordering::Less => {}
+ cmp::Ordering::Equal => continue,
+ cmp::Ordering::Greater => panic!("values in wrong order"),
+ }
+ }
+
+ // This block hasn't been seen before.
+ for BlockPredecessor {
+ inst: pred_inst, ..
+ } in cfg.pred_iter(block)
+ {
+ self.branches.push((pred_inst, block));
+ }
+ last_block = Some(block);
+ }
+ }
+
+ // Reorder the predecessor branches as required by the dominator forest.
+ self.branches
+ .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout));
+ }
+
+ /// Get the next unmerged parameter value.
+ pub fn next_param(&self) -> Option<Value> {
+ self.params.last().cloned()
+ }
+
+ /// Indicate that `param` is now fully merged.
+ pub fn merged_param(&mut self, param: Value, func: &Function) {
+ let popped = self.params.pop();
+ debug_assert_eq!(popped, Some(param));
+
+ // The domtree pre-order in `self.params` guarantees that all parameters defined at the
+ // same block will be adjacent. This means we can see when all parameters at a block have been
+ // merged.
+ //
+ // We don't care about the last parameter - when that is merged we are done.
+ let last = match self.params.last() {
+ None => return,
+ Some(x) => *x,
+ };
+ let block = func.dfg.value_def(param).unwrap_block();
+ if func.dfg.value_def(last).unwrap_block() == block {
+ // We're not done with `block` parameters yet.
+ return;
+ }
+
+ // Alright, we know there are no remaining `block` parameters in `self.params`. This means we
+ // can get rid of the `block` predecessors in `self.branches`. We don't have to, the
+ // `VCopyIter` will just skip them, but this reduces its workload.
+ self.branches.retain(|&(_, dest)| dest != block);
+ }
+
+ /// Set a filter for the virtual copy nodes we're generating.
+ ///
+ /// Only generate nodes for parameter values that are in the same congruence class as `reprs`.
+ /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's
+ /// congruence class.
+ pub fn set_filter(
+ &mut self,
+ reprs: [Value; 2],
+ func: &Function,
+ virtregs: &VirtRegs,
+ preorder: &DominatorTreePreorder,
+ ) {
+ self.filter.clear();
+
+ // Parameters in `self.params` are ordered according to the domtree per-order, and they are
+ // removed from the back once they are fully merged. This means we can stop looking for
+ // parameters once we're beyond the last one.
+ let last_param = *self.params.last().expect("No more parameters");
+ let limit = func.dfg.value_def(last_param).unwrap_block();
+
+ for (set_id, repr) in reprs.iter().enumerate() {
+ let set_id = set_id as u8;
+ for &value in virtregs.congruence_class(repr) {
+ if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) {
+ if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater {
+ // Stop once we're outside the bounds of `self.params`.
+ break;
+ }
+ self.filter.insert(block, (set_id, num));
+ }
+ }
+ }
+ }
+
+ /// Look up the set_id and argument number for `block` in the current filter.
+ ///
+ /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise
+ /// returns `(set_id, argnum)` for an active parameter defined at `block`.
+ fn lookup(&self, block: Block) -> Option<(u8, usize)> {
+ self.filter.get(&block).cloned()
+ }
+
+ /// Get an iterator of dom-forest nodes corresponding to the current filter.
+ pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter {
+ VCopyIter {
+ func,
+ vcopies: self,
+ branches: self.branches.iter(),
+ }
+ }
+}
+
+/// Virtual copy iterator.
+///
+/// This iterator produces dom-forest nodes corresponding to the current filter in the virtual
+/// copies container.
+struct VCopyIter<'a> {
+ func: &'a Function,
+ vcopies: &'a VirtualCopies,
+ branches: slice::Iter<'a, (Inst, Block)>,
+}
+
+impl<'a> Iterator for VCopyIter<'a> {
+ type Item = Node;
+
+ fn next(&mut self) -> Option<Node> {
+ while let Some(&(branch, dest)) = self.branches.next() {
+ if let Some((set_id, argnum)) = self.vcopies.lookup(dest) {
+ let arg = self.func.dfg.inst_variable_args(branch)[argnum];
+ return Some(Node::vcopy(branch, arg, set_id, self.func));
+ }
+ }
+ None
+ }
+}
+
+/// Node-merging iterator.
+///
+/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them.
+struct MergeNodes<'a, IA, IB>
+where
+ IA: Iterator<Item = Node>,
+ IB: Iterator<Item = Node>,
+{
+ a: iter::Peekable<IA>,
+ b: iter::Peekable<IB>,
+ layout: &'a ir::Layout,
+ preorder: &'a DominatorTreePreorder,
+}
+
+impl<'a, IA, IB> MergeNodes<'a, IA, IB>
+where
+ IA: Iterator<Item = Node>,
+ IB: Iterator<Item = Node>,
+{
+ pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self {
+ MergeNodes {
+ a: a.peekable(),
+ b: b.peekable(),
+ layout: &func.layout,
+ preorder,
+ }
+ }
+}
+
+impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB>
+where
+ IA: Iterator<Item = Node>,
+ IB: Iterator<Item = Node>,
+{
+ type Item = Node;
+
+ fn next(&mut self) -> Option<Node> {
+ let ord = match (self.a.peek(), self.b.peek()) {
+ (Some(a), Some(b)) => {
+ let layout = self.layout;
+ self.preorder
+ .pre_cmp_block(a.block, b.block)
+ .then_with(|| layout.cmp(a.def, b.def))
+ }
+ (Some(_), None) => cmp::Ordering::Less,
+ (None, Some(_)) => cmp::Ordering::Greater,
+ (None, None) => return None,
+ };
+ // When the nodes compare equal, prefer the `a` side.
+ if ord != cmp::Ordering::Greater {
+ self.a.next()
+ } else {
+ self.b.next()
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/coloring.rs b/third_party/rust/cranelift-codegen/src/regalloc/coloring.rs
new file mode 100644
index 0000000000..f9d65b8fa1
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/coloring.rs
@@ -0,0 +1,1324 @@
+//! Register allocator coloring pass.
+//!
+//! The coloring pass assigns a physical register to every SSA value with a register affinity,
+//! under the assumption that the register pressure has been lowered sufficiently by spilling and
+//! splitting.
+//!
+//! # Preconditions
+//!
+//! The coloring pass doesn't work on arbitrary code. Certain preconditions must be satisfied:
+//!
+//! 1. All instructions must be legalized and assigned an encoding. The encoding recipe guides the
+//! register assignments and provides exact constraints.
+//!
+//! 2. Instructions with tied operands must be in a coloring-friendly state. Specifically, the
+//! values used by the tied operands must be killed by the instruction. This can be achieved by
+//! inserting a `copy` to a new value immediately before the two-address instruction.
+//!
+//! 3. If a value is bound to more than one operand on the same instruction, the operand
+//! constraints must be compatible. This can also be achieved by inserting copies so the
+//! incompatible operands get different values.
+//!
+//! 4. The register pressure must be lowered sufficiently by inserting spill code. Register
+//! operands are allowed to read spilled values, but each such instance must be counted as using
+//! a register.
+//!
+//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed
+//! as arguments when branching to a block must belong to the same virtual register as the
+//! corresponding block argument value.
+//!
+//! # Iteration order
+//!
+//! The SSA property guarantees that whenever the live range of two values overlap, one of the
+//! values will be live at the definition point of the other value. If we visit the instructions in
+//! a topological order relative to the dominance relation, we can assign colors to the values
+//! defined by the instruction and only consider the colors of other values that are live at the
+//! instruction.
+//!
+//! The first time we see a branch to a block, the block's argument values are colored to match the
+//! registers currently holding branch argument values passed to the predecessor branch. By
+//! visiting blocks in a CFG topological order, we guarantee that at least one predecessor branch has
+//! been visited before the destination block. Therefore, the block's arguments are already colored.
+//!
+//! The exception is the entry block whose arguments are colored from the ABI requirements.
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{ArgumentLoc, InstBuilder, ValueDef};
+use crate::ir::{Block, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc};
+use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit};
+use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa};
+use crate::packed_option::PackedOption;
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::diversion::RegDiversions;
+use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::liverange::LiveRange;
+use crate::regalloc::register_set::RegisterSet;
+use crate::regalloc::solver::{Solver, SolverError};
+use crate::timing;
+use core::mem;
+use log::debug;
+
+/// Data structures for the coloring pass.
+///
+/// These are scratch space data structures that can be reused between invocations.
+pub struct Coloring {
+ divert: RegDiversions,
+ solver: Solver,
+}
+
+/// Kinds of ABI parameters.
+enum AbiParams {
+ Parameters(SigRef),
+ Returns,
+}
+
+/// Bundle of references that the coloring algorithm needs.
+///
+/// Some of the needed mutable references are passed around as explicit function arguments so we
+/// can avoid many fights with the borrow checker over mutable borrows of `self`. This includes the
+/// `Function` and `LiveValueTracker` references.
+///
+/// Immutable context information and mutable references that don't need to be borrowed across
+/// method calls should go in this struct.
+struct Context<'a> {
+ // Current instruction as well as reference to function and ISA.
+ cur: EncCursor<'a>,
+
+ // Cached ISA information.
+ // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
+ reginfo: RegInfo,
+ encinfo: EncInfo,
+
+ // References to contextual data structures we need.
+ cfg: &'a ControlFlowGraph,
+ domtree: &'a DominatorTree,
+ liveness: &'a mut Liveness,
+
+ // References to working set data structures.
+ // If we need to borrow out of a data structure across a method call, it must be passed as a
+ // function argument instead, see the `LiveValueTracker` arguments.
+ divert: &'a mut RegDiversions,
+ solver: &'a mut Solver,
+
+ // Pristine set of registers that the allocator can use.
+ // This set remains immutable, we make clones.
+ usable_regs: RegisterSet,
+
+ uses_pinned_reg: bool,
+}
+
+impl Coloring {
+ /// Allocate scratch space data structures for the coloring pass.
+ pub fn new() -> Self {
+ Self {
+ divert: RegDiversions::new(),
+ solver: Solver::new(),
+ }
+ }
+
+ /// Clear all data structures in this coloring pass.
+ pub fn clear(&mut self) {
+ self.divert.clear();
+ self.solver.clear();
+ }
+
+ /// Run the coloring algorithm over `func`.
+ pub fn run(
+ &mut self,
+ isa: &dyn TargetIsa,
+ func: &mut Function,
+ cfg: &ControlFlowGraph,
+ domtree: &DominatorTree,
+ liveness: &mut Liveness,
+ tracker: &mut LiveValueTracker,
+ ) {
+ let _tt = timing::ra_coloring();
+ debug!("Coloring for:\n{}", func.display(isa));
+ let mut ctx = Context {
+ usable_regs: isa.allocatable_registers(func),
+ uses_pinned_reg: isa.flags().enable_pinned_reg(),
+ cur: EncCursor::new(func, isa),
+ reginfo: isa.register_info(),
+ encinfo: isa.encoding_info(),
+ cfg,
+ domtree,
+ liveness,
+ divert: &mut self.divert,
+ solver: &mut self.solver,
+ };
+ ctx.run(tracker)
+ }
+}
+
+impl<'a> Context<'a> {
+ /// Is the pinned register usage enabled, and is this register the pinned register?
+ #[inline]
+ fn is_pinned_reg(&self, rc: RegClass, reg: RegUnit) -> bool {
+ rc.is_pinned_reg(self.uses_pinned_reg, reg)
+ }
+
+ /// Run the coloring algorithm.
+ fn run(&mut self, tracker: &mut LiveValueTracker) {
+ self.cur
+ .func
+ .locations
+ .resize(self.cur.func.dfg.num_values());
+
+ // Visit blocks in reverse post-order. We need to ensure that at least one predecessor has
+ // been visited before each block. That guarantees that the block arguments have been colored.
+ for &block in self.domtree.cfg_postorder().iter().rev() {
+ self.visit_block(block, tracker);
+ }
+ }
+
+ /// Visit `block`, assuming that the immediate dominator has already been visited.
+ fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
+ debug!("Coloring {}:", block);
+ let mut regs = self.visit_block_header(block, tracker);
+ tracker.drop_dead_params();
+
+ // Now go through the instructions in `block` and color the values they define.
+ self.cur.goto_top(block);
+ while let Some(inst) = self.cur.next_inst() {
+ self.cur.use_srcloc(inst);
+ let opcode = self.cur.func.dfg[inst].opcode();
+ if !opcode.is_ghost() {
+ // This is an instruction which either has an encoding or carries ABI-related
+ // register allocation constraints.
+ let enc = self.cur.func.encodings[inst];
+ let constraints = self.encinfo.operand_constraints(enc);
+ if self.visit_inst(inst, constraints, tracker, &mut regs) {
+ self.replace_global_defines(inst, tracker);
+ // Restore cursor location after `replace_global_defines` moves it.
+ // We want to revisit the copy instructions it inserted.
+ self.cur.goto_inst(inst);
+ }
+ } else {
+ // This is a ghost instruction with no encoding and no extra constraints.
+ let (_throughs, kills) = tracker.process_ghost(inst);
+ self.process_ghost_kills(kills, &mut regs);
+ }
+ tracker.drop_dead(inst);
+
+ // We are not able to insert any regmove for diversion or un-diversion after the first
+ // branch. Instead, we record the diversion to be restored at the entry of the next block,
+ // which should have a single predecessor.
+ if opcode.is_branch() {
+ // The next instruction is necessarily an unconditional branch.
+ if let Some(branch) = self.cur.next_inst() {
+ debug!(
+ "Skip coloring {}\n from {}\n with diversions {}",
+ self.cur.display_inst(branch),
+ regs.input.display(&self.reginfo),
+ self.divert.display(&self.reginfo)
+ );
+ use crate::ir::instructions::BranchInfo::*;
+ let target = match self.cur.func.dfg.analyze_branch(branch) {
+ NotABranch | Table(_, _) => panic!(
+ "unexpected instruction {} after a conditional branch",
+ self.cur.display_inst(branch)
+ ),
+ SingleDest(block, _) => block,
+ };
+
+ // We have a single branch with a single target, and a block with a single
+ // predecessor. Thus we can forward the diversion set to the next block.
+ if self.cfg.pred_iter(target).count() == 1 {
+ // Transfer the diversion to the next block.
+ self.divert
+ .save_for_block(&mut self.cur.func.entry_diversions, target);
+ debug!(
+ "Set entry-diversion for {} to\n {}",
+ target,
+ self.divert.display(&self.reginfo)
+ );
+ } else {
+ debug_assert!(
+ self.divert.is_empty(),
+ "Divert set is non-empty after the terminator."
+ );
+ }
+ assert_eq!(
+ self.cur.next_inst(),
+ None,
+ "Unexpected instruction after a branch group."
+ );
+ } else {
+ assert!(opcode.is_terminator());
+ }
+ }
+ }
+ }
+
+ /// Visit the `block` header.
+ ///
+ /// Initialize the set of live registers and color the arguments to `block`.
+ fn visit_block_header(
+ &mut self,
+ block: Block,
+ tracker: &mut LiveValueTracker,
+ ) -> AvailableRegs {
+ // Reposition the live value tracker and deal with the block arguments.
+ tracker.block_top(
+ block,
+ &self.cur.func.dfg,
+ self.liveness,
+ &self.cur.func.layout,
+ self.domtree,
+ );
+
+ // Copy the content of the registered diversions to be reused at the
+ // entry of this basic block.
+ self.divert.at_block(&self.cur.func.entry_diversions, block);
+ debug!(
+ "Start {} with entry-diversion set to\n {}",
+ block,
+ self.divert.display(&self.reginfo)
+ );
+
+ if self.cur.func.layout.entry_block() == Some(block) {
+ // Parameters on the entry block have ABI constraints.
+ self.color_entry_params(tracker.live())
+ } else {
+ // The live-ins and parameters of a non-entry block have already been assigned a register.
+ // Reconstruct the allocatable set.
+ self.livein_regs(tracker.live())
+ }
+ }
+
+ /// Initialize a set of allocatable registers from the values that are live-in to a block.
+ /// These values must already be colored when the dominating blocks were processed.
+ ///
+ /// Also process the block arguments which were colored when the first predecessor branch was
+ /// encountered.
+ fn livein_regs(&self, live: &[LiveValue]) -> AvailableRegs {
+ // Start from the registers that are actually usable. We don't want to include any reserved
+ // registers in the set.
+ let mut regs = AvailableRegs::new(&self.usable_regs);
+
+ for lv in live.iter().filter(|lv| !lv.is_dead) {
+ debug!(
+ "Live-in: {}:{} in {}",
+ lv.value,
+ lv.affinity.display(&self.reginfo),
+ self.divert
+ .get(lv.value, &self.cur.func.locations)
+ .display(&self.reginfo)
+ );
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ let loc = self.cur.func.locations[lv.value];
+ let reg = match loc {
+ ValueLoc::Reg(reg) => reg,
+ ValueLoc::Unassigned => panic!("Live-in {} wasn't assigned", lv.value),
+ ValueLoc::Stack(ss) => {
+ panic!("Live-in {} is in {}, should be register", lv.value, ss)
+ }
+ };
+ if lv.is_local {
+ regs.take(rc, reg, lv.is_local);
+ } else {
+ let loc = self.divert.get(lv.value, &self.cur.func.locations);
+ let reg_divert = match loc {
+ ValueLoc::Reg(reg) => reg,
+ ValueLoc::Unassigned => {
+ panic!("Diversion: Live-in {} wasn't assigned", lv.value)
+ }
+ ValueLoc::Stack(ss) => panic!(
+ "Diversion: Live-in {} is in {}, should be register",
+ lv.value, ss
+ ),
+ };
+ regs.take_divert(rc, reg, reg_divert);
+ }
+ }
+ }
+
+ regs
+ }
+
+ /// Color the parameters on the entry block.
+ ///
+ /// These are function parameters that should already have assigned register units in the
+ /// function signature.
+ ///
+ /// Return the set of remaining allocatable registers after filtering out the dead arguments.
+ fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs {
+ let sig = &self.cur.func.signature;
+ debug_assert_eq!(sig.params.len(), args.len());
+
+ let mut regs = AvailableRegs::new(&self.usable_regs);
+
+ for (lv, abi) in args.iter().zip(&sig.params) {
+ match lv.affinity {
+ Affinity::Reg(rci) => {
+ let rc = self.reginfo.rc(rci);
+ if let ArgumentLoc::Reg(reg) = abi.location {
+ if !lv.is_dead {
+ regs.take(rc, reg, lv.is_local);
+ }
+ self.cur.func.locations[lv.value] = ValueLoc::Reg(reg);
+ } else {
+ // This should have been fixed by the reload pass.
+ panic!(
+ "Entry arg {} has {} affinity, but ABI {}",
+ lv.value,
+ lv.affinity.display(&self.reginfo),
+ abi.display(&self.reginfo)
+ );
+ }
+ }
+ // The spiller will have assigned an incoming stack slot already.
+ Affinity::Stack => debug_assert!(abi.location.is_stack()),
+ // This is a ghost value, unused in the function. Don't assign it to a location
+ // either.
+ Affinity::Unassigned => {}
+ }
+ }
+
+ regs
+ }
+
+ /// Program the input-side ABI constraints for `inst` into the constraint solver.
+ ///
+ /// ABI constraints are the fixed register assignments useds for calls and returns.
+ fn program_input_abi(&mut self, inst: Inst, abi_params: AbiParams) {
+ let abi_types = match abi_params {
+ AbiParams::Parameters(sig) => &self.cur.func.dfg.signatures[sig].params,
+ AbiParams::Returns => &self.cur.func.signature.returns,
+ };
+
+ for (abi, &value) in abi_types
+ .iter()
+ .zip(self.cur.func.dfg.inst_variable_args(inst))
+ {
+ if let ArgumentLoc::Reg(reg) = abi.location {
+ if let Affinity::Reg(rci) = self
+ .liveness
+ .get(value)
+ .expect("ABI register must have live range")
+ .affinity
+ {
+ let rc = self.reginfo.rc(rci);
+ let cur_reg = self.divert.reg(value, &self.cur.func.locations);
+ self.solver.reassign_in(value, rc, cur_reg, reg);
+ } else {
+ panic!("ABI argument {} should be in a register", value);
+ }
+ }
+ }
+ }
+
+ /// Color the values defined by `inst` and insert any necessary shuffle code to satisfy
+ /// instruction constraints.
+ ///
+ /// Update `regs` to reflect the allocated registers after `inst`, including removing any dead
+ /// or killed values from the set.
+ ///
+ /// Returns true when the global values defined by `inst` must be replaced by local values.
+ fn visit_inst(
+ &mut self,
+ inst: Inst,
+ constraints: Option<&RecipeConstraints>,
+ tracker: &mut LiveValueTracker,
+ regs: &mut AvailableRegs,
+ ) -> bool {
+ debug!(
+ "Coloring {}\n from {}",
+ self.cur.display_inst(inst),
+ regs.input.display(&self.reginfo),
+ );
+
+ // block whose arguments should be colored to match the current branch instruction's
+ // arguments.
+ let mut color_dest_args = None;
+
+ // Program the solver with register constraints for the input side.
+ self.solver.reset(&regs.input);
+
+ if let Some(constraints) = constraints {
+ self.program_input_constraints(inst, constraints.ins);
+ }
+
+ let call_sig = self.cur.func.dfg.call_signature(inst);
+ if let Some(sig) = call_sig {
+ self.program_input_abi(inst, AbiParams::Parameters(sig));
+ } else if self.cur.func.dfg[inst].opcode().is_return() {
+ self.program_input_abi(inst, AbiParams::Returns);
+ } else if self.cur.func.dfg[inst].opcode().is_branch() {
+ // This is a branch, so we need to make sure that globally live values are in their
+ // global registers. For blocks that take arguments, we also need to place the argument
+ // values in the expected registers.
+ if let Some(dest) = self.cur.func.dfg[inst].branch_destination() {
+ if self.program_block_arguments(inst, dest) {
+ color_dest_args = Some(dest);
+ }
+ } else {
+ // This is a multi-way branch like `br_table`. We only support arguments on
+ // single-destination branches.
+ debug_assert_eq!(
+ self.cur.func.dfg.inst_variable_args(inst).len(),
+ 0,
+ "Can't handle block arguments: {}",
+ self.cur.display_inst(inst)
+ );
+ self.undivert_regs(|lr, _| !lr.is_local());
+ }
+ }
+
+ if self.solver.has_fixed_input_conflicts() {
+ self.divert_fixed_input_conflicts(tracker.live());
+ }
+
+ self.solver.inputs_done();
+
+ // Update the live value tracker with this instruction.
+ let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+
+ // Get rid of the killed values.
+ for lv in kills {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ let reg = self.divert.reg(lv.value, &self.cur.func.locations);
+
+ if self.is_pinned_reg(rc, reg) {
+ // Don't kill the pinned reg, either in the local or global register sets.
+ debug_assert!(lv.is_local, "pinned register SSA value can't be global");
+ continue;
+ }
+
+ debug!(
+ " kill {} in {} ({} {})",
+ lv.value,
+ self.reginfo.display_regunit(reg),
+ if lv.is_local { "local" } else { "global" },
+ rc
+ );
+ self.solver.add_kill(lv.value, rc, reg);
+
+ // Update the global register set which has no diversions.
+ if !lv.is_local {
+ regs.global
+ .free(rc, self.cur.func.locations[lv.value].unwrap_reg());
+ }
+ }
+ }
+
+ // This aligns with the " from" line at the top of the function.
+ debug!(" glob {}", regs.global.display(&self.reginfo));
+
+ // This flag is set when the solver failed to find a solution for the global defines that
+ // doesn't interfere with `regs.global`. We need to rewrite all of `inst`s global defines
+ // as local defines followed by copies.
+ let mut replace_global_defines = false;
+
+ // Program the fixed output constraints before the general defines. This allows us to
+ // detect conflicts between fixed outputs and tied operands where the input value hasn't
+ // been converted to a solver variable.
+ if let Some(constraints) = constraints {
+ if constraints.fixed_outs {
+ self.program_fixed_outputs(
+ constraints.outs,
+ defs,
+ throughs,
+ &mut replace_global_defines,
+ &regs.global,
+ );
+ }
+ }
+
+ if let Some(sig) = call_sig {
+ self.program_output_abi(
+ sig,
+ defs,
+ throughs,
+ &mut replace_global_defines,
+ &regs.global,
+ );
+ }
+
+ if let Some(constraints) = constraints {
+ self.program_output_constraints(
+ inst,
+ constraints.outs,
+ defs,
+ &mut replace_global_defines,
+ &regs.global,
+ );
+ }
+
+ // Finally, we've fully programmed the constraint solver.
+ // We expect a quick solution in most cases.
+ let is_reload = match &self.cur.func.dfg[inst] {
+ InstructionData::Unary {
+ opcode: Opcode::Fill,
+ ..
+ } => true,
+ _ => false,
+ };
+
+ let output_regs = self
+ .solver
+ .quick_solve(&regs.global, is_reload)
+ .unwrap_or_else(|_| {
+ debug!("quick_solve failed for {}", self.solver);
+ self.iterate_solution(
+ throughs,
+ &regs.global,
+ &mut replace_global_defines,
+ is_reload,
+ )
+ });
+
+ // The solution and/or fixed input constraints may require us to shuffle the set of live
+ // registers around.
+ self.shuffle_inputs(&mut regs.input);
+
+ // If this is the first time we branch to `dest`, color its arguments to match the current
+ // register state.
+ if let Some(dest) = color_dest_args {
+ self.color_block_params(inst, dest);
+ }
+
+ // Apply the solution to the defs.
+ for v in self.solver.vars().iter().filter(|&v| v.is_define()) {
+ self.cur.func.locations[v.value] = ValueLoc::Reg(v.solution);
+ }
+
+ // Tied defs are not part of the solution above.
+ // Copy register assignments from tied inputs to tied outputs.
+ if let Some(constraints) = constraints {
+ if constraints.tied_ops {
+ for (constraint, lv) in constraints.outs.iter().zip(defs) {
+ if let ConstraintKind::Tied(num) = constraint.kind {
+ let arg = self.cur.func.dfg.inst_args(inst)[num as usize];
+ let reg = self.divert.reg(arg, &self.cur.func.locations);
+ self.cur.func.locations[lv.value] = ValueLoc::Reg(reg);
+ }
+ }
+ }
+ }
+
+ // Update `regs` for the next instruction.
+ regs.input = output_regs;
+ for lv in defs {
+ let loc = self.cur.func.locations[lv.value];
+ debug!(
+ " color {} -> {}{}",
+ lv.value,
+ loc.display(&self.reginfo),
+ if lv.is_local {
+ ""
+ } else if replace_global_defines {
+ " (global to be replaced)"
+ } else {
+ " (global)"
+ }
+ );
+
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ let reg = loc.unwrap_reg();
+
+ debug_assert!(
+ !self.is_pinned_reg(rc, reg)
+ || self.cur.func.dfg[inst].opcode() == Opcode::GetPinnedReg,
+ "pinned register may not be part of outputs for '{}'.",
+ self.cur.func.dfg[inst].opcode()
+ );
+
+ if self.is_pinned_reg(rc, reg) {
+ continue;
+ }
+
+ // Remove the dead defs.
+ if lv.endpoint == inst {
+ regs.input.free(rc, reg);
+ debug_assert!(lv.is_local);
+ }
+
+ // Track globals in their undiverted locations.
+ if !lv.is_local && !replace_global_defines {
+ regs.global.take(rc, reg);
+ }
+ }
+ }
+
+ self.forget_diverted(kills);
+
+ replace_global_defines
+ }
+
+ /// Program the input-side constraints for `inst` into the constraint solver.
+ fn program_input_constraints(&mut self, inst: Inst, constraints: &[OperandConstraint]) {
+ for (constraint, &arg_val) in constraints
+ .iter()
+ .zip(self.cur.func.dfg.inst_args(inst))
+ .filter(|&(constraint, _)| constraint.kind != ConstraintKind::Stack)
+ {
+ // Reload pass is supposed to ensure that all arguments to register operands are
+ // already in a register.
+ let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations);
+ match constraint.kind {
+ ConstraintKind::FixedReg(regunit) => {
+ // Add the fixed constraint even if `cur_reg == regunit`.
+ // It is possible that we will want to convert the value to a variable later,
+ // and this identity assignment prevents that from happening.
+ self.solver
+ .reassign_in(arg_val, constraint.regclass, cur_reg, regunit);
+ }
+ ConstraintKind::FixedTied(regunit) => {
+ // The pinned register may not be part of a fixed tied requirement. If this
+ // becomes the case, then it must be changed to a different register.
+ debug_assert!(
+ !self.is_pinned_reg(constraint.regclass, regunit),
+ "see comment above"
+ );
+ // See comment right above.
+ self.solver
+ .reassign_in(arg_val, constraint.regclass, cur_reg, regunit);
+ }
+ ConstraintKind::Tied(_) => {
+ if self.is_pinned_reg(constraint.regclass, cur_reg) {
+ // Divert the pinned register; it shouldn't be reused for a tied input.
+ if self.solver.can_add_var(constraint.regclass, cur_reg) {
+ self.solver.add_var(arg_val, constraint.regclass, cur_reg);
+ }
+ } else if !constraint.regclass.contains(cur_reg) {
+ self.solver.add_var(arg_val, constraint.regclass, cur_reg);
+ }
+ }
+ ConstraintKind::Reg => {
+ if !constraint.regclass.contains(cur_reg) {
+ self.solver.add_var(arg_val, constraint.regclass, cur_reg);
+ }
+ }
+ ConstraintKind::Stack => unreachable!(),
+ }
+ }
+ }
+
+ /// Program the complete set of input constraints into the solver.
+ ///
+ /// The `program_input_constraints()` function above will not tell the solver about any values
+ /// that are already assigned to appropriate registers. This is normally fine, but if we want
+ /// to add additional variables to help the solver, we need to make sure that they are
+ /// constrained properly.
+ ///
+ /// This function completes the work of `program_input_constraints()` by calling `add_var` for
+ /// all values used by the instruction.
+ fn program_complete_input_constraints(&mut self) {
+ let inst = self.cur.current_inst().expect("Not on an instruction");
+ let constraints = self
+ .encinfo
+ .operand_constraints(self.cur.func.encodings[inst])
+ .expect("Current instruction not encoded")
+ .ins;
+
+ for (constraint, &arg_val) in constraints.iter().zip(self.cur.func.dfg.inst_args(inst)) {
+ match constraint.kind {
+ ConstraintKind::Reg | ConstraintKind::Tied(_) => {
+ let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations);
+
+ // This is the opposite condition of `program_input_constraints()`. The pinned
+ // register mustn't be added back as a variable.
+ if constraint.regclass.contains(cur_reg)
+ && !self.is_pinned_reg(constraint.regclass, cur_reg)
+ {
+ // This code runs after calling `solver.inputs_done()` so we must identify
+ // the new variable as killed or live-through.
+ let layout = &self.cur.func.layout;
+ if self.liveness[arg_val].killed_at(inst, layout.pp_block(inst), layout) {
+ self.solver
+ .add_killed_var(arg_val, constraint.regclass, cur_reg);
+ } else {
+ self.solver
+ .add_through_var(arg_val, constraint.regclass, cur_reg);
+ }
+ }
+ }
+ ConstraintKind::FixedReg(_)
+ | ConstraintKind::FixedTied(_)
+ | ConstraintKind::Stack => {}
+ }
+ }
+ }
+
+ /// Prepare for a branch to `dest`.
+ ///
+ /// 1. Any values that are live-in to `dest` must be un-diverted so they live in their globally
+ /// assigned register.
+ /// 2. If the `dest` block takes arguments, reassign the branch argument values to the matching
+ /// registers.
+ ///
+ /// Returns true if this is the first time a branch to `dest` is seen, so the `dest` argument
+ /// values should be colored after `shuffle_inputs`.
+ fn program_block_arguments(&mut self, inst: Inst, dest: Block) -> bool {
+ // Find diverted registers that are live-in to `dest` and reassign them to their global
+ // home.
+ //
+ // Values with a global live range that are not live in to `dest` could appear as branch
+ // arguments, so they can't always be un-diverted.
+ self.undivert_regs(|lr, layout| lr.is_livein(dest, layout));
+
+ // Now handle the block arguments.
+ let br_args = self.cur.func.dfg.inst_variable_args(inst);
+ let dest_args = self.cur.func.dfg.block_params(dest);
+ debug_assert_eq!(br_args.len(), dest_args.len());
+ for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
+ // The first time we encounter a branch to `dest`, we get to pick the location. The
+ // following times we see a branch to `dest`, we must follow suit.
+ match self.cur.func.locations[dest_arg] {
+ ValueLoc::Unassigned => {
+ // This is the first branch to `dest`, so we should color `dest_arg` instead of
+ // `br_arg`. However, we don't know where `br_arg` will end up until
+ // after `shuffle_inputs`. See `color_block_params` below.
+ //
+ // It is possible for `dest_arg` to have no affinity, and then it should simply
+ // be ignored.
+ if self.liveness[dest_arg].affinity.is_reg() {
+ return true;
+ }
+ }
+ ValueLoc::Reg(dest_reg) => {
+ // We've branched to `dest` before. Make sure we use the correct argument
+ // registers by reassigning `br_arg`.
+ if let Affinity::Reg(rci) = self.liveness[br_arg].affinity {
+ let rc = self.reginfo.rc(rci);
+ let br_reg = self.divert.reg(br_arg, &self.cur.func.locations);
+ self.solver.reassign_in(br_arg, rc, br_reg, dest_reg);
+ } else {
+ panic!("Branch argument {} is not in a register", br_arg);
+ }
+ }
+ ValueLoc::Stack(ss) => {
+ // The spiller should already have given us identical stack slots.
+ debug_assert_eq!(ValueLoc::Stack(ss), self.cur.func.locations[br_arg]);
+ }
+ }
+ }
+
+ // No `dest` arguments need coloring.
+ false
+ }
+
+ /// Knowing that we've never seen a branch to `dest` before, color its parameters to match our
+ /// register state.
+ ///
+ /// This function is only called when `program_block_arguments()` returned `true`.
+ fn color_block_params(&mut self, inst: Inst, dest: Block) {
+ let br_args = self.cur.func.dfg.inst_variable_args(inst);
+ let dest_args = self.cur.func.dfg.block_params(dest);
+ debug_assert_eq!(br_args.len(), dest_args.len());
+ for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) {
+ match self.cur.func.locations[dest_arg] {
+ ValueLoc::Unassigned => {
+ if self.liveness[dest_arg].affinity.is_reg() {
+ let br_reg = self.divert.reg(br_arg, &self.cur.func.locations);
+ self.cur.func.locations[dest_arg] = ValueLoc::Reg(br_reg);
+ }
+ }
+ ValueLoc::Reg(_) => panic!("{} arg {} already colored", dest, dest_arg),
+ // Spilled value consistency is verified by `program_block_arguments()` above.
+ ValueLoc::Stack(_) => {}
+ }
+ }
+ }
+
+ /// Find all diverted registers where `pred` returns `true` and undo their diversion so they
+ /// are reallocated to their global register assignments.
+ fn undivert_regs<Pred>(&mut self, mut pred: Pred)
+ where
+ Pred: FnMut(&LiveRange, &Layout) -> bool,
+ {
+ for (&value, rdiv) in self.divert.iter() {
+ let lr = self
+ .liveness
+ .get(value)
+ .expect("Missing live range for diverted register");
+ if pred(lr, &self.cur.func.layout) {
+ if let Affinity::Reg(rci) = lr.affinity {
+ let rc = self.reginfo.rc(rci);
+ // Stack diversions should not be possible here. They only live transiently
+ // during `shuffle_inputs()`.
+ self.solver.reassign_in(
+ value,
+ rc,
+ rdiv.to.unwrap_reg(),
+ rdiv.from.unwrap_reg(),
+ );
+ } else {
+ panic!(
+ "Diverted register {} with {} affinity",
+ value,
+ lr.affinity.display(&self.reginfo)
+ );
+ }
+ }
+ }
+ }
+
+ /// Find existing live values that conflict with the fixed input register constraints programmed
+ /// into the constraint solver. Convert them to solver variables so they can be diverted.
+ fn divert_fixed_input_conflicts(&mut self, live: &[LiveValue]) {
+ for lv in live {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let toprc = self.reginfo.toprc(rci);
+ let reg = self.divert.reg(lv.value, &self.cur.func.locations);
+ if self.solver.is_fixed_input_conflict(toprc, reg) {
+ debug!(
+ "adding var to divert fixed input conflict for {}",
+ toprc.info.display_regunit(reg)
+ );
+ self.solver.add_var(lv.value, toprc, reg);
+ }
+ }
+ }
+ }
+
+ /// Program any fixed-register output constraints into the solver. This may also detect
+ /// conflicts between live-through registers and fixed output registers. These live-through
+ /// values need to be turned into solver variables so they can be reassigned.
+ fn program_fixed_outputs(
+ &mut self,
+ constraints: &[OperandConstraint],
+ defs: &[LiveValue],
+ throughs: &[LiveValue],
+ replace_global_defines: &mut bool,
+ global_regs: &RegisterSet,
+ ) {
+ for (constraint, lv) in constraints.iter().zip(defs) {
+ match constraint.kind {
+ ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => {
+ self.add_fixed_output(lv.value, constraint.regclass, reg, throughs);
+ if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) {
+ debug!(
+ "Fixed output {} in {}:{} is not available in global regs",
+ lv.value,
+ constraint.regclass,
+ self.reginfo.display_regunit(reg)
+ );
+ *replace_global_defines = true;
+ }
+ }
+ ConstraintKind::Reg | ConstraintKind::Tied(_) | ConstraintKind::Stack => {}
+ }
+ }
+ }
+
+ /// Program the output-side ABI constraints for `inst` into the constraint solver.
+ ///
+ /// That means return values for a call instruction.
+ fn program_output_abi(
+ &mut self,
+ sig: SigRef,
+ defs: &[LiveValue],
+ throughs: &[LiveValue],
+ replace_global_defines: &mut bool,
+ global_regs: &RegisterSet,
+ ) {
+ // It's technically possible for a call instruction to have fixed results before the
+ // variable list of results, but we have no known instances of that.
+ // Just assume all results are variable return values.
+ debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len());
+ for (i, lv) in defs.iter().enumerate() {
+ let abi = self.cur.func.dfg.signatures[sig].returns[i];
+ if let ArgumentLoc::Reg(reg) = abi.location {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ self.add_fixed_output(lv.value, rc, reg, throughs);
+ if !lv.is_local && !global_regs.is_avail(rc, reg) {
+ debug!(
+ "ABI output {} in {}:{} is not available in global regs",
+ lv.value,
+ rc,
+ self.reginfo.display_regunit(reg)
+ );
+ *replace_global_defines = true;
+ }
+ } else {
+ panic!("ABI argument {} should be in a register", lv.value);
+ }
+ }
+ }
+ }
+
+ /// Add a single fixed output value to the solver.
+ fn add_fixed_output(
+ &mut self,
+ value: Value,
+ rc: RegClass,
+ reg: RegUnit,
+ throughs: &[LiveValue],
+ ) {
+ // Pinned register is already unavailable in the solver, since it is copied in the
+ // available registers on entry.
+ if !self.is_pinned_reg(rc, reg) && !self.solver.add_fixed_output(rc, reg) {
+ // The fixed output conflicts with some of the live-through registers.
+ for lv in throughs {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let toprc2 = self.reginfo.toprc(rci);
+ let reg2 = self.divert.reg(lv.value, &self.cur.func.locations);
+ if regs_overlap(rc, reg, toprc2, reg2) {
+ // This live-through value is interfering with the fixed output assignment.
+ // Convert it to a solver variable.
+ self.solver.add_through_var(lv.value, toprc2, reg2);
+ }
+ }
+ }
+
+ let ok = self.solver.add_fixed_output(rc, reg);
+ debug_assert!(ok, "Couldn't clear fixed output interference for {}", value);
+ }
+ self.cur.func.locations[value] = ValueLoc::Reg(reg);
+ }
+
+ /// Program the output-side constraints for `inst` into the constraint solver.
+ ///
+ /// It is assumed that all fixed outputs have already been handled.
+ fn program_output_constraints(
+ &mut self,
+ inst: Inst,
+ constraints: &[OperandConstraint],
+ defs: &[LiveValue],
+ replace_global_defines: &mut bool,
+ global_regs: &RegisterSet,
+ ) {
+ for (constraint, lv) in constraints.iter().zip(defs) {
+ match constraint.kind {
+ ConstraintKind::FixedReg(_)
+ | ConstraintKind::FixedTied(_)
+ | ConstraintKind::Stack => continue,
+ ConstraintKind::Reg => {
+ self.solver
+ .add_def(lv.value, constraint.regclass, !lv.is_local);
+ }
+ ConstraintKind::Tied(num) => {
+ // Find the input operand we're tied to.
+ // The solver doesn't care about the output value.
+ let arg = self.cur.func.dfg.inst_args(inst)[num as usize];
+ let reg = self.divert.reg(arg, &self.cur.func.locations);
+
+ if let Some(reg) =
+ self.solver
+ .add_tied_input(arg, constraint.regclass, reg, !lv.is_local)
+ {
+ // The value we're tied to has been assigned to a fixed register.
+ // We need to make sure that fixed output register is compatible with the
+ // global register set.
+ if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) {
+ debug!(
+ "Tied output {} in {}:{} is not available in global regs",
+ lv.value,
+ constraint.regclass,
+ self.reginfo.display_regunit(reg)
+ );
+ *replace_global_defines = true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /// Try harder to find a solution to the constraint problem since `quick_solve()` failed.
+ ///
+ /// We may need to move more registers around before a solution is possible. Use an iterative
+ /// algorithm that adds one more variable until a solution can be found.
+ fn iterate_solution(
+ &mut self,
+ throughs: &[LiveValue],
+ global_regs: &RegisterSet,
+ replace_global_defines: &mut bool,
+ is_reload: bool,
+ ) -> RegisterSet {
+ // Make sure `try_add_var()` below doesn't create a variable with too loose constraints.
+ self.program_complete_input_constraints();
+
+ loop {
+ match self.solver.real_solve(global_regs, is_reload) {
+ Ok(regs) => return regs,
+ Err(SolverError::Divert(rc)) => {
+ // Do we have any live-through `rc` registers that are not already variables?
+ let added = self.try_add_var(rc, throughs);
+ debug_assert!(added, "Ran out of registers in {}", rc);
+ }
+ Err(SolverError::Global(_value)) => {
+ debug!(
+ "Not enough global registers for {}, trying as local",
+ _value
+ );
+ // We'll clear the `is_global` flag on all solver variables and instead make a
+ // note to replace all global defines with local defines followed by a copy.
+ *replace_global_defines = true;
+ self.solver.clear_all_global_flags();
+ }
+ };
+ }
+ }
+
+ /// Try to add an `rc` variable to the solver from the `throughs` set.
+ fn try_add_var(&mut self, rc: RegClass, throughs: &[LiveValue]) -> bool {
+ debug!("Trying to add a {} reg from {} values", rc, throughs.len());
+
+ for lv in throughs {
+ if let Affinity::Reg(rci) = lv.affinity {
+ // The new variable gets to roam the whole top-level register class because it is
+ // not actually constrained by the instruction. We just want it out of the way.
+ let toprc2 = self.reginfo.toprc(rci);
+ let reg2 = self.divert.reg(lv.value, &self.cur.func.locations);
+ if rc.contains(reg2)
+ && self.solver.can_add_var(toprc2, reg2)
+ && !self.is_live_on_outgoing_edge(lv.value)
+ {
+ self.solver.add_through_var(lv.value, toprc2, reg2);
+ return true;
+ }
+ }
+ }
+
+ false
+ }
+
+ /// Determine if `value` is live on a CFG edge from the current instruction.
+ ///
+ /// This means that the current instruction is a branch and `value` is live in to one of the
+ /// branch destinations. Branch arguments and block parameters are not considered live on the
+ /// edge.
+ fn is_live_on_outgoing_edge(&self, value: Value) -> bool {
+ use crate::ir::instructions::BranchInfo::*;
+
+ let inst = self.cur.current_inst().expect("Not on an instruction");
+ let layout = &self.cur.func.layout;
+ match self.cur.func.dfg.analyze_branch(inst) {
+ NotABranch => false,
+ SingleDest(block, _) => {
+ let lr = &self.liveness[value];
+ lr.is_livein(block, layout)
+ }
+ Table(jt, block) => {
+ let lr = &self.liveness[value];
+ !lr.is_local()
+ && (block.map_or(false, |block| lr.is_livein(block, layout))
+ || self.cur.func.jump_tables[jt]
+ .iter()
+ .any(|block| lr.is_livein(*block, layout)))
+ }
+ }
+ }
+
+ /// Emit `regmove` instructions as needed to move the live registers into place before the
+ /// instruction. Also update `self.divert` accordingly.
+ ///
+ /// The `self.cur` cursor is expected to point at the instruction. The register moves are
+ /// inserted before.
+ ///
+ /// The solver needs to be reminded of the available registers before any moves are inserted.
+ fn shuffle_inputs(&mut self, regs: &mut RegisterSet) {
+ use crate::regalloc::solver::Move::*;
+
+ let spills = self.solver.schedule_moves(regs);
+
+ // The move operations returned by `schedule_moves` refer to emergency spill slots by
+ // consecutive indexes starting from 0. Map these to real stack slots.
+ // It is very unlikely (impossible?) that we would need more than one spill per top-level
+ // register class, so avoid allocation by using a fixed array here.
+ let mut slot = [PackedOption::default(); 8];
+ debug_assert!(spills <= slot.len(), "Too many spills ({})", spills);
+
+ for m in self.solver.moves() {
+ match *m {
+ Reg {
+ value,
+ from,
+ to,
+ rc,
+ } => {
+ debug_assert!(
+ !self.is_pinned_reg(rc, to),
+ "pinned register used in a regmove"
+ );
+ self.divert.regmove(value, from, to);
+ self.cur.ins().regmove(value, from, to);
+ }
+ Spill {
+ value,
+ from,
+ to_slot,
+ ..
+ } => {
+ debug_assert_eq!(slot[to_slot].expand(), None, "Overwriting slot in use");
+ let ss = self
+ .cur
+ .func
+ .stack_slots
+ .get_emergency_slot(self.cur.func.dfg.value_type(value), &slot[0..spills]);
+ slot[to_slot] = ss.into();
+ self.divert.regspill(value, from, ss);
+ self.cur.ins().regspill(value, from, ss);
+ }
+ Fill {
+ value,
+ from_slot,
+ to,
+ rc,
+ } => {
+ debug_assert!(
+ !self.is_pinned_reg(rc, to),
+ "pinned register used in a regfill"
+ );
+ // These slots are single use, so mark `ss` as available again.
+ let ss = slot[from_slot].take().expect("Using unallocated slot");
+ self.divert.regfill(value, ss, to);
+ self.cur.ins().regfill(value, ss, to);
+ }
+ }
+ }
+ }
+
+ /// Forget about any register diversions in `kills`.
+ fn forget_diverted(&mut self, kills: &[LiveValue]) {
+ if self.divert.is_empty() {
+ return;
+ }
+
+ for lv in kills {
+ if lv.affinity.is_reg() {
+ self.divert.remove(lv.value);
+ }
+ }
+ }
+
+ /// Replace all global values defined by `inst` with local values that are then copied into the
+ /// global value:
+ ///
+ /// v1 = foo
+ ///
+ /// becomes:
+ ///
+ /// v20 = foo
+ /// v1 = copy v20
+ ///
+ /// This is sometimes necessary when there are no global registers available that can satisfy
+ /// the constraints on the instruction operands.
+ ///
+ fn replace_global_defines(&mut self, inst: Inst, tracker: &mut LiveValueTracker) {
+ debug!("Replacing global defs on {}", self.cur.display_inst(inst));
+
+ // We'll insert copies *after `inst`. Our caller will move the cursor back.
+ self.cur.next_inst();
+
+ // The tracker keeps the defs from `inst` at the end. Any dead defs have already been
+ // removed, so it's not obvious how many defs to process
+ for lv in tracker.live_mut().iter_mut().rev() {
+ // Keep going until we reach a value that is not defined by `inst`.
+ if match self.cur.func.dfg.value_def(lv.value) {
+ ValueDef::Result(i, _) => i != inst,
+ _ => true,
+ } {
+ break;
+ }
+ if lv.is_local || !lv.affinity.is_reg() {
+ continue;
+ }
+
+ // Now `lv.value` is globally live and defined by `inst`. Replace it with a local live
+ // range that is copied after `inst`.
+ let ty = self.cur.func.dfg.value_type(lv.value);
+ let local = self.cur.func.dfg.replace_result(lv.value, ty);
+ self.cur.ins().with_result(lv.value).copy(local);
+ let copy = self.cur.built_inst();
+
+ // Create a live range for `local: inst -> copy`.
+ self.liveness.create_dead(local, inst, lv.affinity);
+ self.liveness.extend_locally(
+ local,
+ self.cur.func.layout.pp_block(inst),
+ copy,
+ &self.cur.func.layout,
+ );
+
+ // Move the definition of the global `lv.value`.
+ self.liveness.move_def_locally(lv.value, copy);
+
+ // Transfer the register coloring to `local`.
+ let loc = mem::replace(&mut self.cur.func.locations[lv.value], ValueLoc::default());
+ self.cur.func.locations[local] = loc;
+
+ // Update `lv` to reflect the new `local` live range.
+ lv.value = local;
+ lv.endpoint = copy;
+ lv.is_local = true;
+
+ debug!(
+ " + {} with {} in {}",
+ self.cur.display_inst(copy),
+ local,
+ loc.display(&self.reginfo)
+ );
+ }
+ debug!("Done: {}", self.cur.display_inst(inst));
+ }
+
+ /// Process kills on a ghost instruction.
+ /// - Forget diversions.
+ /// - Free killed registers.
+ fn process_ghost_kills(&mut self, kills: &[LiveValue], regs: &mut AvailableRegs) {
+ for lv in kills {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ let loc = match self.divert.remove(lv.value) {
+ Some(loc) => loc,
+ None => self.cur.func.locations[lv.value],
+ };
+ regs.input.free(rc, loc.unwrap_reg());
+ if !lv.is_local {
+ regs.global
+ .free(rc, self.cur.func.locations[lv.value].unwrap_reg());
+ }
+ }
+ }
+ }
+}
+
+/// Keep track of the set of available registers in two interference domains: all registers
+/// considering diversions and global registers not considering diversions.
+struct AvailableRegs {
+ /// The exact set of registers available on the input side of the current instruction. This
+ /// takes into account register diversions, and it includes both local and global live ranges.
+ input: RegisterSet,
+
+ /// Registers available for allocating globally live values. This set ignores any local values,
+ /// and it does not account for register diversions.
+ ///
+ /// Global values must be allocated out of this set because conflicts with other global values
+ /// can't be resolved with local diversions.
+ global: RegisterSet,
+}
+
+impl AvailableRegs {
+ /// Initialize both the input and global sets from `regs`.
+ pub fn new(regs: &RegisterSet) -> Self {
+ Self {
+ input: regs.clone(),
+ global: regs.clone(),
+ }
+ }
+
+ /// Take an un-diverted register from one or both sets.
+ pub fn take(&mut self, rc: RegClass, reg: RegUnit, is_local: bool) {
+ self.input.take(rc, reg);
+ if !is_local {
+ self.global.take(rc, reg);
+ }
+ }
+
+ /// Take a diverted register from both sets for a non-local allocation.
+ pub fn take_divert(&mut self, rc: RegClass, reg: RegUnit, reg_divert: RegUnit) {
+ self.input.take(rc, reg_divert);
+ self.global.take(rc, reg);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/context.rs b/third_party/rust/cranelift-codegen/src/regalloc/context.rs
new file mode 100644
index 0000000000..505b1d127a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/context.rs
@@ -0,0 +1,252 @@
+//! Register allocator context.
+//!
+//! The `Context` struct contains data structures that should be preserved across invocations of
+//! the register allocator algorithm. This doesn't preserve any data between functions, but it
+//! avoids allocating data structures independently for each function begin compiled.
+
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::Function;
+use crate::isa::TargetIsa;
+use crate::regalloc::branch_splitting;
+use crate::regalloc::coalescing::Coalescing;
+use crate::regalloc::coloring::Coloring;
+use crate::regalloc::live_value_tracker::LiveValueTracker;
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::reload::Reload;
+use crate::regalloc::safepoint::emit_stack_maps;
+use crate::regalloc::spilling::Spilling;
+use crate::regalloc::virtregs::VirtRegs;
+use crate::result::CodegenResult;
+use crate::timing;
+use crate::topo_order::TopoOrder;
+use crate::verifier::{
+ verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors,
+};
+
+/// Persistent memory allocations for register allocation.
+pub struct Context {
+ liveness: Liveness,
+ virtregs: VirtRegs,
+ coalescing: Coalescing,
+ topo: TopoOrder,
+ tracker: LiveValueTracker,
+ spilling: Spilling,
+ reload: Reload,
+ coloring: Coloring,
+}
+
+impl Context {
+ /// Create a new context for register allocation.
+ ///
+ /// This context should be reused for multiple functions in order to avoid repeated memory
+ /// allocations.
+ pub fn new() -> Self {
+ Self {
+ liveness: Liveness::new(),
+ virtregs: VirtRegs::new(),
+ coalescing: Coalescing::new(),
+ topo: TopoOrder::new(),
+ tracker: LiveValueTracker::new(),
+ spilling: Spilling::new(),
+ reload: Reload::new(),
+ coloring: Coloring::new(),
+ }
+ }
+
+ /// Clear all data structures in this context.
+ pub fn clear(&mut self) {
+ self.liveness.clear();
+ self.virtregs.clear();
+ self.coalescing.clear();
+ self.topo.clear();
+ self.tracker.clear();
+ self.spilling.clear();
+ self.reload.clear();
+ self.coloring.clear();
+ }
+
+ /// Current values liveness state.
+ pub fn liveness(&self) -> &Liveness {
+ &self.liveness
+ }
+
+ /// Allocate registers in `func`.
+ ///
+ /// After register allocation, all values in `func` have been assigned to a register or stack
+ /// location that is consistent with instruction encoding constraints.
+ pub fn run(
+ &mut self,
+ isa: &dyn TargetIsa,
+ func: &mut Function,
+ cfg: &mut ControlFlowGraph,
+ domtree: &mut DominatorTree,
+ ) -> CodegenResult<()> {
+ let _tt = timing::regalloc();
+ debug_assert!(domtree.is_valid());
+
+ let mut errors = VerifierErrors::default();
+
+ // `Liveness` and `Coloring` are self-clearing.
+ self.virtregs.clear();
+
+ // Tracker state (dominator live sets) is actually reused between the spilling and coloring
+ // phases.
+ self.tracker.clear();
+
+ // Pass: Split branches, add space where to add copy & regmove instructions.
+ branch_splitting::run(isa, func, cfg, domtree, &mut self.topo);
+
+ // Pass: Liveness analysis.
+ self.liveness.compute(isa, func, cfg);
+
+ if isa.flags().enable_verifier() {
+ let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok();
+
+ if !ok {
+ return Err(errors.into());
+ }
+ }
+
+ // Pass: Coalesce and create Conventional SSA form.
+ self.coalescing.conventional_ssa(
+ isa,
+ func,
+ cfg,
+ domtree,
+ &mut self.liveness,
+ &mut self.virtregs,
+ );
+
+ if isa.flags().enable_verifier() {
+ let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+ && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+ && verify_cssa(
+ func,
+ cfg,
+ domtree,
+ &self.liveness,
+ &self.virtregs,
+ &mut errors,
+ )
+ .is_ok();
+
+ if !ok {
+ return Err(errors.into());
+ }
+ }
+
+ // Pass: Spilling.
+ self.spilling.run(
+ isa,
+ func,
+ domtree,
+ &mut self.liveness,
+ &self.virtregs,
+ &mut self.topo,
+ &mut self.tracker,
+ );
+
+ if isa.flags().enable_verifier() {
+ let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+ && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+ && verify_cssa(
+ func,
+ cfg,
+ domtree,
+ &self.liveness,
+ &self.virtregs,
+ &mut errors,
+ )
+ .is_ok();
+
+ if !ok {
+ return Err(errors.into());
+ }
+ }
+
+ // Pass: Reload.
+ self.reload.run(
+ isa,
+ func,
+ domtree,
+ &mut self.liveness,
+ &mut self.topo,
+ &mut self.tracker,
+ );
+
+ if isa.flags().enable_verifier() {
+ let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+ && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+ && verify_cssa(
+ func,
+ cfg,
+ domtree,
+ &self.liveness,
+ &self.virtregs,
+ &mut errors,
+ )
+ .is_ok();
+
+ if !ok {
+ return Err(errors.into());
+ }
+ }
+
+ // Pass: Coloring.
+ self.coloring.run(
+ isa,
+ func,
+ cfg,
+ domtree,
+ &mut self.liveness,
+ &mut self.tracker,
+ );
+
+ // If there are any reference types used, encode safepoints and emit
+ // stack maps.
+ //
+ // This function runs after register allocation has taken place, meaning
+ // values have locations assigned already, which is necessary for
+ // creating the stack maps.
+ let safepoints_enabled = isa.flags().enable_safepoints();
+ for val in func.dfg.values() {
+ let ty = func.dfg.value_type(val);
+ if ty.lane_type().is_ref() {
+ assert!(
+ safepoints_enabled,
+ "reference types were found but safepoints were not enabled"
+ );
+ emit_stack_maps(func, domtree, &self.liveness, &mut self.tracker, isa);
+ break;
+ }
+ }
+
+ if isa.flags().enable_verifier() {
+ let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
+ && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
+ && verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok()
+ && verify_cssa(
+ func,
+ cfg,
+ domtree,
+ &self.liveness,
+ &self.virtregs,
+ &mut errors,
+ )
+ .is_ok();
+
+ if !ok {
+ return Err(errors.into());
+ }
+ }
+
+ // Even if we arrive here, (non-fatal) errors might have been reported, so we
+ // must make sure absolutely nothing is wrong
+ if errors.is_empty() {
+ Ok(())
+ } else {
+ Err(errors.into())
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/diversion.rs b/third_party/rust/cranelift-codegen/src/regalloc/diversion.rs
new file mode 100644
index 0000000000..be7b7fa1ee
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/diversion.rs
@@ -0,0 +1,315 @@
+//! Register diversions.
+//!
+//! Normally, a value is assigned to a single register or stack location by the register allocator.
+//! Sometimes, it is necessary to move register values to a different register in order to satisfy
+//! instruction constraints.
+//!
+//! These register diversions are local to a block. No values can be diverted when entering a new
+//! block.
+
+use crate::fx::FxHashMap;
+use crate::hash_map::{Entry, Iter};
+use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations};
+use crate::ir::{InstructionData, Opcode};
+use crate::isa::{RegInfo, RegUnit};
+use core::fmt;
+use cranelift_entity::{SparseMap, SparseMapValue};
+
+/// A diversion of a value from its original location to a new register or stack location.
+///
+/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
+/// same value.
+///
+/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
+/// the current one.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Diversion {
+ /// The original value location.
+ pub from: ValueLoc,
+ /// The current value location.
+ pub to: ValueLoc,
+}
+
+impl Diversion {
+ /// Make a new diversion.
+ pub fn new(from: ValueLoc, to: ValueLoc) -> Self {
+ debug_assert!(from.is_assigned() && to.is_assigned());
+ Self { from, to }
+ }
+}
+
+/// Keep track of diversions in a block.
+#[derive(Clone)]
+pub struct RegDiversions {
+ current: FxHashMap<Value, Diversion>,
+}
+
+/// Keep track of diversions at the entry of block.
+#[derive(Clone)]
+struct EntryRegDiversionsValue {
+ key: Block,
+ divert: RegDiversions,
+}
+
+/// Map block to their matching RegDiversions at basic blocks entry.
+pub struct EntryRegDiversions {
+ map: SparseMap<Block, EntryRegDiversionsValue>,
+}
+
+impl RegDiversions {
+ /// Create a new empty diversion tracker.
+ pub fn new() -> Self {
+ Self {
+ current: FxHashMap::default(),
+ }
+ }
+
+ /// Clear the content of the diversions, to reset the state of the compiler.
+ pub fn clear(&mut self) {
+ self.current.clear()
+ }
+
+ /// Are there any diversions?
+ pub fn is_empty(&self) -> bool {
+ self.current.is_empty()
+ }
+
+ /// Get the current diversion of `value`, if any.
+ pub fn diversion(&self, value: Value) -> Option<&Diversion> {
+ self.current.get(&value)
+ }
+
+ /// Get all current diversions.
+ pub fn iter(&self) -> Iter<'_, Value, Diversion> {
+ self.current.iter()
+ }
+
+ /// Get the current location for `value`. Fall back to the assignment map for non-diverted
+ /// values
+ pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
+ match self.diversion(value) {
+ Some(d) => d.to,
+ None => locations[value],
+ }
+ }
+
+ /// Get the current register location for `value`, or panic if `value` isn't in a register.
+ pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
+ self.get(value, locations).unwrap_reg()
+ }
+
+ /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
+ pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
+ self.get(value, locations).unwrap_stack()
+ }
+
+ /// Record any kind of move.
+ ///
+ /// The `from` location must match an existing `to` location, if any.
+ fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
+ debug_assert!(from.is_assigned() && to.is_assigned());
+ match self.current.entry(value) {
+ Entry::Occupied(mut e) => {
+ // TODO: non-lexical lifetimes should allow removal of the scope and early return.
+ {
+ let d = e.get_mut();
+ debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value);
+ if d.from != to {
+ d.to = to;
+ return;
+ }
+ }
+ e.remove();
+ }
+ Entry::Vacant(e) => {
+ e.insert(Diversion::new(from, to));
+ }
+ }
+ }
+
+ /// Record a register -> register move.
+ pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
+ self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
+ }
+
+ /// Record a register -> stack move.
+ pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
+ self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
+ }
+
+ /// Record a stack -> register move.
+ pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
+ self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
+ }
+
+ /// Apply the effect of `inst`.
+ ///
+ /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
+ /// match.
+ pub fn apply(&mut self, inst: &InstructionData) {
+ match *inst {
+ InstructionData::RegMove {
+ opcode: Opcode::Regmove,
+ arg,
+ src,
+ dst,
+ } => self.regmove(arg, src, dst),
+ InstructionData::RegSpill {
+ opcode: Opcode::Regspill,
+ arg,
+ src,
+ dst,
+ } => self.regspill(arg, src, dst),
+ InstructionData::RegFill {
+ opcode: Opcode::Regfill,
+ arg,
+ src,
+ dst,
+ } => self.regfill(arg, src, dst),
+ _ => {}
+ }
+ }
+
+ /// Drop any recorded move for `value`.
+ ///
+ /// Returns the `to` location of the removed diversion.
+ pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
+ self.current.remove(&value).map(|d| d.to)
+ }
+
+ /// Resets the state of the current diversions to the recorded diversions at the entry of the
+ /// given `block`. The recoded diversions is available after coloring on `func.entry_diversions`
+ /// field.
+ pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) {
+ self.clear();
+ if let Some(entry_divert) = entry_diversions.map.get(block) {
+ let iter = entry_divert.divert.current.iter();
+ self.current.extend(iter);
+ }
+ }
+
+ /// Copy the current state of the diversions, and save it for the entry of the `block` given as
+ /// argument.
+ ///
+ /// Note: This function can only be called once on a `Block` with a given `entry_diversions`
+ /// argument, otherwise it would panic.
+ pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) {
+ // No need to save anything if there is no diversions to be recorded.
+ if self.is_empty() {
+ return;
+ }
+ debug_assert!(!entry_diversions.map.contains_key(target));
+ let iter = self.current.iter();
+ let mut entry_divert = Self::new();
+ entry_divert.current.extend(iter);
+ entry_diversions.map.insert(EntryRegDiversionsValue {
+ key: target,
+ divert: entry_divert,
+ });
+ }
+
+ /// Check that the recorded entry for a given `block` matches what is recorded in the
+ /// `entry_diversions`.
+ pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool {
+ let entry_divert = match entry_diversions.map.get(target) {
+ Some(entry_divert) => entry_divert,
+ None => return self.is_empty(),
+ };
+
+ if entry_divert.divert.current.len() != self.current.len() {
+ return false;
+ }
+
+ for (val, _) in entry_divert.divert.current.iter() {
+ if !self.current.contains_key(val) {
+ return false;
+ }
+ }
+ true
+ }
+
+ /// Return an object that can display the diversions.
+ pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
+ DisplayDiversions(&self, regs.into())
+ }
+}
+
+impl EntryRegDiversions {
+ /// Create a new empty entry diversion, to associate diversions to each block entry.
+ pub fn new() -> Self {
+ Self {
+ map: SparseMap::new(),
+ }
+ }
+
+ pub fn clear(&mut self) {
+ self.map.clear();
+ }
+}
+
+impl Clone for EntryRegDiversions {
+ /// The Clone trait is required by `ir::Function`.
+ fn clone(&self) -> Self {
+ let mut tmp = Self::new();
+ for v in self.map.values() {
+ tmp.map.insert(v.clone());
+ }
+ tmp
+ }
+}
+
+/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry
+/// diversions for each block.
+impl SparseMapValue<Block> for EntryRegDiversionsValue {
+ fn key(&self) -> Block {
+ self.key
+ }
+}
+
+/// Object that displays register diversions.
+pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayDiversions<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{{")?;
+ for (value, div) in self.0.current.iter() {
+ write!(
+ f,
+ " {}: {} -> {}",
+ value,
+ div.from.display(self.1),
+ div.to.display(self.1)
+ )?
+ }
+ write!(f, " }}")
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::entity::EntityRef;
+ use crate::ir::Value;
+
+ #[test]
+ fn inserts() {
+ let mut divs = RegDiversions::new();
+ let v1 = Value::new(1);
+ let v2 = Value::new(2);
+
+ divs.regmove(v1, 10, 12);
+ assert_eq!(
+ divs.diversion(v1),
+ Some(&Diversion {
+ from: ValueLoc::Reg(10),
+ to: ValueLoc::Reg(12),
+ })
+ );
+ assert_eq!(divs.diversion(v2), None);
+
+ divs.regmove(v1, 12, 11);
+ assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
+ divs.regmove(v1, 11, 10);
+ assert_eq!(divs.diversion(v1), None);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs b/third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs
new file mode 100644
index 0000000000..ae33a15f4d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs
@@ -0,0 +1,344 @@
+//! Track which values are live in a block with instruction granularity.
+//!
+//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in a block.
+//! The sets of live values are computed on the fly as the tracker is moved from instruction to
+//! instruction, starting at the block header.
+
+use crate::dominator_tree::DominatorTree;
+use crate::entity::{EntityList, ListPool};
+use crate::fx::FxHashMap;
+use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value};
+use crate::partition_slice::partition_slice;
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::liverange::LiveRange;
+use alloc::vec::Vec;
+
+type ValueList = EntityList<Value>;
+
+/// Compute and track live values throughout a block.
+pub struct LiveValueTracker {
+ /// The set of values that are live at the current program point.
+ live: LiveValueVec,
+
+ /// Saved set of live values for every jump and branch that can potentially be an immediate
+ /// dominator of a block.
+ ///
+ /// This is the set of values that are live *before* the branch.
+ idom_sets: FxHashMap<Inst, ValueList>,
+
+ /// Memory pool for the live sets.
+ idom_pool: ListPool<Value>,
+}
+
+/// Information about a value that is live at the current program point.
+#[derive(Debug)]
+pub struct LiveValue {
+ /// The live value.
+ pub value: Value,
+
+ /// The local ending point of the live range in the current block, as returned by
+ /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
+ pub endpoint: Inst,
+
+ /// The affinity of the value as represented in its `LiveRange`.
+ ///
+ /// This value is simply a copy of the affinity stored in the live range. We copy it because
+ /// almost all users of `LiveValue` need to look at it.
+ pub affinity: Affinity,
+
+ /// The live range for this value never leaves its block.
+ pub is_local: bool,
+
+ /// This value is dead - the live range ends immediately.
+ pub is_dead: bool,
+}
+
+struct LiveValueVec {
+ /// The set of values that are live at the current program point.
+ values: Vec<LiveValue>,
+
+ /// How many values at the front of `values` are known to be live after `inst`?
+ ///
+ /// This is used to pass a much smaller slice to `partition_slice` when its called a second
+ /// time for the same instruction.
+ live_prefix: Option<(Inst, usize)>,
+}
+
+impl LiveValueVec {
+ fn new() -> Self {
+ Self {
+ values: Vec::new(),
+ live_prefix: None,
+ }
+ }
+
+ /// Add a new live value to `values`. Copy some properties from `lr`.
+ fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
+ self.values.push(LiveValue {
+ value,
+ endpoint,
+ affinity: lr.affinity,
+ is_local: lr.is_local(),
+ is_dead: lr.is_dead(),
+ });
+ }
+
+ /// Remove all elements.
+ fn clear(&mut self) {
+ self.values.clear();
+ self.live_prefix = None;
+ }
+
+ /// Make sure that the values killed by `next_inst` are moved to the end of the `values`
+ /// vector.
+ ///
+ /// Returns the number of values that will be live after `next_inst`.
+ fn live_after(&mut self, next_inst: Inst) -> usize {
+ // How many values at the front of the vector are already known to survive `next_inst`?
+ // We don't need to pass this prefix to `partition_slice()`
+ let keep = match self.live_prefix {
+ Some((i, prefix)) if i == next_inst => prefix,
+ _ => 0,
+ };
+
+ // Move the remaining surviving values to the front partition of the vector.
+ let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
+
+ // Remember the new prefix length in case we get called again for the same `next_inst`.
+ self.live_prefix = Some((next_inst, prefix));
+ prefix
+ }
+
+ /// Remove the values killed by `next_inst`.
+ fn remove_kill_values(&mut self, next_inst: Inst) {
+ let keep = self.live_after(next_inst);
+ self.values.truncate(keep);
+ }
+
+ /// Remove any dead values.
+ fn remove_dead_values(&mut self) {
+ self.values.retain(|v| !v.is_dead);
+ self.live_prefix = None;
+ }
+}
+
+impl LiveValueTracker {
+ /// Create a new blank tracker.
+ pub fn new() -> Self {
+ Self {
+ live: LiveValueVec::new(),
+ idom_sets: FxHashMap(),
+ idom_pool: ListPool::new(),
+ }
+ }
+
+ /// Clear all cached information.
+ pub fn clear(&mut self) {
+ self.live.clear();
+ self.idom_sets.clear();
+ self.idom_pool.clear();
+ }
+
+ /// Get the set of currently live values.
+ ///
+ /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
+ /// defined by the current instruction.
+ pub fn live(&self) -> &[LiveValue] {
+ &self.live.values
+ }
+
+ /// Get a mutable set of currently live values.
+ ///
+ /// Use with care and don't move entries around.
+ pub fn live_mut(&mut self) -> &mut [LiveValue] {
+ &mut self.live.values
+ }
+
+ /// Move the current position to the top of `block`.
+ ///
+ /// This depends on the stored live value set at `block`'s immediate dominator, so that must have
+ /// been visited first.
+ ///
+ /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
+ /// from the immediate dominator. The second slice is the set of `block` parameters.
+ ///
+ /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
+ pub fn block_top(
+ &mut self,
+ block: Block,
+ dfg: &DataFlowGraph,
+ liveness: &Liveness,
+ layout: &Layout,
+ domtree: &DominatorTree,
+ ) -> (&[LiveValue], &[LiveValue]) {
+ // Start over, compute the set of live values at the top of the block from two sources:
+ //
+ // 1. Values that were live before `block`'s immediate dominator, filtered for those that are
+ // actually live-in.
+ // 2. Arguments to `block` that are not dead.
+ //
+ self.live.clear();
+
+ // Compute the live-in values. Start by filtering the set of values that were live before
+ // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
+ // the entry block or an unreachable block).
+ if let Some(idom) = domtree.idom(block) {
+ // If the immediate dominator exits, we must have a stored list for it. This is a
+ // requirement to the order blocks are visited: All dominators must have been processed
+ // before the current block.
+ let idom_live_list = self
+ .idom_sets
+ .get(&idom)
+ .expect("No stored live set for dominator");
+ // Get just the values that are live-in to `block`.
+ for &value in idom_live_list.as_slice(&self.idom_pool) {
+ let lr = liveness
+ .get(value)
+ .expect("Immediate dominator value has no live range");
+
+ // Check if this value is live-in here.
+ if let Some(endpoint) = lr.livein_local_end(block, layout) {
+ self.live.push(value, endpoint, lr);
+ }
+ }
+ }
+
+ // Now add all the live parameters to `block`.
+ let first_arg = self.live.values.len();
+ for &value in dfg.block_params(block) {
+ let lr = &liveness[value];
+ debug_assert_eq!(lr.def(), block.into());
+ match lr.def_local_end().into() {
+ ExpandedProgramPoint::Inst(endpoint) => {
+ self.live.push(value, endpoint, lr);
+ }
+ ExpandedProgramPoint::Block(local_block) => {
+ // This is a dead block parameter which is not even live into the first
+ // instruction in the block.
+ debug_assert_eq!(
+ local_block, block,
+ "block parameter live range ends at wrong block header"
+ );
+ // Give this value a fake endpoint that is the first instruction in the block.
+ // We expect it to be removed by calling `drop_dead_args()`.
+ self.live
+ .push(value, layout.first_inst(block).expect("Empty block"), lr);
+ }
+ }
+ }
+
+ self.live.values.split_at(first_arg)
+ }
+
+ /// Prepare to move past `inst`.
+ ///
+ /// Determine the set of already live values that are killed by `inst`, and add the new defined
+ /// values to the tracked set.
+ ///
+ /// Returns `(throughs, kills, defs)` as a tuple of slices:
+ ///
+ /// 1. The `throughs` slice is the set of live-through values that are neither defined nor
+ /// killed by the instruction.
+ /// 2. The `kills` slice is the set of values that were live before the instruction and are
+ /// killed at the instruction. This does not include dead defs.
+ /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
+ /// dead defines.
+ ///
+ /// The order of `throughs` and `kills` is arbitrary.
+ ///
+ /// The `drop_dead()` method must be called next to actually remove the dead values from the
+ /// tracked set after the two returned slices are no longer needed.
+ pub fn process_inst(
+ &mut self,
+ inst: Inst,
+ dfg: &DataFlowGraph,
+ liveness: &Liveness,
+ ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
+ // Save a copy of the live values before any branches or jumps that could be somebody's
+ // immediate dominator.
+ if dfg[inst].opcode().is_branch() {
+ self.save_idom_live_set(inst);
+ }
+
+ // Move killed values to the end of the vector.
+ // Don't remove them yet, `drop_dead()` will do that.
+ let first_kill = self.live.live_after(inst);
+
+ // Add the values defined by `inst`.
+ let first_def = self.live.values.len();
+ for &value in dfg.inst_results(inst) {
+ let lr = &liveness[value];
+ debug_assert_eq!(lr.def(), inst.into());
+ match lr.def_local_end().into() {
+ ExpandedProgramPoint::Inst(endpoint) => {
+ self.live.push(value, endpoint, lr);
+ }
+ ExpandedProgramPoint::Block(block) => {
+ panic!("Instruction result live range can't end at {}", block);
+ }
+ }
+ }
+
+ (
+ &self.live.values[0..first_kill],
+ &self.live.values[first_kill..first_def],
+ &self.live.values[first_def..],
+ )
+ }
+
+ /// Prepare to move past a ghost instruction.
+ ///
+ /// This is like `process_inst`, except any defs are ignored.
+ ///
+ /// Returns `(throughs, kills)`.
+ pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
+ let first_kill = self.live.live_after(inst);
+ self.live.values.as_slice().split_at(first_kill)
+ }
+
+ /// Drop the values that are now dead after moving past `inst`.
+ ///
+ /// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
+ ///
+ /// This must be called after `process_inst(inst)` and before proceeding to the next
+ /// instruction.
+ pub fn drop_dead(&mut self, inst: Inst) {
+ // Remove both live values that were killed by `inst` and dead defines from `inst`.
+ self.live.remove_kill_values(inst);
+ }
+
+ /// Drop any values that are marked as `is_dead`.
+ ///
+ /// Use this after calling `block_top` to clean out dead block parameters.
+ pub fn drop_dead_params(&mut self) {
+ self.live.remove_dead_values();
+ }
+
+ /// Process new spills.
+ ///
+ /// Any values where `f` returns true are spilled and will be treated as if their affinity was
+ /// `Stack`.
+ pub fn process_spills<F>(&mut self, mut f: F)
+ where
+ F: FnMut(Value) -> bool,
+ {
+ for lv in &mut self.live.values {
+ if f(lv.value) {
+ lv.affinity = Affinity::Stack;
+ }
+ }
+ }
+
+ /// Save the current set of live values so it is associated with `idom`.
+ fn save_idom_live_set(&mut self, idom: Inst) {
+ let values = self.live.values.iter().map(|lv| lv.value);
+ let pool = &mut self.idom_pool;
+ // If there already is a set saved for `idom`, just keep it.
+ self.idom_sets.entry(idom).or_insert_with(|| {
+ let mut list = ValueList::default();
+ list.extend(values, pool);
+ list
+ });
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/liveness.rs b/third_party/rust/cranelift-codegen/src/regalloc/liveness.rs
new file mode 100644
index 0000000000..2e9c5015bd
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/liveness.rs
@@ -0,0 +1,443 @@
+//! Liveness analysis for SSA values.
+//!
+//! This module computes the live range of all the SSA values in a function and produces a
+//! `LiveRange` instance for each.
+//!
+//!
+//! # Liveness consumers
+//!
+//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
+//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the
+//! currently live values as it is iterating down the instructions in the block. It asks the
+//! following questions:
+//!
+//! - What is the set of live values at the entry to the block?
+//! - When moving past a use of a value, is that value still alive in the block, or was that the last
+//! use?
+//! - When moving past a branch, which of the live values are still live below the branch?
+//!
+//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
+//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the
+//! dominator tree, so it can compute the set of live values at the beginning of a block by starting
+//! from the set of live values at the dominating branch instruction and filtering it with
+//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
+//!
+//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
+//! number of live values at every program point and insert spill code until the number of
+//! registers needed is small enough.
+//!
+//!
+//! # Alternative algorithms
+//!
+//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
+//! alternatives.
+//!
+//! ## Data-flow equations
+//!
+//! The classic *live variables analysis* that you will find in all compiler books from the
+//! previous century does not depend on SSA form. It is typically implemented by iteratively
+//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
+//! variables for every basic block in the program.
+//!
+//! This algorithm has some disadvantages that makes us look elsewhere:
+//!
+//! - Quadratic memory use. We need a bit per variable per basic block in the function.
+//! - Dense representation of sparse data. In practice, the majority of SSA values never leave
+//! their basic block, and those that do spa basic blocks rarely span a large number of basic
+//! blocks. This makes the data stored in the bitvectors quite sparse.
+//! - Traditionally, the data-flow equations were solved for real program *variables* which does
+//! not include temporaries used in evaluating expressions. We have an SSA form program which
+//! blurs the distinction between temporaries and variables. This makes the quadratic memory
+//! problem worse because there are many more SSA values than there was variables in the original
+//! program, and we don't know a priori which SSA values leave their basic block.
+//! - Missing last-use information. For values that are not live-out of a basic block, we would
+//! need to store information about the last use in the block somewhere. LLVM stores this
+//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
+//! source of problems for LLVM's register allocator.
+//!
+//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
+//! multiple definitions of the same variable. We don't need this generality since we already have
+//! a program in SSA form.
+//!
+//! ## LLVM's liveness analysis
+//!
+//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
+//! a disjoint union of related SSA values that should be assigned to the same physical register.
+//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
+//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
+//! describes the live range of a virtual register *and* which one of the related SSA values is
+//! live at any given program point.
+//!
+//! LLVM computes the live range of each virtual register independently by using the use-def chains
+//! that are baked into its IR. The algorithm for a single virtual register is:
+//!
+//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
+//! the def-chain. This does not include any phi-values.
+//! 2. Go through the virtual register's use chain and perform the following steps at each use:
+//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
+//! that already contain some liveness and extend the last live SSA value in the block to be
+//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
+//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
+//! PHI values to be created when different SSA values can reach the same block.
+//!
+//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
+//! one SSA value.
+//!
+//! This algorithm has some advantages compared to the data-flow equations:
+//!
+//! - The live ranges of local virtual registers are computed very quickly without ever traversing
+//! the CFG. The memory needed to store these live ranges is independent of the number of basic
+//! blocks in the program.
+//! - The time to compute the live range of a global virtual register is proportional to the number
+//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
+//! functions.
+//! - A single live range can be recomputed after making modifications to the IR. No global
+//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
+//! which Cranelift doesn't.
+//!
+//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important
+//! difference that live ranges are computed per SSA value instead of per virtual register, and the
+//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that
+//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses.
+//!
+//! ## Fast Liveness Checking for SSA-Form Programs
+//!
+//! A liveness analysis that is often brought up in the context of SSA-based register allocation
+//! was presented at CGO 2008:
+//!
+//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
+//! Checking for SSA-Form Programs.* CGO.
+//!
+//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
+//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
+//! chain of the value and performs lookups in the precomputed bit-vectors.
+//!
+//! I did not seriously consider this analysis for Cranelift because:
+//!
+//! - It depends critically on use chains which Cranelift doesn't have.
+//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
+//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
+//! quadratic behavior in unfortunate cases.
+//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
+//! based approach, which isn't that impressive.
+//!
+//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift
+//! gains use chains, this approach would be worth a proper evaluation.
+//!
+//!
+//! # Cranelift's liveness analysis
+//!
+//! The algorithm implemented in this module is similar to LLVM's with these differences:
+//!
+//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
+//! register.
+//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers.
+//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use
+//! chains, so it is not possible to compute the live range for a single SSA value independently.
+//!
+//! The liveness computation visits all instructions in the program. The order is not important for
+//! the algorithm to be correct. At each instruction, the used values are examined.
+//!
+//! - The first time a value is encountered, its live range is constructed as a dead live range
+//! containing only the defining program point.
+//! - The local interval of the value's live range is extended so it reaches the use. This may
+//! require creating a new live-in local interval for the block.
+//! - If the live range became live-in to the block, add the block to a work-list.
+//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each
+//! of the live-in block's CFG predecessor instructions as a 'use'.
+//!
+//! The effect of this algorithm is to extend the live range of each to reach uses as they are
+//! visited. No data about each value beyond the live range is needed between visiting uses, so
+//! nothing is lost by computing the live range of all values simultaneously.
+//!
+//! ## Cache efficiency of Cranelift vs LLVM
+//!
+//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
+//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
+//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
+//! somewhat chaotically.
+//!
+//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each
+//! instruction is brought into cache only once, and it is likely that the other instructions on
+//! the same cache line will be visited before the line is evicted.
+//!
+//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always
+//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
+//! multiple related values can live on the same cache line.
+//!
+//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
+//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
+//! size to 32 bytes.
+//! - Related values should be stored on the same cache line. The current sparse set implementation
+//! does a decent job of that.
+//! - For global values, the list of live-in intervals is very likely to fit on a single cache
+//! line. These lists are very likely to be found in L2 cache at least.
+//!
+//! There is some room for improvement.
+
+use crate::entity::SparseMap;
+use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
+use crate::ir::dfg::ValueDef;
+use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value};
+use crate::isa::{EncInfo, OperandConstraint, TargetIsa};
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::liverange::LiveRange;
+use crate::timing;
+use alloc::vec::Vec;
+use core::mem;
+use core::ops::Index;
+
+/// A set of live ranges, indexed by value number.
+type LiveRangeSet = SparseMap<Value, LiveRange>;
+
+/// Get a mutable reference to the live range for `value`.
+/// Create it if necessary.
+fn get_or_create<'a>(
+ lrset: &'a mut LiveRangeSet,
+ value: Value,
+ isa: &dyn TargetIsa,
+ func: &Function,
+ encinfo: &EncInfo,
+) -> &'a mut LiveRange {
+ // It would be better to use `get_mut()` here, but that leads to borrow checker fighting
+ // which can probably only be resolved by non-lexical lifetimes.
+ // https://github.com/rust-lang/rfcs/issues/811
+ if lrset.get(value).is_none() {
+ // Create a live range for value. We need the program point that defines it.
+ let def;
+ let affinity;
+ match func.dfg.value_def(value) {
+ ValueDef::Result(inst, rnum) => {
+ def = inst.into();
+ // Initialize the affinity from the defining instruction's result constraints.
+ // Don't do this for call return values which are always tied to a single register.
+ affinity = encinfo
+ .operand_constraints(func.encodings[inst])
+ .and_then(|rc| rc.outs.get(rnum))
+ .map(Affinity::new)
+ .or_else(|| {
+ // If this is a call, get the return value affinity.
+ func.dfg
+ .call_signature(inst)
+ .map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa))
+ })
+ .unwrap_or_default();
+ }
+ ValueDef::Param(block, num) => {
+ def = block.into();
+ if func.layout.entry_block() == Some(block) {
+ // The affinity for entry block parameters can be inferred from the function
+ // signature.
+ affinity = Affinity::abi(&func.signature.params[num], isa);
+ } else {
+ // Give normal block parameters a register affinity matching their type.
+ let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
+ affinity = Affinity::Reg(rc.into());
+ }
+ }
+ };
+ lrset.insert(LiveRange::new(value, def, affinity));
+ }
+ lrset.get_mut(value).unwrap()
+}
+
+/// Extend the live range for `value` so it reaches `to` which must live in `block`.
+fn extend_to_use(
+ lr: &mut LiveRange,
+ block: Block,
+ to: Inst,
+ worklist: &mut Vec<Block>,
+ func: &Function,
+ cfg: &ControlFlowGraph,
+) {
+ // This is our scratch working space, and we'll leave it empty when we return.
+ debug_assert!(worklist.is_empty());
+
+ // Extend the range locally in `block`.
+ // If there already was a live interval in that block, we're done.
+ if lr.extend_in_block(block, to, &func.layout) {
+ worklist.push(block);
+ }
+
+ // The work list contains those blocks where we have learned that the value needs to be
+ // live-in.
+ //
+ // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
+ // CFG from the existing live range to `block`.
+ //
+ // Extend the live range as we go. The live range itself also serves as a visited set since
+ // `extend_in_block` will never return true twice for the same block.
+ //
+ while let Some(livein) = worklist.pop() {
+ // We've learned that the value needs to be live-in to the `livein` block.
+ // Make sure it is also live at all predecessor branches to `livein`.
+ for BlockPredecessor {
+ block: pred,
+ inst: branch,
+ } in cfg.pred_iter(livein)
+ {
+ if lr.extend_in_block(pred, branch, &func.layout) {
+ // This predecessor block also became live-in. We need to process it later.
+ worklist.push(pred);
+ }
+ }
+ }
+}
+
+/// Liveness analysis for a function.
+///
+/// Compute a live range for every SSA value used in the function.
+pub struct Liveness {
+ /// The live ranges that have been computed so far.
+ ranges: LiveRangeSet,
+
+ /// Working space for the `extend_to_use` algorithm.
+ /// This vector is always empty, except for inside that function.
+ /// It lives here to avoid repeated allocation of scratch memory.
+ worklist: Vec<Block>,
+}
+
+impl Liveness {
+ /// Create a new empty liveness analysis.
+ ///
+ /// The memory allocated for this analysis can be reused for multiple functions. Use the
+ /// `compute` method to actually runs the analysis for a function.
+ pub fn new() -> Self {
+ Self {
+ ranges: LiveRangeSet::new(),
+ worklist: Vec::new(),
+ }
+ }
+
+ /// Current live ranges.
+ pub fn ranges(&self) -> &LiveRangeSet {
+ &self.ranges
+ }
+
+ /// Clear all data structures in this liveness analysis.
+ pub fn clear(&mut self) {
+ self.ranges.clear();
+ self.worklist.clear();
+ }
+
+ /// Get the live range for `value`, if it exists.
+ pub fn get(&self, value: Value) -> Option<&LiveRange> {
+ self.ranges.get(value)
+ }
+
+ /// Create a new live range for `value`.
+ ///
+ /// The new live range will be defined at `def` with no extent, like a dead value.
+ ///
+ /// This asserts that `value` does not have an existing live range.
+ pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
+ where
+ PP: Into<ProgramPoint>,
+ {
+ let old = self
+ .ranges
+ .insert(LiveRange::new(value, def.into(), affinity));
+ debug_assert!(old.is_none(), "{} already has a live range", value);
+ }
+
+ /// Move the definition of `value` to `def`.
+ ///
+ /// The old and new def points must be in the same block, and before the end of the live range.
+ pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
+ where
+ PP: Into<ProgramPoint>,
+ {
+ let lr = self.ranges.get_mut(value).expect("Value has no live range");
+ lr.move_def_locally(def.into());
+ }
+
+ /// Locally extend the live range for `value` to reach `user`.
+ ///
+ /// It is assumed the `value` is already live before `user` in `block`.
+ ///
+ /// Returns a mutable reference to the value's affinity in case that also needs to be updated.
+ pub fn extend_locally(
+ &mut self,
+ value: Value,
+ block: Block,
+ user: Inst,
+ layout: &Layout,
+ ) -> &mut Affinity {
+ debug_assert_eq!(Some(block), layout.inst_block(user));
+ let lr = self.ranges.get_mut(value).expect("Value has no live range");
+ let livein = lr.extend_in_block(block, user, layout);
+ debug_assert!(!livein, "{} should already be live in {}", value, block);
+ &mut lr.affinity
+ }
+
+ /// Change the affinity of `value` to `Stack` and return the previous affinity.
+ pub fn spill(&mut self, value: Value) -> Affinity {
+ let lr = self.ranges.get_mut(value).expect("Value has no live range");
+ mem::replace(&mut lr.affinity, Affinity::Stack)
+ }
+
+ /// Compute the live ranges of all SSA values used in `func`.
+ /// This clears out any existing analysis stored in this data structure.
+ pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
+ let _tt = timing::ra_liveness();
+ self.ranges.clear();
+
+ // Get ISA data structures used for computing live range affinities.
+ let encinfo = isa.encoding_info();
+ let reginfo = isa.register_info();
+
+ // The liveness computation needs to visit all uses, but the order doesn't matter.
+ // TODO: Perhaps this traversal of the function could be combined with a dead code
+ // elimination pass if we visit a post-order of the dominator tree?
+ for block in func.layout.blocks() {
+ // Make sure we have created live ranges for dead block parameters.
+ // TODO: If these parameters are really dead, we could remove them, except for the
+ // entry block which must match the function signature.
+ for &arg in func.dfg.block_params(block) {
+ get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
+ }
+
+ for inst in func.layout.block_insts(block) {
+ // Eliminate all value aliases, they would confuse the register allocator.
+ func.dfg.resolve_aliases_in_arguments(inst);
+
+ // Make sure we have created live ranges for dead defs.
+ // TODO: When we implement DCE, we can use the absence of a live range to indicate
+ // an unused value.
+ for &def in func.dfg.inst_results(inst) {
+ get_or_create(&mut self.ranges, def, isa, func, &encinfo);
+ }
+
+ // Iterator of constraints, one per value operand.
+ let encoding = func.encodings[inst];
+ let operand_constraint_slice: &[OperandConstraint] =
+ encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins);
+ let mut operand_constraints = operand_constraint_slice.iter();
+
+ for &arg in func.dfg.inst_args(inst) {
+ // Get the live range, create it as a dead range if necessary.
+ let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
+
+ // Extend the live range to reach this use.
+ extend_to_use(lr, block, inst, &mut self.worklist, func, cfg);
+
+ // Apply operand constraint, ignoring any variable arguments after the fixed
+ // operands described by `operand_constraints`. Variable arguments are either
+ // block arguments or call/return ABI arguments.
+ if let Some(constraint) = operand_constraints.next() {
+ lr.affinity.merge(constraint, &reginfo);
+ }
+ }
+ }
+ }
+ }
+}
+
+impl Index<Value> for Liveness {
+ type Output = LiveRange;
+ fn index(&self, index: Value) -> &LiveRange {
+ self.ranges
+ .get(index)
+ .unwrap_or_else(|| panic!("{} has no live range", index))
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/liverange.rs b/third_party/rust/cranelift-codegen/src/regalloc/liverange.rs
new file mode 100644
index 0000000000..e9b3db5bb0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/liverange.rs
@@ -0,0 +1,720 @@
+//! Data structure representing the live range of an SSA value.
+//!
+//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
+//! an SSA value begins where it is defined and extends to all program points where the value is
+//! still needed.
+//!
+//! # Local Live Ranges
+//!
+//! Inside a single basic block, the live range of a value is always an interval between
+//! two program points (if the value is live in the block at all). The starting point is either:
+//!
+//! 1. The instruction that defines the value, or
+//! 2. The block header, because the value is an argument to the block, or
+//! 3. The block header, because the value is defined in another block and live-in to this one.
+//!
+//! The ending point of the local live range is the last of the following program points in the
+//! block:
+//!
+//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument.
+//! 2. The last branch or jump instruction in the block that can reach a use.
+//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
+//!
+//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined
+//! outside a loop and used inside the loop, it will be live in the entire loop.
+//!
+//! # Global Live Ranges
+//!
+//! Values that appear in more than one block have a *global live range* which can be seen as the
+//! disjoint union of the per-block local intervals for all of the blocks where the value is live.
+//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live
+//! range becomes a linear sequence of disjoint intervals, at most one per block.
+//!
+//! In the special case of a dead value, the global live range is a single interval where the start
+//! and end points are the same. The global live range of a value is never completely empty.
+//!
+//! # Register interference
+//!
+//! The register allocator uses live ranges to determine if values *interfere*, which means that
+//! they can't be stored in the same register. Two live ranges interfere if and only if any of
+//! their intervals overlap.
+//!
+//! If one live range ends at an instruction that defines another live range, those two live ranges
+//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
+//! register for an output value. If Cranelift gets support for inline assembly, we will need to
+//! handle *early clobbers* which are output registers that are not allowed to alias any input
+//! registers.
+//!
+//! If `i1 < i2 < i3` are program points, we have:
+//!
+//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
+//! - `i1-i2` and `i2-i3` don't interfere.
+//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
+//! - `i1-i2` and `i2-i2` don't interfere.
+//! - `i2-i3` and `i2-i2` do interfere.
+//!
+//! Because of this behavior around interval end points, live range interference is not completely
+//! equivalent to mathematical intersection of open or half-open intervals.
+//!
+//! # Implementation notes
+//!
+//! A few notes about the implementation of the live intervals field `liveins`. This should not
+//! concern someone only looking to use the public interface.
+//!
+//! ## Current representation
+//!
+//! Our current implementation uses a sorted array of compressed intervals, represented by their
+//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of
+//! intervals easily, and shows some nice performance behavior. See
+//! https://github.com/bytecodealliance/cranelift/issues/1084 for benchmarks against using a
+//! bforest::Map<Block, Inst>.
+//!
+//! ## block ordering
+//!
+//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to
+//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't
+//! depend on any property of the program order, so alternative orderings are possible:
+//!
+//! 1. The block layout order. This is what we currently use.
+//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
+//! def interval.
+//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the
+//! `ProgramOrder` for comparisons.
+//!
+//! These orderings will cause small differences in coalescing opportunities, but all of them would
+//! do a decent job of compressing a long live range. The numerical order might be preferable
+//! because:
+//!
+//! - It has better performance because block numbers can be compared directly without any table
+//! lookups.
+//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious
+//! live-in intervals from any coalesced representations that happen to cross a new block.
+//!
+//! For comparing instructions, the layout order is always what we want.
+//!
+//! ## Alternative representation
+//!
+//! Since a local live-in interval always begins at its block header, it is uniquely described by its
+//! end point instruction alone. We can use the layout to look up the block containing the end point.
+//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
+//!
+//! Coalescing is an important compression technique because some live ranges can span thousands of
+//! blocks. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
+//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
+//! `Block` entry represents a single live-in interval.
+//!
+//! This representation is more compact for a live range with many uncoalesced live-in intervals.
+//! It is more complicated to work with, though, so it is probably not worth it. The performance
+//! benefits of switching to a numerical block order only appears if the binary search is doing
+//! block-block comparisons.
+//!
+//! A `BTreeMap<Block, Inst>` could have been used for the live-in intervals, but it doesn't provide
+//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes.
+//!
+//! Even the specialized `bforest::Map<Block, Inst>` implementation is slower than a plain sorted
+//! array, see https://github.com/bytecodealliance/cranelift/issues/1084 for details.
+
+use crate::entity::SparseMapValue;
+use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
+use crate::regalloc::affinity::Affinity;
+use core::cmp::Ordering;
+use core::marker::PhantomData;
+use smallvec::SmallVec;
+
+/// Global live range of a single SSA value.
+///
+/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
+/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at
+/// most one interval per block. We further distinguish between:
+///
+/// 1. The *def interval* is the local interval in the block where the value is defined, and
+/// 2. The *live-in intervals* are the local intervals in the remaining blocks.
+///
+/// A live-in interval always begins at the block header, while the def interval can begin at the
+/// defining instruction, or at the block header for a block argument value.
+///
+/// All values have a def interval, but a large proportion of values don't have any live-in
+/// intervals. These are called *local live ranges*.
+///
+/// # Program order requirements
+///
+/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
+/// ordering instructions inside a block *and* for ordering blocks. The methods that depend on the
+/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
+/// ensure that the provided ordering is consistent between calls.
+///
+/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges.
+///
+/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
+/// instructions using or defining their value, `LiveRange` structs can contain references to
+/// branch and jump instructions.
+pub type LiveRange = GenericLiveRange<Layout>;
+
+// See comment of liveins below.
+pub struct Interval {
+ begin: Block,
+ end: Inst,
+}
+
+/// Generic live range implementation.
+///
+/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
+/// Use `LiveRange` instead of using this generic directly.
+pub struct GenericLiveRange<PO: ProgramOrder> {
+ /// The value described by this live range.
+ /// This member can't be modified in case the live range is stored in a `SparseMap`.
+ value: Value,
+
+ /// The preferred register allocation for this value.
+ pub affinity: Affinity,
+
+ /// The instruction or block header where this value is defined.
+ def_begin: ProgramPoint,
+
+ /// The end point of the def interval. This must always belong to the same block as `def_begin`.
+ ///
+ /// We always have `def_begin <= def_end` with equality implying a dead def live range with no
+ /// uses.
+ def_end: ProgramPoint,
+
+ /// Additional live-in intervals sorted in program order.
+ ///
+ /// This vector is empty for most values which are only used in one block.
+ ///
+ /// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to
+ /// `inst` which may belong to a later block in the program order.
+ ///
+ /// The entries are non-overlapping, and none of them overlap the block where the value is
+ /// defined.
+ liveins: SmallVec<[Interval; 2]>,
+
+ po: PhantomData<*const PO>,
+}
+
+/// A simple helper macro to make comparisons more natural to read.
+macro_rules! cmp {
+ ($order:ident, $a:ident > $b:expr) => {
+ $order.cmp($a, $b) == Ordering::Greater
+ };
+ ($order:ident, $a:ident >= $b:expr) => {
+ $order.cmp($a, $b) != Ordering::Less
+ };
+ ($order:ident, $a:ident < $b:expr) => {
+ $order.cmp($a, $b) == Ordering::Less
+ };
+ ($order:ident, $a:ident <= $b:expr) => {
+ $order.cmp($a, $b) != Ordering::Greater
+ };
+}
+
+impl<PO: ProgramOrder> GenericLiveRange<PO> {
+ /// Create a new live range for `value` defined at `def`.
+ ///
+ /// The live range will be created as dead, but it can be extended with `extend_in_block()`.
+ pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self {
+ Self {
+ value,
+ affinity,
+ def_begin: def,
+ def_end: def,
+ liveins: SmallVec::new(),
+ po: PhantomData,
+ }
+ }
+
+ /// Finds an entry in the compressed set of live-in intervals that contains `block`, or return
+ /// the position where to insert such a new entry.
+ fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result<usize, usize> {
+ self.liveins
+ .binary_search_by(|interval| order.cmp(interval.begin, block))
+ .or_else(|n| {
+ // The previous interval's end might cover the searched block.
+ if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) {
+ Ok(n - 1)
+ } else {
+ Err(n)
+ }
+ })
+ }
+
+ /// Extend the local interval for `block` so it reaches `to` which must belong to `block`.
+ /// Create a live-in interval if necessary.
+ ///
+ /// If the live range already has a local interval in `block`, extend its end point so it
+ /// includes `to`, and return false.
+ ///
+ /// If the live range did not previously have a local interval in `block`, add one so the value
+ /// is live-in to `block`, extending to `to`. Return true.
+ ///
+ /// The return value can be used to detect if we just learned that the value is live-in to
+ /// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks.
+ pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool {
+ // First check if we're extending the def interval.
+ //
+ // We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't
+ // check it without a method for getting `inst`'s block.
+ if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) {
+ let inst_pp = inst.into();
+ debug_assert_ne!(
+ inst_pp, self.def_begin,
+ "Can't use value in the defining instruction."
+ );
+ if cmp!(order, inst > self.def_end) {
+ self.def_end = inst_pp;
+ }
+ return false;
+ }
+
+ // Now check if we're extending any of the existing live-in intervals.
+ match self.lookup_entry_containing_block(block, order) {
+ Ok(n) => {
+ // We found one interval and might need to extend it.
+ if cmp!(order, inst <= self.liveins[n].end) {
+ // Both interval parts are already included in a compressed interval.
+ return false;
+ }
+
+ // If the instruction at the end is the last instruction before the next block,
+ // coalesce the two intervals:
+ // [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end]
+ if let Some(next) = &self.liveins.get(n + 1) {
+ if order.is_block_gap(inst, next.begin) {
+ // At this point we can choose to remove the current interval or the next
+ // one; remove the next one to avoid one memory move.
+ let next_end = next.end;
+ debug_assert!(cmp!(order, next_end > self.liveins[n].end));
+ self.liveins[n].end = next_end;
+ self.liveins.remove(n + 1);
+ return false;
+ }
+ }
+
+ // We can't coalesce, just extend the interval.
+ self.liveins[n].end = inst;
+ false
+ }
+
+ Err(n) => {
+ // No interval was found containing the current block: we need to insert a new one,
+ // unless there's a coalescing opportunity with the previous or next one.
+ let coalesce_next = self
+ .liveins
+ .get(n)
+ .filter(|next| order.is_block_gap(inst, next.begin))
+ .is_some();
+ let coalesce_prev = self
+ .liveins
+ .get(n.wrapping_sub(1))
+ .filter(|prev| order.is_block_gap(prev.end, block))
+ .is_some();
+
+ match (coalesce_prev, coalesce_next) {
+ // The new interval is the missing hole between prev and next: we can merge
+ // them all together.
+ (true, true) => {
+ let prev_end = self.liveins[n - 1].end;
+ debug_assert!(cmp!(order, prev_end <= self.liveins[n].end));
+ self.liveins[n - 1].end = self.liveins[n].end;
+ self.liveins.remove(n);
+ }
+
+ // Coalesce only with the previous or next one.
+ (true, false) => {
+ debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end));
+ self.liveins[n - 1].end = inst;
+ }
+ (false, true) => {
+ debug_assert!(cmp!(order, block <= self.liveins[n].begin));
+ self.liveins[n].begin = block;
+ }
+
+ (false, false) => {
+ // No coalescing opportunity, we have to insert.
+ self.liveins.insert(
+ n,
+ Interval {
+ begin: block,
+ end: inst,
+ },
+ );
+ }
+ }
+
+ true
+ }
+ }
+ }
+
+ /// Is this the live range of a dead value?
+ ///
+ /// A dead value has no uses, and its live range ends at the same program point where it is
+ /// defined.
+ pub fn is_dead(&self) -> bool {
+ self.def_begin == self.def_end
+ }
+
+ /// Is this a local live range?
+ ///
+ /// A local live range is only used in the same block where it was defined. It is allowed to span
+ /// multiple basic blocks within that block.
+ pub fn is_local(&self) -> bool {
+ self.liveins.is_empty()
+ }
+
+ /// Get the program point where this live range is defined.
+ ///
+ /// This will be a block header when the value is a block argument, otherwise it is the defining
+ /// instruction.
+ pub fn def(&self) -> ProgramPoint {
+ self.def_begin
+ }
+
+ /// Move the definition of this value to a new program point.
+ ///
+ /// It is only valid to move the definition within the same block, and it can't be moved beyond
+ /// `def_local_end()`.
+ pub fn move_def_locally(&mut self, def: ProgramPoint) {
+ self.def_begin = def;
+ }
+
+ /// Get the local end-point of this live range in the block where it is defined.
+ ///
+ /// This can be the block header itself in the case of a dead block argument.
+ /// Otherwise, it will be the last local use or branch/jump that can reach a use.
+ pub fn def_local_end(&self) -> ProgramPoint {
+ self.def_end
+ }
+
+ /// Get the local end-point of this live range in a block where it is live-in.
+ ///
+ /// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point
+ /// of this live range's local interval in `block`.
+ ///
+ /// If the live range is live through all of `block`, the terminator of `block` is a correct
+ /// answer, but it is also possible that an even later program point is returned. So don't
+ /// depend on the returned `Inst` to belong to `block`.
+ pub fn livein_local_end(&self, block: Block, order: &PO) -> Option<Inst> {
+ self.lookup_entry_containing_block(block, order)
+ .and_then(|i| {
+ let inst = self.liveins[i].end;
+ if cmp!(order, block < inst) {
+ Ok(inst)
+ } else {
+ // Can be any error type, really, since it's discarded by ok().
+ Err(i)
+ }
+ })
+ .ok()
+ }
+
+ /// Is this value live-in to `block`?
+ ///
+ /// A block argument is not considered to be live in.
+ pub fn is_livein(&self, block: Block, order: &PO) -> bool {
+ self.livein_local_end(block, order).is_some()
+ }
+
+ /// Get all the live-in intervals.
+ ///
+ /// Note that the intervals are stored in a compressed form so each entry may span multiple
+ /// blocks where the value is live in.
+ pub fn liveins<'a>(&'a self) -> impl Iterator<Item = (Block, Inst)> + 'a {
+ self.liveins
+ .iter()
+ .map(|interval| (interval.begin, interval.end))
+ }
+
+ /// Check if this live range overlaps a definition in `block`.
+ pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool {
+ // Two defs at the same program point always overlap, even if one is dead.
+ if def == self.def_begin.into() {
+ return true;
+ }
+
+ // Check for an overlap with the local range.
+ if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) {
+ return true;
+ }
+
+ // Check for an overlap with a live-in range.
+ match self.livein_local_end(block, order) {
+ Some(inst) => cmp!(order, def < inst),
+ None => false,
+ }
+ }
+
+ /// Check if this live range reaches a use at `user` in `block`.
+ pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool {
+ // Check for an overlap with the local range.
+ if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) {
+ return true;
+ }
+
+ // Check for an overlap with a live-in range.
+ match self.livein_local_end(block, order) {
+ Some(inst) => cmp!(order, user <= inst),
+ None => false,
+ }
+ }
+
+ /// Check if this live range is killed at `user` in `block`.
+ pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool {
+ self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user)
+ }
+}
+
+/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
+impl<PO: ProgramOrder> SparseMapValue<Value> for GenericLiveRange<PO> {
+ fn key(&self) -> Value {
+ self.value
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{GenericLiveRange, Interval};
+ use crate::entity::EntityRef;
+ use crate::ir::{Block, Inst, Value};
+ use crate::ir::{ExpandedProgramPoint, ProgramOrder};
+ use alloc::vec::Vec;
+ use core::cmp::Ordering;
+
+ // Dummy program order which simply compares indexes.
+ // It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes
+ // in between. `is_block_gap` assumes that terminator instructions have indexes of the form
+ // block * 10 + 1. This is used in the coalesce test.
+ struct ProgOrder {}
+
+ impl ProgramOrder for ProgOrder {
+ fn cmp<A, B>(&self, a: A, b: B) -> Ordering
+ where
+ A: Into<ExpandedProgramPoint>,
+ B: Into<ExpandedProgramPoint>,
+ {
+ fn idx(pp: ExpandedProgramPoint) -> usize {
+ match pp {
+ ExpandedProgramPoint::Inst(i) => i.index(),
+ ExpandedProgramPoint::Block(e) => e.index(),
+ }
+ }
+
+ let ia = idx(a.into());
+ let ib = idx(b.into());
+ ia.cmp(&ib)
+ }
+
+ fn is_block_gap(&self, inst: Inst, block: Block) -> bool {
+ inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1
+ }
+ }
+
+ impl ProgOrder {
+ // Get the block corresponding to `inst`.
+ fn inst_block(&self, inst: Inst) -> Block {
+ let i = inst.index();
+ Block::new(i - i % 10)
+ }
+
+ // Get the block of a program point.
+ fn pp_block<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Block {
+ match pp.into() {
+ ExpandedProgramPoint::Inst(i) => self.inst_block(i),
+ ExpandedProgramPoint::Block(e) => e,
+ }
+ }
+
+ // Validate the live range invariants.
+ fn validate(&self, lr: &GenericLiveRange<Self>) {
+ // The def interval must cover a single block.
+ let def_block = self.pp_block(lr.def_begin);
+ assert_eq!(def_block, self.pp_block(lr.def_end));
+
+ // Check that the def interval isn't backwards.
+ match self.cmp(lr.def_begin, lr.def_end) {
+ Ordering::Equal => assert!(lr.liveins.is_empty()),
+ Ordering::Greater => {
+ panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
+ }
+ Ordering::Less => {}
+ }
+
+ // Check the live-in intervals.
+ let mut prev_end = None;
+ for Interval { begin, end } in lr.liveins.iter() {
+ let begin = *begin;
+ let end = *end;
+
+ assert_eq!(self.cmp(begin, end), Ordering::Less);
+ if let Some(e) = prev_end {
+ assert_eq!(self.cmp(e, begin), Ordering::Less);
+ }
+
+ assert!(
+ self.cmp(lr.def_end, begin) == Ordering::Less
+ || self.cmp(lr.def_begin, end) == Ordering::Greater,
+ "Interval can't overlap the def block"
+ );
+
+ // Save for next round.
+ prev_end = Some(end);
+ }
+ }
+ }
+
+ // Singleton `ProgramOrder` for tests below.
+ const PO: &'static ProgOrder = &ProgOrder {};
+
+ #[test]
+ fn dead_def_range() {
+ let v0 = Value::new(0);
+ let e0 = Block::new(0);
+ let i1 = Inst::new(1);
+ let i2 = Inst::new(2);
+ let e2 = Block::new(2);
+ let lr = GenericLiveRange::new(v0, i1.into(), Default::default());
+ assert!(lr.is_dead());
+ assert!(lr.is_local());
+ assert_eq!(lr.def(), i1.into());
+ assert_eq!(lr.def_local_end(), i1.into());
+ assert_eq!(lr.livein_local_end(e2, PO), None);
+ PO.validate(&lr);
+
+ // A dead live range overlaps its own def program point.
+ assert!(lr.overlaps_def(i1.into(), e0, PO));
+ assert!(!lr.overlaps_def(i2.into(), e0, PO));
+ assert!(!lr.overlaps_def(e0.into(), e0, PO));
+ }
+
+ #[test]
+ fn dead_arg_range() {
+ let v0 = Value::new(0);
+ let e2 = Block::new(2);
+ let lr = GenericLiveRange::new(v0, e2.into(), Default::default());
+ assert!(lr.is_dead());
+ assert!(lr.is_local());
+ assert_eq!(lr.def(), e2.into());
+ assert_eq!(lr.def_local_end(), e2.into());
+ // The def interval of a block argument does not count as live-in.
+ assert_eq!(lr.livein_local_end(e2, PO), None);
+ PO.validate(&lr);
+ }
+
+ #[test]
+ fn local_def() {
+ let v0 = Value::new(0);
+ let e10 = Block::new(10);
+ let i11 = Inst::new(11);
+ let i12 = Inst::new(12);
+ let i13 = Inst::new(13);
+ let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
+
+ assert_eq!(lr.extend_in_block(e10, i13, PO), false);
+ PO.validate(&lr);
+ assert!(!lr.is_dead());
+ assert!(lr.is_local());
+ assert_eq!(lr.def(), i11.into());
+ assert_eq!(lr.def_local_end(), i13.into());
+
+ // Extending to an already covered inst should not change anything.
+ assert_eq!(lr.extend_in_block(e10, i12, PO), false);
+ PO.validate(&lr);
+ assert_eq!(lr.def(), i11.into());
+ assert_eq!(lr.def_local_end(), i13.into());
+ }
+
+ #[test]
+ fn local_arg() {
+ let v0 = Value::new(0);
+ let e10 = Block::new(10);
+ let i11 = Inst::new(11);
+ let i12 = Inst::new(12);
+ let i13 = Inst::new(13);
+ let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default());
+
+ // Extending a dead block argument in its own block should not indicate that a live-in
+ // interval was created.
+ assert_eq!(lr.extend_in_block(e10, i12, PO), false);
+ PO.validate(&lr);
+ assert!(!lr.is_dead());
+ assert!(lr.is_local());
+ assert_eq!(lr.def(), e10.into());
+ assert_eq!(lr.def_local_end(), i12.into());
+
+ // Extending to an already covered inst should not change anything.
+ assert_eq!(lr.extend_in_block(e10, i11, PO), false);
+ PO.validate(&lr);
+ assert_eq!(lr.def(), e10.into());
+ assert_eq!(lr.def_local_end(), i12.into());
+
+ // Extending further.
+ assert_eq!(lr.extend_in_block(e10, i13, PO), false);
+ PO.validate(&lr);
+ assert_eq!(lr.def(), e10.into());
+ assert_eq!(lr.def_local_end(), i13.into());
+ }
+
+ #[test]
+ fn global_def() {
+ let v0 = Value::new(0);
+ let e10 = Block::new(10);
+ let i11 = Inst::new(11);
+ let i12 = Inst::new(12);
+ let e20 = Block::new(20);
+ let i21 = Inst::new(21);
+ let i22 = Inst::new(22);
+ let i23 = Inst::new(23);
+ let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
+
+ assert_eq!(lr.extend_in_block(e10, i12, PO), false);
+
+ // Adding a live-in interval.
+ assert_eq!(lr.extend_in_block(e20, i22, PO), true);
+ PO.validate(&lr);
+ assert_eq!(lr.livein_local_end(e20, PO), Some(i22));
+
+ // Non-extending the live-in.
+ assert_eq!(lr.extend_in_block(e20, i21, PO), false);
+ assert_eq!(lr.livein_local_end(e20, PO), Some(i22));
+
+ // Extending the existing live-in.
+ assert_eq!(lr.extend_in_block(e20, i23, PO), false);
+ PO.validate(&lr);
+ assert_eq!(lr.livein_local_end(e20, PO), Some(i23));
+ }
+
+ #[test]
+ fn coalesce() {
+ let v0 = Value::new(0);
+ let i11 = Inst::new(11);
+ let e20 = Block::new(20);
+ let i21 = Inst::new(21);
+ let e30 = Block::new(30);
+ let i31 = Inst::new(31);
+ let e40 = Block::new(40);
+ let i41 = Inst::new(41);
+ let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
+
+ assert_eq!(lr.extend_in_block(e30, i31, PO,), true);
+ assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i31)]);
+
+ // Coalesce to previous
+ assert_eq!(lr.extend_in_block(e40, i41, PO,), true);
+ assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i41)]);
+
+ // Coalesce to next
+ assert_eq!(lr.extend_in_block(e20, i21, PO,), true);
+ assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]);
+
+ let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
+
+ assert_eq!(lr.extend_in_block(e40, i41, PO,), true);
+ assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e40, i41)]);
+
+ assert_eq!(lr.extend_in_block(e20, i21, PO,), true);
+ assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i21), (e40, i41)]);
+
+ // Coalesce to previous and next
+ assert_eq!(lr.extend_in_block(e30, i31, PO,), true);
+ assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/mod.rs b/third_party/rust/cranelift-codegen/src/regalloc/mod.rs
new file mode 100644
index 0000000000..581acc408e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/mod.rs
@@ -0,0 +1,26 @@
+//! Register allocation.
+//!
+//! This module contains data structures and algorithms used for register allocation.
+
+pub mod coloring;
+pub mod live_value_tracker;
+pub mod liveness;
+pub mod liverange;
+pub mod register_set;
+pub mod virtregs;
+
+mod affinity;
+mod branch_splitting;
+mod coalescing;
+mod context;
+mod diversion;
+mod pressure;
+mod reload;
+mod safepoint;
+mod solver;
+mod spilling;
+
+pub use self::context::Context;
+pub use self::diversion::{EntryRegDiversions, RegDiversions};
+pub use self::register_set::RegisterSet;
+pub use self::safepoint::emit_stack_maps;
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/pressure.rs b/third_party/rust/cranelift-codegen/src/regalloc/pressure.rs
new file mode 100644
index 0000000000..aa83037041
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/pressure.rs
@@ -0,0 +1,371 @@
+//! Register pressure tracking.
+//!
+//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
+//! sufficiently". This module defines the data structures needed to measure register pressure
+//! accurately enough to guarantee that the coloring phase will not run out of registers.
+//!
+//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
+//! any given program point. This simplistic method has two problems:
+//!
+//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
+//! register banks, so we need to at least count the number of live registers in each register
+//! bank separately.
+//!
+//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
+//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
+//! This makes it difficult to accurately measure register pressure.
+//!
+//! This module deals with the problems via *register banks* and *top-level register classes*.
+//! Register classes in different register banks are completely independent, so we can count
+//! registers in one bank without worrying about the other bank at all.
+//!
+//! All register classes have a unique top-level register class, and we will count registers for
+//! each top-level register class individually. However, a register bank can have multiple
+//! top-level register classes that interfere with each other, so all top-level counts need to
+//! be considered when determining how many more registers can be allocated.
+//!
+//! Currently, the only register bank with multiple top-level registers is the `arm32`
+//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
+//!
+//! # Base and transient counts
+//!
+//! We maintain two separate register counts per top-level register class: base counts and
+//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
+//! transient counts are adjusted with `take_transient` and `free_transient`.
+
+// Remove once we're using the pressure tracker.
+#![allow(dead_code)]
+
+use crate::isa::registers::{RegClass, RegClassMask, RegInfo};
+use crate::regalloc::RegisterSet;
+use core::cmp::min;
+use core::fmt;
+use core::iter::ExactSizeIterator;
+use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS;
+
+/// Information per top-level register class.
+///
+/// Everything but the counts is static information computed from the constructor arguments.
+#[derive(Default)]
+struct TopRC {
+ /// Number of registers currently used from this register class.
+ base_count: u32,
+ transient_count: u32,
+
+ /// Max number of registers that can be allocated.
+ limit: u32,
+
+ /// Register units per register.
+ width: u8,
+
+ /// The first aliasing top-level RC.
+ first_toprc: u8,
+
+ /// The number of aliasing top-level RCs.
+ num_toprcs: u8,
+}
+
+impl TopRC {
+ fn total_count(&self) -> u32 {
+ self.base_count + self.transient_count
+ }
+}
+
+pub struct Pressure {
+ /// Bit mask of top-level register classes that are aliased by other top-level register classes.
+ /// Unaliased register classes can use a simpler interference algorithm.
+ aliased: RegClassMask,
+
+ /// Current register counts per top-level register class.
+ toprc: [TopRC; MAX_TRACKED_TOP_RCS],
+}
+
+impl Pressure {
+ /// Create a new register pressure tracker.
+ pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
+ let mut p = Self {
+ aliased: 0,
+ toprc: Default::default(),
+ };
+
+ // Get the layout of aliasing top-level register classes from the register banks.
+ for bank in reginfo.banks {
+ let first = bank.first_toprc;
+ let num = bank.num_toprcs;
+
+ if bank.pressure_tracking {
+ for rc in &mut p.toprc[first..first + num] {
+ rc.first_toprc = first as u8;
+ rc.num_toprcs = num as u8;
+ }
+
+ // Flag the top-level register classes with aliases.
+ if num > 1 {
+ p.aliased |= ((1 << num) - 1) << first;
+ }
+ } else {
+ // This bank has no pressure tracking, so its top-level register classes may exceed
+ // `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
+ for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] {
+ // These aren't used if we don't set the `aliased` bit.
+ rc.first_toprc = !0;
+ rc.limit = !0;
+ }
+ }
+ }
+
+ // Compute per-class limits from `usable`.
+ for (toprc, rc) in p
+ .toprc
+ .iter_mut()
+ .take_while(|t| t.num_toprcs > 0)
+ .zip(reginfo.classes)
+ {
+ toprc.limit = usable.iter(rc).len() as u32;
+ toprc.width = rc.width;
+ }
+
+ p
+ }
+
+ /// Check for an available register in the register class `rc`.
+ ///
+ /// If it is possible to allocate one more register from `rc`'s top-level register class,
+ /// returns 0.
+ ///
+ /// If not, returns a bit-mask of top-level register classes that are interfering. Register
+ /// pressure should be eased in one of the returned top-level register classes before calling
+ /// `can_take()` to check again.
+ fn check_avail(&self, rc: RegClass) -> RegClassMask {
+ let entry = match self.toprc.get(rc.toprc as usize) {
+ None => return 0, // Not a pressure tracked bank.
+ Some(e) => e,
+ };
+ let mask = 1 << rc.toprc;
+ if (self.aliased & mask) == 0 {
+ // This is a simple unaliased top-level register class.
+ if entry.total_count() < entry.limit {
+ 0
+ } else {
+ mask
+ }
+ } else {
+ // This is the more complicated case. The top-level register class has aliases.
+ self.check_avail_aliased(entry)
+ }
+ }
+
+ /// Check for an available register in a top-level register class that may have aliases.
+ ///
+ /// This is the out-of-line slow path for `check_avail()`.
+ fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
+ let first = usize::from(entry.first_toprc);
+ let num = usize::from(entry.num_toprcs);
+ let width = u32::from(entry.width);
+ let ulimit = entry.limit * width;
+
+ // Count up the number of available register units.
+ let mut units = 0;
+ for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
+ let rcw = u32::from(rc.width);
+ // If `rc.width` is smaller than `width`, each register in `rc` could potentially block
+ // one of ours. This is assuming that none of the smaller registers are straddling the
+ // bigger ones.
+ //
+ // If `rc.width` is larger than `width`, we are also assuming that the registers are
+ // aligned and `rc.width` is a multiple of `width`.
+ let u = if rcw < width {
+ // We can't take more than the total number of register units in the class.
+ // This matters for arm32 S-registers which can only ever lock out 16 D-registers.
+ min(rc.total_count() * width, rc.limit * rcw)
+ } else {
+ rc.total_count() * rcw
+ };
+
+ // If this top-level RC on its own is responsible for exceeding our limit, return it
+ // early to guarantee that registers here are spilled before spilling other registers
+ // unnecessarily.
+ if u >= ulimit {
+ return 1 << rci;
+ }
+
+ units += u;
+ }
+
+ // We've counted up the worst-case number of register units claimed by all aliasing
+ // classes. Compare to the unit limit in this class.
+ if units < ulimit {
+ 0
+ } else {
+ // Registers need to be spilled from any one of the aliasing classes.
+ ((1 << num) - 1) << first
+ }
+ }
+
+ /// Take a register from `rc`.
+ ///
+ /// This does not check if there are enough registers available.
+ pub fn take(&mut self, rc: RegClass) {
+ if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
+ t.base_count += 1;
+ }
+ }
+
+ /// Free a register in `rc`.
+ pub fn free(&mut self, rc: RegClass) {
+ if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
+ t.base_count -= 1;
+ }
+ }
+
+ /// Reset all counts to 0, both base and transient.
+ pub fn reset(&mut self) {
+ for e in &mut self.toprc {
+ e.base_count = 0;
+ e.transient_count = 0;
+ }
+ }
+
+ /// Try to increment a transient counter.
+ ///
+ /// This will fail if there are not enough registers available.
+ pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
+ let mask = self.check_avail(rc);
+ if mask == 0 {
+ if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
+ t.transient_count += 1;
+ }
+
+ Ok(())
+ } else {
+ Err(mask)
+ }
+ }
+
+ /// Reset all transient counts to 0.
+ pub fn reset_transient(&mut self) {
+ for e in &mut self.toprc {
+ e.transient_count = 0;
+ }
+ }
+
+ /// Preserve the transient counts by transferring them to the base counts.
+ pub fn preserve_transient(&mut self) {
+ for e in &mut self.toprc {
+ e.base_count += e.transient_count;
+ e.transient_count = 0;
+ }
+ }
+}
+
+impl fmt::Display for Pressure {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "Pressure[")?;
+ for rc in &self.toprc {
+ if rc.limit > 0 && rc.limit < !0 {
+ write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
+ }
+ }
+ write!(f, " ]")
+ }
+}
+
+#[cfg(test)]
+#[cfg(feature = "arm32")]
+mod tests {
+ use super::Pressure;
+ use crate::isa::registers::{RegBank, RegClassData};
+ use crate::isa::{RegClass, RegInfo, RegUnit};
+ use crate::regalloc::RegisterSet;
+ use core::borrow::Borrow;
+
+ // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info
+ // about registers, so we directly access `INFO` from registers-arm32.rs.
+ include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
+
+ // Get a register class by name.
+ fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass {
+ reginfo
+ .classes
+ .iter()
+ .find(|rc| rc.name == name)
+ .expect("Can't find named register class.")
+ }
+
+ #[test]
+ fn basic_counting() {
+ let reginfo = INFO.borrow();
+ let gpr = rc_by_name(&reginfo, "GPR");
+ let s = rc_by_name(&reginfo, "S");
+
+ let regs = RegisterSet::new();
+
+ let mut pressure = Pressure::new(&reginfo, &regs);
+ let mut count = 0;
+ while pressure.check_avail(gpr) == 0 {
+ pressure.take(gpr);
+ count += 1;
+ }
+ assert_eq!(count, 16);
+ assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+ assert_eq!(pressure.check_avail(s), 0);
+ pressure.free(gpr);
+ assert_eq!(pressure.check_avail(gpr), 0);
+ pressure.take(gpr);
+ assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
+ assert_eq!(pressure.check_avail(s), 0);
+ pressure.reset();
+ assert_eq!(pressure.check_avail(gpr), 0);
+ assert_eq!(pressure.check_avail(s), 0);
+ }
+
+ #[test]
+ fn arm_float_bank() {
+ let reginfo = INFO.borrow();
+ let s = rc_by_name(&reginfo, "S");
+ let d = rc_by_name(&reginfo, "D");
+ let q = rc_by_name(&reginfo, "Q");
+ let regs = RegisterSet::new();
+
+ let mut pressure = Pressure::new(&reginfo, &regs);
+ assert_eq!(pressure.check_avail(s), 0);
+ assert_eq!(pressure.check_avail(d), 0);
+ assert_eq!(pressure.check_avail(q), 0);
+
+ // Allocating a single S-register should not affect availability.
+ pressure.take(s);
+ assert_eq!(pressure.check_avail(s), 0);
+ assert_eq!(pressure.check_avail(d), 0);
+ assert_eq!(pressure.check_avail(q), 0);
+
+ pressure.take(d);
+ assert_eq!(pressure.check_avail(s), 0);
+ assert_eq!(pressure.check_avail(d), 0);
+ assert_eq!(pressure.check_avail(q), 0);
+
+ pressure.take(q);
+ assert_eq!(pressure.check_avail(s), 0);
+ assert_eq!(pressure.check_avail(d), 0);
+ assert_eq!(pressure.check_avail(q), 0);
+
+ // Take a total of 16 S-regs.
+ for _ in 1..16 {
+ pressure.take(s);
+ }
+ assert_eq!(pressure.check_avail(s), 0);
+ assert_eq!(pressure.check_avail(d), 0);
+ assert_eq!(pressure.check_avail(q), 0);
+
+ // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
+ for _ in 0..6 {
+ assert_eq!(pressure.check_avail(d), 0);
+ assert_eq!(pressure.check_avail(q), 0);
+ pressure.take(q);
+ }
+
+ // We've taken 16 S, 1 D, and 7 Qs.
+ assert!(pressure.check_avail(s) != 0);
+ assert_eq!(pressure.check_avail(d), 0);
+ assert!(pressure.check_avail(q) != 0);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/register_set.rs b/third_party/rust/cranelift-codegen/src/regalloc/register_set.rs
new file mode 100644
index 0000000000..52b8a6fa0a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/register_set.rs
@@ -0,0 +1,391 @@
+//! Set of allocatable registers as a bit vector of register units.
+//!
+//! While allocating registers, we need to keep track of which registers are available and which
+//! registers are in use. Since registers can alias in different ways, we track this via the
+//! "register unit" abstraction. Every register contains one or more register units. Registers that
+//! share a register unit can't be in use at the same time.
+
+use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
+use core::char;
+use core::fmt;
+use core::iter::ExactSizeIterator;
+use core::mem::size_of_val;
+
+/// Set of registers available for allocation.
+#[derive(Clone)]
+pub struct RegisterSet {
+ avail: RegUnitMask,
+}
+
+// Given a register class and a register unit in the class, compute a word index and a bit mask of
+// register units representing that register.
+//
+// Note that a register is not allowed to straddle words.
+fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
+ // Bit mask representing the register. It is `rc.width` consecutive units.
+ let width_bits = (1 << rc.width) - 1;
+ // Index into avail[] of the word containing `reg`.
+ let word_index = (reg / 32) as usize;
+ // The actual bits in the word that cover `reg`.
+ let reg_bits = width_bits << (reg % 32);
+
+ (word_index, reg_bits)
+}
+
+impl RegisterSet {
+ /// Create a new register set with all registers available.
+ ///
+ /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
+ /// allocatable registers where reserved registers have been filtered out.
+ pub fn new() -> Self {
+ Self { avail: [!0; 3] }
+ }
+
+ /// Create a new register set with no registers available.
+ pub fn empty() -> Self {
+ Self { avail: [0; 3] }
+ }
+
+ /// Returns `true` if the specified register is available.
+ pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
+ let (idx, bits) = bitmask(rc, reg);
+ (self.avail[idx] & bits) == bits
+ }
+
+ /// Allocate `reg` from `rc` so it is no longer available.
+ ///
+ /// It is an error to take a register that doesn't have all of its register units available.
+ pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
+ let (idx, bits) = bitmask(rc, reg);
+ debug_assert!(
+ (self.avail[idx] & bits) == bits,
+ "{}:{} not available in {}",
+ rc,
+ rc.info.display_regunit(reg),
+ self.display(rc.info)
+ );
+ self.avail[idx] &= !bits;
+ }
+
+ /// Return `reg` and all of its register units to the set of available registers.
+ pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
+ let (idx, bits) = bitmask(rc, reg);
+ debug_assert!(
+ (self.avail[idx] & bits) == 0,
+ "{}:{} is already free in {}",
+ rc,
+ rc.info.display_regunit(reg),
+ self.display(rc.info)
+ );
+ self.avail[idx] |= bits;
+ }
+
+ /// Return an iterator over all available registers belonging to the register class `rc`.
+ ///
+ /// This doesn't allocate anything from the set; use `take()` for that.
+ pub fn iter(&self, rc: RegClass) -> RegSetIter {
+ // Start by copying the RC mask. It is a single set bit for each register in the class.
+ let mut rsi = RegSetIter { regs: rc.mask };
+
+ // Mask out the unavailable units.
+ for idx in 0..self.avail.len() {
+ // If a single unit in a register is unavailable, the whole register can't be used. If
+ // a register straddles a word boundary, it will be marked as unavailable. There's an
+ // assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that.
+ for i in 0..rc.width {
+ rsi.regs[idx] &= self.avail[idx] >> i;
+ }
+ }
+ rsi
+ }
+
+ /// Check if any register units allocated out of this set interferes with units allocated out
+ /// of `other`.
+ ///
+ /// This assumes that unused bits are 1.
+ pub fn interferes_with(&self, other: &Self) -> bool {
+ self.avail
+ .iter()
+ .zip(&other.avail)
+ .any(|(&x, &y)| (x | y) != !0)
+ }
+
+ /// Intersect this set of registers with `other`. This has the effect of removing any register
+ /// units from this set that are not in `other`.
+ pub fn intersect(&mut self, other: &Self) {
+ for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
+ *x &= y;
+ }
+ }
+
+ /// Return an object that can display this register set, using the register info from the
+ /// target ISA.
+ pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
+ DisplayRegisterSet(self.clone(), regs.into())
+ }
+}
+
+/// Iterator over available registers in a register class.
+#[derive(Clone)]
+pub struct RegSetIter {
+ regs: RegUnitMask,
+}
+
+impl Iterator for RegSetIter {
+ type Item = RegUnit;
+
+ fn next(&mut self) -> Option<RegUnit> {
+ let mut unit_offset = 0;
+
+ // Find the first set bit in `self.regs`.
+ for word in &mut self.regs {
+ if *word != 0 {
+ // Compute the register unit number from the lowest set bit in the word.
+ let unit = unit_offset + word.trailing_zeros() as RegUnit;
+
+ // Clear that lowest bit so we won't find it again.
+ *word &= *word - 1;
+
+ return Some(unit);
+ }
+ // How many register units was there in the word? This is a constant 32 for `u32` etc.
+ unit_offset += 8 * size_of_val(word) as RegUnit;
+ }
+
+ // All of `self.regs` is 0.
+ None
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
+ (bits, Some(bits))
+ }
+}
+
+impl RegSetIter {
+ pub fn rnext(&mut self) -> Option<RegUnit> {
+ let num_words = self.regs.len();
+ let bits_per_word = 8 * size_of_val(&self.regs[0]);
+
+ // Find the last set bit in `self.regs`.
+ for i in 0..num_words {
+ let word_ix = num_words - 1 - i;
+
+ let word = &mut self.regs[word_ix];
+ if *word != 0 {
+ let lzeroes = word.leading_zeros() as usize;
+
+ // Clear that highest bit so we won't find it again.
+ *word &= !(1 << (bits_per_word - 1 - lzeroes));
+
+ return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit);
+ }
+ }
+
+ // All of `self.regs` is 0.
+ None
+ }
+}
+
+impl ExactSizeIterator for RegSetIter {}
+
+/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
+pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
+
+impl<'a> fmt::Display for DisplayRegisterSet<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "[")?;
+ match self.1 {
+ None => {
+ for w in &self.0.avail {
+ write!(f, " #{:08x}", w)?;
+ }
+ }
+ Some(reginfo) => {
+ let toprcs = reginfo
+ .banks
+ .iter()
+ .map(|b| b.first_toprc + b.num_toprcs)
+ .max()
+ .expect("No register banks");
+ for rc in &reginfo.classes[0..toprcs] {
+ if rc.width == 1 {
+ let bank = &reginfo.banks[rc.bank as usize];
+ write!(f, " {}: ", rc)?;
+ for offset in 0..bank.units {
+ let reg = bank.first_unit + offset;
+ if !rc.contains(reg) {
+ continue;
+ }
+ if !self.0.is_avail(rc, reg) {
+ write!(f, "-")?;
+ continue;
+ }
+ // Display individual registers as either the second letter of their
+ // name or the last digit of their number.
+ // This works for x86 (rax, rbx, ...) and for numbered regs.
+ write!(
+ f,
+ "{}",
+ bank.names
+ .get(offset as usize)
+ .and_then(|name| name.chars().nth(1))
+ .unwrap_or_else(|| char::from_digit(
+ u32::from(offset % 10),
+ 10
+ )
+ .unwrap())
+ )?;
+ }
+ }
+ }
+ }
+ }
+ write!(f, " ]")
+ }
+}
+
+impl fmt::Display for RegisterSet {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.display(None).fmt(f)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::isa::registers::{RegClass, RegClassData};
+ use alloc::vec::Vec;
+
+ // Register classes for testing.
+ const GPR: RegClass = &RegClassData {
+ name: "GPR",
+ index: 0,
+ width: 1,
+ bank: 0,
+ toprc: 0,
+ first: 28,
+ subclasses: 0,
+ mask: [0xf0000000, 0x0000000f, 0],
+ info: &INFO,
+ pinned_reg: None,
+ };
+
+ const DPR: RegClass = &RegClassData {
+ name: "DPR",
+ index: 0,
+ width: 2,
+ bank: 0,
+ toprc: 0,
+ first: 28,
+ subclasses: 0,
+ mask: [0x50000000, 0x0000000a, 0],
+ info: &INFO,
+ pinned_reg: None,
+ };
+
+ const INFO: RegInfo = RegInfo {
+ banks: &[],
+ classes: &[],
+ };
+
+ const RSI_1: RegSetIter = RegSetIter {
+ regs: [0x31415927, 0x27182818, 0x14141356],
+ };
+
+ const RSI_2: RegSetIter = RegSetIter {
+ regs: [0x00000000, 0x00000000, 0x00000000],
+ };
+
+ const RSI_3: RegSetIter = RegSetIter {
+ regs: [0xffffffff, 0xffffffff, 0xffffffff],
+ };
+
+ fn reverse_regset_iteration_work(rsi: &RegSetIter) {
+ // Check the reverse iterator by comparing its output with the forward iterator.
+ let rsi_f = (*rsi).clone();
+ let results_f = rsi_f.collect::<Vec<_>>();
+
+ let mut rsi_r = (*rsi).clone();
+ let mut results_r = Vec::<RegUnit>::new();
+ while let Some(r) = rsi_r.rnext() {
+ results_r.push(r);
+ }
+
+ let len_f = results_f.len();
+ let len_r = results_r.len();
+ assert_eq!(len_f, len_r);
+
+ for i in 0..len_f {
+ assert_eq!(results_f[i], results_r[len_f - 1 - i]);
+ }
+ }
+
+ #[test]
+ fn reverse_regset_iteration() {
+ reverse_regset_iteration_work(&RSI_1);
+ reverse_regset_iteration_work(&RSI_2);
+ reverse_regset_iteration_work(&RSI_3);
+ }
+
+ #[test]
+ fn put_and_take() {
+ let mut regs = RegisterSet::new();
+
+ // `GPR` has units 28-36.
+ assert_eq!(regs.iter(GPR).len(), 8);
+ assert_eq!(regs.iter(GPR).count(), 8);
+ assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
+
+ assert!(regs.is_avail(GPR, 29));
+ regs.take(&GPR, 29);
+ assert!(!regs.is_avail(GPR, 29));
+
+ assert_eq!(regs.iter(GPR).count(), 7);
+ assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+ assert!(regs.is_avail(GPR, 30));
+ regs.take(&GPR, 30);
+ assert!(!regs.is_avail(GPR, 30));
+
+ assert_eq!(regs.iter(GPR).count(), 6);
+ assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+ assert!(regs.is_avail(GPR, 32));
+ regs.take(&GPR, 32);
+ assert!(!regs.is_avail(GPR, 32));
+
+ assert_eq!(regs.iter(GPR).count(), 5);
+ assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
+
+ regs.free(&GPR, 30);
+ assert!(regs.is_avail(GPR, 30));
+ assert!(!regs.is_avail(GPR, 29));
+ assert!(!regs.is_avail(GPR, 32));
+
+ assert_eq!(regs.iter(GPR).count(), 6);
+ assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+
+ regs.free(&GPR, 32);
+ assert!(regs.is_avail(GPR, 31));
+ assert!(!regs.is_avail(GPR, 29));
+ assert!(regs.is_avail(GPR, 32));
+
+ assert_eq!(regs.iter(GPR).count(), 7);
+ assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
+ }
+
+ #[test]
+ fn interference() {
+ let mut regs1 = RegisterSet::new();
+ let mut regs2 = RegisterSet::new();
+
+ assert!(!regs1.interferes_with(&regs2));
+ regs1.take(&GPR, 32);
+ assert!(!regs1.interferes_with(&regs2));
+ regs2.take(&GPR, 31);
+ assert!(!regs1.interferes_with(&regs2));
+ regs1.intersect(&regs2);
+ assert!(regs1.interferes_with(&regs2));
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/reload.rs b/third_party/rust/cranelift-codegen/src/regalloc/reload.rs
new file mode 100644
index 0000000000..cdafb68af8
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/reload.rs
@@ -0,0 +1,485 @@
+//! Reload pass
+//!
+//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
+//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
+//! get a value with register affinity, and operands expecting a stack slot will get a value with
+//! stack affinity.
+//!
+//! The secondary responsibility of the reload pass is to reuse values in registers as much as
+//! possible to minimize the number of `fill` instructions needed. This must not cause the register
+//! pressure limits to be exceeded.
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::entity::{SparseMap, SparseMapValue};
+use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
+use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc};
+use crate::isa::RegClass;
+use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
+use crate::regalloc::liveness::Liveness;
+use crate::timing;
+use crate::topo_order::TopoOrder;
+use alloc::vec::Vec;
+use log::debug;
+
+/// Reusable data structures for the reload pass.
+pub struct Reload {
+ candidates: Vec<ReloadCandidate>,
+ reloads: SparseMap<Value, ReloadedValue>,
+}
+
+/// Context data structure that gets instantiated once per pass.
+struct Context<'a> {
+ cur: EncCursor<'a>,
+
+ // Cached ISA information.
+ // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
+ encinfo: EncInfo,
+
+ // References to contextual data structures we need.
+ domtree: &'a DominatorTree,
+ liveness: &'a mut Liveness,
+ topo: &'a mut TopoOrder,
+
+ candidates: &'a mut Vec<ReloadCandidate>,
+ reloads: &'a mut SparseMap<Value, ReloadedValue>,
+}
+
+impl Reload {
+ /// Create a new blank reload pass.
+ pub fn new() -> Self {
+ Self {
+ candidates: Vec::new(),
+ reloads: SparseMap::new(),
+ }
+ }
+
+ /// Clear all data structures in this reload pass.
+ pub fn clear(&mut self) {
+ self.candidates.clear();
+ self.reloads.clear();
+ }
+
+ /// Run the reload algorithm over `func`.
+ pub fn run(
+ &mut self,
+ isa: &dyn TargetIsa,
+ func: &mut Function,
+ domtree: &DominatorTree,
+ liveness: &mut Liveness,
+ topo: &mut TopoOrder,
+ tracker: &mut LiveValueTracker,
+ ) {
+ let _tt = timing::ra_reload();
+ debug!("Reload for:\n{}", func.display(isa));
+ let mut ctx = Context {
+ cur: EncCursor::new(func, isa),
+ encinfo: isa.encoding_info(),
+ domtree,
+ liveness,
+ topo,
+ candidates: &mut self.candidates,
+ reloads: &mut self.reloads,
+ };
+ ctx.run(tracker)
+ }
+}
+
+/// A reload candidate.
+///
+/// This represents a stack value that is used by the current instruction where a register is
+/// needed.
+struct ReloadCandidate {
+ argidx: usize,
+ value: Value,
+ regclass: RegClass,
+}
+
+/// A Reloaded value.
+///
+/// This represents a value that has been reloaded into a register value from the stack.
+struct ReloadedValue {
+ stack: Value,
+ reg: Value,
+}
+
+impl SparseMapValue<Value> for ReloadedValue {
+ fn key(&self) -> Value {
+ self.stack
+ }
+}
+
+impl<'a> Context<'a> {
+ fn run(&mut self, tracker: &mut LiveValueTracker) {
+ self.topo.reset(self.cur.func.layout.blocks());
+ while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
+ self.visit_block(block, tracker);
+ }
+ }
+
+ fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
+ debug!("Reloading {}:", block);
+ self.visit_block_header(block, tracker);
+ tracker.drop_dead_params();
+
+ // visit_block_header() places us at the first interesting instruction in the block.
+ while let Some(inst) = self.cur.current_inst() {
+ if !self.cur.func.dfg[inst].opcode().is_ghost() {
+ // This instruction either has an encoding or has ABI constraints, so visit it to
+ // insert spills and fills as needed.
+ let encoding = self.cur.func.encodings[inst];
+ self.visit_inst(block, inst, encoding, tracker);
+ tracker.drop_dead(inst);
+ } else {
+ // This is a ghost instruction with no encoding and no extra constraints, so we can
+ // just skip over it.
+ self.cur.next_inst();
+ }
+ }
+ }
+
+ /// Process the block parameters. Move to the next instruction in the block to be processed
+ fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) {
+ let (liveins, args) = tracker.block_top(
+ block,
+ &self.cur.func.dfg,
+ self.liveness,
+ &self.cur.func.layout,
+ self.domtree,
+ );
+
+ if self.cur.func.layout.entry_block() == Some(block) {
+ debug_assert_eq!(liveins.len(), 0);
+ self.visit_entry_params(block, args);
+ } else {
+ self.visit_block_params(block, args);
+ }
+ }
+
+ /// Visit the parameters on the entry block.
+ /// These values have ABI constraints from the function signature.
+ fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) {
+ debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
+ self.cur.goto_first_inst(block);
+
+ for (arg_idx, arg) in args.iter().enumerate() {
+ let abi = self.cur.func.signature.params[arg_idx];
+ match abi.location {
+ ArgumentLoc::Reg(_) => {
+ if arg.affinity.is_stack() {
+ // An incoming register parameter was spilled. Replace the parameter value
+ // with a temporary register value that is immediately spilled.
+ let reg = self
+ .cur
+ .func
+ .dfg
+ .replace_block_param(arg.value, abi.value_type);
+ let affinity = Affinity::abi(&abi, self.cur.isa);
+ self.liveness.create_dead(reg, block, affinity);
+ self.insert_spill(block, arg.value, reg);
+ }
+ }
+ ArgumentLoc::Stack(_) => {
+ debug_assert!(arg.affinity.is_stack());
+ }
+ ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
+ }
+ }
+ }
+
+ fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) {
+ self.cur.goto_first_inst(block);
+ }
+
+ /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
+ /// that needs processing.
+ fn visit_inst(
+ &mut self,
+ block: Block,
+ inst: Inst,
+ encoding: Encoding,
+ tracker: &mut LiveValueTracker,
+ ) {
+ self.cur.use_srcloc(inst);
+
+ // Get the operand constraints for `inst` that we are trying to satisfy.
+ let constraints = self.encinfo.operand_constraints(encoding);
+
+ // Identify reload candidates.
+ debug_assert!(self.candidates.is_empty());
+ self.find_candidates(inst, constraints);
+
+ // If we find a copy from a stack slot to the same stack slot, replace
+ // it with a `copy_nop` but otherwise ignore it. In particular, don't
+ // generate a reload immediately followed by a spill. The `copy_nop`
+ // has a zero-length encoding, so will disappear at emission time.
+ if let InstructionData::Unary {
+ opcode: Opcode::Copy,
+ arg,
+ } = self.cur.func.dfg[inst]
+ {
+ let dst_vals = self.cur.func.dfg.inst_results(inst);
+ if dst_vals.len() == 1 {
+ let dst_val = dst_vals[0];
+ let can_transform = match (
+ self.cur.func.locations[arg],
+ self.cur.func.locations[dst_val],
+ ) {
+ (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => {
+ src_slot == dst_slot && {
+ let src_ty = self.cur.func.dfg.value_type(arg);
+ let dst_ty = self.cur.func.dfg.value_type(dst_val);
+ debug_assert!(src_ty == dst_ty);
+ // This limits the transformation to copies of the
+ // types: I128 I64 I32 I16 I8 F64 and F32, since that's
+ // the set of `copy_nop` encodings available.
+ src_ty.is_int() || src_ty.is_float()
+ }
+ }
+ _ => false,
+ };
+ if can_transform {
+ // Convert the instruction into a `copy_nop`.
+ self.cur.func.dfg.replace(inst).copy_nop(arg);
+ let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
+ debug_assert!(ok, "copy_nop encoding missing for this type");
+
+ // And move on to the next insn.
+ self.reloads.clear();
+ let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+ self.cur.next_inst();
+ self.candidates.clear();
+ return;
+ }
+ }
+ }
+
+ // Deal with all instructions not special-cased by the immediately
+ // preceding fragment.
+ if let InstructionData::Unary {
+ opcode: Opcode::Copy,
+ ..
+ } = self.cur.func.dfg[inst]
+ {
+ self.reload_copy_candidates(inst);
+ } else {
+ self.reload_inst_candidates(block, inst);
+ }
+
+ // TODO: Reuse reloads for future instructions.
+ self.reloads.clear();
+
+ let (_throughs, _kills, defs) =
+ tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+
+ // Advance to the next instruction so we can insert any spills after the instruction.
+ self.cur.next_inst();
+
+ // Rewrite register defs that need to be spilled.
+ //
+ // Change:
+ //
+ // v2 = inst ...
+ //
+ // Into:
+ //
+ // v7 = inst ...
+ // v2 = spill v7
+ //
+ // That way, we don't need to rewrite all future uses of v2.
+ if let Some(constraints) = constraints {
+ for (lv, op) in defs.iter().zip(constraints.outs) {
+ if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
+ if let InstructionData::Unary {
+ opcode: Opcode::Copy,
+ arg,
+ } = self.cur.func.dfg[inst]
+ {
+ self.cur.func.dfg.replace(inst).spill(arg);
+ let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
+ debug_assert!(ok);
+ } else {
+ let value_type = self.cur.func.dfg.value_type(lv.value);
+ let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
+ self.liveness.create_dead(reg, inst, Affinity::new(op));
+ self.insert_spill(block, lv.value, reg);
+ }
+ }
+ }
+ }
+
+ // Same thing for spilled call return values.
+ let retvals = &defs[self.cur.func.dfg[inst]
+ .opcode()
+ .constraints()
+ .num_fixed_results()..];
+ if !retvals.is_empty() {
+ let sig = self
+ .cur
+ .func
+ .dfg
+ .call_signature(inst)
+ .expect("Extra results on non-call instruction");
+ for (i, lv) in retvals.iter().enumerate() {
+ let abi = self.cur.func.dfg.signatures[sig].returns[i];
+ debug_assert!(
+ abi.location.is_reg(),
+ "expected reg; got {:?}",
+ abi.location
+ );
+ if lv.affinity.is_stack() {
+ let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
+ self.liveness
+ .create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa));
+ self.insert_spill(block, lv.value, reg);
+ }
+ }
+ }
+ }
+
+ // Reload the current candidates for the given `inst`.
+ fn reload_inst_candidates(&mut self, block: Block, inst: Inst) {
+ // Insert fill instructions before `inst` and replace `cand.value` with the filled value.
+ for cand in self.candidates.iter_mut() {
+ if let Some(reload) = self.reloads.get(cand.value) {
+ cand.value = reload.reg;
+ continue;
+ }
+
+ let reg = self.cur.ins().fill(cand.value);
+ let fill = self.cur.built_inst();
+
+ self.reloads.insert(ReloadedValue {
+ stack: cand.value,
+ reg,
+ });
+ cand.value = reg;
+
+ // Create a live range for the new reload.
+ let affinity = Affinity::Reg(cand.regclass.into());
+ self.liveness.create_dead(reg, fill, affinity);
+ self.liveness
+ .extend_locally(reg, block, inst, &self.cur.func.layout);
+ }
+
+ // Rewrite instruction arguments.
+ //
+ // Only rewrite those arguments that were identified as candidates. This leaves block
+ // arguments on branches as-is without rewriting them. A spilled block argument needs to stay
+ // spilled because the matching block parameter is going to be in the same virtual register
+ // and therefore the same stack slot as the block argument value.
+ if !self.candidates.is_empty() {
+ let args = self.cur.func.dfg.inst_args_mut(inst);
+ while let Some(cand) = self.candidates.pop() {
+ args[cand.argidx] = cand.value;
+ }
+ }
+ }
+
+ // Reload the current candidates for the given copy `inst`.
+ //
+ // As an optimization, replace a copy instruction where the argument has been spilled with
+ // a fill instruction.
+ fn reload_copy_candidates(&mut self, inst: Inst) {
+ // Copy instructions can only have one argument.
+ debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1);
+
+ if let Some(cand) = self.candidates.pop() {
+ self.cur.func.dfg.replace(inst).fill(cand.value);
+ let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
+ debug_assert!(ok);
+ }
+ }
+
+ // Find reload candidates for `inst` and add them to `self.candidates`.
+ //
+ // These are uses of spilled values where the operand constraint requires a register.
+ fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) {
+ let args = self.cur.func.dfg.inst_args(inst);
+
+ if let Some(constraints) = constraints {
+ for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
+ if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
+ self.candidates.push(ReloadCandidate {
+ argidx,
+ value: arg,
+ regclass: op.regclass,
+ })
+ }
+ }
+ }
+
+ // If we only have the fixed arguments, we're done now.
+ let offset = self.cur.func.dfg[inst]
+ .opcode()
+ .constraints()
+ .num_fixed_value_arguments();
+ if args.len() == offset {
+ return;
+ }
+ let var_args = &args[offset..];
+
+ // Handle ABI arguments.
+ if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
+ handle_abi_args(
+ self.candidates,
+ &self.cur.func.dfg.signatures[sig].params,
+ var_args,
+ offset,
+ self.cur.isa,
+ self.liveness,
+ );
+ } else if self.cur.func.dfg[inst].opcode().is_return() {
+ handle_abi_args(
+ self.candidates,
+ &self.cur.func.signature.returns,
+ var_args,
+ offset,
+ self.cur.isa,
+ self.liveness,
+ );
+ }
+ }
+
+ /// Insert a spill at `pos` and update data structures.
+ ///
+ /// - Insert `stack = spill reg` at `pos`, and assign an encoding.
+ /// - Move the `stack` live range starting point to the new instruction.
+ /// - Extend the `reg` live range to reach the new instruction.
+ fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) {
+ self.cur.ins().with_result(stack).spill(reg);
+ let inst = self.cur.built_inst();
+
+ // Update live ranges.
+ self.liveness.move_def_locally(stack, inst);
+ self.liveness
+ .extend_locally(reg, block, inst, &self.cur.func.layout);
+ }
+}
+
+/// Find reload candidates in the instruction's ABI variable arguments. This handles both
+/// return values and call arguments.
+fn handle_abi_args(
+ candidates: &mut Vec<ReloadCandidate>,
+ abi_types: &[AbiParam],
+ var_args: &[Value],
+ offset: usize,
+ isa: &dyn TargetIsa,
+ liveness: &Liveness,
+) {
+ debug_assert_eq!(abi_types.len(), var_args.len());
+ for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
+ if abi.location.is_reg() {
+ let lv = liveness.get(arg).expect("Missing live range for ABI arg");
+ if lv.affinity.is_stack() {
+ candidates.push(ReloadCandidate {
+ argidx,
+ value: arg,
+ regclass: isa.regclass_for_abi_type(abi.value_type),
+ });
+ }
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs b/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs
new file mode 100644
index 0000000000..2686c57277
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs
@@ -0,0 +1,65 @@
+use crate::cursor::{Cursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::inst_predicates::is_safepoint;
+use crate::ir::{Function, InstBuilder};
+use crate::isa::TargetIsa;
+use crate::regalloc::live_value_tracker::LiveValueTracker;
+use crate::regalloc::liveness::Liveness;
+use alloc::vec::Vec;
+
+fn insert_and_encode_safepoint<'f>(
+ pos: &mut FuncCursor<'f>,
+ tracker: &LiveValueTracker,
+ isa: &dyn TargetIsa,
+) {
+ // Iterate through all live values, collect only the references.
+ let live_ref_values = tracker
+ .live()
+ .iter()
+ .filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref())
+ .map(|live_val| live_val.value)
+ .collect::<Vec<_>>();
+
+ if !live_ref_values.is_empty() {
+ pos.ins().safepoint(&live_ref_values);
+ // Move cursor to the new safepoint instruction to encode it.
+ if let Some(inst) = pos.prev_inst() {
+ let ok = pos.func.update_encoding(inst, isa).is_ok();
+ debug_assert!(ok);
+ }
+ // Restore cursor position.
+ pos.next_inst();
+ }
+}
+
+// The emit_stack_maps() function analyzes each instruction to retrieve the liveness of
+// the defs and operands by traversing a function's blocks in layout order.
+pub fn emit_stack_maps(
+ func: &mut Function,
+ domtree: &DominatorTree,
+ liveness: &Liveness,
+ tracker: &mut LiveValueTracker,
+ isa: &dyn TargetIsa,
+) {
+ let mut curr = func.layout.entry_block();
+
+ while let Some(block) = curr {
+ tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree);
+ tracker.drop_dead_params();
+ let mut pos = FuncCursor::new(func);
+
+ // From the top of the block, step through the instructions.
+ pos.goto_top(block);
+
+ while let Some(inst) = pos.next_inst() {
+ if is_safepoint(&pos.func, inst) {
+ insert_and_encode_safepoint(&mut pos, tracker, isa);
+ }
+
+ // Process the instruction and get rid of dead values.
+ tracker.process_inst(inst, &pos.func.dfg, liveness);
+ tracker.drop_dead(inst);
+ }
+ curr = func.layout.next_block(block);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/solver.rs b/third_party/rust/cranelift-codegen/src/regalloc/solver.rs
new file mode 100644
index 0000000000..07b3aba9b9
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/solver.rs
@@ -0,0 +1,1383 @@
+//! Constraint solver for register coloring.
+//!
+//! The coloring phase of SSA-based register allocation is very simple in theory, but in practice
+//! it is complicated by the various constraints imposed by individual instructions:
+//!
+//! - Call and return instructions have to satisfy ABI requirements for arguments and return
+//! values.
+//! - Values live across a call must be in a callee-saved register.
+//! - Some instructions have operand constraints such as register sub-classes, fixed registers, or
+//! tied operands.
+//!
+//! # The instruction register coloring problem
+//!
+//! The constraint solver addresses the problem of satisfying the constraints of a single
+//! instruction. We have:
+//!
+//! - A set of values that are live in registers before the instruction, with current register
+//! assignments. Some are used by the instruction, some are not.
+//! - A subset of the live register values that are killed by the instruction.
+//! - A set of new register values that are defined by the instruction.
+//!
+//! We are not concerned with stack values at all. The reload pass ensures that all values required
+//! to be in a register by the instruction are already in a register.
+//!
+//! A solution to the register coloring problem consists of:
+//!
+//! - Register reassignment prescriptions for a subset of the live register values.
+//! - Register assignments for the instruction's defined values.
+//!
+//! The solution ensures that when live registers are reassigned as prescribed before the
+//! instruction, all its operand constraints are satisfied, and the definition assignments won't
+//! conflict.
+//!
+//! # Register diversions and global interference
+//!
+//! We can divert register values temporarily to satisfy constraints, but we need to put the
+//! values back into their originally assigned register locations before leaving the block.
+//! Otherwise, values won't be in the right register at the entry point of other blocks.
+//!
+//! Some values are *local*, and we don't need to worry about putting those values back since they
+//! are not used in any other blocks.
+//!
+//! When we assign register locations to defines, we are assigning both the register used locally
+//! immediately after the instruction and the register used globally when the defined value is used
+//! in a different block. We need to avoid interference both locally at the instruction and globally.
+//!
+//! We have multiple mappings of values to registers:
+//!
+//! 1. The initial local mapping before the instruction. This includes any diversions from previous
+//! instructions in the block, but not diversions for the current instruction.
+//! 2. The local mapping after applying the additional reassignments required to satisfy the
+//! constraints of the current instruction.
+//! 3. The local mapping after the instruction. This excludes values killed by the instruction and
+//! includes values defined by the instruction.
+//! 4. The global mapping after the instruction. This mapping only contains values with global live
+//! ranges, and it does not include any diversions.
+//!
+//! All four mappings must be kept free of interference.
+//!
+//! # Problems handled by previous passes.
+//!
+//! The constraint solver can only reassign registers, it can't create spill code, so some
+//! constraints are handled by earlier passes:
+//!
+//! - There will be enough free registers available for the defines. Ensuring this is the primary
+//! purpose of the spilling phase.
+//! - When the same value is used for multiple operands, the intersection of operand constraints is
+//! non-empty. The spilling phase will insert copies to handle mutually incompatible constraints,
+//! such as when the same value is bound to two different function arguments.
+//! - Values bound to tied operands must be killed by the instruction. Also enforced by the
+//! spiller.
+//! - Values used by register operands are in registers, and values used by stack operands are in
+//! stack slots. This is enforced by the reload pass.
+//!
+//! # Solver algorithm
+//!
+//! The goal of the solver is to satisfy the instruction constraints with a minimal number of
+//! register assignments before the instruction.
+//!
+//! 1. Compute the set of values used by operands with a fixed register constraint that isn't
+//! already satisfied. These are mandatory predetermined reassignments.
+//! 2. Compute the set of values that don't satisfy their register class constraint. These are
+//! mandatory reassignments that we need to solve.
+//! 3. Add the set of defines to the set of variables computed in 2. Exclude defines tied to an
+//! input operand since their value is pre-determined.
+//!
+//! The set of values computed in 2. and 3. are the *variables* for the solver. Given a set of
+//! variables, we can also compute a set of allocatable registers by removing the variables from
+//! the set of assigned registers before the instruction.
+//!
+//! 1. For each variable, compute its domain as the intersection of the allocatable registers and
+//! its register class constraint.
+//! 2. Sort the variables in order of increasing domain size.
+//! 3. Search for a solution that assigns each variable a register from its domain without
+//! interference between variables.
+//!
+//! If the search fails to find a solution, we may need to reassign more registers. Find an
+//! appropriate candidate among the set of live register values, add it as a variable and start
+//! over.
+
+use super::RegisterSet;
+use crate::dbg::DisplayList;
+use crate::entity::{SparseMap, SparseMapValue};
+use crate::ir::Value;
+use crate::isa::{RegClass, RegUnit};
+use crate::regalloc::register_set::RegSetIter;
+use alloc::vec::Vec;
+use core::cmp;
+use core::fmt;
+use core::mem;
+use core::u16;
+use log::debug;
+
+/// A variable in the constraint problem.
+///
+/// Variables represent register values that can be assigned to any register unit within the
+/// constraint register class. This includes live register values that can be reassigned to a new
+/// register and values defined by the instruction which must be assigned to a register.
+///
+/// Besides satisfying the register class constraint, variables must also be mutually
+/// non-interfering in up to three contexts:
+///
+/// 1. Input side live registers, after applying all the reassignments.
+/// 2. Output side live registers, considering all the local register diversions.
+/// 3. Global live register, not considering any local diversions.
+///
+pub struct Variable {
+ /// The value whose register assignment we're looking for.
+ pub value: Value,
+
+ /// Original register unit holding this live value before the instruction, or `None` for a
+ /// value that is defined by the instruction.
+ from: Option<RegUnit>,
+
+ /// Avoid interference on the input side.
+ is_input: bool,
+
+ /// Avoid interference on the output side.
+ is_output: bool,
+
+ /// Avoid interference with the global registers.
+ is_global: bool,
+
+ /// Number of registers available in the domain of this variable.
+ domain: u16,
+
+ /// The assigned register unit after a full solution was found.
+ pub solution: RegUnit,
+
+ /// Any solution must belong to the constraint register class.
+ constraint: RegClass,
+}
+
+impl Variable {
+ fn new_live(value: Value, constraint: RegClass, from: RegUnit, is_output: bool) -> Self {
+ Self {
+ value,
+ constraint,
+ from: Some(from),
+ is_input: true,
+ is_output,
+ is_global: false,
+ domain: 0,
+ solution: !0,
+ }
+ }
+
+ fn new_def(value: Value, constraint: RegClass, is_global: bool) -> Self {
+ Self {
+ value,
+ constraint,
+ from: None,
+ is_input: false,
+ is_output: true,
+ is_global,
+ domain: 0,
+ solution: !0,
+ }
+ }
+
+ /// Does this variable represent a value defined by the current instruction?
+ pub fn is_define(&self) -> bool {
+ self.from.is_none()
+ }
+
+ /// Get an iterator over possible register choices, given the available registers on the input
+ /// and output sides as well as the available global register set.
+ fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter {
+ if !self.is_output {
+ debug_assert!(!self.is_global, "Global implies output");
+ debug_assert!(self.is_input, "Missing interference set");
+ return iregs.iter(self.constraint);
+ }
+
+ let mut r = oregs.clone();
+ if self.is_input {
+ r.intersect(iregs);
+ }
+ if self.is_global {
+ r.intersect(gregs);
+ }
+ r.iter(self.constraint)
+ }
+}
+
+impl fmt::Display for Variable {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}({}", self.value, self.constraint)?;
+ if let Some(reg) = self.from {
+ write!(f, ", from {}", self.constraint.info.display_regunit(reg))?;
+ }
+ if self.is_input {
+ write!(f, ", in")?;
+ }
+ if self.is_output {
+ write!(f, ", out")?;
+ }
+ if self.is_global {
+ write!(f, ", global")?;
+ }
+ if self.is_define() {
+ write!(f, ", def")?;
+ }
+ if self.domain > 0 {
+ write!(f, ", {}", self.domain)?;
+ }
+ write!(f, ")")
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct Assignment {
+ pub value: Value,
+ pub from: RegUnit,
+ pub to: RegUnit,
+ pub rc: RegClass,
+}
+
+impl SparseMapValue<Value> for Assignment {
+ fn key(&self) -> Value {
+ self.value
+ }
+}
+
+impl fmt::Display for Assignment {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let ri = self.rc.info;
+ write!(
+ f,
+ "{}:{}({} -> {})",
+ self.value,
+ self.rc,
+ ri.display_regunit(self.from),
+ ri.display_regunit(self.to)
+ )
+ }
+}
+
+/// A move operation between two registers or between a register and an emergency spill slot.
+#[derive(Clone, PartialEq)]
+pub enum Move {
+ Reg {
+ value: Value,
+ rc: RegClass,
+ from: RegUnit,
+ to: RegUnit,
+ },
+ #[allow(dead_code)] // rustc doesn't see it isn't dead.
+ Spill {
+ value: Value,
+ rc: RegClass,
+ from: RegUnit,
+ to_slot: usize,
+ },
+ Fill {
+ value: Value,
+ rc: RegClass,
+ from_slot: usize,
+ to: RegUnit,
+ },
+}
+
+impl Move {
+ /// Create a register move from an assignment, but not for identity assignments.
+ fn with_assignment(a: &Assignment) -> Option<Self> {
+ if a.from != a.to {
+ Some(Self::Reg {
+ value: a.value,
+ from: a.from,
+ to: a.to,
+ rc: a.rc,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Get the "from" register and register class, if possible.
+ #[cfg_attr(feature = "cargo-clippy", allow(clippy::wrong_self_convention))]
+ fn from_reg(&self) -> Option<(RegClass, RegUnit)> {
+ match *self {
+ Self::Reg { rc, from, .. } | Self::Spill { rc, from, .. } => Some((rc, from)),
+ Self::Fill { .. } => None,
+ }
+ }
+
+ /// Get the "to" register and register class, if possible.
+ fn to_reg(&self) -> Option<(RegClass, RegUnit)> {
+ match *self {
+ Self::Reg { rc, to, .. } | Self::Fill { rc, to, .. } => Some((rc, to)),
+ Self::Spill { .. } => None,
+ }
+ }
+
+ /// Replace the "to" register with `new` and return the old value.
+ fn replace_to_reg(&mut self, new: RegUnit) -> RegUnit {
+ mem::replace(
+ match *self {
+ Self::Reg { ref mut to, .. } | Self::Fill { ref mut to, .. } => to,
+ Self::Spill { .. } => panic!("No to register in a spill {}", self),
+ },
+ new,
+ )
+ }
+
+ /// Convert this `Reg` move to a spill to `slot` and return the old "to" register.
+ fn change_to_spill(&mut self, slot: usize) -> RegUnit {
+ match self.clone() {
+ Self::Reg {
+ value,
+ rc,
+ from,
+ to,
+ } => {
+ *self = Self::Spill {
+ value,
+ rc,
+ from,
+ to_slot: slot,
+ };
+ to
+ }
+ _ => panic!("Expected reg move: {}", self),
+ }
+ }
+
+ /// Get the value being moved.
+ fn value(&self) -> Value {
+ match *self {
+ Self::Reg { value, .. } | Self::Fill { value, .. } | Self::Spill { value, .. } => value,
+ }
+ }
+
+ /// Get the associated register class.
+ fn rc(&self) -> RegClass {
+ match *self {
+ Self::Reg { rc, .. } | Self::Fill { rc, .. } | Self::Spill { rc, .. } => rc,
+ }
+ }
+}
+
+impl fmt::Display for Move {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ Self::Reg {
+ value,
+ from,
+ to,
+ rc,
+ } => write!(
+ f,
+ "{}:{}({} -> {})",
+ value,
+ rc,
+ rc.info.display_regunit(from),
+ rc.info.display_regunit(to)
+ ),
+ Self::Spill {
+ value,
+ from,
+ to_slot,
+ rc,
+ } => write!(
+ f,
+ "{}:{}({} -> slot {})",
+ value,
+ rc,
+ rc.info.display_regunit(from),
+ to_slot
+ ),
+ Self::Fill {
+ value,
+ from_slot,
+ to,
+ rc,
+ } => write!(
+ f,
+ "{}:{}(slot {} -> {})",
+ value,
+ rc,
+ from_slot,
+ rc.info.display_regunit(to)
+ ),
+ }
+ }
+}
+
+impl fmt::Debug for Move {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let as_display: &dyn fmt::Display = self;
+ as_display.fmt(f)
+ }
+}
+
+/// Constraint solver for register allocation around a single instruction.
+///
+/// Start by programming in the instruction constraints.
+///
+/// 1. Initialize the solver by calling `reset()` with the set of allocatable registers before the
+/// instruction.
+/// 2. Program the input side constraints: Call `reassign_in()` for all fixed register constraints,
+/// and `add_var()` for any input operands whose constraints are not already satisfied.
+/// 3. Check for conflicts between fixed input assignments and existing live values by calling
+/// `has_fixed_input_conflicts()`. Resolve any conflicts by calling `add_var()` with the
+/// conflicting values.
+/// 4. Prepare for adding output side constraints by calling `inputs_done()`.
+/// 5. Add any killed register values that no longer cause interference on the output side by
+/// calling `add_kill()`.
+/// 6. Program the output side constraints: Call `add_fixed_output()` for all fixed register
+/// constraints and `add_def()` for free defines. Resolve fixed output conflicts by calling
+/// `add_through_var()`.
+///
+pub struct Solver {
+ /// Register reassignments that are required or decided as part of a full solution.
+ /// This includes identity assignments for values that are already in the correct fixed
+ /// register.
+ assignments: SparseMap<Value, Assignment>,
+
+ /// Variables are the values that should be reassigned as part of a solution.
+ /// Values with fixed register constraints are not considered variables. They are represented
+ /// in the `assignments` vector if necessary.
+ vars: Vec<Variable>,
+
+ /// Are we finished adding input-side constraints? This changes the meaning of the `regs_in`
+ /// and `regs_out` register sets.
+ inputs_done: bool,
+
+ /// Available registers on the input side of the instruction.
+ ///
+ /// While we're adding input constraints (`!inputs_done`):
+ ///
+ /// - Live values on the input side are marked as unavailable.
+ /// - The 'from' registers of fixed input reassignments are marked as available as they are
+ /// added.
+ /// - Input-side variables are marked as available.
+ ///
+ /// After finishing input constraints (`inputs_done`):
+ ///
+ /// - Live values on the input side are marked as unavailable.
+ /// - The 'to' registers of fixed input reassignments are marked as unavailable.
+ /// - Input-side variables are marked as available.
+ ///
+ regs_in: RegisterSet,
+
+ /// Available registers on the output side of the instruction / fixed input scratch space.
+ ///
+ /// While we're adding input constraints (`!inputs_done`):
+ ///
+ /// - The 'to' registers of fixed input reassignments are marked as unavailable.
+ ///
+ /// After finishing input constraints (`inputs_done`):
+ ///
+ /// - Live-through values are marked as unavailable.
+ /// - Fixed output assignments are marked as unavailable.
+ /// - Live-through variables are marked as available.
+ ///
+ regs_out: RegisterSet,
+
+ /// List of register moves scheduled to avoid conflicts.
+ ///
+ /// This is used as working space by the `schedule_moves()` function.
+ moves: Vec<Move>,
+
+ /// List of pending fill moves. This is only used during `schedule_moves()`.
+ fills: Vec<Move>,
+}
+
+/// Interface for programming the constraints into the solver.
+impl Solver {
+ /// Create a new empty solver.
+ pub fn new() -> Self {
+ Self {
+ assignments: SparseMap::new(),
+ vars: Vec::new(),
+ inputs_done: false,
+ regs_in: RegisterSet::new(),
+ regs_out: RegisterSet::new(),
+ moves: Vec::new(),
+ fills: Vec::new(),
+ }
+ }
+
+ /// Clear all data structures in this coloring pass.
+ pub fn clear(&mut self) {
+ self.assignments.clear();
+ self.vars.clear();
+ self.inputs_done = false;
+ self.regs_in = RegisterSet::new();
+ self.regs_out = RegisterSet::new();
+ self.moves.clear();
+ self.fills.clear();
+ }
+
+ /// Reset the solver state and prepare solving for a new instruction with an initial set of
+ /// allocatable registers.
+ ///
+ /// The `regs` set is the allocatable registers before any reassignments are applied.
+ pub fn reset(&mut self, regs: &RegisterSet) {
+ self.assignments.clear();
+ self.vars.clear();
+ self.inputs_done = false;
+ self.regs_in = regs.clone();
+ // Used for tracking fixed input assignments while `!inputs_done`:
+ self.regs_out = RegisterSet::new();
+ self.moves.clear();
+ self.fills.clear();
+ }
+
+ /// Add a fixed input reassignment of `value`.
+ ///
+ /// This means that `value` must be assigned to `to` and can't become a variable. Call with
+ /// `from == to` to ensure that `value` is not reassigned from its existing register location.
+ ///
+ /// In either case, `to` will not be available for variables on the input side of the
+ /// instruction.
+ pub fn reassign_in(&mut self, value: Value, rc: RegClass, from: RegUnit, to: RegUnit) {
+ debug!(
+ "reassign_in({}:{}, {} -> {})",
+ value,
+ rc,
+ rc.info.display_regunit(from),
+ rc.info.display_regunit(to)
+ );
+ debug_assert!(!self.inputs_done);
+ if self.regs_in.is_avail(rc, from) {
+ // It looks like `value` was already removed from the register set. It must have been
+ // added as a variable previously. A fixed constraint beats a variable, so convert it.
+ if let Some(idx) = self.vars.iter().position(|v| v.value == value) {
+ let v = self.vars.remove(idx);
+ debug!("-> converting variable {} to a fixed constraint", v);
+ // The spiller is responsible for ensuring that all constraints on the uses of a
+ // value are compatible.
+ debug_assert!(
+ v.constraint.contains(to),
+ "Incompatible constraints for {}",
+ value
+ );
+ } else {
+ panic!("Invalid from register for fixed {} constraint", value);
+ }
+ }
+ self.regs_in.free(rc, from);
+ self.regs_out.take(rc, to);
+ self.assignments.insert(Assignment {
+ value,
+ rc,
+ from,
+ to,
+ });
+ }
+
+ /// Add a variable representing an input side value with an existing register assignment.
+ ///
+ /// A variable is a value that should be reassigned to something in the `constraint` register
+ /// class.
+ ///
+ /// It is assumed initially that the value is also live on the output side of the instruction.
+ /// This can be changed by calling to `add_kill()`.
+ ///
+ /// This function can only be used before calling `inputs_done()`. Afterwards, more input-side
+ /// variables can be added by calling `add_killed_var()` and `add_through_var()`
+ pub fn add_var(&mut self, value: Value, constraint: RegClass, from: RegUnit) {
+ debug!(
+ "add_var({}:{}, from={})",
+ value,
+ constraint,
+ constraint.info.display_regunit(from)
+ );
+ debug_assert!(!self.inputs_done);
+ self.add_live_var(value, constraint, from, true);
+ }
+
+ /// Add an extra input-side variable representing a value that is killed by the current
+ /// instruction.
+ ///
+ /// This function should be called after `inputs_done()` only. Use `add_var()` before.
+ pub fn add_killed_var(&mut self, value: Value, rc: RegClass, from: RegUnit) {
+ debug!(
+ "add_killed_var({}:{}, from={})",
+ value,
+ rc,
+ rc.info.display_regunit(from)
+ );
+ debug_assert!(self.inputs_done);
+ self.add_live_var(value, rc, from, false);
+ }
+
+ /// Add an extra input-side variable representing a value that is live through the current
+ /// instruction.
+ ///
+ /// This function should be called after `inputs_done()` only. Use `add_var()` before.
+ pub fn add_through_var(&mut self, value: Value, rc: RegClass, from: RegUnit) {
+ debug!(
+ "add_through_var({}:{}, from={})",
+ value,
+ rc,
+ rc.info.display_regunit(from)
+ );
+ debug_assert!(self.inputs_done);
+ self.add_live_var(value, rc, from, true);
+ }
+
+ /// Shared code for `add_var`, `add_killed_var`, and `add_through_var`.
+ ///
+ /// Add a variable that is live before the instruction, and possibly live through. Merge
+ /// constraints if the value has already been added as a variable or fixed assignment.
+ fn add_live_var(&mut self, value: Value, rc: RegClass, from: RegUnit, live_through: bool) {
+ // Check for existing entries for this value.
+ if !self.can_add_var(rc, from) {
+ // There could be an existing variable entry.
+ if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
+ // We have an existing variable entry for `value`. Combine the constraints.
+ if let Some(rc) = v.constraint.intersect(rc) {
+ debug!("-> combining constraint with {} yields {}", v, rc);
+ v.constraint = rc;
+ return;
+ } else {
+ // The spiller should have made sure the same value is not used with disjoint
+ // constraints.
+ panic!("Incompatible constraints: {} + {}", rc, v)
+ }
+ }
+
+ // No variable, then it must be a fixed reassignment.
+ if let Some(a) = self.assignments.get(value) {
+ debug!("-> already fixed assignment {}", a);
+ debug_assert!(rc.contains(a.to), "Incompatible constraints for {}", value);
+ return;
+ }
+
+ debug!("{}", self);
+ panic!("Wrong from register for {}", value);
+ }
+
+ let new_var = Variable::new_live(value, rc, from, live_through);
+ debug!("-> new var: {}", new_var);
+
+ self.regs_in.free(rc, from);
+ if self.inputs_done && live_through {
+ self.regs_out.free(rc, from);
+ }
+ self.vars.push(new_var);
+ }
+
+ /// Check for conflicts between fixed input assignments and existing live values.
+ ///
+ /// Returns true if one of the live values conflicts with a fixed input assignment. Such a
+ /// conflicting value must be turned into a variable.
+ pub fn has_fixed_input_conflicts(&self) -> bool {
+ debug_assert!(!self.inputs_done);
+ // The `from` side of the fixed input diversions are taken from `regs_out`.
+ self.regs_out.interferes_with(&self.regs_in)
+ }
+
+ /// Check if `rc, reg` specifically conflicts with the fixed input assignments.
+ pub fn is_fixed_input_conflict(&self, rc: RegClass, reg: RegUnit) -> bool {
+ debug_assert!(!self.inputs_done);
+ !self.regs_out.is_avail(rc, reg)
+ }
+
+ /// Finish adding input side constraints.
+ ///
+ /// Call this method to indicate that there will be no more fixed input reassignments added
+ /// and prepare for the output side constraints.
+ pub fn inputs_done(&mut self) {
+ debug_assert!(!self.has_fixed_input_conflicts());
+
+ // At this point, `regs_out` contains the `to` side of the input reassignments, and the
+ // `from` side has already been marked as available in `regs_in`.
+ //
+ // Remove the `to` assignments from `regs_in` so it now indicates the registers available
+ // to variables at the input side.
+ self.regs_in.intersect(&self.regs_out);
+
+ // The meaning of `regs_out` now changes completely to indicate the registers available to
+ // variables on the output side.
+ // The initial mask will be modified by `add_kill()` and `add_fixed_output()`.
+ self.regs_out = self.regs_in.clone();
+
+ // Now we can't add more fixed input assignments, but `add_var()` is still allowed.
+ self.inputs_done = true;
+ }
+
+ /// Record that an input register value is killed by the instruction.
+ ///
+ /// Even if a fixed reassignment has been added for the value, the `reg` argument should be the
+ /// original location before the reassignments.
+ ///
+ /// This means that the register is available on the output side.
+ pub fn add_kill(&mut self, value: Value, rc: RegClass, reg: RegUnit) {
+ debug_assert!(self.inputs_done);
+
+ // If a fixed assignment is killed, the `to` register becomes available on the output side.
+ if let Some(a) = self.assignments.get(value) {
+ debug_assert_eq!(a.from, reg);
+ self.regs_out.free(a.rc, a.to);
+ return;
+ }
+
+ // It's also possible that a variable is killed. That means it doesn't need to satisfy
+ // interference constraints on the output side.
+ // Variables representing tied operands will get their `is_output` flag set again later.
+ if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
+ debug_assert!(v.is_input);
+ v.is_output = false;
+ return;
+ }
+
+ // Alright, this is just a boring value being killed by the instruction. Just reclaim
+ // the assigned register.
+ self.regs_out.free(rc, reg);
+ }
+
+ /// Record that an input register is tied to an output register.
+ ///
+ /// It is assumed that `add_kill` was called previously with the same arguments.
+ ///
+ /// The output value that must have the same register as the input value is not recorded in the
+ /// solver.
+ ///
+ /// If the value has already been assigned to a fixed register, return that.
+ pub fn add_tied_input(
+ &mut self,
+ value: Value,
+ rc: RegClass,
+ reg: RegUnit,
+ is_global: bool,
+ ) -> Option<RegUnit> {
+ debug_assert!(self.inputs_done);
+
+ // If a fixed assignment is tied, the `to` register is not available on the output side.
+ if let Some(a) = self.assignments.get(value) {
+ debug_assert_eq!(a.from, reg);
+ self.regs_out.take(a.rc, a.to);
+ return Some(a.to);
+ }
+
+ // Check if a variable was created.
+ if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) {
+ debug_assert!(v.is_input);
+ v.is_output = true;
+ v.is_global = is_global;
+ return None;
+ }
+
+ // No variable exists for `value` because its constraints are already satisfied.
+ // However, if the tied output value has a global live range, we must create a variable to
+ // avoid global interference too.
+ if is_global {
+ let mut new_var = Variable::new_live(value, rc, reg, true);
+ new_var.is_global = true;
+ debug!("add_tied_input: new tied-global value: {}", new_var);
+ self.vars.push(new_var);
+ self.regs_in.free(rc, reg);
+ } else {
+ self.regs_out.take(rc, reg);
+ }
+
+ None
+ }
+
+ /// Add a fixed output assignment.
+ ///
+ /// This means that `to` will not be available for variables on the output side of the
+ /// instruction.
+ ///
+ /// Returns `false` if a live value conflicts with `to`, so it couldn't be added. Find the
+ /// conflicting live-through value and turn it into a variable before calling this method
+ /// again.
+ #[allow(dead_code)]
+ pub fn add_fixed_output(&mut self, rc: RegClass, reg: RegUnit) -> bool {
+ debug_assert!(self.inputs_done);
+ if self.regs_out.is_avail(rc, reg) {
+ self.regs_out.take(rc, reg);
+ true
+ } else {
+ false
+ }
+ }
+
+ /// Add a defined output value.
+ ///
+ /// This is similar to `add_var`, except the value doesn't have a prior register assignment.
+ pub fn add_def(&mut self, value: Value, constraint: RegClass, is_global: bool) {
+ debug_assert!(self.inputs_done);
+ self.vars
+ .push(Variable::new_def(value, constraint, is_global));
+ }
+
+ /// Clear the `is_global` flag on all solver variables.
+ ///
+ /// This is used when there are not enough global registers available, and global defines have
+ /// to be replaced with local defines followed by a copy.
+ pub fn clear_all_global_flags(&mut self) {
+ for v in &mut self.vars {
+ v.is_global = false;
+ }
+ }
+}
+
+/// Error reported when the solver fails to find a solution with the current constraints.
+///
+/// When no solution can be found, the error indicates how constraints could be loosened to help.
+pub enum SolverError {
+ /// There are not available registers in the given register class.
+ ///
+ /// This should be resolved by turning live-through values into variables so they can be moved
+ /// out of the way.
+ Divert(RegClass),
+
+ /// There are insufficient available registers in the global set to assign an `is_global`
+ /// variable with the given value.
+ ///
+ /// This should be resolved by converting the variable to a local one.
+ Global(Value),
+}
+
+/// Interface for searching for a solution.
+impl Solver {
+ /// Try a quick-and-dirty solution.
+ ///
+ /// This is expected to succeed for most instructions since the constraint problem is almost
+ /// always trivial.
+ ///
+ /// Returns `Ok(regs)` if a solution was found.
+ pub fn quick_solve(
+ &mut self,
+ global_regs: &RegisterSet,
+ is_reload: bool,
+ ) -> Result<RegisterSet, SolverError> {
+ self.find_solution(global_regs, is_reload)
+ }
+
+ /// Try harder to find a solution.
+ ///
+ /// Call this method after `quick_solve()` fails.
+ ///
+ /// This may return an error with a register class that has run out of registers. If registers
+ /// can be freed up in the starving class, this method can be called again after adding
+ /// variables for the freed registers.
+ pub fn real_solve(
+ &mut self,
+ global_regs: &RegisterSet,
+ is_reload: bool,
+ ) -> Result<RegisterSet, SolverError> {
+ // Compute domain sizes for all the variables given the current register sets.
+ for v in &mut self.vars {
+ let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len();
+ v.domain = cmp::min(d, u16::MAX as usize) as u16;
+ }
+
+ // Solve for vars with small domains first to increase the chance of finding a solution.
+ //
+ // Also consider this case:
+ //
+ // v0: out, global
+ // v1: in
+ // v2: in+out
+ //
+ // If only %r0 and %r1 are available, the global constraint may cause us to assign:
+ //
+ // v0 -> %r1
+ // v1 -> %r0
+ // v2 -> !
+ //
+ // Usually in+out variables will have a smaller domain, but in the above case the domain
+ // size is the same, so we also prioritize in+out variables.
+ //
+ // Include the reversed previous solution for this variable partly as a stable tie breaker,
+ // partly to shake things up on a second attempt.
+ //
+ // Use the `from` register and value number as a tie breaker to get a stable sort.
+ self.vars.sort_unstable_by_key(|v| {
+ (
+ v.domain,
+ !(v.is_input && v.is_output),
+ !v.solution,
+ v.from.unwrap_or(0),
+ v.value,
+ )
+ });
+
+ debug!("real_solve for {}", self);
+ self.find_solution(global_regs, is_reload)
+ }
+
+ /// Search for a solution with the current list of variables.
+ ///
+ /// If a solution was found, returns `Ok(regs)` with the set of available registers on the
+ /// output side after the solution. If no solution could be found, returns `Err(rc)` with the
+ /// constraint register class that needs more available registers.
+ fn find_solution(
+ &mut self,
+ global_regs: &RegisterSet,
+ is_reload: bool,
+ ) -> Result<RegisterSet, SolverError> {
+ // Available registers on the input and output sides respectively.
+ let mut iregs = self.regs_in.clone();
+ let mut oregs = self.regs_out.clone();
+ let mut gregs = global_regs.clone();
+
+ for v in &mut self.vars {
+ let rc = v.constraint;
+
+ // Decide which register to assign. In order to try and keep registers holding
+ // reloaded values separate from all other registers to the extent possible, we choose
+ // the first available register in the normal case, but the last available one in the
+ // case of a reload. See "A side note on register choice heuristics" in
+ // src/redundant_reload_remover.rs for further details.
+ let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs);
+ let maybe_reg = if is_reload {
+ reg_set_iter.rnext()
+ } else {
+ reg_set_iter.next()
+ };
+
+ let reg = match maybe_reg {
+ Some(reg) => reg,
+ None => {
+ // If `v` must avoid global interference, there is not point in requesting
+ // live registers be diverted. We need to make it a non-global value.
+ if v.is_global && gregs.iter(rc).next().is_none() {
+ return Err(SolverError::Global(v.value));
+ }
+ return Err(SolverError::Divert(rc));
+ }
+ };
+
+ v.solution = reg;
+ if v.is_input {
+ iregs.take(rc, reg);
+ }
+ if v.is_output {
+ oregs.take(rc, reg);
+ }
+ if v.is_global {
+ gregs.take(rc, reg);
+ }
+ }
+
+ Ok(oregs)
+ }
+
+ /// Get all the variables.
+ pub fn vars(&self) -> &[Variable] {
+ &self.vars
+ }
+
+ /// Check if `value` can be added as a variable to help find a solution.
+ pub fn can_add_var(&mut self, constraint: RegClass, from: RegUnit) -> bool {
+ !self.regs_in.is_avail(constraint, from)
+ && !self.vars.iter().any(|var| var.from == Some(from))
+ }
+}
+
+/// Interface for working with parallel copies once a solution has been found.
+impl Solver {
+ /// Collect all the register moves we need to execute.
+ fn collect_moves(&mut self) {
+ self.moves.clear();
+
+ // Collect moves from the chosen solution for all non-define variables.
+ for v in &self.vars {
+ if let Some(from) = v.from {
+ // Omit variable solutions that don't require the value to be moved.
+ if from != v.solution {
+ self.moves.push(Move::Reg {
+ value: v.value,
+ from,
+ to: v.solution,
+ rc: v.constraint,
+ });
+ }
+ }
+ }
+
+ // Convert all of the fixed register assignments into moves, but omit the ones that are
+ // already in the right register.
+ self.moves
+ .extend(self.assignments.values().filter_map(Move::with_assignment));
+
+ if !self.moves.is_empty() {
+ debug!("collect_moves: {}", DisplayList(&self.moves));
+ }
+ }
+
+ /// Try to schedule a sequence of `regmove` instructions that will shuffle registers into
+ /// place.
+ ///
+ /// This may require the use of additional available registers, and it can fail if no
+ /// additional registers are available.
+ ///
+ /// TODO: Handle failure by generating a sequence of register swaps, or by temporarily spilling
+ /// a register.
+ ///
+ /// Returns the number of spills that had to be emitted.
+ pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize {
+ self.collect_moves();
+ debug_assert!(self.fills.is_empty());
+
+ let mut num_spill_slots = 0;
+ let mut avail = regs.clone();
+ let mut i = 0;
+ while i < self.moves.len() + self.fills.len() {
+ // Don't even look at the fills until we've spent all the moves. Deferring these lets
+ // us potentially reuse the claimed registers to resolve multiple cycles.
+ if i >= self.moves.len() {
+ self.moves.append(&mut self.fills);
+ }
+
+ // Find the first move that can be executed now.
+ if let Some(j) = self.moves[i..].iter().position(|m| match m.to_reg() {
+ Some((rc, reg)) => avail.is_avail(rc, reg),
+ None => true,
+ }) {
+ // This move can be executed now.
+ self.moves.swap(i, i + j);
+ let m = &self.moves[i];
+ if let Some((rc, reg)) = m.to_reg() {
+ avail.take(rc, reg);
+ }
+ if let Some((rc, reg)) = m.from_reg() {
+ avail.free(rc, reg);
+ }
+ debug!("move #{}: {}", i, m);
+ i += 1;
+ continue;
+ }
+
+ // When we get here, none of the `moves[i..]` can be executed. This means there are
+ // only cycles remaining. The cycles can be broken in a few ways:
+ //
+ // 1. Grab an available register and use it to break a cycle.
+ // 2. Move a value temporarily into a stack slot instead of a register.
+ // 3. Use swap instructions.
+ //
+ // TODO: So far we only implement 1 and 2.
+
+ // Pick an assignment with the largest possible width. This is more likely to break up
+ // a cycle than an assignment with fewer register units. For example, it may be
+ // necessary to move two arm32 S-registers out of the way before a D-register can move
+ // into place.
+ //
+ // We use `min_by_key` and `!` instead of `max_by_key` because it preserves the
+ // existing order of moves with the same width.
+ let j = self.moves[i..]
+ .iter()
+ .enumerate()
+ .min_by_key(|&(_, m)| !m.rc().width)
+ .unwrap()
+ .0;
+ self.moves.swap(i, i + j);
+
+ // Check the top-level register class for an available register. It is an axiom of the
+ // register allocator that we can move between all registers in the top-level RC.
+ let m = self.moves[i].clone();
+ let toprc = m.rc().toprc();
+ if let Some(reg) = avail.iter(toprc).next() {
+ debug!(
+ "breaking cycle at {} with available {} register {}",
+ m,
+ toprc,
+ toprc.info.display_regunit(reg)
+ );
+
+ // Alter the move so it is guaranteed to be picked up when we loop. It is important
+ // that this move is scheduled immediately, otherwise we would have multiple moves
+ // of the same value, and they would not be commutable.
+ let old_to_reg = self.moves[i].replace_to_reg(reg);
+ // Append a fixup move so we end up in the right place. This move will be scheduled
+ // later. That's ok because it is the single remaining move of `m.value` after the
+ // next iteration.
+ self.moves.push(Move::Reg {
+ value: m.value(),
+ rc: toprc,
+ from: reg,
+ to: old_to_reg,
+ });
+ // TODO: What if allocating an extra register is not enough to break a cycle? This
+ // can happen when there are registers of different widths in a cycle. For ARM, we
+ // may have to move two S-registers out of the way before we can resolve a cycle
+ // involving a D-register.
+ continue;
+ }
+
+ // It was impossible to free up a register in toprc, so use an emergency spill slot as
+ // a last resort.
+ let slot = num_spill_slots;
+ num_spill_slots += 1;
+ debug!("breaking cycle at {} with slot {}", m, slot);
+ let old_to_reg = self.moves[i].change_to_spill(slot);
+ self.fills.push(Move::Fill {
+ value: m.value(),
+ rc: toprc,
+ from_slot: slot,
+ to: old_to_reg,
+ });
+ }
+
+ num_spill_slots
+ }
+
+ /// Borrow the scheduled set of register moves that was computed by `schedule_moves()`.
+ pub fn moves(&self) -> &[Move] {
+ &self.moves
+ }
+}
+
+impl fmt::Display for Solver {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let reginfo = self.vars.first().map(|v| v.constraint.info);
+ writeln!(f, "Solver {{ inputs_done: {},", self.inputs_done)?;
+ writeln!(f, " in: {}", self.regs_in.display(reginfo))?;
+ writeln!(f, " out: {}", self.regs_out.display(reginfo))?;
+ writeln!(
+ f,
+ " assignments: {}",
+ DisplayList(self.assignments.as_slice())
+ )?;
+ writeln!(f, " vars: {}", DisplayList(&self.vars))?;
+ writeln!(f, " moves: {}", DisplayList(&self.moves))?;
+ writeln!(f, "}}")
+ }
+}
+
+#[cfg(test)]
+#[cfg(feature = "arm32")]
+mod tests {
+ use super::{Move, Solver};
+ use crate::entity::EntityRef;
+ use crate::ir::Value;
+ use crate::isa::registers::{RegBank, RegClassData};
+ use crate::isa::{RegClass, RegInfo, RegUnit};
+ use crate::regalloc::RegisterSet;
+ use core::borrow::Borrow;
+
+ // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info
+ // about registers, so we directly access `INFO` from registers-arm32.rs.
+ include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
+
+ // Get a register class by name.
+ fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass {
+ reginfo
+ .classes
+ .iter()
+ .find(|rc| rc.name == name)
+ .expect("Can't find named register class.")
+ }
+
+ // Construct a register move.
+ fn mov(value: Value, rc: RegClass, from: RegUnit, to: RegUnit) -> Move {
+ Move::Reg {
+ value,
+ rc,
+ from,
+ to,
+ }
+ }
+
+ fn spill(value: Value, rc: RegClass, from: RegUnit, to_slot: usize) -> Move {
+ Move::Spill {
+ value,
+ rc,
+ from,
+ to_slot,
+ }
+ }
+
+ fn fill(value: Value, rc: RegClass, from_slot: usize, to: RegUnit) -> Move {
+ Move::Fill {
+ value,
+ rc,
+ from_slot,
+ to,
+ }
+ }
+
+ #[test]
+ fn simple_moves() {
+ let reginfo = INFO.borrow();
+ let gpr = rc_by_name(&reginfo, "GPR");
+ let r0 = gpr.unit(0);
+ let r1 = gpr.unit(1);
+ let r2 = gpr.unit(2);
+ let gregs = RegisterSet::new();
+ let mut regs = RegisterSet::new();
+ let mut solver = Solver::new();
+ let v10 = Value::new(10);
+ let v11 = Value::new(11);
+
+ // As simple as it gets: Value is in r1, we want r0.
+ regs.take(gpr, r1);
+ solver.reset(&regs);
+ solver.reassign_in(v10, gpr, r1, r0);
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ assert_eq!(solver.schedule_moves(&regs), 0);
+ assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]);
+
+ // A bit harder: r0, r1 need to go in r1, r2.
+ regs.take(gpr, r0);
+ solver.reset(&regs);
+ solver.reassign_in(v10, gpr, r0, r1);
+ solver.reassign_in(v11, gpr, r1, r2);
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ assert_eq!(solver.schedule_moves(&regs), 0);
+ assert_eq!(
+ solver.moves(),
+ &[mov(v11, gpr, r1, r2), mov(v10, gpr, r0, r1)]
+ );
+
+ // Swap r0 and r1 in three moves using r2 as a scratch.
+ solver.reset(&regs);
+ solver.reassign_in(v10, gpr, r0, r1);
+ solver.reassign_in(v11, gpr, r1, r0);
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ assert_eq!(solver.schedule_moves(&regs), 0);
+ assert_eq!(
+ solver.moves(),
+ &[
+ mov(v10, gpr, r0, r2),
+ mov(v11, gpr, r1, r0),
+ mov(v10, gpr, r2, r1),
+ ]
+ );
+ }
+
+ #[test]
+ fn harder_move_cycles() {
+ let reginfo = INFO.borrow();
+ let s = rc_by_name(&reginfo, "S");
+ let d = rc_by_name(&reginfo, "D");
+ let d0 = d.unit(0);
+ let d1 = d.unit(1);
+ let d2 = d.unit(2);
+ let s0 = s.unit(0);
+ let s1 = s.unit(1);
+ let s2 = s.unit(2);
+ let s3 = s.unit(3);
+ let gregs = RegisterSet::new();
+ let mut regs = RegisterSet::new();
+ let mut solver = Solver::new();
+ let v10 = Value::new(10);
+ let v11 = Value::new(11);
+ let v12 = Value::new(12);
+
+ // Not a simple cycle: Swap d0 <-> (s2, s3)
+ regs.take(d, d0);
+ regs.take(d, d1);
+ solver.reset(&regs);
+ solver.reassign_in(v10, d, d0, d1);
+ solver.reassign_in(v11, s, s2, s0);
+ solver.reassign_in(v12, s, s3, s1);
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ assert_eq!(solver.schedule_moves(&regs), 0);
+ assert_eq!(
+ solver.moves(),
+ &[
+ mov(v10, d, d0, d2),
+ mov(v11, s, s2, s0),
+ mov(v12, s, s3, s1),
+ mov(v10, d, d2, d1),
+ ]
+ );
+
+ // Same problem in the other direction: Swap (s0, s1) <-> d1.
+ //
+ // If we divert the moves in order, we will need to allocate *two* temporary S registers. A
+ // trivial algorithm might assume that allocating a single temp is enough.
+ solver.reset(&regs);
+ solver.reassign_in(v11, s, s0, s2);
+ solver.reassign_in(v12, s, s1, s3);
+ solver.reassign_in(v10, d, d1, d0);
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ assert_eq!(solver.schedule_moves(&regs), 0);
+ assert_eq!(
+ solver.moves(),
+ &[
+ mov(v10, d, d1, d2),
+ mov(v12, s, s1, s3),
+ mov(v11, s, s0, s2),
+ mov(v10, d, d2, d0),
+ ]
+ );
+ }
+
+ #[test]
+ fn emergency_spill() {
+ let reginfo = INFO.borrow();
+ let gpr = rc_by_name(&reginfo, "GPR");
+ let r0 = gpr.unit(0);
+ let r1 = gpr.unit(1);
+ let r2 = gpr.unit(2);
+ let r3 = gpr.unit(3);
+ let r4 = gpr.unit(4);
+ let r5 = gpr.unit(5);
+ let gregs = RegisterSet::new();
+ let mut regs = RegisterSet::new();
+ let mut solver = Solver::new();
+ let v10 = Value::new(10);
+ let v11 = Value::new(11);
+ let v12 = Value::new(12);
+ let v13 = Value::new(13);
+ let v14 = Value::new(14);
+ let v15 = Value::new(15);
+
+ // Claim r0--r2 and r3--r15 for other values.
+ for i in 0..16 {
+ regs.take(gpr, gpr.unit(i));
+ }
+
+ // Request a permutation cycle.
+ solver.reset(&regs);
+ solver.reassign_in(v10, gpr, r0, r1);
+ solver.reassign_in(v11, gpr, r1, r2);
+ solver.reassign_in(v12, gpr, r2, r0);
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ assert_eq!(solver.schedule_moves(&regs), 1);
+ assert_eq!(
+ solver.moves(),
+ &[
+ spill(v10, gpr, r0, 0),
+ mov(v12, gpr, r2, r0),
+ mov(v11, gpr, r1, r2),
+ fill(v10, gpr, 0, r1),
+ ]
+ );
+
+ // Two cycles should only require a single spill.
+ solver.reset(&regs);
+ // Cycle 1.
+ solver.reassign_in(v10, gpr, r0, r1);
+ solver.reassign_in(v11, gpr, r1, r2);
+ solver.reassign_in(v12, gpr, r2, r0);
+ // Cycle 2.
+ solver.reassign_in(v13, gpr, r3, r4);
+ solver.reassign_in(v14, gpr, r4, r5);
+ solver.reassign_in(v15, gpr, r5, r3);
+
+ solver.inputs_done();
+ assert!(solver.quick_solve(&gregs, false).is_ok());
+ // We resolve two cycles with one spill.
+ assert_eq!(solver.schedule_moves(&regs), 1);
+ assert_eq!(
+ solver.moves(),
+ &[
+ spill(v10, gpr, r0, 0),
+ mov(v12, gpr, r2, r0),
+ mov(v11, gpr, r1, r2),
+ mov(v13, gpr, r3, r1), // Use available r1 to break cycle 2.
+ mov(v15, gpr, r5, r3),
+ mov(v14, gpr, r4, r5),
+ mov(v13, gpr, r1, r4),
+ fill(v10, gpr, 0, r1), // Finally complete cycle 1.
+ ]
+ );
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/spilling.rs b/third_party/rust/cranelift-codegen/src/regalloc/spilling.rs
new file mode 100644
index 0000000000..33d0fe9bd6
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/spilling.rs
@@ -0,0 +1,637 @@
+//! Spilling pass.
+//!
+//! The spilling pass is the first to run after the liveness analysis. Its primary function is to
+//! ensure that the register pressure never exceeds the number of available registers by moving
+//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's
+//! live range.
+//!
+//! Some instruction operand constraints may require additional registers to resolve. Since this
+//! can cause spilling, the spilling pass is also responsible for resolving those constraints by
+//! inserting copies. The extra constraints are:
+//!
+//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by
+//! inserting a copy to a temporary value when necessary.
+//! 2. When the same value is used more than once by an instruction, the operand constraints must
+//! be compatible. Otherwise, the value must be copied into a new register for some of the
+//! operands.
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc};
+use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit};
+use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa};
+use crate::regalloc::affinity::Affinity;
+use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
+use crate::regalloc::liveness::Liveness;
+use crate::regalloc::pressure::Pressure;
+use crate::regalloc::virtregs::VirtRegs;
+use crate::timing;
+use crate::topo_order::TopoOrder;
+use alloc::vec::Vec;
+use core::fmt;
+use log::debug;
+
+/// Return a top-level register class which contains `unit`.
+fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass {
+ let bank = reginfo.bank_containing_regunit(unit).unwrap();
+ reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)]
+ .iter()
+ .find(|&rc| rc.contains(unit))
+ .expect("reg unit should be in a toprc")
+}
+
+/// Persistent data structures for the spilling pass.
+pub struct Spilling {
+ spills: Vec<Value>,
+ reg_uses: Vec<RegUse>,
+}
+
+/// Context data structure that gets instantiated once per pass.
+struct Context<'a> {
+ // Current instruction as well as reference to function and ISA.
+ cur: EncCursor<'a>,
+
+ // Cached ISA information.
+ reginfo: RegInfo,
+ encinfo: EncInfo,
+
+ // References to contextual data structures we need.
+ domtree: &'a DominatorTree,
+ liveness: &'a mut Liveness,
+ virtregs: &'a VirtRegs,
+ topo: &'a mut TopoOrder,
+
+ // Current register pressure.
+ pressure: Pressure,
+
+ // Values spilled for the current instruction. These values have already been removed from the
+ // pressure tracker, but they are still present in the live value tracker and their affinity
+ // hasn't been changed yet.
+ spills: &'a mut Vec<Value>,
+
+ // Uses of register values in the current instruction.
+ reg_uses: &'a mut Vec<RegUse>,
+}
+
+impl Spilling {
+ /// Create a new spilling data structure.
+ pub fn new() -> Self {
+ Self {
+ spills: Vec::new(),
+ reg_uses: Vec::new(),
+ }
+ }
+
+ /// Clear all data structures in this spilling pass.
+ pub fn clear(&mut self) {
+ self.spills.clear();
+ self.reg_uses.clear();
+ }
+
+ /// Run the spilling algorithm over `func`.
+ pub fn run(
+ &mut self,
+ isa: &dyn TargetIsa,
+ func: &mut Function,
+ domtree: &DominatorTree,
+ liveness: &mut Liveness,
+ virtregs: &VirtRegs,
+ topo: &mut TopoOrder,
+ tracker: &mut LiveValueTracker,
+ ) {
+ let _tt = timing::ra_spilling();
+ debug!("Spilling for:\n{}", func.display(isa));
+ let reginfo = isa.register_info();
+ let usable_regs = isa.allocatable_registers(func);
+ let mut ctx = Context {
+ cur: EncCursor::new(func, isa),
+ reginfo: isa.register_info(),
+ encinfo: isa.encoding_info(),
+ domtree,
+ liveness,
+ virtregs,
+ topo,
+ pressure: Pressure::new(&reginfo, &usable_regs),
+ spills: &mut self.spills,
+ reg_uses: &mut self.reg_uses,
+ };
+ ctx.run(tracker)
+ }
+}
+
+impl<'a> Context<'a> {
+ fn run(&mut self, tracker: &mut LiveValueTracker) {
+ self.topo.reset(self.cur.func.layout.blocks());
+ while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
+ self.visit_block(block, tracker);
+ }
+ }
+
+ fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
+ debug!("Spilling {}:", block);
+ self.cur.goto_top(block);
+ self.visit_block_header(block, tracker);
+ tracker.drop_dead_params();
+ self.process_spills(tracker);
+
+ while let Some(inst) = self.cur.next_inst() {
+ if !self.cur.func.dfg[inst].opcode().is_ghost() {
+ self.visit_inst(inst, block, tracker);
+ } else {
+ let (_throughs, kills) = tracker.process_ghost(inst);
+ self.free_regs(kills);
+ }
+ tracker.drop_dead(inst);
+ self.process_spills(tracker);
+ }
+ }
+
+ // Take all live registers in `regs` from the pressure set.
+ // This doesn't cause any spilling, it is assumed there are enough registers.
+ fn take_live_regs(&mut self, regs: &[LiveValue]) {
+ for lv in regs {
+ if !lv.is_dead {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ self.pressure.take(rc);
+ }
+ }
+ }
+ }
+
+ // Free all registers in `kills` from the pressure set.
+ fn free_regs(&mut self, kills: &[LiveValue]) {
+ for lv in kills {
+ if let Affinity::Reg(rci) = lv.affinity {
+ if !self.spills.contains(&lv.value) {
+ let rc = self.reginfo.rc(rci);
+ self.pressure.free(rc);
+ }
+ }
+ }
+ }
+
+ // Free all dead registers in `regs` from the pressure set.
+ fn free_dead_regs(&mut self, regs: &[LiveValue]) {
+ for lv in regs {
+ if lv.is_dead {
+ if let Affinity::Reg(rci) = lv.affinity {
+ if !self.spills.contains(&lv.value) {
+ let rc = self.reginfo.rc(rci);
+ self.pressure.free(rc);
+ }
+ }
+ }
+ }
+ }
+
+ fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) {
+ let (liveins, params) = tracker.block_top(
+ block,
+ &self.cur.func.dfg,
+ self.liveness,
+ &self.cur.func.layout,
+ self.domtree,
+ );
+
+ // Count the live-in registers. These should already fit in registers; they did at the
+ // dominator.
+ self.pressure.reset();
+ self.take_live_regs(liveins);
+
+ // A block can have an arbitrary (up to 2^16...) number of parameters, so they are not
+ // guaranteed to fit in registers.
+ for lv in params {
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ 'try_take: while let Err(mask) = self.pressure.take_transient(rc) {
+ debug!("Need {} reg for block param {}", rc, lv.value);
+ match self.spill_candidate(mask, liveins) {
+ Some(cand) => {
+ debug!(
+ "Spilling live-in {} to make room for {} block param {}",
+ cand, rc, lv.value
+ );
+ self.spill_reg(cand);
+ }
+ None => {
+ // We can't spill any of the live-in registers, so we have to spill an
+ // block argument. Since the current spill metric would consider all the
+ // block arguments equal, just spill the present register.
+ debug!("Spilling {} block argument {}", rc, lv.value);
+
+ // Since `spill_reg` will free a register, add the current one here.
+ self.pressure.take(rc);
+ self.spill_reg(lv.value);
+ break 'try_take;
+ }
+ }
+ }
+ }
+ }
+
+ // The transient pressure counts for the block arguments are accurate. Just preserve them.
+ self.pressure.preserve_transient();
+ self.free_dead_regs(params);
+ }
+
+ fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) {
+ debug!("Inst {}, {}", self.cur.display_inst(inst), self.pressure);
+ debug_assert_eq!(self.cur.current_inst(), Some(inst));
+ debug_assert_eq!(self.cur.current_block(), Some(block));
+
+ let constraints = self
+ .encinfo
+ .operand_constraints(self.cur.func.encodings[inst]);
+
+ // We may need to resolve register constraints if there are any noteworthy uses.
+ debug_assert!(self.reg_uses.is_empty());
+ self.collect_reg_uses(inst, block, constraints);
+
+ // Calls usually have fixed register uses.
+ let call_sig = self.cur.func.dfg.call_signature(inst);
+ if let Some(sig) = call_sig {
+ self.collect_abi_reg_uses(inst, sig);
+ }
+
+ if !self.reg_uses.is_empty() {
+ self.process_reg_uses(inst, tracker);
+ }
+
+ // Update the live value tracker with this instruction.
+ let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
+
+ // Remove kills from the pressure tracker.
+ self.free_regs(kills);
+
+ // If inst is a call, spill all register values that are live across the call.
+ // This means that we don't currently take advantage of callee-saved registers.
+ // TODO: Be more sophisticated.
+ let opcode = self.cur.func.dfg[inst].opcode();
+ if call_sig.is_some() || opcode.clobbers_all_regs() {
+ for lv in throughs {
+ if lv.affinity.is_reg() && !self.spills.contains(&lv.value) {
+ self.spill_reg(lv.value);
+ }
+ }
+ }
+
+ // Make sure we have enough registers for the register defs.
+ // Dead defs are included here. They need a register too.
+ // No need to process call return values, they are in fixed registers.
+ if let Some(constraints) = constraints {
+ for op in constraints.outs {
+ if op.kind != ConstraintKind::Stack {
+ // Add register def to pressure, spill if needed.
+ while let Err(mask) = self.pressure.take_transient(op.regclass) {
+ debug!("Need {} reg from {} throughs", op.regclass, throughs.len());
+ match self.spill_candidate(mask, throughs) {
+ Some(cand) => self.spill_reg(cand),
+ None => panic!(
+ "Ran out of {} registers for {}",
+ op.regclass,
+ self.cur.display_inst(inst)
+ ),
+ }
+ }
+ }
+ }
+ self.pressure.reset_transient();
+ }
+
+ // Restore pressure state, compute pressure with affinities from `defs`.
+ // Exclude dead defs. Includes call return values.
+ // This won't cause spilling.
+ self.take_live_regs(defs);
+ }
+
+ // Collect register uses that are noteworthy in one of the following ways:
+ //
+ // 1. It's a fixed register constraint.
+ // 2. It's a use of a spilled value.
+ // 3. It's a tied register constraint and the value isn't killed.
+ //
+ // We are assuming here that if a value is used both by a fixed register operand and a register
+ // class operand, they two are compatible. We are also assuming that two register class
+ // operands are always compatible.
+ fn collect_reg_uses(
+ &mut self,
+ inst: Inst,
+ block: Block,
+ constraints: Option<&RecipeConstraints>,
+ ) {
+ let args = self.cur.func.dfg.inst_args(inst);
+ let num_fixed_ins = if let Some(constraints) = constraints {
+ for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
+ let mut reguse = RegUse::new(arg, idx, op.regclass.into());
+ let lr = &self.liveness[arg];
+ match op.kind {
+ ConstraintKind::Stack => continue,
+ ConstraintKind::FixedReg(_) => reguse.fixed = true,
+ ConstraintKind::Tied(_) => {
+ // A tied operand must kill the used value.
+ reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout);
+ }
+ ConstraintKind::FixedTied(_) => {
+ reguse.fixed = true;
+ reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout);
+ }
+ ConstraintKind::Reg => {}
+ }
+ if lr.affinity.is_stack() {
+ reguse.spilled = true;
+ }
+
+ // Only collect the interesting register uses.
+ if reguse.fixed || reguse.tied || reguse.spilled {
+ debug!(" reguse: {}", reguse);
+ self.reg_uses.push(reguse);
+ }
+ }
+ constraints.ins.len()
+ } else {
+ // A non-ghost instruction with no constraints can't have any
+ // fixed operands.
+ 0
+ };
+
+ // Similarly, for return instructions, collect uses of ABI-defined
+ // return values.
+ if self.cur.func.dfg[inst].opcode().is_return() {
+ debug_assert_eq!(
+ self.cur.func.dfg.inst_variable_args(inst).len(),
+ self.cur.func.signature.returns.len(),
+ "The non-fixed arguments in a return should follow the function's signature."
+ );
+ for (ret_idx, (ret, &arg)) in
+ self.cur.func.signature.returns.iter().zip(args).enumerate()
+ {
+ let idx = num_fixed_ins + ret_idx;
+ let unit = match ret.location {
+ ArgumentLoc::Unassigned => {
+ panic!("function return signature should be legalized")
+ }
+ ArgumentLoc::Reg(unit) => unit,
+ ArgumentLoc::Stack(_) => continue,
+ };
+ let toprc = toprc_containing_regunit(unit, &self.reginfo);
+ let mut reguse = RegUse::new(arg, idx, toprc.into());
+ reguse.fixed = true;
+
+ debug!(" reguse: {}", reguse);
+ self.reg_uses.push(reguse);
+ }
+ }
+ }
+
+ // Collect register uses from the ABI input constraints.
+ fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) {
+ let num_fixed_args = self.cur.func.dfg[inst]
+ .opcode()
+ .constraints()
+ .num_fixed_value_arguments();
+ let args = self.cur.func.dfg.inst_variable_args(inst);
+ for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig]
+ .params
+ .iter()
+ .zip(args)
+ .enumerate()
+ {
+ if abi.location.is_reg() {
+ let (rci, spilled) = match self.liveness[arg].affinity {
+ Affinity::Reg(rci) => (rci, false),
+ Affinity::Stack => (
+ self.cur.isa.regclass_for_abi_type(abi.value_type).into(),
+ true,
+ ),
+ Affinity::Unassigned => panic!("Missing affinity for {}", arg),
+ };
+ let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci);
+ reguse.fixed = true;
+ reguse.spilled = spilled;
+ self.reg_uses.push(reguse);
+ }
+ }
+ }
+
+ // Process multiple register uses to resolve potential conflicts.
+ //
+ // Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary.
+ // Trigger spilling if any of the temporaries cause the register pressure to become too high.
+ //
+ // Leave `self.reg_uses` empty.
+ fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) {
+ // We're looking for multiple uses of the same value, so start by sorting by value. The
+ // secondary `opidx` key makes it possible to use an unstable (non-allocating) sort.
+ self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx));
+
+ self.cur.use_srcloc(inst);
+ for i in 0..self.reg_uses.len() {
+ let ru = self.reg_uses[i];
+
+ // Do we need to insert a copy for this use?
+ let need_copy = if ru.tied {
+ true
+ } else if ru.fixed {
+ // This is a fixed register use which doesn't necessarily require a copy.
+ // Make a copy only if this is not the first use of the value.
+ self.reg_uses
+ .get(i.wrapping_sub(1))
+ .map_or(false, |ru2| ru2.value == ru.value)
+ } else {
+ false
+ };
+
+ if need_copy {
+ let copy = self.insert_copy(ru.value, ru.rci);
+ self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy;
+ }
+
+ // Even if we don't insert a copy, we may need to account for register pressure for the
+ // reload pass.
+ if need_copy || ru.spilled {
+ let rc = self.reginfo.rc(ru.rci);
+ while let Err(mask) = self.pressure.take_transient(rc) {
+ debug!("Copy of {} reg causes spill", rc);
+ // Spill a live register that is *not* used by the current instruction.
+ // Spilling a use wouldn't help.
+ //
+ // Do allow spilling of block arguments on branches. This is safe since we spill
+ // the whole virtual register which includes the matching block parameter value
+ // at the branch destination. It is also necessary since there can be
+ // arbitrarily many block arguments.
+ match {
+ let args = if self.cur.func.dfg[inst].opcode().is_branch() {
+ self.cur.func.dfg.inst_fixed_args(inst)
+ } else {
+ self.cur.func.dfg.inst_args(inst)
+ };
+ self.spill_candidate(
+ mask,
+ tracker.live().iter().filter(|lv| !args.contains(&lv.value)),
+ )
+ } {
+ Some(cand) => self.spill_reg(cand),
+ None => panic!(
+ "Ran out of {} registers when inserting copy before {}",
+ rc,
+ self.cur.display_inst(inst)
+ ),
+ }
+ }
+ }
+ }
+ self.pressure.reset_transient();
+ self.reg_uses.clear()
+ }
+
+ // Find a spill candidate from `candidates` whose top-level register class is in `mask`.
+ fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value>
+ where
+ II: IntoIterator<Item = &'ii LiveValue>,
+ {
+ // Find the best viable spill candidate.
+ //
+ // The very simple strategy implemented here is to spill the value with the earliest def in
+ // the reverse post-order. This strategy depends on a good reload pass to generate good
+ // code.
+ //
+ // We know that all candidate defs dominate the current instruction, so one of them will
+ // dominate the others. That is the earliest def.
+ candidates
+ .into_iter()
+ .filter_map(|lv| {
+ // Viable candidates are registers in one of the `mask` classes, and not already in
+ // the spill set.
+ if let Affinity::Reg(rci) = lv.affinity {
+ let rc = self.reginfo.rc(rci);
+ if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) {
+ // Here, `lv` is a viable spill candidate.
+ return Some(lv.value);
+ }
+ }
+ None
+ })
+ .min_by(|&a, &b| {
+ // Find the minimum candidate according to the RPO of their defs.
+ self.domtree.rpo_cmp(
+ self.cur.func.dfg.value_def(a),
+ self.cur.func.dfg.value_def(b),
+ &self.cur.func.layout,
+ )
+ })
+ }
+
+ /// Spill `value` immediately by
+ ///
+ /// 1. Changing its affinity to `Stack` which marks the spill.
+ /// 2. Removing the value from the pressure tracker.
+ /// 3. Adding the value to `self.spills` for later reference by `process_spills`.
+ ///
+ /// Note that this does not update the cached affinity in the live value tracker. Call
+ /// `process_spills` to do that.
+ fn spill_reg(&mut self, value: Value) {
+ if let Affinity::Reg(rci) = self.liveness.spill(value) {
+ let rc = self.reginfo.rc(rci);
+ self.pressure.free(rc);
+ self.spills.push(value);
+ debug!("Spilled {}:{} -> {}", value, rc, self.pressure);
+ } else {
+ panic!("Cannot spill {} that was already on the stack", value);
+ }
+
+ // Assign a spill slot for the whole virtual register.
+ let ss = self
+ .cur
+ .func
+ .stack_slots
+ .make_spill_slot(self.cur.func.dfg.value_type(value));
+ for &v in self.virtregs.congruence_class(&value) {
+ self.liveness.spill(v);
+ self.cur.func.locations[v] = ValueLoc::Stack(ss);
+ }
+ }
+
+ /// Process any pending spills in the `self.spills` vector.
+ ///
+ /// It is assumed that spills are removed from the pressure tracker immediately, see
+ /// `spill_reg` above.
+ ///
+ /// We also need to update the live range affinity and remove spilled values from the live
+ /// value tracker.
+ fn process_spills(&mut self, tracker: &mut LiveValueTracker) {
+ if !self.spills.is_empty() {
+ tracker.process_spills(|v| self.spills.contains(&v));
+ self.spills.clear()
+ }
+ }
+
+ /// Insert a `copy value` before the current instruction and give it a live range extending to
+ /// the current instruction.
+ ///
+ /// Returns the new local value created.
+ fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value {
+ let copy = self.cur.ins().copy(value);
+ let inst = self.cur.built_inst();
+
+ // Update live ranges.
+ self.liveness.create_dead(copy, inst, Affinity::Reg(rci));
+ self.liveness.extend_locally(
+ copy,
+ self.cur.func.layout.pp_block(inst),
+ self.cur.current_inst().expect("must be at an instruction"),
+ &self.cur.func.layout,
+ );
+
+ copy
+ }
+}
+
+/// Struct representing a register use of a value.
+/// Used to detect multiple uses of the same value with incompatible register constraints.
+#[derive(Clone, Copy)]
+struct RegUse {
+ value: Value,
+ opidx: u16,
+
+ // Register class required by the use.
+ rci: RegClassIndex,
+
+ // A use with a fixed register constraint.
+ fixed: bool,
+
+ // A register use of a spilled value.
+ spilled: bool,
+
+ // A use with a tied register constraint *and* the used value is not killed.
+ tied: bool,
+}
+
+impl RegUse {
+ fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self {
+ Self {
+ value,
+ opidx: idx as u16,
+ rci,
+ fixed: false,
+ spilled: false,
+ tied: false,
+ }
+ }
+}
+
+impl fmt::Display for RegUse {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}@op{}", self.value, self.opidx)?;
+ if self.fixed {
+ write!(f, "/fixed")?;
+ }
+ if self.spilled {
+ write!(f, "/spilled")?;
+ }
+ if self.tied {
+ write!(f, "/tied")?;
+ }
+ Ok(())
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs b/third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs
new file mode 100644
index 0000000000..ee2ec9bcd9
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs
@@ -0,0 +1,505 @@
+//! Virtual registers.
+//!
+//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the
+//! values in a virtual register are assigned to the same location, fewer copies will result in the
+//! output.
+//!
+//! A virtual register is typically built by merging together SSA values that are "phi-related" -
+//! that is, one value is passed as a block argument to a branch and the other is the block parameter
+//! value itself.
+//!
+//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids
+//! memory-to-memory copies when a spilled value is passed as a block argument.
+
+use crate::dbg::DisplayList;
+use crate::dominator_tree::DominatorTreePreorder;
+use crate::entity::entity_impl;
+use crate::entity::{EntityList, ListPool};
+use crate::entity::{Keys, PrimaryMap, SecondaryMap};
+use crate::ir::{Function, Value};
+use crate::packed_option::PackedOption;
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+use core::fmt;
+use core::slice;
+use smallvec::SmallVec;
+
+/// A virtual register reference.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct VirtReg(u32);
+entity_impl!(VirtReg, "vreg");
+
+type ValueList = EntityList<Value>;
+
+/// Collection of virtual registers.
+///
+/// Each virtual register is a list of values. Also maintain a map from values to their unique
+/// virtual register, if any.
+pub struct VirtRegs {
+ /// Memory pool for the value lists.
+ pool: ListPool<Value>,
+
+ /// The primary table of virtual registers.
+ vregs: PrimaryMap<VirtReg, ValueList>,
+
+ /// Allocated virtual register numbers that are no longer in use.
+ unused_vregs: Vec<VirtReg>,
+
+ /// Each value belongs to at most one virtual register.
+ value_vregs: SecondaryMap<Value, PackedOption<VirtReg>>,
+
+ /// Table used during the union-find phase while `vregs` is empty.
+ union_find: SecondaryMap<Value, i32>,
+
+ /// Values that have been activated in the `union_find` table, but not yet added to any virtual
+ /// registers by the `finish_union_find()` function.
+ pending_values: Vec<Value>,
+}
+
+impl VirtRegs {
+ /// Create a new virtual register collection.
+ pub fn new() -> Self {
+ Self {
+ pool: ListPool::new(),
+ vregs: PrimaryMap::new(),
+ unused_vregs: Vec::new(),
+ value_vregs: SecondaryMap::new(),
+ union_find: SecondaryMap::new(),
+ pending_values: Vec::new(),
+ }
+ }
+
+ /// Clear all virtual registers.
+ pub fn clear(&mut self) {
+ self.vregs.clear();
+ self.unused_vregs.clear();
+ self.value_vregs.clear();
+ self.pool.clear();
+ self.union_find.clear();
+ self.pending_values.clear();
+ }
+
+ /// Get the virtual register containing `value`, if any.
+ pub fn get(&self, value: Value) -> Option<VirtReg> {
+ self.value_vregs[value].into()
+ }
+
+ /// Get the list of values in `vreg`.
+ pub fn values(&self, vreg: VirtReg) -> &[Value] {
+ self.vregs[vreg].as_slice(&self.pool)
+ }
+
+ /// Get an iterator over all virtual registers.
+ pub fn all_virtregs(&self) -> Keys<VirtReg> {
+ self.vregs.keys()
+ }
+
+ /// Get the congruence class of `value`.
+ ///
+ /// If `value` belongs to a virtual register, the congruence class is the values of the virtual
+ /// register. Otherwise it is just the value itself.
+ #[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))]
+ pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value]
+ where
+ 'a: 'b,
+ {
+ self.get(*value)
+ .map_or_else(|| slice::from_ref(value), |vr| self.values(vr))
+ }
+
+ /// Check if `a` and `b` belong to the same congruence class.
+ pub fn same_class(&self, a: Value, b: Value) -> bool {
+ match (self.get(a), self.get(b)) {
+ (Some(va), Some(vb)) => va == vb,
+ _ => a == b,
+ }
+ }
+
+ /// Sort the values in `vreg` according to the dominator tree pre-order.
+ ///
+ /// Returns the slice of sorted values which `values(vreg)` will also return from now on.
+ pub fn sort_values(
+ &mut self,
+ vreg: VirtReg,
+ func: &Function,
+ preorder: &DominatorTreePreorder,
+ ) -> &[Value] {
+ let s = self.vregs[vreg].as_mut_slice(&mut self.pool);
+ s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func));
+ s
+ }
+
+ /// Insert a single value into a sorted virtual register.
+ ///
+ /// It is assumed that the virtual register containing `big` is already sorted by
+ /// `sort_values()`, and that `single` does not already belong to a virtual register.
+ ///
+ /// If `big` is not part of a virtual register, one will be created.
+ pub fn insert_single(
+ &mut self,
+ big: Value,
+ single: Value,
+ func: &Function,
+ preorder: &DominatorTreePreorder,
+ ) -> VirtReg {
+ debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);
+
+ // Make sure `big` has a vreg.
+ let vreg = self.get(big).unwrap_or_else(|| {
+ let vr = self.alloc();
+ self.vregs[vr].push(big, &mut self.pool);
+ self.value_vregs[big] = vr.into();
+ vr
+ });
+
+ // Determine the insertion position for `single`.
+ let index = match self
+ .values(vreg)
+ .binary_search_by(|&v| preorder.pre_cmp_def(v, single, func))
+ {
+ Ok(_) => panic!("{} already in {}", single, vreg),
+ Err(i) => i,
+ };
+ self.vregs[vreg].insert(index, single, &mut self.pool);
+ self.value_vregs[single] = vreg.into();
+ vreg
+ }
+
+ /// Remove a virtual register.
+ ///
+ /// The values in `vreg` become singletons, and the virtual register number may be reused in
+ /// the future.
+ pub fn remove(&mut self, vreg: VirtReg) {
+ // Start by reassigning all the values.
+ for &v in self.vregs[vreg].as_slice(&self.pool) {
+ let old = self.value_vregs[v].take();
+ debug_assert_eq!(old, Some(vreg));
+ }
+
+ self.vregs[vreg].clear(&mut self.pool);
+ self.unused_vregs.push(vreg);
+ }
+
+ /// Allocate a new empty virtual register.
+ fn alloc(&mut self) -> VirtReg {
+ self.unused_vregs
+ .pop()
+ .unwrap_or_else(|| self.vregs.push(Default::default()))
+ }
+
+ /// Unify `values` into a single virtual register.
+ ///
+ /// The values in the slice can be singletons or they can belong to a virtual register already.
+ /// If a value belongs to a virtual register, all of the values in that register must be
+ /// present.
+ ///
+ /// The values are assumed to already be in topological order.
+ pub fn unify(&mut self, values: &[Value]) -> VirtReg {
+ // Start by clearing all virtual registers involved.
+ let mut singletons = 0;
+ let mut cleared = 0;
+ for &val in values {
+ match self.get(val) {
+ None => singletons += 1,
+ Some(vreg) => {
+ if !self.vregs[vreg].is_empty() {
+ cleared += self.vregs[vreg].len(&self.pool);
+ self.vregs[vreg].clear(&mut self.pool);
+ self.unused_vregs.push(vreg);
+ }
+ }
+ }
+ }
+
+ debug_assert_eq!(
+ values.len(),
+ singletons + cleared,
+ "Can't unify partial virtual registers"
+ );
+
+ let vreg = self.alloc();
+ self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool);
+ for &v in values {
+ self.value_vregs[v] = vreg.into();
+ }
+
+ vreg
+ }
+}
+
+impl fmt::Display for VirtRegs {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ for vreg in self.all_virtregs() {
+ write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?;
+ }
+ Ok(())
+ }
+}
+
+/// Expanded version of a union-find table entry.
+enum UFEntry {
+ /// This value is a a set leader. The embedded number is the set's rank.
+ Rank(u32),
+
+ /// This value belongs to the same set as the linked value.
+ Link(Value),
+}
+
+/// The `union_find` table contains `i32` entries that are interpreted as follows:
+///
+/// x = 0: The value belongs to its own singleton set.
+/// x > 0: The value is the leader of a set with rank x.
+/// x < 0: The value belongs to the same set as the value numbered !x.
+///
+/// The rank of a set is an upper bound on the number of links that must be followed from a member
+/// of the set to the set leader.
+///
+/// A singleton set is the same as a set with rank 0. It contains only the leader value.
+impl UFEntry {
+ /// Decode a table entry.
+ fn decode(x: i32) -> Self {
+ if x < 0 {
+ Self::Link(Value::from_u32((!x) as u32))
+ } else {
+ Self::Rank(x as u32)
+ }
+ }
+
+ /// Encode a link entry.
+ fn encode_link(v: Value) -> i32 {
+ !(v.as_u32() as i32)
+ }
+}
+
+/// Union-find algorithm for building virtual registers.
+///
+/// Before values are added to virtual registers, it is possible to use a union-find algorithm to
+/// construct virtual registers efficiently. This support implemented here is used as follows:
+///
+/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same
+/// virtual register.
+/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the
+/// `union()` calls.
+///
+/// The values that were passed to `union(a, b)` must not belong to any existing virtual registers
+/// by the time `finish_union_find()` is called.
+///
+/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for
+/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed.
+///
+/// The [Wikipedia entry on disjoint-set data
+/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good.
+impl VirtRegs {
+ /// Find the leader value and rank of the set containing `v`.
+ /// Compress the path if needed.
+ fn find(&mut self, mut val: Value) -> (Value, u32) {
+ let mut val_stack = SmallVec::<[Value; 8]>::new();
+ let found = loop {
+ match UFEntry::decode(self.union_find[val]) {
+ UFEntry::Rank(rank) => break (val, rank),
+ UFEntry::Link(parent) => {
+ val_stack.push(val);
+ val = parent;
+ }
+ }
+ };
+ // Compress the path
+ while let Some(val) = val_stack.pop() {
+ self.union_find[val] = UFEntry::encode_link(found.0);
+ }
+ found
+ }
+
+ /// Union the two sets containing `a` and `b`.
+ ///
+ /// This ensures that `a` and `b` will belong to the same virtual register after calling
+ /// `finish_union_find()`.
+ pub fn union(&mut self, a: Value, b: Value) {
+ let (leader_a, rank_a) = self.find(a);
+ let (leader_b, rank_b) = self.find(b);
+
+ if leader_a == leader_b {
+ return;
+ }
+
+ // The first time we see a value, its rank will be 0. Add it to the list of pending values.
+ if rank_a == 0 {
+ debug_assert_eq!(a, leader_a);
+ self.pending_values.push(a);
+ }
+ if rank_b == 0 {
+ debug_assert_eq!(b, leader_b);
+ self.pending_values.push(b);
+ }
+
+ // Merge into the set with the greater rank. This preserves the invariant that the rank is
+ // an upper bound on the number of links to the leader.
+ match rank_a.cmp(&rank_b) {
+ Ordering::Less => {
+ self.union_find[leader_a] = UFEntry::encode_link(leader_b);
+ }
+ Ordering::Greater => {
+ self.union_find[leader_b] = UFEntry::encode_link(leader_a);
+ }
+ Ordering::Equal => {
+ // When the two sets have the same rank, we arbitrarily pick the a-set to preserve.
+ // We need to increase the rank by one since the elements in the b-set are now one
+ // link further away from the leader.
+ self.union_find[leader_a] += 1;
+ self.union_find[leader_b] = UFEntry::encode_link(leader_a);
+ }
+ }
+ }
+
+ /// Compute virtual registers based on previous calls to `union(a, b)`.
+ ///
+ /// This terminates the union-find algorithm, so the next time `union()` is called, it is for a
+ /// new independent batch of values.
+ ///
+ /// The values in each virtual register will be ordered according to when they were first
+ /// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to
+ /// create a more sensible value order.
+ ///
+ /// The new virtual registers will be appended to `new_vregs`, if present.
+ pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec<VirtReg>>) {
+ debug_assert_eq!(
+ self.pending_values.iter().find(|&&v| self.get(v).is_some()),
+ None,
+ "Values participating in union-find must not belong to existing virtual registers"
+ );
+
+ while let Some(val) = self.pending_values.pop() {
+ let (leader, _) = self.find(val);
+
+ // Get the vreg for `leader`, or create it.
+ let vreg = self.get(leader).unwrap_or_else(|| {
+ // Allocate a vreg for `leader`, but leave it empty.
+ let vr = self.alloc();
+ if let Some(ref mut vec) = new_vregs {
+ vec.push(vr);
+ }
+ self.value_vregs[leader] = vr.into();
+ vr
+ });
+
+ // Push values in `pending_values` order, including when `v == leader`.
+ self.vregs[vreg].push(val, &mut self.pool);
+ self.value_vregs[val] = vreg.into();
+
+ // Clear the entry in the union-find table. The `find(val)` call may still look at this
+ // entry in a future iteration, but that it ok. It will return a rank 0 leader that has
+ // already been assigned to the correct virtual register.
+ self.union_find[val] = 0;
+ }
+
+ // We do *not* call `union_find.clear()` table here because re-initializing the table for
+ // sparse use takes time linear in the number of values in the function. Instead we reset
+ // the entries that are known to be non-zero in the loop above.
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::entity::EntityRef;
+ use crate::ir::Value;
+
+ #[test]
+ fn empty_union_find() {
+ let mut vregs = VirtRegs::new();
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.all_virtregs().count(), 0);
+ }
+
+ #[test]
+ fn union_self() {
+ let mut vregs = VirtRegs::new();
+ let v1 = Value::new(1);
+ vregs.union(v1, v1);
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.get(v1), None);
+ assert_eq!(vregs.all_virtregs().count(), 0);
+ }
+
+ #[test]
+ fn union_pair() {
+ let mut vregs = VirtRegs::new();
+ let v1 = Value::new(1);
+ let v2 = Value::new(2);
+ vregs.union(v1, v2);
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.congruence_class(&v1), &[v2, v1]);
+ assert_eq!(vregs.congruence_class(&v2), &[v2, v1]);
+ assert_eq!(vregs.all_virtregs().count(), 1);
+ }
+
+ #[test]
+ fn union_pair_backwards() {
+ let mut vregs = VirtRegs::new();
+ let v1 = Value::new(1);
+ let v2 = Value::new(2);
+ vregs.union(v2, v1);
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.congruence_class(&v1), &[v1, v2]);
+ assert_eq!(vregs.congruence_class(&v2), &[v1, v2]);
+ assert_eq!(vregs.all_virtregs().count(), 1);
+ }
+
+ #[test]
+ fn union_tree() {
+ let mut vregs = VirtRegs::new();
+ let v1 = Value::new(1);
+ let v2 = Value::new(2);
+ let v3 = Value::new(3);
+ let v4 = Value::new(4);
+
+ vregs.union(v2, v4);
+ vregs.union(v3, v1);
+ // Leaders: v2, v3
+ vregs.union(v4, v1);
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.all_virtregs().count(), 1);
+ }
+
+ #[test]
+ fn union_two() {
+ let mut vregs = VirtRegs::new();
+ let v1 = Value::new(1);
+ let v2 = Value::new(2);
+ let v3 = Value::new(3);
+ let v4 = Value::new(4);
+
+ vregs.union(v2, v4);
+ vregs.union(v3, v1);
+ // Leaders: v2, v3
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.congruence_class(&v1), &[v1, v3]);
+ assert_eq!(vregs.congruence_class(&v2), &[v4, v2]);
+ assert_eq!(vregs.congruence_class(&v3), &[v1, v3]);
+ assert_eq!(vregs.congruence_class(&v4), &[v4, v2]);
+ assert_eq!(vregs.all_virtregs().count(), 2);
+ }
+
+ #[test]
+ fn union_uneven() {
+ let mut vregs = VirtRegs::new();
+ let v1 = Value::new(1);
+ let v2 = Value::new(2);
+ let v3 = Value::new(3);
+ let v4 = Value::new(4);
+
+ vregs.union(v2, v4); // Rank 0-0
+ vregs.union(v3, v2); // Rank 0-1
+ vregs.union(v2, v1); // Rank 1-0
+ vregs.finish_union_find(None);
+ assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
+ assert_eq!(vregs.all_virtregs().count(), 1);
+ }
+}