diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/cranelift-codegen/src/regalloc | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/regalloc')
17 files changed, 8663 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/regalloc/affinity.rs b/third_party/rust/cranelift-codegen/src/regalloc/affinity.rs new file mode 100644 index 0000000000..efcc4dabfa --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/affinity.rs @@ -0,0 +1,126 @@ +//! Value affinity for register allocation. +//! +//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class +//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy +//! instruction operand constraints. +//! +//! For values that want to be in registers, the affinity hint includes a register class or +//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a +//! larger register class instead. + +use crate::ir::{AbiParam, ArgumentLoc}; +use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa}; +use core::fmt; + +/// Preferred register allocation for an SSA value. +#[derive(Clone, Copy, Debug)] +pub enum Affinity { + /// No affinity. + /// + /// This indicates a value that is not defined or used by any real instructions. It is a ghost + /// value that won't appear in the final program. + Unassigned, + + /// This value should be placed in a spill slot on the stack. + Stack, + + /// This value prefers a register from the given register class. + Reg(RegClassIndex), +} + +impl Default for Affinity { + fn default() -> Self { + Self::Unassigned + } +} + +impl Affinity { + /// Create an affinity that satisfies a single constraint. + /// + /// This will never create an `Affinity::Unassigned`. + /// Use the `Default` implementation for that. + pub fn new(constraint: &OperandConstraint) -> Self { + if constraint.kind == ConstraintKind::Stack { + Self::Stack + } else { + Self::Reg(constraint.regclass.into()) + } + } + + /// Create an affinity that matches an ABI argument for `isa`. + pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self { + match arg.location { + ArgumentLoc::Unassigned => Self::Unassigned, + ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()), + ArgumentLoc::Stack(_) => Self::Stack, + } + } + + /// Is this the `Unassigned` affinity? + pub fn is_unassigned(self) -> bool { + match self { + Self::Unassigned => true, + _ => false, + } + } + + /// Is this the `Reg` affinity? + pub fn is_reg(self) -> bool { + match self { + Self::Reg(_) => true, + _ => false, + } + } + + /// Is this the `Stack` affinity? + pub fn is_stack(self) -> bool { + match self { + Self::Stack => true, + _ => false, + } + } + + /// Merge an operand constraint into this affinity. + /// + /// Note that this does not guarantee that the register allocator will pick a register that + /// satisfies the constraint. + pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) { + match *self { + Self::Unassigned => *self = Self::new(constraint), + Self::Reg(rc) => { + // If the preferred register class is a subclass of the constraint, there's no need + // to change anything. + if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc) + { + // If the register classes overlap, try to shrink our preferred register class. + if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) { + *self = Self::Reg(subclass); + } + } + } + Self::Stack => {} + } + } + + /// Return an object that can display this value affinity, using the register info from the + /// target ISA. + pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> { + DisplayAffinity(self, regs.into()) + } +} + +/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA. +pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayAffinity<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Affinity::Unassigned => write!(f, "unassigned"), + Affinity::Stack => write!(f, "stack"), + Affinity::Reg(rci) => match self.1 { + Some(regs) => write!(f, "{}", regs.rc(rci)), + None => write!(f, "{}", rci), + }, + } + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs b/third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs new file mode 100644 index 0000000000..4e9a159f3e --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/branch_splitting.rs @@ -0,0 +1,169 @@ +//! Split the outgoing edges of conditional branches that pass parameters. +//! +//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions +//! between a conditional branch and the following terminator. +use alloc::vec::Vec; + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList}; +use crate::isa::TargetIsa; +use crate::topo_order::TopoOrder; + +pub fn run( + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + topo: &mut TopoOrder, +) { + let mut ctx = Context { + has_new_blocks: false, + cur: EncCursor::new(func, isa), + domtree, + topo, + cfg, + }; + ctx.run() +} + +struct Context<'a> { + /// True if new blocks were inserted. + has_new_blocks: bool, + + /// Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + /// References to contextual data structures we need. + domtree: &'a mut DominatorTree, + topo: &'a mut TopoOrder, + cfg: &'a mut ControlFlowGraph, +} + +impl<'a> Context<'a> { + fn run(&mut self) { + // Any block order will do. + self.topo.reset(self.cur.func.layout.blocks()); + while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { + // Branches can only be at the last or second to last position in an extended basic + // block. + self.cur.goto_last_inst(block); + let terminator_inst = self.cur.current_inst().expect("terminator"); + if let Some(inst) = self.cur.prev_inst() { + let opcode = self.cur.func.dfg[inst].opcode(); + if opcode.is_branch() { + self.visit_conditional_branch(inst, opcode); + self.cur.goto_inst(terminator_inst); + self.visit_terminator_branch(terminator_inst); + } + } + } + + // If blocks were added the cfg and domtree are inconsistent and must be recomputed. + if self.has_new_blocks { + self.cfg.compute(&self.cur.func); + self.domtree.compute(&self.cur.func, self.cfg); + } + } + + fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) { + // TODO: target = dfg[branch].branch_destination().expect("conditional branch"); + let target = match self.cur.func.dfg[branch] { + InstructionData::Branch { destination, .. } + | InstructionData::BranchIcmp { destination, .. } + | InstructionData::BranchInt { destination, .. } + | InstructionData::BranchFloat { destination, .. } => destination, + _ => panic!("Unexpected instruction in visit_conditional_branch"), + }; + + // If there are any parameters, split the edge. + if self.should_split_edge(target) { + // Create the block the branch will jump to. + let new_block = self.cur.func.dfg.make_block(); + + // Insert the new block before the destination, such that it can fallthrough in the + // target block. + assert_ne!(Some(target), self.cur.layout().entry_block()); + self.cur.layout_mut().insert_block(new_block, target); + self.has_new_blocks = true; + + // Extract the arguments of the branch instruction, split the Block parameters and the + // branch arguments + let num_fixed = opcode.constraints().num_fixed_value_arguments(); + let dfg = &mut self.cur.func.dfg; + let old_args: Vec<_> = { + let args = dfg[branch].take_value_list().expect("block parameters"); + args.as_slice(&dfg.value_lists).iter().copied().collect() + }; + let (branch_args, block_params) = old_args.split_at(num_fixed); + + // Replace the branch destination by the new Block created with no parameters, and restore + // the branch arguments, without the original Block parameters. + { + let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists); + let data = &mut dfg[branch]; + *data.branch_destination_mut().expect("branch") = new_block; + data.put_value_list(branch_args); + } + let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok(); + debug_assert!(ok); + + // Insert a jump to the original target with its arguments into the new block. + self.cur.goto_first_insertion_point(new_block); + self.cur.ins().jump(target, block_params); + + // Reset the cursor to point to the branch. + self.cur.goto_inst(branch); + } + } + + fn visit_terminator_branch(&mut self, inst: Inst) { + let inst_data = &self.cur.func.dfg[inst]; + let opcode = inst_data.opcode(); + if opcode != Opcode::Jump && opcode != Opcode::Fallthrough { + // This opcode is ignored as it does not have any block parameters. + if opcode != Opcode::IndirectJumpTableBr { + debug_assert!(!opcode.is_branch()) + } + return; + } + + let target = match inst_data { + InstructionData::Jump { destination, .. } => destination, + _ => panic!( + "Unexpected instruction {} in visit_terminator_branch", + self.cur.display_inst(inst) + ), + }; + debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator()); + + // If there are any parameters, split the edge. + if self.should_split_edge(*target) { + // Create the block the branch will jump to. + let new_block = self.cur.func.dfg.make_block(); + self.has_new_blocks = true; + + // Split the current block before its terminator, and insert a new jump instruction to + // jump to it. + let jump = self.cur.ins().jump(new_block, &[]); + self.cur.insert_block(new_block); + + // Reset the cursor to point to new terminator of the old block. + self.cur.goto_inst(jump); + } + } + + /// Returns whether we should introduce a new branch. + fn should_split_edge(&self, target: Block) -> bool { + // We should split the edge if the target has any parameters. + if !self.cur.func.dfg.block_params(target).is_empty() { + return true; + }; + + // Or, if the target has more than one block reaching it. + debug_assert!(self.cfg.pred_iter(target).next() != None); + + self.cfg.pred_iter(target).nth(1).is_some() + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs b/third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs new file mode 100644 index 0000000000..23c7bb14b4 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/coalescing.rs @@ -0,0 +1,1107 @@ +//! Constructing Conventional SSA form. +//! +//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related +//! values do not interfere. We construct CSSA by building virtual registers that are as large as +//! possible and inserting copies where necessary such that all argument values passed to a block +//! parameter will belong to the same virtual register as the block parameter value itself. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dbg::DisplayList; +use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::fx::FxHashMap; +use crate::ir::{self, InstBuilder, ProgramOrder}; +use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value}; +use crate::isa::{EncInfo, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::virtregs::{VirtReg, VirtRegs}; +use crate::timing; +use alloc::vec::Vec; +use core::cmp; +use core::fmt; +use core::iter; +use core::slice; +use log::debug; + +// # Implementation +// +// The coalescing algorithm implemented follows this paper fairly closely: +// +// Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and +// live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534 +// +// We use a more efficient dominator forest representation (a linear stack) described here: +// +// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for +// correctness, code quality and efficiency. +// +// The algorithm has two main phases: +// +// Phase 1: Union-find. +// +// We use the union-find support in `VirtRegs` to build virtual registers such that block parameter +// values always belong to the same virtual register as their corresponding block arguments at the +// predecessor branches. Trivial interferences between parameter and argument value live ranges are +// detected and resolved before unioning congruence classes, but non-trivial interferences between +// values that end up in the same congruence class are possible. +// +// Phase 2: Dominator forests. +// +// The virtual registers formed in phase 1 can contain interferences that we need to detect and +// eliminate. By ordering the values in a virtual register according to a dominator tree pre-order, +// we can identify all interferences in the virtual register in linear time. +// +// Interfering values are isolated and virtual registers rebuilt. + +/// Data structures to be used by the coalescing pass. +pub struct Coalescing { + preorder: DominatorTreePreorder, + forest: DomForest, + vcopies: VirtualCopies, + values: Vec<Value>, + predecessors: Vec<Inst>, + backedges: Vec<Inst>, +} + +/// One-shot context created once per invocation. +struct Context<'a> { + isa: &'a dyn TargetIsa, + encinfo: EncInfo, + + func: &'a mut Function, + cfg: &'a ControlFlowGraph, + domtree: &'a DominatorTree, + preorder: &'a DominatorTreePreorder, + liveness: &'a mut Liveness, + virtregs: &'a mut VirtRegs, + + forest: &'a mut DomForest, + vcopies: &'a mut VirtualCopies, + values: &'a mut Vec<Value>, + predecessors: &'a mut Vec<Inst>, + backedges: &'a mut Vec<Inst>, +} + +impl Coalescing { + /// Create a new coalescing pass. + pub fn new() -> Self { + Self { + forest: DomForest::new(), + preorder: DominatorTreePreorder::new(), + vcopies: VirtualCopies::new(), + values: Vec::new(), + predecessors: Vec::new(), + backedges: Vec::new(), + } + } + + /// Clear all data structures in this coalescing pass. + pub fn clear(&mut self) { + self.forest.clear(); + self.vcopies.clear(); + self.values.clear(); + self.predecessors.clear(); + self.backedges.clear(); + } + + /// Convert `func` to Conventional SSA form and build virtual registers in the process. + pub fn conventional_ssa( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + liveness: &mut Liveness, + virtregs: &mut VirtRegs, + ) { + let _tt = timing::ra_cssa(); + debug!("Coalescing for:\n{}", func.display(isa)); + self.preorder.compute(domtree, &func.layout); + let mut context = Context { + isa, + encinfo: isa.encoding_info(), + func, + cfg, + domtree, + preorder: &self.preorder, + liveness, + virtregs, + forest: &mut self.forest, + vcopies: &mut self.vcopies, + values: &mut self.values, + predecessors: &mut self.predecessors, + backedges: &mut self.backedges, + }; + + // Run phase 1 (union-find) of the coalescing algorithm on the current function. + for &block in domtree.cfg_postorder() { + context.union_find_block(block); + } + context.finish_union_find(); + + // Run phase 2 (dominator forests) on the current function. + context.process_vregs(); + } +} + +/// Phase 1: Union-find. +/// +/// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`. +impl<'a> Context<'a> { + /// Run the union-find algorithm on the parameter values on `block`. + /// + /// This ensure that all block parameters will belong to the same virtual register as their + /// corresponding arguments at all predecessor branches. + pub fn union_find_block(&mut self, block: Block) { + let num_params = self.func.dfg.num_block_params(block); + if num_params == 0 { + return; + } + + self.isolate_conflicting_params(block, num_params); + + for i in 0..num_params { + self.union_pred_args(block, i); + } + } + + // Identify block parameter values that are live at one of the predecessor branches. + // + // Such a parameter value will conflict with any argument value at the predecessor branch, so + // it must be isolated by inserting a copy. + fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) { + debug_assert_eq!(num_params, self.func.dfg.num_block_params(block)); + // The only way a parameter value can interfere with a predecessor branch is if the block is + // dominating the predecessor branch. That is, we are looking for loop back-edges. + for BlockPredecessor { + block: pred_block, + inst: pred_inst, + } in self.cfg.pred_iter(block) + { + // The quick pre-order dominance check is accurate because the block parameter is defined + // at the top of the block before any branches. + if !self.preorder.dominates(block, pred_block) { + continue; + } + + debug!( + " - checking {} params at back-edge {}: {}", + num_params, + pred_block, + self.func.dfg.display_inst(pred_inst, self.isa) + ); + + // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values + // to be live at the use. + for i in 0..num_params { + let param = self.func.dfg.block_params(block)[i]; + if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) { + self.isolate_param(block, param); + } + } + } + } + + // Union block parameter value `num` with the corresponding block arguments on the predecessor + // branches. + // + // Detect cases where the argument value is live-in to `block` so it conflicts with any block + // parameter. Isolate the argument in those cases before unioning it with the parameter value. + fn union_pred_args(&mut self, block: Block, argnum: usize) { + let param = self.func.dfg.block_params(block)[argnum]; + + for BlockPredecessor { + block: pred_block, + inst: pred_inst, + } in self.cfg.pred_iter(block) + { + let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + + // Never coalesce incoming function parameters on the stack. These parameters are + // pre-spilled, and the rest of the virtual register would be forced to spill to the + // `incoming_arg` stack slot too. + if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) { + if Some(def_block) == self.func.layout.entry_block() + && self.func.signature.params[def_num].location.is_stack() + { + debug!("-> isolating function stack parameter {}", arg); + let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); + continue; + } + } + + // Check for basic interference: If `arg` overlaps a value defined at the entry to + // `block`, it can never be used as a block argument. + let interference = { + let lr = &self.liveness[arg]; + + // There are two ways the argument value can interfere with `block`: + // + // 1. It is defined in a dominating block and live-in to `block`. + // 2. If is itself a parameter value for `block`. This case should already have been + // eliminated by `isolate_conflicting_params()`. + debug_assert!( + lr.def() != block.into(), + "{} parameter {} was missed by isolate_conflicting_params()", + block, + arg + ); + + // The only other possibility is that `arg` is live-in to `block`. + lr.is_livein(block, &self.func.layout) + }; + + if interference { + let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); + } else { + self.virtregs.union(param, arg); + } + } + } + + // Isolate block parameter value `param` on `block`. + // + // When `param=v10`: + // + // block1(v10: i32): + // foo + // + // becomes: + // + // block1(v11: i32): + // v10 = copy v11 + // foo + // + // This function inserts the copy and updates the live ranges of the old and new parameter + // values. Returns the new parameter value. + fn isolate_param(&mut self, block: Block, param: Value) -> Value { + debug_assert_eq!( + self.func.dfg.value_def(param).pp(), + ExpandedProgramPoint::Block(block) + ); + let ty = self.func.dfg.value_type(param); + let new_val = self.func.dfg.replace_block_param(param, ty); + + // Insert a copy instruction at the top of `block`. + let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block); + if let Some(inst) = pos.current_inst() { + pos.use_srcloc(inst); + } + pos.ins().with_result(param).copy(new_val); + let inst = pos.built_inst(); + self.liveness.move_def_locally(param, inst); + + debug!( + "-> inserted {}, following {}({}: {})", + pos.display_inst(inst), + block, + new_val, + ty + ); + + // Create a live range for the new value. + // TODO: Should we handle ghost values? + let affinity = Affinity::new( + &self + .encinfo + .operand_constraints(pos.func.encodings[inst]) + .expect("Bad copy encoding") + .outs[0], + ); + self.liveness.create_dead(new_val, block, affinity); + self.liveness + .extend_locally(new_val, block, inst, &pos.func.layout); + + new_val + } + + // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`. + // + // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block + // argument is `pred_val`. Since the argument value interferes with the corresponding block + // parameter at the destination, a copy is used instead: + // + // brnz v1, block2(v10) + // + // Becomes: + // + // v11 = copy v10 + // brnz v1, block2(v11) + // + // This way the interference with the block parameter is avoided. + // + // A live range for the new value is created while the live range for `pred_val` is left + // unaltered. + // + // The new argument value is returned. + fn isolate_arg( + &mut self, + pred_block: Block, + pred_inst: Inst, + argnum: usize, + pred_val: Value, + ) -> Value { + let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst); + pos.use_srcloc(pred_inst); + let copy = pos.ins().copy(pred_val); + let inst = pos.built_inst(); + + // Create a live range for the new value. + // TODO: Handle affinity for ghost values. + let affinity = Affinity::new( + &self + .encinfo + .operand_constraints(pos.func.encodings[inst]) + .expect("Bad copy encoding") + .outs[0], + ); + self.liveness.create_dead(copy, inst, affinity); + self.liveness + .extend_locally(copy, pred_block, pred_inst, &pos.func.layout); + + pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; + + debug!( + "-> inserted {}, before {}: {}", + pos.display_inst(inst), + pred_block, + pos.display_inst(pred_inst) + ); + + copy + } + + /// Finish the union-find part of the coalescing algorithm. + /// + /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric + /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`. + fn finish_union_find(&mut self) { + self.virtregs.finish_union_find(None); + debug!("After union-find phase:{}", self.virtregs); + } +} + +/// Phase 2: Dominator forests. +/// +/// The main entry point is `process_vregs()`. +impl<'a> Context<'a> { + /// Check al virtual registers for interference and fix conflicts. + pub fn process_vregs(&mut self) { + for vreg in self.virtregs.all_virtregs() { + self.process_vreg(vreg); + } + } + + // Check `vreg` for interferences and fix conflicts. + fn process_vreg(&mut self, vreg: VirtReg) { + if !self.check_vreg(vreg) { + self.synthesize_vreg(vreg); + } + } + + // Check `vreg` for interferences. + // + // We use a Budimlic dominator forest to check for interferences between the values in `vreg` + // and identify values that should be isolated. + // + // Returns true if `vreg` is free of interference. + fn check_vreg(&mut self, vreg: VirtReg) -> bool { + // Order the values according to the dominator pre-order of their definition. + let values = self.virtregs.sort_values(vreg, self.func, self.preorder); + debug!("Checking {} = {}", vreg, DisplayList(values)); + + // Now push the values in order to the dominator forest. + // This gives us the closest dominating value def for each of the values. + self.forest.clear(); + for &value in values { + let node = Node::value(value, 0, self.func); + + // Push this value and get the nearest dominating def back. + let parent = match self + .forest + .push_node(node, self.func, self.domtree, self.preorder) + { + None => continue, + Some(n) => n, + }; + + // Check for interference between `parent` and `value`. Since `parent` dominates + // `value`, we only have to check if it overlaps the definition. + if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) { + // The two values are interfering, so they can't be in the same virtual register. + debug!("-> interference: {} overlaps def of {}", parent, value); + return false; + } + } + + // No interference found. + true + } + + /// Destroy and rebuild `vreg` by iterative coalescing. + /// + /// When detecting that a virtual register formed in phase 1 contains interference, we have to + /// start over in a more careful way. We'll split the vreg into individual values and then + /// reassemble virtual registers using an iterative algorithm of pairwise merging. + /// + /// It is possible to recover multiple large virtual registers this way while still avoiding + /// a lot of copies. + fn synthesize_vreg(&mut self, vreg: VirtReg) { + self.vcopies.initialize( + self.virtregs.values(vreg), + self.func, + self.cfg, + self.preorder, + ); + debug!( + "Synthesizing {} from {} branches and params {}", + vreg, + self.vcopies.branches.len(), + DisplayList(&self.vcopies.params) + ); + self.virtregs.remove(vreg); + + while let Some(param) = self.vcopies.next_param() { + self.merge_param(param); + self.vcopies.merged_param(param, self.func); + } + } + + /// Merge block parameter value `param` with virtual registers at its predecessors. + fn merge_param(&mut self, param: Value) { + let (block, argnum) = match self.func.dfg.value_def(param) { + ir::ValueDef::Param(e, n) => (e, n), + ir::ValueDef::Result(_, _) => panic!("Expected parameter"), + }; + + // Collect all the predecessors and rearrange them. + // + // The order we process the predecessors matters because once one predecessor's virtual + // register is merged, it can cause interference with following merges. This means that the + // first predecessors processed are more likely to be copy-free. We want an ordering that + // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses + // instruction numbers which is not great for reproducible test cases. + // + // First merge loop back-edges in layout order, on the theory that shorter back-edges are + // more sensitive to inserted copies. + // + // Second everything else in reverse layout order. Again, short forward branches get merged + // first. There can also be backwards branches mixed in here, though, as long as they are + // not loop backedges. + debug_assert!(self.predecessors.is_empty()); + debug_assert!(self.backedges.is_empty()); + for BlockPredecessor { + block: pred_block, + inst: pred_inst, + } in self.cfg.pred_iter(block) + { + if self.preorder.dominates(block, pred_block) { + self.backedges.push(pred_inst); + } else { + self.predecessors.push(pred_inst); + } + } + // Order instructions in reverse order so we can pop them off the back. + { + let l = &self.func.layout; + self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a)); + self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b)); + self.predecessors.extend_from_slice(&self.backedges); + self.backedges.clear(); + } + + while let Some(pred_inst) = self.predecessors.pop() { + let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + + // We want to merge the vreg containing `param` with the vreg containing `arg`. + if self.try_merge_vregs(param, arg) { + continue; + } + + // Can't merge because of interference. Insert a copy instead. + let pred_block = self.func.layout.pp_block(pred_inst); + let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); + self.virtregs + .insert_single(param, new_arg, self.func, self.preorder); + } + } + + /// Merge the virtual registers containing `param` and `arg` if possible. + /// + /// Use self.vcopies to check for virtual copy interference too. + /// + /// Returns true if the virtual registers are successfully merged. + fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool { + if self.virtregs.same_class(param, arg) { + return true; + } + + if !self.can_merge_vregs(param, arg) { + return false; + } + + let _vreg = self.virtregs.unify(self.values); + debug!("-> merged into {} = {}", _vreg, DisplayList(self.values)); + true + } + + /// Check if it is possible to merge two virtual registers. + /// + /// Also leave `self.values` with the ordered list of values in the merged vreg. + fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool { + // We only need an immutable function reference. + let func = &*self.func; + let domtree = self.domtree; + let preorder = self.preorder; + + // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties + // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`. + self.vcopies + .set_filter([param, arg], func, self.virtregs, preorder); + + // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual + // registers and the filtered virtual copies. + let v0 = self.virtregs.congruence_class(¶m); + let v1 = self.virtregs.congruence_class(&arg); + debug!( + " - set 0: {}\n - set 1: {}", + DisplayList(v0), + DisplayList(v1) + ); + let nodes = MergeNodes::new( + func, + preorder, + MergeNodes::new( + func, + preorder, + v0.iter().map(|&value| Node::value(value, 0, func)), + v1.iter().map(|&value| Node::value(value, 1, func)), + ), + self.vcopies.iter(func), + ); + + // Now push the values in order to the dominator forest. + // This gives us the closest dominating value def for each of the values. + self.forest.clear(); + self.values.clear(); + for node in nodes { + // Accumulate ordered values for the new vreg. + if node.is_value() { + self.values.push(node.value); + } + + // Push this value and get the nearest dominating def back. + let parent = match self.forest.push_node(node, func, domtree, preorder) { + None => { + if node.is_vcopy { + self.forest.pop_last(); + } + continue; + } + Some(n) => n, + }; + + if node.is_vcopy { + // Vcopy nodes don't represent interference if they are copies of the parent value. + // In that case, the node must be removed because the parent value can still be + // live belong the vcopy. + if parent.is_vcopy || node.value == parent.value { + self.forest.pop_last(); + continue; + } + + // Check if the parent value interferes with the virtual copy. + let inst = node.def.unwrap_inst(); + if node.set_id != parent.set_id + && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout) + { + debug!( + " - interference: {} overlaps vcopy at {}:{}", + parent, + node.block, + self.func.dfg.display_inst(inst, self.isa) + ); + return false; + } + + // Keep this vcopy on the stack. It will save us a few interference checks. + continue; + } + + // Parent vcopies never represent any interference. We only keep them on the stack to + // avoid an interference check against a value higher up. + if parent.is_vcopy { + continue; + } + + // Both node and parent are values, so check for interference. + debug_assert!(node.is_value() && parent.is_value()); + if node.set_id != parent.set_id + && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) + { + // The two values are interfering. + debug!(" - interference: {} overlaps def of {}", parent, node.value); + return false; + } + } + + // The values vector should receive all values. + debug_assert_eq!(v0.len() + v1.len(), self.values.len()); + + // No interference found. + true + } +} + +/// Dominator forest. +/// +/// This is a utility type used for detecting interference in virtual registers, where each virtual +/// register is a list of values ordered according to the dominator tree pre-order. +/// +/// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack +/// representation in the Boissinot paper. Our version of the linear stack is slightly modified +/// because we have a pre-order of the dominator tree at the block granularity, not basic block +/// granularity. +/// +/// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed, +/// `push_node` will return the nearest previously pushed value that dominates the definition. +#[allow(dead_code)] +struct DomForest { + // Stack representing the rightmost edge of the dominator forest so far, ending in the last + // element of `values`. + // + // At all times, the block of each element in the stack dominates the block of the next one. + stack: Vec<Node>, +} + +/// A node in the dominator forest. +#[derive(Clone, Copy, Debug)] +#[allow(dead_code)] +struct Node { + /// The program point where the live range is defined. + def: ExpandedProgramPoint, + /// block containing `def`. + block: Block, + /// Is this a virtual copy or a value? + is_vcopy: bool, + /// Set identifier. + set_id: u8, + /// For a value node: The value defined at `def`. + /// For a vcopy node: The relevant branch argument at `def`. + value: Value, +} + +impl Node { + /// Create a node representing `value`. + pub fn value(value: Value, set_id: u8, func: &Function) -> Self { + let def = func.dfg.value_def(value).pp(); + let block = func.layout.pp_block(def); + Self { + def, + block, + is_vcopy: false, + set_id, + value, + } + } + + /// Create a node representing a virtual copy. + pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self { + let def = branch.into(); + let block = func.layout.pp_block(def); + Self { + def, + block, + is_vcopy: true, + set_id, + value, + } + } + + /// IF this a value node? + pub fn is_value(&self) -> bool { + !self.is_vcopy + } +} + +impl fmt::Display for Node { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_vcopy { + write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block) + } else { + write!(f, "{}:{}@{}", self.set_id, self.value, self.block) + } + } +} + +impl DomForest { + /// Create a new empty dominator forest. + pub fn new() -> Self { + Self { stack: Vec::new() } + } + + /// Clear all data structures in this dominator forest. + pub fn clear(&mut self) { + self.stack.clear(); + } + + /// Add a single node to the forest. + /// + /// Update the stack so its dominance invariants are preserved. Detect a parent node on the + /// stack which is the closest one dominating the new node and return it. + fn push_node( + &mut self, + node: Node, + func: &Function, + domtree: &DominatorTree, + preorder: &DominatorTreePreorder, + ) -> Option<Node> { + // The stack contains the current sequence of dominating defs. Pop elements until we + // find one whose block dominates `node.block`. + while let Some(top) = self.stack.pop() { + if preorder.dominates(top.block, node.block) { + // This is the right insertion spot for `node`. + self.stack.push(top); + self.stack.push(node); + + // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does + // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` + // program point may be below the last branch in `top.block` that dominates + // `node.def`. + // + // We do know, though, that if there is a nearest value dominating `node.def`, it + // will be on the stack. We just need to find the last stack entry that actually + // dominates. + let mut last_dom = node.def; + for &n in self.stack.iter().rev().skip(1) { + // If the node is defined at the block header, it does in fact dominate + // everything else pushed on the stack. + let def_inst = match n.def { + ExpandedProgramPoint::Block(_) => return Some(n), + ExpandedProgramPoint::Inst(i) => i, + }; + + // We need to find the last program point in `n.block` to dominate `node.def`. + last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) { + None => n.block.into(), + Some(inst) => { + if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater { + return Some(n); + } + inst.into() + } + }; + } + + // No real dominator found on the stack. + return None; + } + } + + // No dominators, start a new tree in the forest. + self.stack.push(node); + None + } + + pub fn pop_last(&mut self) { + self.stack.pop().expect("Stack is empty"); + } +} + +/// Virtual copies. +/// +/// When building a full virtual register at once, like phase 1 does with union-find, it is good +/// enough to check for interference between the values in the full virtual register like +/// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual +/// registers that don't represent the full transitive closure of the block argument-parameter +/// relation. This means that just checking for interference between values is inadequate. +/// +/// Example: +/// +/// v1 = iconst.i32 1 +/// brnz v10, block1(v1) +/// v2 = iconst.i32 2 +/// brnz v11, block1(v2) +/// return v1 +/// +/// block1(v3: i32): +/// v4 = iadd v3, v1 +/// +/// With just value interference checking, we could build the virtual register [v3, v1] since those +/// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2 +/// interfere. However, we can't resolve that interference either by inserting a copy: +/// +/// v1 = iconst.i32 1 +/// brnz v10, block1(v1) +/// v2 = iconst.i32 2 +/// v20 = copy v2 <-- new value +/// brnz v11, block1(v20) +/// return v1 +/// +/// block1(v3: i32): +/// v4 = iadd v3, v1 +/// +/// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We +/// shouldn't have placed v1 and v3 in the same virtual register to begin with. +/// +/// LLVM detects this form of interference by inserting copies in the predecessors of all phi +/// instructions, then attempting to delete the copies. This is quite expensive because it involves +/// creating a large number of copies and value. +/// +/// We'll detect this form of interference with *virtual copies*: Each block parameter value that +/// hasn't yet been fully merged with its block argument values is given a set of virtual copies at +/// the predecessors. Any candidate value to be merged is checked for interference against both the +/// virtual register and the virtual copies. +/// +/// In the general case, we're checking if two virtual registers can be merged, and both can +/// contain incomplete block parameter values with associated virtual copies. +/// +/// The `VirtualCopies` struct represents a set of incomplete parameters and their associated +/// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes +/// representing the virtual copies in both vregs. +struct VirtualCopies { + // Incomplete block parameters. These don't need to belong to the same virtual register. + params: Vec<Value>, + + // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks + // whose parameters can be found in `params`. + // + // Ordered by dominator tree pre-order of the branch instructions. + branches: Vec<(Inst, Block)>, + + // Filter for the currently active node iterator. + // + // A block => (set_id, num) entry means that branches to `block` are active in `set_id` with + // branch argument number `num`. + filter: FxHashMap<Block, (u8, usize)>, +} + +impl VirtualCopies { + /// Create an empty VirtualCopies struct. + pub fn new() -> Self { + Self { + params: Vec::new(), + branches: Vec::new(), + filter: FxHashMap(), + } + } + + /// Clear all state. + pub fn clear(&mut self) { + self.params.clear(); + self.branches.clear(); + self.filter.clear(); + } + + /// Initialize virtual copies from the (interfering) values in a union-find virtual register + /// that is going to be broken up and reassembled iteratively. + /// + /// The values are assumed to be in domtree pre-order. + /// + /// This will extract the block parameter values and associate virtual copies all of them. + pub fn initialize( + &mut self, + values: &[Value], + func: &Function, + cfg: &ControlFlowGraph, + preorder: &DominatorTreePreorder, + ) { + self.clear(); + + let mut last_block = None; + for &val in values { + if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) { + self.params.push(val); + + // We may have multiple parameters from the same block, but we only need to collect + // predecessors once. Also verify the ordering of values. + if let Some(last) = last_block { + match preorder.pre_cmp_block(last, block) { + cmp::Ordering::Less => {} + cmp::Ordering::Equal => continue, + cmp::Ordering::Greater => panic!("values in wrong order"), + } + } + + // This block hasn't been seen before. + for BlockPredecessor { + inst: pred_inst, .. + } in cfg.pred_iter(block) + { + self.branches.push((pred_inst, block)); + } + last_block = Some(block); + } + } + + // Reorder the predecessor branches as required by the dominator forest. + self.branches + .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout)); + } + + /// Get the next unmerged parameter value. + pub fn next_param(&self) -> Option<Value> { + self.params.last().cloned() + } + + /// Indicate that `param` is now fully merged. + pub fn merged_param(&mut self, param: Value, func: &Function) { + let popped = self.params.pop(); + debug_assert_eq!(popped, Some(param)); + + // The domtree pre-order in `self.params` guarantees that all parameters defined at the + // same block will be adjacent. This means we can see when all parameters at a block have been + // merged. + // + // We don't care about the last parameter - when that is merged we are done. + let last = match self.params.last() { + None => return, + Some(x) => *x, + }; + let block = func.dfg.value_def(param).unwrap_block(); + if func.dfg.value_def(last).unwrap_block() == block { + // We're not done with `block` parameters yet. + return; + } + + // Alright, we know there are no remaining `block` parameters in `self.params`. This means we + // can get rid of the `block` predecessors in `self.branches`. We don't have to, the + // `VCopyIter` will just skip them, but this reduces its workload. + self.branches.retain(|&(_, dest)| dest != block); + } + + /// Set a filter for the virtual copy nodes we're generating. + /// + /// Only generate nodes for parameter values that are in the same congruence class as `reprs`. + /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's + /// congruence class. + pub fn set_filter( + &mut self, + reprs: [Value; 2], + func: &Function, + virtregs: &VirtRegs, + preorder: &DominatorTreePreorder, + ) { + self.filter.clear(); + + // Parameters in `self.params` are ordered according to the domtree per-order, and they are + // removed from the back once they are fully merged. This means we can stop looking for + // parameters once we're beyond the last one. + let last_param = *self.params.last().expect("No more parameters"); + let limit = func.dfg.value_def(last_param).unwrap_block(); + + for (set_id, repr) in reprs.iter().enumerate() { + let set_id = set_id as u8; + for &value in virtregs.congruence_class(repr) { + if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) { + if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater { + // Stop once we're outside the bounds of `self.params`. + break; + } + self.filter.insert(block, (set_id, num)); + } + } + } + } + + /// Look up the set_id and argument number for `block` in the current filter. + /// + /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise + /// returns `(set_id, argnum)` for an active parameter defined at `block`. + fn lookup(&self, block: Block) -> Option<(u8, usize)> { + self.filter.get(&block).cloned() + } + + /// Get an iterator of dom-forest nodes corresponding to the current filter. + pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter { + VCopyIter { + func, + vcopies: self, + branches: self.branches.iter(), + } + } +} + +/// Virtual copy iterator. +/// +/// This iterator produces dom-forest nodes corresponding to the current filter in the virtual +/// copies container. +struct VCopyIter<'a> { + func: &'a Function, + vcopies: &'a VirtualCopies, + branches: slice::Iter<'a, (Inst, Block)>, +} + +impl<'a> Iterator for VCopyIter<'a> { + type Item = Node; + + fn next(&mut self) -> Option<Node> { + while let Some(&(branch, dest)) = self.branches.next() { + if let Some((set_id, argnum)) = self.vcopies.lookup(dest) { + let arg = self.func.dfg.inst_variable_args(branch)[argnum]; + return Some(Node::vcopy(branch, arg, set_id, self.func)); + } + } + None + } +} + +/// Node-merging iterator. +/// +/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. +struct MergeNodes<'a, IA, IB> +where + IA: Iterator<Item = Node>, + IB: Iterator<Item = Node>, +{ + a: iter::Peekable<IA>, + b: iter::Peekable<IB>, + layout: &'a ir::Layout, + preorder: &'a DominatorTreePreorder, +} + +impl<'a, IA, IB> MergeNodes<'a, IA, IB> +where + IA: Iterator<Item = Node>, + IB: Iterator<Item = Node>, +{ + pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self { + MergeNodes { + a: a.peekable(), + b: b.peekable(), + layout: &func.layout, + preorder, + } + } +} + +impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB> +where + IA: Iterator<Item = Node>, + IB: Iterator<Item = Node>, +{ + type Item = Node; + + fn next(&mut self) -> Option<Node> { + let ord = match (self.a.peek(), self.b.peek()) { + (Some(a), Some(b)) => { + let layout = self.layout; + self.preorder + .pre_cmp_block(a.block, b.block) + .then_with(|| layout.cmp(a.def, b.def)) + } + (Some(_), None) => cmp::Ordering::Less, + (None, Some(_)) => cmp::Ordering::Greater, + (None, None) => return None, + }; + // When the nodes compare equal, prefer the `a` side. + if ord != cmp::Ordering::Greater { + self.a.next() + } else { + self.b.next() + } + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/coloring.rs b/third_party/rust/cranelift-codegen/src/regalloc/coloring.rs new file mode 100644 index 0000000000..f9d65b8fa1 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/coloring.rs @@ -0,0 +1,1324 @@ +//! Register allocator coloring pass. +//! +//! The coloring pass assigns a physical register to every SSA value with a register affinity, +//! under the assumption that the register pressure has been lowered sufficiently by spilling and +//! splitting. +//! +//! # Preconditions +//! +//! The coloring pass doesn't work on arbitrary code. Certain preconditions must be satisfied: +//! +//! 1. All instructions must be legalized and assigned an encoding. The encoding recipe guides the +//! register assignments and provides exact constraints. +//! +//! 2. Instructions with tied operands must be in a coloring-friendly state. Specifically, the +//! values used by the tied operands must be killed by the instruction. This can be achieved by +//! inserting a `copy` to a new value immediately before the two-address instruction. +//! +//! 3. If a value is bound to more than one operand on the same instruction, the operand +//! constraints must be compatible. This can also be achieved by inserting copies so the +//! incompatible operands get different values. +//! +//! 4. The register pressure must be lowered sufficiently by inserting spill code. Register +//! operands are allowed to read spilled values, but each such instance must be counted as using +//! a register. +//! +//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed +//! as arguments when branching to a block must belong to the same virtual register as the +//! corresponding block argument value. +//! +//! # Iteration order +//! +//! The SSA property guarantees that whenever the live range of two values overlap, one of the +//! values will be live at the definition point of the other value. If we visit the instructions in +//! a topological order relative to the dominance relation, we can assign colors to the values +//! defined by the instruction and only consider the colors of other values that are live at the +//! instruction. +//! +//! The first time we see a branch to a block, the block's argument values are colored to match the +//! registers currently holding branch argument values passed to the predecessor branch. By +//! visiting blocks in a CFG topological order, we guarantee that at least one predecessor branch has +//! been visited before the destination block. Therefore, the block's arguments are already colored. +//! +//! The exception is the entry block whose arguments are colored from the ABI requirements. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{ArgumentLoc, InstBuilder, ValueDef}; +use crate::ir::{Block, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc}; +use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit}; +use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa}; +use crate::packed_option::PackedOption; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::diversion::RegDiversions; +use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::liverange::LiveRange; +use crate::regalloc::register_set::RegisterSet; +use crate::regalloc::solver::{Solver, SolverError}; +use crate::timing; +use core::mem; +use log::debug; + +/// Data structures for the coloring pass. +/// +/// These are scratch space data structures that can be reused between invocations. +pub struct Coloring { + divert: RegDiversions, + solver: Solver, +} + +/// Kinds of ABI parameters. +enum AbiParams { + Parameters(SigRef), + Returns, +} + +/// Bundle of references that the coloring algorithm needs. +/// +/// Some of the needed mutable references are passed around as explicit function arguments so we +/// can avoid many fights with the borrow checker over mutable borrows of `self`. This includes the +/// `Function` and `LiveValueTracker` references. +/// +/// Immutable context information and mutable references that don't need to be borrowed across +/// method calls should go in this struct. +struct Context<'a> { + // Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + // Cached ISA information. + // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. + reginfo: RegInfo, + encinfo: EncInfo, + + // References to contextual data structures we need. + cfg: &'a ControlFlowGraph, + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + + // References to working set data structures. + // If we need to borrow out of a data structure across a method call, it must be passed as a + // function argument instead, see the `LiveValueTracker` arguments. + divert: &'a mut RegDiversions, + solver: &'a mut Solver, + + // Pristine set of registers that the allocator can use. + // This set remains immutable, we make clones. + usable_regs: RegisterSet, + + uses_pinned_reg: bool, +} + +impl Coloring { + /// Allocate scratch space data structures for the coloring pass. + pub fn new() -> Self { + Self { + divert: RegDiversions::new(), + solver: Solver::new(), + } + } + + /// Clear all data structures in this coloring pass. + pub fn clear(&mut self) { + self.divert.clear(); + self.solver.clear(); + } + + /// Run the coloring algorithm over `func`. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + liveness: &mut Liveness, + tracker: &mut LiveValueTracker, + ) { + let _tt = timing::ra_coloring(); + debug!("Coloring for:\n{}", func.display(isa)); + let mut ctx = Context { + usable_regs: isa.allocatable_registers(func), + uses_pinned_reg: isa.flags().enable_pinned_reg(), + cur: EncCursor::new(func, isa), + reginfo: isa.register_info(), + encinfo: isa.encoding_info(), + cfg, + domtree, + liveness, + divert: &mut self.divert, + solver: &mut self.solver, + }; + ctx.run(tracker) + } +} + +impl<'a> Context<'a> { + /// Is the pinned register usage enabled, and is this register the pinned register? + #[inline] + fn is_pinned_reg(&self, rc: RegClass, reg: RegUnit) -> bool { + rc.is_pinned_reg(self.uses_pinned_reg, reg) + } + + /// Run the coloring algorithm. + fn run(&mut self, tracker: &mut LiveValueTracker) { + self.cur + .func + .locations + .resize(self.cur.func.dfg.num_values()); + + // Visit blocks in reverse post-order. We need to ensure that at least one predecessor has + // been visited before each block. That guarantees that the block arguments have been colored. + for &block in self.domtree.cfg_postorder().iter().rev() { + self.visit_block(block, tracker); + } + } + + /// Visit `block`, assuming that the immediate dominator has already been visited. + fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { + debug!("Coloring {}:", block); + let mut regs = self.visit_block_header(block, tracker); + tracker.drop_dead_params(); + + // Now go through the instructions in `block` and color the values they define. + self.cur.goto_top(block); + while let Some(inst) = self.cur.next_inst() { + self.cur.use_srcloc(inst); + let opcode = self.cur.func.dfg[inst].opcode(); + if !opcode.is_ghost() { + // This is an instruction which either has an encoding or carries ABI-related + // register allocation constraints. + let enc = self.cur.func.encodings[inst]; + let constraints = self.encinfo.operand_constraints(enc); + if self.visit_inst(inst, constraints, tracker, &mut regs) { + self.replace_global_defines(inst, tracker); + // Restore cursor location after `replace_global_defines` moves it. + // We want to revisit the copy instructions it inserted. + self.cur.goto_inst(inst); + } + } else { + // This is a ghost instruction with no encoding and no extra constraints. + let (_throughs, kills) = tracker.process_ghost(inst); + self.process_ghost_kills(kills, &mut regs); + } + tracker.drop_dead(inst); + + // We are not able to insert any regmove for diversion or un-diversion after the first + // branch. Instead, we record the diversion to be restored at the entry of the next block, + // which should have a single predecessor. + if opcode.is_branch() { + // The next instruction is necessarily an unconditional branch. + if let Some(branch) = self.cur.next_inst() { + debug!( + "Skip coloring {}\n from {}\n with diversions {}", + self.cur.display_inst(branch), + regs.input.display(&self.reginfo), + self.divert.display(&self.reginfo) + ); + use crate::ir::instructions::BranchInfo::*; + let target = match self.cur.func.dfg.analyze_branch(branch) { + NotABranch | Table(_, _) => panic!( + "unexpected instruction {} after a conditional branch", + self.cur.display_inst(branch) + ), + SingleDest(block, _) => block, + }; + + // We have a single branch with a single target, and a block with a single + // predecessor. Thus we can forward the diversion set to the next block. + if self.cfg.pred_iter(target).count() == 1 { + // Transfer the diversion to the next block. + self.divert + .save_for_block(&mut self.cur.func.entry_diversions, target); + debug!( + "Set entry-diversion for {} to\n {}", + target, + self.divert.display(&self.reginfo) + ); + } else { + debug_assert!( + self.divert.is_empty(), + "Divert set is non-empty after the terminator." + ); + } + assert_eq!( + self.cur.next_inst(), + None, + "Unexpected instruction after a branch group." + ); + } else { + assert!(opcode.is_terminator()); + } + } + } + } + + /// Visit the `block` header. + /// + /// Initialize the set of live registers and color the arguments to `block`. + fn visit_block_header( + &mut self, + block: Block, + tracker: &mut LiveValueTracker, + ) -> AvailableRegs { + // Reposition the live value tracker and deal with the block arguments. + tracker.block_top( + block, + &self.cur.func.dfg, + self.liveness, + &self.cur.func.layout, + self.domtree, + ); + + // Copy the content of the registered diversions to be reused at the + // entry of this basic block. + self.divert.at_block(&self.cur.func.entry_diversions, block); + debug!( + "Start {} with entry-diversion set to\n {}", + block, + self.divert.display(&self.reginfo) + ); + + if self.cur.func.layout.entry_block() == Some(block) { + // Parameters on the entry block have ABI constraints. + self.color_entry_params(tracker.live()) + } else { + // The live-ins and parameters of a non-entry block have already been assigned a register. + // Reconstruct the allocatable set. + self.livein_regs(tracker.live()) + } + } + + /// Initialize a set of allocatable registers from the values that are live-in to a block. + /// These values must already be colored when the dominating blocks were processed. + /// + /// Also process the block arguments which were colored when the first predecessor branch was + /// encountered. + fn livein_regs(&self, live: &[LiveValue]) -> AvailableRegs { + // Start from the registers that are actually usable. We don't want to include any reserved + // registers in the set. + let mut regs = AvailableRegs::new(&self.usable_regs); + + for lv in live.iter().filter(|lv| !lv.is_dead) { + debug!( + "Live-in: {}:{} in {}", + lv.value, + lv.affinity.display(&self.reginfo), + self.divert + .get(lv.value, &self.cur.func.locations) + .display(&self.reginfo) + ); + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let loc = self.cur.func.locations[lv.value]; + let reg = match loc { + ValueLoc::Reg(reg) => reg, + ValueLoc::Unassigned => panic!("Live-in {} wasn't assigned", lv.value), + ValueLoc::Stack(ss) => { + panic!("Live-in {} is in {}, should be register", lv.value, ss) + } + }; + if lv.is_local { + regs.take(rc, reg, lv.is_local); + } else { + let loc = self.divert.get(lv.value, &self.cur.func.locations); + let reg_divert = match loc { + ValueLoc::Reg(reg) => reg, + ValueLoc::Unassigned => { + panic!("Diversion: Live-in {} wasn't assigned", lv.value) + } + ValueLoc::Stack(ss) => panic!( + "Diversion: Live-in {} is in {}, should be register", + lv.value, ss + ), + }; + regs.take_divert(rc, reg, reg_divert); + } + } + } + + regs + } + + /// Color the parameters on the entry block. + /// + /// These are function parameters that should already have assigned register units in the + /// function signature. + /// + /// Return the set of remaining allocatable registers after filtering out the dead arguments. + fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs { + let sig = &self.cur.func.signature; + debug_assert_eq!(sig.params.len(), args.len()); + + let mut regs = AvailableRegs::new(&self.usable_regs); + + for (lv, abi) in args.iter().zip(&sig.params) { + match lv.affinity { + Affinity::Reg(rci) => { + let rc = self.reginfo.rc(rci); + if let ArgumentLoc::Reg(reg) = abi.location { + if !lv.is_dead { + regs.take(rc, reg, lv.is_local); + } + self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); + } else { + // This should have been fixed by the reload pass. + panic!( + "Entry arg {} has {} affinity, but ABI {}", + lv.value, + lv.affinity.display(&self.reginfo), + abi.display(&self.reginfo) + ); + } + } + // The spiller will have assigned an incoming stack slot already. + Affinity::Stack => debug_assert!(abi.location.is_stack()), + // This is a ghost value, unused in the function. Don't assign it to a location + // either. + Affinity::Unassigned => {} + } + } + + regs + } + + /// Program the input-side ABI constraints for `inst` into the constraint solver. + /// + /// ABI constraints are the fixed register assignments useds for calls and returns. + fn program_input_abi(&mut self, inst: Inst, abi_params: AbiParams) { + let abi_types = match abi_params { + AbiParams::Parameters(sig) => &self.cur.func.dfg.signatures[sig].params, + AbiParams::Returns => &self.cur.func.signature.returns, + }; + + for (abi, &value) in abi_types + .iter() + .zip(self.cur.func.dfg.inst_variable_args(inst)) + { + if let ArgumentLoc::Reg(reg) = abi.location { + if let Affinity::Reg(rci) = self + .liveness + .get(value) + .expect("ABI register must have live range") + .affinity + { + let rc = self.reginfo.rc(rci); + let cur_reg = self.divert.reg(value, &self.cur.func.locations); + self.solver.reassign_in(value, rc, cur_reg, reg); + } else { + panic!("ABI argument {} should be in a register", value); + } + } + } + } + + /// Color the values defined by `inst` and insert any necessary shuffle code to satisfy + /// instruction constraints. + /// + /// Update `regs` to reflect the allocated registers after `inst`, including removing any dead + /// or killed values from the set. + /// + /// Returns true when the global values defined by `inst` must be replaced by local values. + fn visit_inst( + &mut self, + inst: Inst, + constraints: Option<&RecipeConstraints>, + tracker: &mut LiveValueTracker, + regs: &mut AvailableRegs, + ) -> bool { + debug!( + "Coloring {}\n from {}", + self.cur.display_inst(inst), + regs.input.display(&self.reginfo), + ); + + // block whose arguments should be colored to match the current branch instruction's + // arguments. + let mut color_dest_args = None; + + // Program the solver with register constraints for the input side. + self.solver.reset(®s.input); + + if let Some(constraints) = constraints { + self.program_input_constraints(inst, constraints.ins); + } + + let call_sig = self.cur.func.dfg.call_signature(inst); + if let Some(sig) = call_sig { + self.program_input_abi(inst, AbiParams::Parameters(sig)); + } else if self.cur.func.dfg[inst].opcode().is_return() { + self.program_input_abi(inst, AbiParams::Returns); + } else if self.cur.func.dfg[inst].opcode().is_branch() { + // This is a branch, so we need to make sure that globally live values are in their + // global registers. For blocks that take arguments, we also need to place the argument + // values in the expected registers. + if let Some(dest) = self.cur.func.dfg[inst].branch_destination() { + if self.program_block_arguments(inst, dest) { + color_dest_args = Some(dest); + } + } else { + // This is a multi-way branch like `br_table`. We only support arguments on + // single-destination branches. + debug_assert_eq!( + self.cur.func.dfg.inst_variable_args(inst).len(), + 0, + "Can't handle block arguments: {}", + self.cur.display_inst(inst) + ); + self.undivert_regs(|lr, _| !lr.is_local()); + } + } + + if self.solver.has_fixed_input_conflicts() { + self.divert_fixed_input_conflicts(tracker.live()); + } + + self.solver.inputs_done(); + + // Update the live value tracker with this instruction. + let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + + // Get rid of the killed values. + for lv in kills { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let reg = self.divert.reg(lv.value, &self.cur.func.locations); + + if self.is_pinned_reg(rc, reg) { + // Don't kill the pinned reg, either in the local or global register sets. + debug_assert!(lv.is_local, "pinned register SSA value can't be global"); + continue; + } + + debug!( + " kill {} in {} ({} {})", + lv.value, + self.reginfo.display_regunit(reg), + if lv.is_local { "local" } else { "global" }, + rc + ); + self.solver.add_kill(lv.value, rc, reg); + + // Update the global register set which has no diversions. + if !lv.is_local { + regs.global + .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); + } + } + } + + // This aligns with the " from" line at the top of the function. + debug!(" glob {}", regs.global.display(&self.reginfo)); + + // This flag is set when the solver failed to find a solution for the global defines that + // doesn't interfere with `regs.global`. We need to rewrite all of `inst`s global defines + // as local defines followed by copies. + let mut replace_global_defines = false; + + // Program the fixed output constraints before the general defines. This allows us to + // detect conflicts between fixed outputs and tied operands where the input value hasn't + // been converted to a solver variable. + if let Some(constraints) = constraints { + if constraints.fixed_outs { + self.program_fixed_outputs( + constraints.outs, + defs, + throughs, + &mut replace_global_defines, + ®s.global, + ); + } + } + + if let Some(sig) = call_sig { + self.program_output_abi( + sig, + defs, + throughs, + &mut replace_global_defines, + ®s.global, + ); + } + + if let Some(constraints) = constraints { + self.program_output_constraints( + inst, + constraints.outs, + defs, + &mut replace_global_defines, + ®s.global, + ); + } + + // Finally, we've fully programmed the constraint solver. + // We expect a quick solution in most cases. + let is_reload = match &self.cur.func.dfg[inst] { + InstructionData::Unary { + opcode: Opcode::Fill, + .. + } => true, + _ => false, + }; + + let output_regs = self + .solver + .quick_solve(®s.global, is_reload) + .unwrap_or_else(|_| { + debug!("quick_solve failed for {}", self.solver); + self.iterate_solution( + throughs, + ®s.global, + &mut replace_global_defines, + is_reload, + ) + }); + + // The solution and/or fixed input constraints may require us to shuffle the set of live + // registers around. + self.shuffle_inputs(&mut regs.input); + + // If this is the first time we branch to `dest`, color its arguments to match the current + // register state. + if let Some(dest) = color_dest_args { + self.color_block_params(inst, dest); + } + + // Apply the solution to the defs. + for v in self.solver.vars().iter().filter(|&v| v.is_define()) { + self.cur.func.locations[v.value] = ValueLoc::Reg(v.solution); + } + + // Tied defs are not part of the solution above. + // Copy register assignments from tied inputs to tied outputs. + if let Some(constraints) = constraints { + if constraints.tied_ops { + for (constraint, lv) in constraints.outs.iter().zip(defs) { + if let ConstraintKind::Tied(num) = constraint.kind { + let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; + let reg = self.divert.reg(arg, &self.cur.func.locations); + self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); + } + } + } + } + + // Update `regs` for the next instruction. + regs.input = output_regs; + for lv in defs { + let loc = self.cur.func.locations[lv.value]; + debug!( + " color {} -> {}{}", + lv.value, + loc.display(&self.reginfo), + if lv.is_local { + "" + } else if replace_global_defines { + " (global to be replaced)" + } else { + " (global)" + } + ); + + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let reg = loc.unwrap_reg(); + + debug_assert!( + !self.is_pinned_reg(rc, reg) + || self.cur.func.dfg[inst].opcode() == Opcode::GetPinnedReg, + "pinned register may not be part of outputs for '{}'.", + self.cur.func.dfg[inst].opcode() + ); + + if self.is_pinned_reg(rc, reg) { + continue; + } + + // Remove the dead defs. + if lv.endpoint == inst { + regs.input.free(rc, reg); + debug_assert!(lv.is_local); + } + + // Track globals in their undiverted locations. + if !lv.is_local && !replace_global_defines { + regs.global.take(rc, reg); + } + } + } + + self.forget_diverted(kills); + + replace_global_defines + } + + /// Program the input-side constraints for `inst` into the constraint solver. + fn program_input_constraints(&mut self, inst: Inst, constraints: &[OperandConstraint]) { + for (constraint, &arg_val) in constraints + .iter() + .zip(self.cur.func.dfg.inst_args(inst)) + .filter(|&(constraint, _)| constraint.kind != ConstraintKind::Stack) + { + // Reload pass is supposed to ensure that all arguments to register operands are + // already in a register. + let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); + match constraint.kind { + ConstraintKind::FixedReg(regunit) => { + // Add the fixed constraint even if `cur_reg == regunit`. + // It is possible that we will want to convert the value to a variable later, + // and this identity assignment prevents that from happening. + self.solver + .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); + } + ConstraintKind::FixedTied(regunit) => { + // The pinned register may not be part of a fixed tied requirement. If this + // becomes the case, then it must be changed to a different register. + debug_assert!( + !self.is_pinned_reg(constraint.regclass, regunit), + "see comment above" + ); + // See comment right above. + self.solver + .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); + } + ConstraintKind::Tied(_) => { + if self.is_pinned_reg(constraint.regclass, cur_reg) { + // Divert the pinned register; it shouldn't be reused for a tied input. + if self.solver.can_add_var(constraint.regclass, cur_reg) { + self.solver.add_var(arg_val, constraint.regclass, cur_reg); + } + } else if !constraint.regclass.contains(cur_reg) { + self.solver.add_var(arg_val, constraint.regclass, cur_reg); + } + } + ConstraintKind::Reg => { + if !constraint.regclass.contains(cur_reg) { + self.solver.add_var(arg_val, constraint.regclass, cur_reg); + } + } + ConstraintKind::Stack => unreachable!(), + } + } + } + + /// Program the complete set of input constraints into the solver. + /// + /// The `program_input_constraints()` function above will not tell the solver about any values + /// that are already assigned to appropriate registers. This is normally fine, but if we want + /// to add additional variables to help the solver, we need to make sure that they are + /// constrained properly. + /// + /// This function completes the work of `program_input_constraints()` by calling `add_var` for + /// all values used by the instruction. + fn program_complete_input_constraints(&mut self) { + let inst = self.cur.current_inst().expect("Not on an instruction"); + let constraints = self + .encinfo + .operand_constraints(self.cur.func.encodings[inst]) + .expect("Current instruction not encoded") + .ins; + + for (constraint, &arg_val) in constraints.iter().zip(self.cur.func.dfg.inst_args(inst)) { + match constraint.kind { + ConstraintKind::Reg | ConstraintKind::Tied(_) => { + let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); + + // This is the opposite condition of `program_input_constraints()`. The pinned + // register mustn't be added back as a variable. + if constraint.regclass.contains(cur_reg) + && !self.is_pinned_reg(constraint.regclass, cur_reg) + { + // This code runs after calling `solver.inputs_done()` so we must identify + // the new variable as killed or live-through. + let layout = &self.cur.func.layout; + if self.liveness[arg_val].killed_at(inst, layout.pp_block(inst), layout) { + self.solver + .add_killed_var(arg_val, constraint.regclass, cur_reg); + } else { + self.solver + .add_through_var(arg_val, constraint.regclass, cur_reg); + } + } + } + ConstraintKind::FixedReg(_) + | ConstraintKind::FixedTied(_) + | ConstraintKind::Stack => {} + } + } + } + + /// Prepare for a branch to `dest`. + /// + /// 1. Any values that are live-in to `dest` must be un-diverted so they live in their globally + /// assigned register. + /// 2. If the `dest` block takes arguments, reassign the branch argument values to the matching + /// registers. + /// + /// Returns true if this is the first time a branch to `dest` is seen, so the `dest` argument + /// values should be colored after `shuffle_inputs`. + fn program_block_arguments(&mut self, inst: Inst, dest: Block) -> bool { + // Find diverted registers that are live-in to `dest` and reassign them to their global + // home. + // + // Values with a global live range that are not live in to `dest` could appear as branch + // arguments, so they can't always be un-diverted. + self.undivert_regs(|lr, layout| lr.is_livein(dest, layout)); + + // Now handle the block arguments. + let br_args = self.cur.func.dfg.inst_variable_args(inst); + let dest_args = self.cur.func.dfg.block_params(dest); + debug_assert_eq!(br_args.len(), dest_args.len()); + for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { + // The first time we encounter a branch to `dest`, we get to pick the location. The + // following times we see a branch to `dest`, we must follow suit. + match self.cur.func.locations[dest_arg] { + ValueLoc::Unassigned => { + // This is the first branch to `dest`, so we should color `dest_arg` instead of + // `br_arg`. However, we don't know where `br_arg` will end up until + // after `shuffle_inputs`. See `color_block_params` below. + // + // It is possible for `dest_arg` to have no affinity, and then it should simply + // be ignored. + if self.liveness[dest_arg].affinity.is_reg() { + return true; + } + } + ValueLoc::Reg(dest_reg) => { + // We've branched to `dest` before. Make sure we use the correct argument + // registers by reassigning `br_arg`. + if let Affinity::Reg(rci) = self.liveness[br_arg].affinity { + let rc = self.reginfo.rc(rci); + let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); + self.solver.reassign_in(br_arg, rc, br_reg, dest_reg); + } else { + panic!("Branch argument {} is not in a register", br_arg); + } + } + ValueLoc::Stack(ss) => { + // The spiller should already have given us identical stack slots. + debug_assert_eq!(ValueLoc::Stack(ss), self.cur.func.locations[br_arg]); + } + } + } + + // No `dest` arguments need coloring. + false + } + + /// Knowing that we've never seen a branch to `dest` before, color its parameters to match our + /// register state. + /// + /// This function is only called when `program_block_arguments()` returned `true`. + fn color_block_params(&mut self, inst: Inst, dest: Block) { + let br_args = self.cur.func.dfg.inst_variable_args(inst); + let dest_args = self.cur.func.dfg.block_params(dest); + debug_assert_eq!(br_args.len(), dest_args.len()); + for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { + match self.cur.func.locations[dest_arg] { + ValueLoc::Unassigned => { + if self.liveness[dest_arg].affinity.is_reg() { + let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); + self.cur.func.locations[dest_arg] = ValueLoc::Reg(br_reg); + } + } + ValueLoc::Reg(_) => panic!("{} arg {} already colored", dest, dest_arg), + // Spilled value consistency is verified by `program_block_arguments()` above. + ValueLoc::Stack(_) => {} + } + } + } + + /// Find all diverted registers where `pred` returns `true` and undo their diversion so they + /// are reallocated to their global register assignments. + fn undivert_regs<Pred>(&mut self, mut pred: Pred) + where + Pred: FnMut(&LiveRange, &Layout) -> bool, + { + for (&value, rdiv) in self.divert.iter() { + let lr = self + .liveness + .get(value) + .expect("Missing live range for diverted register"); + if pred(lr, &self.cur.func.layout) { + if let Affinity::Reg(rci) = lr.affinity { + let rc = self.reginfo.rc(rci); + // Stack diversions should not be possible here. They only live transiently + // during `shuffle_inputs()`. + self.solver.reassign_in( + value, + rc, + rdiv.to.unwrap_reg(), + rdiv.from.unwrap_reg(), + ); + } else { + panic!( + "Diverted register {} with {} affinity", + value, + lr.affinity.display(&self.reginfo) + ); + } + } + } + } + + /// Find existing live values that conflict with the fixed input register constraints programmed + /// into the constraint solver. Convert them to solver variables so they can be diverted. + fn divert_fixed_input_conflicts(&mut self, live: &[LiveValue]) { + for lv in live { + if let Affinity::Reg(rci) = lv.affinity { + let toprc = self.reginfo.toprc(rci); + let reg = self.divert.reg(lv.value, &self.cur.func.locations); + if self.solver.is_fixed_input_conflict(toprc, reg) { + debug!( + "adding var to divert fixed input conflict for {}", + toprc.info.display_regunit(reg) + ); + self.solver.add_var(lv.value, toprc, reg); + } + } + } + } + + /// Program any fixed-register output constraints into the solver. This may also detect + /// conflicts between live-through registers and fixed output registers. These live-through + /// values need to be turned into solver variables so they can be reassigned. + fn program_fixed_outputs( + &mut self, + constraints: &[OperandConstraint], + defs: &[LiveValue], + throughs: &[LiveValue], + replace_global_defines: &mut bool, + global_regs: &RegisterSet, + ) { + for (constraint, lv) in constraints.iter().zip(defs) { + match constraint.kind { + ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => { + self.add_fixed_output(lv.value, constraint.regclass, reg, throughs); + if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { + debug!( + "Fixed output {} in {}:{} is not available in global regs", + lv.value, + constraint.regclass, + self.reginfo.display_regunit(reg) + ); + *replace_global_defines = true; + } + } + ConstraintKind::Reg | ConstraintKind::Tied(_) | ConstraintKind::Stack => {} + } + } + } + + /// Program the output-side ABI constraints for `inst` into the constraint solver. + /// + /// That means return values for a call instruction. + fn program_output_abi( + &mut self, + sig: SigRef, + defs: &[LiveValue], + throughs: &[LiveValue], + replace_global_defines: &mut bool, + global_regs: &RegisterSet, + ) { + // It's technically possible for a call instruction to have fixed results before the + // variable list of results, but we have no known instances of that. + // Just assume all results are variable return values. + debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len()); + for (i, lv) in defs.iter().enumerate() { + let abi = self.cur.func.dfg.signatures[sig].returns[i]; + if let ArgumentLoc::Reg(reg) = abi.location { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + self.add_fixed_output(lv.value, rc, reg, throughs); + if !lv.is_local && !global_regs.is_avail(rc, reg) { + debug!( + "ABI output {} in {}:{} is not available in global regs", + lv.value, + rc, + self.reginfo.display_regunit(reg) + ); + *replace_global_defines = true; + } + } else { + panic!("ABI argument {} should be in a register", lv.value); + } + } + } + } + + /// Add a single fixed output value to the solver. + fn add_fixed_output( + &mut self, + value: Value, + rc: RegClass, + reg: RegUnit, + throughs: &[LiveValue], + ) { + // Pinned register is already unavailable in the solver, since it is copied in the + // available registers on entry. + if !self.is_pinned_reg(rc, reg) && !self.solver.add_fixed_output(rc, reg) { + // The fixed output conflicts with some of the live-through registers. + for lv in throughs { + if let Affinity::Reg(rci) = lv.affinity { + let toprc2 = self.reginfo.toprc(rci); + let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); + if regs_overlap(rc, reg, toprc2, reg2) { + // This live-through value is interfering with the fixed output assignment. + // Convert it to a solver variable. + self.solver.add_through_var(lv.value, toprc2, reg2); + } + } + } + + let ok = self.solver.add_fixed_output(rc, reg); + debug_assert!(ok, "Couldn't clear fixed output interference for {}", value); + } + self.cur.func.locations[value] = ValueLoc::Reg(reg); + } + + /// Program the output-side constraints for `inst` into the constraint solver. + /// + /// It is assumed that all fixed outputs have already been handled. + fn program_output_constraints( + &mut self, + inst: Inst, + constraints: &[OperandConstraint], + defs: &[LiveValue], + replace_global_defines: &mut bool, + global_regs: &RegisterSet, + ) { + for (constraint, lv) in constraints.iter().zip(defs) { + match constraint.kind { + ConstraintKind::FixedReg(_) + | ConstraintKind::FixedTied(_) + | ConstraintKind::Stack => continue, + ConstraintKind::Reg => { + self.solver + .add_def(lv.value, constraint.regclass, !lv.is_local); + } + ConstraintKind::Tied(num) => { + // Find the input operand we're tied to. + // The solver doesn't care about the output value. + let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; + let reg = self.divert.reg(arg, &self.cur.func.locations); + + if let Some(reg) = + self.solver + .add_tied_input(arg, constraint.regclass, reg, !lv.is_local) + { + // The value we're tied to has been assigned to a fixed register. + // We need to make sure that fixed output register is compatible with the + // global register set. + if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { + debug!( + "Tied output {} in {}:{} is not available in global regs", + lv.value, + constraint.regclass, + self.reginfo.display_regunit(reg) + ); + *replace_global_defines = true; + } + } + } + } + } + } + + /// Try harder to find a solution to the constraint problem since `quick_solve()` failed. + /// + /// We may need to move more registers around before a solution is possible. Use an iterative + /// algorithm that adds one more variable until a solution can be found. + fn iterate_solution( + &mut self, + throughs: &[LiveValue], + global_regs: &RegisterSet, + replace_global_defines: &mut bool, + is_reload: bool, + ) -> RegisterSet { + // Make sure `try_add_var()` below doesn't create a variable with too loose constraints. + self.program_complete_input_constraints(); + + loop { + match self.solver.real_solve(global_regs, is_reload) { + Ok(regs) => return regs, + Err(SolverError::Divert(rc)) => { + // Do we have any live-through `rc` registers that are not already variables? + let added = self.try_add_var(rc, throughs); + debug_assert!(added, "Ran out of registers in {}", rc); + } + Err(SolverError::Global(_value)) => { + debug!( + "Not enough global registers for {}, trying as local", + _value + ); + // We'll clear the `is_global` flag on all solver variables and instead make a + // note to replace all global defines with local defines followed by a copy. + *replace_global_defines = true; + self.solver.clear_all_global_flags(); + } + }; + } + } + + /// Try to add an `rc` variable to the solver from the `throughs` set. + fn try_add_var(&mut self, rc: RegClass, throughs: &[LiveValue]) -> bool { + debug!("Trying to add a {} reg from {} values", rc, throughs.len()); + + for lv in throughs { + if let Affinity::Reg(rci) = lv.affinity { + // The new variable gets to roam the whole top-level register class because it is + // not actually constrained by the instruction. We just want it out of the way. + let toprc2 = self.reginfo.toprc(rci); + let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); + if rc.contains(reg2) + && self.solver.can_add_var(toprc2, reg2) + && !self.is_live_on_outgoing_edge(lv.value) + { + self.solver.add_through_var(lv.value, toprc2, reg2); + return true; + } + } + } + + false + } + + /// Determine if `value` is live on a CFG edge from the current instruction. + /// + /// This means that the current instruction is a branch and `value` is live in to one of the + /// branch destinations. Branch arguments and block parameters are not considered live on the + /// edge. + fn is_live_on_outgoing_edge(&self, value: Value) -> bool { + use crate::ir::instructions::BranchInfo::*; + + let inst = self.cur.current_inst().expect("Not on an instruction"); + let layout = &self.cur.func.layout; + match self.cur.func.dfg.analyze_branch(inst) { + NotABranch => false, + SingleDest(block, _) => { + let lr = &self.liveness[value]; + lr.is_livein(block, layout) + } + Table(jt, block) => { + let lr = &self.liveness[value]; + !lr.is_local() + && (block.map_or(false, |block| lr.is_livein(block, layout)) + || self.cur.func.jump_tables[jt] + .iter() + .any(|block| lr.is_livein(*block, layout))) + } + } + } + + /// Emit `regmove` instructions as needed to move the live registers into place before the + /// instruction. Also update `self.divert` accordingly. + /// + /// The `self.cur` cursor is expected to point at the instruction. The register moves are + /// inserted before. + /// + /// The solver needs to be reminded of the available registers before any moves are inserted. + fn shuffle_inputs(&mut self, regs: &mut RegisterSet) { + use crate::regalloc::solver::Move::*; + + let spills = self.solver.schedule_moves(regs); + + // The move operations returned by `schedule_moves` refer to emergency spill slots by + // consecutive indexes starting from 0. Map these to real stack slots. + // It is very unlikely (impossible?) that we would need more than one spill per top-level + // register class, so avoid allocation by using a fixed array here. + let mut slot = [PackedOption::default(); 8]; + debug_assert!(spills <= slot.len(), "Too many spills ({})", spills); + + for m in self.solver.moves() { + match *m { + Reg { + value, + from, + to, + rc, + } => { + debug_assert!( + !self.is_pinned_reg(rc, to), + "pinned register used in a regmove" + ); + self.divert.regmove(value, from, to); + self.cur.ins().regmove(value, from, to); + } + Spill { + value, + from, + to_slot, + .. + } => { + debug_assert_eq!(slot[to_slot].expand(), None, "Overwriting slot in use"); + let ss = self + .cur + .func + .stack_slots + .get_emergency_slot(self.cur.func.dfg.value_type(value), &slot[0..spills]); + slot[to_slot] = ss.into(); + self.divert.regspill(value, from, ss); + self.cur.ins().regspill(value, from, ss); + } + Fill { + value, + from_slot, + to, + rc, + } => { + debug_assert!( + !self.is_pinned_reg(rc, to), + "pinned register used in a regfill" + ); + // These slots are single use, so mark `ss` as available again. + let ss = slot[from_slot].take().expect("Using unallocated slot"); + self.divert.regfill(value, ss, to); + self.cur.ins().regfill(value, ss, to); + } + } + } + } + + /// Forget about any register diversions in `kills`. + fn forget_diverted(&mut self, kills: &[LiveValue]) { + if self.divert.is_empty() { + return; + } + + for lv in kills { + if lv.affinity.is_reg() { + self.divert.remove(lv.value); + } + } + } + + /// Replace all global values defined by `inst` with local values that are then copied into the + /// global value: + /// + /// v1 = foo + /// + /// becomes: + /// + /// v20 = foo + /// v1 = copy v20 + /// + /// This is sometimes necessary when there are no global registers available that can satisfy + /// the constraints on the instruction operands. + /// + fn replace_global_defines(&mut self, inst: Inst, tracker: &mut LiveValueTracker) { + debug!("Replacing global defs on {}", self.cur.display_inst(inst)); + + // We'll insert copies *after `inst`. Our caller will move the cursor back. + self.cur.next_inst(); + + // The tracker keeps the defs from `inst` at the end. Any dead defs have already been + // removed, so it's not obvious how many defs to process + for lv in tracker.live_mut().iter_mut().rev() { + // Keep going until we reach a value that is not defined by `inst`. + if match self.cur.func.dfg.value_def(lv.value) { + ValueDef::Result(i, _) => i != inst, + _ => true, + } { + break; + } + if lv.is_local || !lv.affinity.is_reg() { + continue; + } + + // Now `lv.value` is globally live and defined by `inst`. Replace it with a local live + // range that is copied after `inst`. + let ty = self.cur.func.dfg.value_type(lv.value); + let local = self.cur.func.dfg.replace_result(lv.value, ty); + self.cur.ins().with_result(lv.value).copy(local); + let copy = self.cur.built_inst(); + + // Create a live range for `local: inst -> copy`. + self.liveness.create_dead(local, inst, lv.affinity); + self.liveness.extend_locally( + local, + self.cur.func.layout.pp_block(inst), + copy, + &self.cur.func.layout, + ); + + // Move the definition of the global `lv.value`. + self.liveness.move_def_locally(lv.value, copy); + + // Transfer the register coloring to `local`. + let loc = mem::replace(&mut self.cur.func.locations[lv.value], ValueLoc::default()); + self.cur.func.locations[local] = loc; + + // Update `lv` to reflect the new `local` live range. + lv.value = local; + lv.endpoint = copy; + lv.is_local = true; + + debug!( + " + {} with {} in {}", + self.cur.display_inst(copy), + local, + loc.display(&self.reginfo) + ); + } + debug!("Done: {}", self.cur.display_inst(inst)); + } + + /// Process kills on a ghost instruction. + /// - Forget diversions. + /// - Free killed registers. + fn process_ghost_kills(&mut self, kills: &[LiveValue], regs: &mut AvailableRegs) { + for lv in kills { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let loc = match self.divert.remove(lv.value) { + Some(loc) => loc, + None => self.cur.func.locations[lv.value], + }; + regs.input.free(rc, loc.unwrap_reg()); + if !lv.is_local { + regs.global + .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); + } + } + } + } +} + +/// Keep track of the set of available registers in two interference domains: all registers +/// considering diversions and global registers not considering diversions. +struct AvailableRegs { + /// The exact set of registers available on the input side of the current instruction. This + /// takes into account register diversions, and it includes both local and global live ranges. + input: RegisterSet, + + /// Registers available for allocating globally live values. This set ignores any local values, + /// and it does not account for register diversions. + /// + /// Global values must be allocated out of this set because conflicts with other global values + /// can't be resolved with local diversions. + global: RegisterSet, +} + +impl AvailableRegs { + /// Initialize both the input and global sets from `regs`. + pub fn new(regs: &RegisterSet) -> Self { + Self { + input: regs.clone(), + global: regs.clone(), + } + } + + /// Take an un-diverted register from one or both sets. + pub fn take(&mut self, rc: RegClass, reg: RegUnit, is_local: bool) { + self.input.take(rc, reg); + if !is_local { + self.global.take(rc, reg); + } + } + + /// Take a diverted register from both sets for a non-local allocation. + pub fn take_divert(&mut self, rc: RegClass, reg: RegUnit, reg_divert: RegUnit) { + self.input.take(rc, reg_divert); + self.global.take(rc, reg); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/context.rs b/third_party/rust/cranelift-codegen/src/regalloc/context.rs new file mode 100644 index 0000000000..505b1d127a --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/context.rs @@ -0,0 +1,252 @@ +//! Register allocator context. +//! +//! The `Context` struct contains data structures that should be preserved across invocations of +//! the register allocator algorithm. This doesn't preserve any data between functions, but it +//! avoids allocating data structures independently for each function begin compiled. + +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::Function; +use crate::isa::TargetIsa; +use crate::regalloc::branch_splitting; +use crate::regalloc::coalescing::Coalescing; +use crate::regalloc::coloring::Coloring; +use crate::regalloc::live_value_tracker::LiveValueTracker; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::reload::Reload; +use crate::regalloc::safepoint::emit_stack_maps; +use crate::regalloc::spilling::Spilling; +use crate::regalloc::virtregs::VirtRegs; +use crate::result::CodegenResult; +use crate::timing; +use crate::topo_order::TopoOrder; +use crate::verifier::{ + verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors, +}; + +/// Persistent memory allocations for register allocation. +pub struct Context { + liveness: Liveness, + virtregs: VirtRegs, + coalescing: Coalescing, + topo: TopoOrder, + tracker: LiveValueTracker, + spilling: Spilling, + reload: Reload, + coloring: Coloring, +} + +impl Context { + /// Create a new context for register allocation. + /// + /// This context should be reused for multiple functions in order to avoid repeated memory + /// allocations. + pub fn new() -> Self { + Self { + liveness: Liveness::new(), + virtregs: VirtRegs::new(), + coalescing: Coalescing::new(), + topo: TopoOrder::new(), + tracker: LiveValueTracker::new(), + spilling: Spilling::new(), + reload: Reload::new(), + coloring: Coloring::new(), + } + } + + /// Clear all data structures in this context. + pub fn clear(&mut self) { + self.liveness.clear(); + self.virtregs.clear(); + self.coalescing.clear(); + self.topo.clear(); + self.tracker.clear(); + self.spilling.clear(); + self.reload.clear(); + self.coloring.clear(); + } + + /// Current values liveness state. + pub fn liveness(&self) -> &Liveness { + &self.liveness + } + + /// Allocate registers in `func`. + /// + /// After register allocation, all values in `func` have been assigned to a register or stack + /// location that is consistent with instruction encoding constraints. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + ) -> CodegenResult<()> { + let _tt = timing::regalloc(); + debug_assert!(domtree.is_valid()); + + let mut errors = VerifierErrors::default(); + + // `Liveness` and `Coloring` are self-clearing. + self.virtregs.clear(); + + // Tracker state (dominator live sets) is actually reused between the spilling and coloring + // phases. + self.tracker.clear(); + + // Pass: Split branches, add space where to add copy & regmove instructions. + branch_splitting::run(isa, func, cfg, domtree, &mut self.topo); + + // Pass: Liveness analysis. + self.liveness.compute(isa, func, cfg); + + if isa.flags().enable_verifier() { + let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Coalesce and create Conventional SSA form. + self.coalescing.conventional_ssa( + isa, + func, + cfg, + domtree, + &mut self.liveness, + &mut self.virtregs, + ); + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Spilling. + self.spilling.run( + isa, + func, + domtree, + &mut self.liveness, + &self.virtregs, + &mut self.topo, + &mut self.tracker, + ); + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Reload. + self.reload.run( + isa, + func, + domtree, + &mut self.liveness, + &mut self.topo, + &mut self.tracker, + ); + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Coloring. + self.coloring.run( + isa, + func, + cfg, + domtree, + &mut self.liveness, + &mut self.tracker, + ); + + // If there are any reference types used, encode safepoints and emit + // stack maps. + // + // This function runs after register allocation has taken place, meaning + // values have locations assigned already, which is necessary for + // creating the stack maps. + let safepoints_enabled = isa.flags().enable_safepoints(); + for val in func.dfg.values() { + let ty = func.dfg.value_type(val); + if ty.lane_type().is_ref() { + assert!( + safepoints_enabled, + "reference types were found but safepoints were not enabled" + ); + emit_stack_maps(func, domtree, &self.liveness, &mut self.tracker, isa); + break; + } + } + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Even if we arrive here, (non-fatal) errors might have been reported, so we + // must make sure absolutely nothing is wrong + if errors.is_empty() { + Ok(()) + } else { + Err(errors.into()) + } + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/diversion.rs b/third_party/rust/cranelift-codegen/src/regalloc/diversion.rs new file mode 100644 index 0000000000..be7b7fa1ee --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/diversion.rs @@ -0,0 +1,315 @@ +//! Register diversions. +//! +//! Normally, a value is assigned to a single register or stack location by the register allocator. +//! Sometimes, it is necessary to move register values to a different register in order to satisfy +//! instruction constraints. +//! +//! These register diversions are local to a block. No values can be diverted when entering a new +//! block. + +use crate::fx::FxHashMap; +use crate::hash_map::{Entry, Iter}; +use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations}; +use crate::ir::{InstructionData, Opcode}; +use crate::isa::{RegInfo, RegUnit}; +use core::fmt; +use cranelift_entity::{SparseMap, SparseMapValue}; + +/// A diversion of a value from its original location to a new register or stack location. +/// +/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the +/// same value. +/// +/// When tracking diversions, the `from` field is the original assigned value location, and `to` is +/// the current one. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Diversion { + /// The original value location. + pub from: ValueLoc, + /// The current value location. + pub to: ValueLoc, +} + +impl Diversion { + /// Make a new diversion. + pub fn new(from: ValueLoc, to: ValueLoc) -> Self { + debug_assert!(from.is_assigned() && to.is_assigned()); + Self { from, to } + } +} + +/// Keep track of diversions in a block. +#[derive(Clone)] +pub struct RegDiversions { + current: FxHashMap<Value, Diversion>, +} + +/// Keep track of diversions at the entry of block. +#[derive(Clone)] +struct EntryRegDiversionsValue { + key: Block, + divert: RegDiversions, +} + +/// Map block to their matching RegDiversions at basic blocks entry. +pub struct EntryRegDiversions { + map: SparseMap<Block, EntryRegDiversionsValue>, +} + +impl RegDiversions { + /// Create a new empty diversion tracker. + pub fn new() -> Self { + Self { + current: FxHashMap::default(), + } + } + + /// Clear the content of the diversions, to reset the state of the compiler. + pub fn clear(&mut self) { + self.current.clear() + } + + /// Are there any diversions? + pub fn is_empty(&self) -> bool { + self.current.is_empty() + } + + /// Get the current diversion of `value`, if any. + pub fn diversion(&self, value: Value) -> Option<&Diversion> { + self.current.get(&value) + } + + /// Get all current diversions. + pub fn iter(&self) -> Iter<'_, Value, Diversion> { + self.current.iter() + } + + /// Get the current location for `value`. Fall back to the assignment map for non-diverted + /// values + pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc { + match self.diversion(value) { + Some(d) => d.to, + None => locations[value], + } + } + + /// Get the current register location for `value`, or panic if `value` isn't in a register. + pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit { + self.get(value, locations).unwrap_reg() + } + + /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot. + pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot { + self.get(value, locations).unwrap_stack() + } + + /// Record any kind of move. + /// + /// The `from` location must match an existing `to` location, if any. + fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) { + debug_assert!(from.is_assigned() && to.is_assigned()); + match self.current.entry(value) { + Entry::Occupied(mut e) => { + // TODO: non-lexical lifetimes should allow removal of the scope and early return. + { + let d = e.get_mut(); + debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value); + if d.from != to { + d.to = to; + return; + } + } + e.remove(); + } + Entry::Vacant(e) => { + e.insert(Diversion::new(from, to)); + } + } + } + + /// Record a register -> register move. + pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) { + self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to)); + } + + /// Record a register -> stack move. + pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) { + self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to)); + } + + /// Record a stack -> register move. + pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) { + self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to)); + } + + /// Apply the effect of `inst`. + /// + /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to + /// match. + pub fn apply(&mut self, inst: &InstructionData) { + match *inst { + InstructionData::RegMove { + opcode: Opcode::Regmove, + arg, + src, + dst, + } => self.regmove(arg, src, dst), + InstructionData::RegSpill { + opcode: Opcode::Regspill, + arg, + src, + dst, + } => self.regspill(arg, src, dst), + InstructionData::RegFill { + opcode: Opcode::Regfill, + arg, + src, + dst, + } => self.regfill(arg, src, dst), + _ => {} + } + } + + /// Drop any recorded move for `value`. + /// + /// Returns the `to` location of the removed diversion. + pub fn remove(&mut self, value: Value) -> Option<ValueLoc> { + self.current.remove(&value).map(|d| d.to) + } + + /// Resets the state of the current diversions to the recorded diversions at the entry of the + /// given `block`. The recoded diversions is available after coloring on `func.entry_diversions` + /// field. + pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) { + self.clear(); + if let Some(entry_divert) = entry_diversions.map.get(block) { + let iter = entry_divert.divert.current.iter(); + self.current.extend(iter); + } + } + + /// Copy the current state of the diversions, and save it for the entry of the `block` given as + /// argument. + /// + /// Note: This function can only be called once on a `Block` with a given `entry_diversions` + /// argument, otherwise it would panic. + pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) { + // No need to save anything if there is no diversions to be recorded. + if self.is_empty() { + return; + } + debug_assert!(!entry_diversions.map.contains_key(target)); + let iter = self.current.iter(); + let mut entry_divert = Self::new(); + entry_divert.current.extend(iter); + entry_diversions.map.insert(EntryRegDiversionsValue { + key: target, + divert: entry_divert, + }); + } + + /// Check that the recorded entry for a given `block` matches what is recorded in the + /// `entry_diversions`. + pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool { + let entry_divert = match entry_diversions.map.get(target) { + Some(entry_divert) => entry_divert, + None => return self.is_empty(), + }; + + if entry_divert.divert.current.len() != self.current.len() { + return false; + } + + for (val, _) in entry_divert.divert.current.iter() { + if !self.current.contains_key(val) { + return false; + } + } + true + } + + /// Return an object that can display the diversions. + pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> { + DisplayDiversions(&self, regs.into()) + } +} + +impl EntryRegDiversions { + /// Create a new empty entry diversion, to associate diversions to each block entry. + pub fn new() -> Self { + Self { + map: SparseMap::new(), + } + } + + pub fn clear(&mut self) { + self.map.clear(); + } +} + +impl Clone for EntryRegDiversions { + /// The Clone trait is required by `ir::Function`. + fn clone(&self) -> Self { + let mut tmp = Self::new(); + for v in self.map.values() { + tmp.map.insert(v.clone()); + } + tmp + } +} + +/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry +/// diversions for each block. +impl SparseMapValue<Block> for EntryRegDiversionsValue { + fn key(&self) -> Block { + self.key + } +} + +/// Object that displays register diversions. +pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayDiversions<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{")?; + for (value, div) in self.0.current.iter() { + write!( + f, + " {}: {} -> {}", + value, + div.from.display(self.1), + div.to.display(self.1) + )? + } + write!(f, " }}") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + use crate::ir::Value; + + #[test] + fn inserts() { + let mut divs = RegDiversions::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + + divs.regmove(v1, 10, 12); + assert_eq!( + divs.diversion(v1), + Some(&Diversion { + from: ValueLoc::Reg(10), + to: ValueLoc::Reg(12), + }) + ); + assert_eq!(divs.diversion(v2), None); + + divs.regmove(v1, 12, 11); + assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11)); + divs.regmove(v1, 11, 10); + assert_eq!(divs.diversion(v1), None); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs b/third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs new file mode 100644 index 0000000000..ae33a15f4d --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/live_value_tracker.rs @@ -0,0 +1,344 @@ +//! Track which values are live in a block with instruction granularity. +//! +//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in a block. +//! The sets of live values are computed on the fly as the tracker is moved from instruction to +//! instruction, starting at the block header. + +use crate::dominator_tree::DominatorTree; +use crate::entity::{EntityList, ListPool}; +use crate::fx::FxHashMap; +use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value}; +use crate::partition_slice::partition_slice; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::liverange::LiveRange; +use alloc::vec::Vec; + +type ValueList = EntityList<Value>; + +/// Compute and track live values throughout a block. +pub struct LiveValueTracker { + /// The set of values that are live at the current program point. + live: LiveValueVec, + + /// Saved set of live values for every jump and branch that can potentially be an immediate + /// dominator of a block. + /// + /// This is the set of values that are live *before* the branch. + idom_sets: FxHashMap<Inst, ValueList>, + + /// Memory pool for the live sets. + idom_pool: ListPool<Value>, +} + +/// Information about a value that is live at the current program point. +#[derive(Debug)] +pub struct LiveValue { + /// The live value. + pub value: Value, + + /// The local ending point of the live range in the current block, as returned by + /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`. + pub endpoint: Inst, + + /// The affinity of the value as represented in its `LiveRange`. + /// + /// This value is simply a copy of the affinity stored in the live range. We copy it because + /// almost all users of `LiveValue` need to look at it. + pub affinity: Affinity, + + /// The live range for this value never leaves its block. + pub is_local: bool, + + /// This value is dead - the live range ends immediately. + pub is_dead: bool, +} + +struct LiveValueVec { + /// The set of values that are live at the current program point. + values: Vec<LiveValue>, + + /// How many values at the front of `values` are known to be live after `inst`? + /// + /// This is used to pass a much smaller slice to `partition_slice` when its called a second + /// time for the same instruction. + live_prefix: Option<(Inst, usize)>, +} + +impl LiveValueVec { + fn new() -> Self { + Self { + values: Vec::new(), + live_prefix: None, + } + } + + /// Add a new live value to `values`. Copy some properties from `lr`. + fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) { + self.values.push(LiveValue { + value, + endpoint, + affinity: lr.affinity, + is_local: lr.is_local(), + is_dead: lr.is_dead(), + }); + } + + /// Remove all elements. + fn clear(&mut self) { + self.values.clear(); + self.live_prefix = None; + } + + /// Make sure that the values killed by `next_inst` are moved to the end of the `values` + /// vector. + /// + /// Returns the number of values that will be live after `next_inst`. + fn live_after(&mut self, next_inst: Inst) -> usize { + // How many values at the front of the vector are already known to survive `next_inst`? + // We don't need to pass this prefix to `partition_slice()` + let keep = match self.live_prefix { + Some((i, prefix)) if i == next_inst => prefix, + _ => 0, + }; + + // Move the remaining surviving values to the front partition of the vector. + let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst); + + // Remember the new prefix length in case we get called again for the same `next_inst`. + self.live_prefix = Some((next_inst, prefix)); + prefix + } + + /// Remove the values killed by `next_inst`. + fn remove_kill_values(&mut self, next_inst: Inst) { + let keep = self.live_after(next_inst); + self.values.truncate(keep); + } + + /// Remove any dead values. + fn remove_dead_values(&mut self) { + self.values.retain(|v| !v.is_dead); + self.live_prefix = None; + } +} + +impl LiveValueTracker { + /// Create a new blank tracker. + pub fn new() -> Self { + Self { + live: LiveValueVec::new(), + idom_sets: FxHashMap(), + idom_pool: ListPool::new(), + } + } + + /// Clear all cached information. + pub fn clear(&mut self) { + self.live.clear(); + self.idom_sets.clear(); + self.idom_pool.clear(); + } + + /// Get the set of currently live values. + /// + /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and + /// defined by the current instruction. + pub fn live(&self) -> &[LiveValue] { + &self.live.values + } + + /// Get a mutable set of currently live values. + /// + /// Use with care and don't move entries around. + pub fn live_mut(&mut self) -> &mut [LiveValue] { + &mut self.live.values + } + + /// Move the current position to the top of `block`. + /// + /// This depends on the stored live value set at `block`'s immediate dominator, so that must have + /// been visited first. + /// + /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values + /// from the immediate dominator. The second slice is the set of `block` parameters. + /// + /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them. + pub fn block_top( + &mut self, + block: Block, + dfg: &DataFlowGraph, + liveness: &Liveness, + layout: &Layout, + domtree: &DominatorTree, + ) -> (&[LiveValue], &[LiveValue]) { + // Start over, compute the set of live values at the top of the block from two sources: + // + // 1. Values that were live before `block`'s immediate dominator, filtered for those that are + // actually live-in. + // 2. Arguments to `block` that are not dead. + // + self.live.clear(); + + // Compute the live-in values. Start by filtering the set of values that were live before + // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e., + // the entry block or an unreachable block). + if let Some(idom) = domtree.idom(block) { + // If the immediate dominator exits, we must have a stored list for it. This is a + // requirement to the order blocks are visited: All dominators must have been processed + // before the current block. + let idom_live_list = self + .idom_sets + .get(&idom) + .expect("No stored live set for dominator"); + // Get just the values that are live-in to `block`. + for &value in idom_live_list.as_slice(&self.idom_pool) { + let lr = liveness + .get(value) + .expect("Immediate dominator value has no live range"); + + // Check if this value is live-in here. + if let Some(endpoint) = lr.livein_local_end(block, layout) { + self.live.push(value, endpoint, lr); + } + } + } + + // Now add all the live parameters to `block`. + let first_arg = self.live.values.len(); + for &value in dfg.block_params(block) { + let lr = &liveness[value]; + debug_assert_eq!(lr.def(), block.into()); + match lr.def_local_end().into() { + ExpandedProgramPoint::Inst(endpoint) => { + self.live.push(value, endpoint, lr); + } + ExpandedProgramPoint::Block(local_block) => { + // This is a dead block parameter which is not even live into the first + // instruction in the block. + debug_assert_eq!( + local_block, block, + "block parameter live range ends at wrong block header" + ); + // Give this value a fake endpoint that is the first instruction in the block. + // We expect it to be removed by calling `drop_dead_args()`. + self.live + .push(value, layout.first_inst(block).expect("Empty block"), lr); + } + } + } + + self.live.values.split_at(first_arg) + } + + /// Prepare to move past `inst`. + /// + /// Determine the set of already live values that are killed by `inst`, and add the new defined + /// values to the tracked set. + /// + /// Returns `(throughs, kills, defs)` as a tuple of slices: + /// + /// 1. The `throughs` slice is the set of live-through values that are neither defined nor + /// killed by the instruction. + /// 2. The `kills` slice is the set of values that were live before the instruction and are + /// killed at the instruction. This does not include dead defs. + /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes + /// dead defines. + /// + /// The order of `throughs` and `kills` is arbitrary. + /// + /// The `drop_dead()` method must be called next to actually remove the dead values from the + /// tracked set after the two returned slices are no longer needed. + pub fn process_inst( + &mut self, + inst: Inst, + dfg: &DataFlowGraph, + liveness: &Liveness, + ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) { + // Save a copy of the live values before any branches or jumps that could be somebody's + // immediate dominator. + if dfg[inst].opcode().is_branch() { + self.save_idom_live_set(inst); + } + + // Move killed values to the end of the vector. + // Don't remove them yet, `drop_dead()` will do that. + let first_kill = self.live.live_after(inst); + + // Add the values defined by `inst`. + let first_def = self.live.values.len(); + for &value in dfg.inst_results(inst) { + let lr = &liveness[value]; + debug_assert_eq!(lr.def(), inst.into()); + match lr.def_local_end().into() { + ExpandedProgramPoint::Inst(endpoint) => { + self.live.push(value, endpoint, lr); + } + ExpandedProgramPoint::Block(block) => { + panic!("Instruction result live range can't end at {}", block); + } + } + } + + ( + &self.live.values[0..first_kill], + &self.live.values[first_kill..first_def], + &self.live.values[first_def..], + ) + } + + /// Prepare to move past a ghost instruction. + /// + /// This is like `process_inst`, except any defs are ignored. + /// + /// Returns `(throughs, kills)`. + pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) { + let first_kill = self.live.live_after(inst); + self.live.values.as_slice().split_at(first_kill) + } + + /// Drop the values that are now dead after moving past `inst`. + /// + /// This removes both live values that were killed by `inst` and dead defines on `inst` itself. + /// + /// This must be called after `process_inst(inst)` and before proceeding to the next + /// instruction. + pub fn drop_dead(&mut self, inst: Inst) { + // Remove both live values that were killed by `inst` and dead defines from `inst`. + self.live.remove_kill_values(inst); + } + + /// Drop any values that are marked as `is_dead`. + /// + /// Use this after calling `block_top` to clean out dead block parameters. + pub fn drop_dead_params(&mut self) { + self.live.remove_dead_values(); + } + + /// Process new spills. + /// + /// Any values where `f` returns true are spilled and will be treated as if their affinity was + /// `Stack`. + pub fn process_spills<F>(&mut self, mut f: F) + where + F: FnMut(Value) -> bool, + { + for lv in &mut self.live.values { + if f(lv.value) { + lv.affinity = Affinity::Stack; + } + } + } + + /// Save the current set of live values so it is associated with `idom`. + fn save_idom_live_set(&mut self, idom: Inst) { + let values = self.live.values.iter().map(|lv| lv.value); + let pool = &mut self.idom_pool; + // If there already is a set saved for `idom`, just keep it. + self.idom_sets.entry(idom).or_insert_with(|| { + let mut list = ValueList::default(); + list.extend(values, pool); + list + }); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/liveness.rs b/third_party/rust/cranelift-codegen/src/regalloc/liveness.rs new file mode 100644 index 0000000000..2e9c5015bd --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/liveness.rs @@ -0,0 +1,443 @@ +//! Liveness analysis for SSA values. +//! +//! This module computes the live range of all the SSA values in a function and produces a +//! `LiveRange` instance for each. +//! +//! +//! # Liveness consumers +//! +//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each +//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the +//! currently live values as it is iterating down the instructions in the block. It asks the +//! following questions: +//! +//! - What is the set of live values at the entry to the block? +//! - When moving past a use of a value, is that value still alive in the block, or was that the last +//! use? +//! - When moving past a branch, which of the live values are still live below the branch? +//! +//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and +//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the +//! dominator tree, so it can compute the set of live values at the beginning of a block by starting +//! from the set of live values at the dominating branch instruction and filtering it with +//! `livein_local_end`. These sets do not need to be stored in the liveness analysis. +//! +//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the +//! number of live values at every program point and insert spill code until the number of +//! registers needed is small enough. +//! +//! +//! # Alternative algorithms +//! +//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few +//! alternatives. +//! +//! ## Data-flow equations +//! +//! The classic *live variables analysis* that you will find in all compiler books from the +//! previous century does not depend on SSA form. It is typically implemented by iteratively +//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of +//! variables for every basic block in the program. +//! +//! This algorithm has some disadvantages that makes us look elsewhere: +//! +//! - Quadratic memory use. We need a bit per variable per basic block in the function. +//! - Dense representation of sparse data. In practice, the majority of SSA values never leave +//! their basic block, and those that do spa basic blocks rarely span a large number of basic +//! blocks. This makes the data stored in the bitvectors quite sparse. +//! - Traditionally, the data-flow equations were solved for real program *variables* which does +//! not include temporaries used in evaluating expressions. We have an SSA form program which +//! blurs the distinction between temporaries and variables. This makes the quadratic memory +//! problem worse because there are many more SSA values than there was variables in the original +//! program, and we don't know a priori which SSA values leave their basic block. +//! - Missing last-use information. For values that are not live-out of a basic block, we would +//! need to store information about the last use in the block somewhere. LLVM stores this +//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a +//! source of problems for LLVM's register allocator. +//! +//! Data-flow equations can detect when a variable is used uninitialized, and they can handle +//! multiple definitions of the same variable. We don't need this generality since we already have +//! a program in SSA form. +//! +//! ## LLVM's liveness analysis +//! +//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is +//! a disjoint union of related SSA values that should be assigned to the same physical register. +//! It uses a compact data structure very similar to our `LiveRange`. The important difference is +//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval` +//! describes the live range of a virtual register *and* which one of the related SSA values is +//! live at any given program point. +//! +//! LLVM computes the live range of each virtual register independently by using the use-def chains +//! that are baked into its IR. The algorithm for a single virtual register is: +//! +//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using +//! the def-chain. This does not include any phi-values. +//! 2. Go through the virtual register's use chain and perform the following steps at each use: +//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks +//! that already contain some liveness and extend the last live SSA value in the block to be +//! live-out. Also build a list of new basic blocks where the register needs to be live-in. +//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new +//! PHI values to be created when different SSA values can reach the same block. +//! +//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered +//! one SSA value. +//! +//! This algorithm has some advantages compared to the data-flow equations: +//! +//! - The live ranges of local virtual registers are computed very quickly without ever traversing +//! the CFG. The memory needed to store these live ranges is independent of the number of basic +//! blocks in the program. +//! - The time to compute the live range of a global virtual register is proportional to the number +//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large +//! functions. +//! - A single live range can be recomputed after making modifications to the IR. No global +//! algorithm is necessary. This feature depends on having use-def chains for virtual registers +//! which Cranelift doesn't. +//! +//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important +//! difference that live ranges are computed per SSA value instead of per virtual register, and the +//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that +//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses. +//! +//! ## Fast Liveness Checking for SSA-Form Programs +//! +//! A liveness analysis that is often brought up in the context of SSA-based register allocation +//! was presented at CGO 2008: +//! +//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness +//! Checking for SSA-Form Programs.* CGO. +//! +//! This analysis uses a global pre-computation that only depends on the CFG of the function. It +//! then allows liveness queries for any (value, program point) pair. Each query traverses the use +//! chain of the value and performs lookups in the precomputed bit-vectors. +//! +//! I did not seriously consider this analysis for Cranelift because: +//! +//! - It depends critically on use chains which Cranelift doesn't have. +//! - Popular variables like the `this` pointer in a C++ method can have very large use chains. +//! Traversing such a long use chain on every liveness lookup has the potential for some nasty +//! quadratic behavior in unfortunate cases. +//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow +//! based approach, which isn't that impressive. +//! +//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift +//! gains use chains, this approach would be worth a proper evaluation. +//! +//! +//! # Cranelift's liveness analysis +//! +//! The algorithm implemented in this module is similar to LLVM's with these differences: +//! +//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual +//! register. +//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers. +//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use +//! chains, so it is not possible to compute the live range for a single SSA value independently. +//! +//! The liveness computation visits all instructions in the program. The order is not important for +//! the algorithm to be correct. At each instruction, the used values are examined. +//! +//! - The first time a value is encountered, its live range is constructed as a dead live range +//! containing only the defining program point. +//! - The local interval of the value's live range is extended so it reaches the use. This may +//! require creating a new live-in local interval for the block. +//! - If the live range became live-in to the block, add the block to a work-list. +//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each +//! of the live-in block's CFG predecessor instructions as a 'use'. +//! +//! The effect of this algorithm is to extend the live range of each to reach uses as they are +//! visited. No data about each value beyond the live range is needed between visiting uses, so +//! nothing is lost by computing the live range of all values simultaneously. +//! +//! ## Cache efficiency of Cranelift vs LLVM +//! +//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the +//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use +//! chain order, some cache thrashing can occur as a result of pulling instructions into cache +//! somewhat chaotically. +//! +//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each +//! instruction is brought into cache only once, and it is likely that the other instructions on +//! the same cache line will be visited before the line is evicted. +//! +//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always +//! regularly. We should strive to make the `LiveRange` struct as small as possible such that +//! multiple related values can live on the same cache line. +//! +//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current +//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the +//! size to 32 bytes. +//! - Related values should be stored on the same cache line. The current sparse set implementation +//! does a decent job of that. +//! - For global values, the list of live-in intervals is very likely to fit on a single cache +//! line. These lists are very likely to be found in L2 cache at least. +//! +//! There is some room for improvement. + +use crate::entity::SparseMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::dfg::ValueDef; +use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value}; +use crate::isa::{EncInfo, OperandConstraint, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::liverange::LiveRange; +use crate::timing; +use alloc::vec::Vec; +use core::mem; +use core::ops::Index; + +/// A set of live ranges, indexed by value number. +type LiveRangeSet = SparseMap<Value, LiveRange>; + +/// Get a mutable reference to the live range for `value`. +/// Create it if necessary. +fn get_or_create<'a>( + lrset: &'a mut LiveRangeSet, + value: Value, + isa: &dyn TargetIsa, + func: &Function, + encinfo: &EncInfo, +) -> &'a mut LiveRange { + // It would be better to use `get_mut()` here, but that leads to borrow checker fighting + // which can probably only be resolved by non-lexical lifetimes. + // https://github.com/rust-lang/rfcs/issues/811 + if lrset.get(value).is_none() { + // Create a live range for value. We need the program point that defines it. + let def; + let affinity; + match func.dfg.value_def(value) { + ValueDef::Result(inst, rnum) => { + def = inst.into(); + // Initialize the affinity from the defining instruction's result constraints. + // Don't do this for call return values which are always tied to a single register. + affinity = encinfo + .operand_constraints(func.encodings[inst]) + .and_then(|rc| rc.outs.get(rnum)) + .map(Affinity::new) + .or_else(|| { + // If this is a call, get the return value affinity. + func.dfg + .call_signature(inst) + .map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)) + }) + .unwrap_or_default(); + } + ValueDef::Param(block, num) => { + def = block.into(); + if func.layout.entry_block() == Some(block) { + // The affinity for entry block parameters can be inferred from the function + // signature. + affinity = Affinity::abi(&func.signature.params[num], isa); + } else { + // Give normal block parameters a register affinity matching their type. + let rc = isa.regclass_for_abi_type(func.dfg.value_type(value)); + affinity = Affinity::Reg(rc.into()); + } + } + }; + lrset.insert(LiveRange::new(value, def, affinity)); + } + lrset.get_mut(value).unwrap() +} + +/// Extend the live range for `value` so it reaches `to` which must live in `block`. +fn extend_to_use( + lr: &mut LiveRange, + block: Block, + to: Inst, + worklist: &mut Vec<Block>, + func: &Function, + cfg: &ControlFlowGraph, +) { + // This is our scratch working space, and we'll leave it empty when we return. + debug_assert!(worklist.is_empty()); + + // Extend the range locally in `block`. + // If there already was a live interval in that block, we're done. + if lr.extend_in_block(block, to, &func.layout) { + worklist.push(block); + } + + // The work list contains those blocks where we have learned that the value needs to be + // live-in. + // + // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the + // CFG from the existing live range to `block`. + // + // Extend the live range as we go. The live range itself also serves as a visited set since + // `extend_in_block` will never return true twice for the same block. + // + while let Some(livein) = worklist.pop() { + // We've learned that the value needs to be live-in to the `livein` block. + // Make sure it is also live at all predecessor branches to `livein`. + for BlockPredecessor { + block: pred, + inst: branch, + } in cfg.pred_iter(livein) + { + if lr.extend_in_block(pred, branch, &func.layout) { + // This predecessor block also became live-in. We need to process it later. + worklist.push(pred); + } + } + } +} + +/// Liveness analysis for a function. +/// +/// Compute a live range for every SSA value used in the function. +pub struct Liveness { + /// The live ranges that have been computed so far. + ranges: LiveRangeSet, + + /// Working space for the `extend_to_use` algorithm. + /// This vector is always empty, except for inside that function. + /// It lives here to avoid repeated allocation of scratch memory. + worklist: Vec<Block>, +} + +impl Liveness { + /// Create a new empty liveness analysis. + /// + /// The memory allocated for this analysis can be reused for multiple functions. Use the + /// `compute` method to actually runs the analysis for a function. + pub fn new() -> Self { + Self { + ranges: LiveRangeSet::new(), + worklist: Vec::new(), + } + } + + /// Current live ranges. + pub fn ranges(&self) -> &LiveRangeSet { + &self.ranges + } + + /// Clear all data structures in this liveness analysis. + pub fn clear(&mut self) { + self.ranges.clear(); + self.worklist.clear(); + } + + /// Get the live range for `value`, if it exists. + pub fn get(&self, value: Value) -> Option<&LiveRange> { + self.ranges.get(value) + } + + /// Create a new live range for `value`. + /// + /// The new live range will be defined at `def` with no extent, like a dead value. + /// + /// This asserts that `value` does not have an existing live range. + pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity) + where + PP: Into<ProgramPoint>, + { + let old = self + .ranges + .insert(LiveRange::new(value, def.into(), affinity)); + debug_assert!(old.is_none(), "{} already has a live range", value); + } + + /// Move the definition of `value` to `def`. + /// + /// The old and new def points must be in the same block, and before the end of the live range. + pub fn move_def_locally<PP>(&mut self, value: Value, def: PP) + where + PP: Into<ProgramPoint>, + { + let lr = self.ranges.get_mut(value).expect("Value has no live range"); + lr.move_def_locally(def.into()); + } + + /// Locally extend the live range for `value` to reach `user`. + /// + /// It is assumed the `value` is already live before `user` in `block`. + /// + /// Returns a mutable reference to the value's affinity in case that also needs to be updated. + pub fn extend_locally( + &mut self, + value: Value, + block: Block, + user: Inst, + layout: &Layout, + ) -> &mut Affinity { + debug_assert_eq!(Some(block), layout.inst_block(user)); + let lr = self.ranges.get_mut(value).expect("Value has no live range"); + let livein = lr.extend_in_block(block, user, layout); + debug_assert!(!livein, "{} should already be live in {}", value, block); + &mut lr.affinity + } + + /// Change the affinity of `value` to `Stack` and return the previous affinity. + pub fn spill(&mut self, value: Value) -> Affinity { + let lr = self.ranges.get_mut(value).expect("Value has no live range"); + mem::replace(&mut lr.affinity, Affinity::Stack) + } + + /// Compute the live ranges of all SSA values used in `func`. + /// This clears out any existing analysis stored in this data structure. + pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { + let _tt = timing::ra_liveness(); + self.ranges.clear(); + + // Get ISA data structures used for computing live range affinities. + let encinfo = isa.encoding_info(); + let reginfo = isa.register_info(); + + // The liveness computation needs to visit all uses, but the order doesn't matter. + // TODO: Perhaps this traversal of the function could be combined with a dead code + // elimination pass if we visit a post-order of the dominator tree? + for block in func.layout.blocks() { + // Make sure we have created live ranges for dead block parameters. + // TODO: If these parameters are really dead, we could remove them, except for the + // entry block which must match the function signature. + for &arg in func.dfg.block_params(block) { + get_or_create(&mut self.ranges, arg, isa, func, &encinfo); + } + + for inst in func.layout.block_insts(block) { + // Eliminate all value aliases, they would confuse the register allocator. + func.dfg.resolve_aliases_in_arguments(inst); + + // Make sure we have created live ranges for dead defs. + // TODO: When we implement DCE, we can use the absence of a live range to indicate + // an unused value. + for &def in func.dfg.inst_results(inst) { + get_or_create(&mut self.ranges, def, isa, func, &encinfo); + } + + // Iterator of constraints, one per value operand. + let encoding = func.encodings[inst]; + let operand_constraint_slice: &[OperandConstraint] = + encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins); + let mut operand_constraints = operand_constraint_slice.iter(); + + for &arg in func.dfg.inst_args(inst) { + // Get the live range, create it as a dead range if necessary. + let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo); + + // Extend the live range to reach this use. + extend_to_use(lr, block, inst, &mut self.worklist, func, cfg); + + // Apply operand constraint, ignoring any variable arguments after the fixed + // operands described by `operand_constraints`. Variable arguments are either + // block arguments or call/return ABI arguments. + if let Some(constraint) = operand_constraints.next() { + lr.affinity.merge(constraint, ®info); + } + } + } + } + } +} + +impl Index<Value> for Liveness { + type Output = LiveRange; + fn index(&self, index: Value) -> &LiveRange { + self.ranges + .get(index) + .unwrap_or_else(|| panic!("{} has no live range", index)) + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/liverange.rs b/third_party/rust/cranelift-codegen/src/regalloc/liverange.rs new file mode 100644 index 0000000000..e9b3db5bb0 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/liverange.rs @@ -0,0 +1,720 @@ +//! Data structure representing the live range of an SSA value. +//! +//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of +//! an SSA value begins where it is defined and extends to all program points where the value is +//! still needed. +//! +//! # Local Live Ranges +//! +//! Inside a single basic block, the live range of a value is always an interval between +//! two program points (if the value is live in the block at all). The starting point is either: +//! +//! 1. The instruction that defines the value, or +//! 2. The block header, because the value is an argument to the block, or +//! 3. The block header, because the value is defined in another block and live-in to this one. +//! +//! The ending point of the local live range is the last of the following program points in the +//! block: +//! +//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument. +//! 2. The last branch or jump instruction in the block that can reach a use. +//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it. +//! +//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined +//! outside a loop and used inside the loop, it will be live in the entire loop. +//! +//! # Global Live Ranges +//! +//! Values that appear in more than one block have a *global live range* which can be seen as the +//! disjoint union of the per-block local intervals for all of the blocks where the value is live. +//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live +//! range becomes a linear sequence of disjoint intervals, at most one per block. +//! +//! In the special case of a dead value, the global live range is a single interval where the start +//! and end points are the same. The global live range of a value is never completely empty. +//! +//! # Register interference +//! +//! The register allocator uses live ranges to determine if values *interfere*, which means that +//! they can't be stored in the same register. Two live ranges interfere if and only if any of +//! their intervals overlap. +//! +//! If one live range ends at an instruction that defines another live range, those two live ranges +//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input +//! register for an output value. If Cranelift gets support for inline assembly, we will need to +//! handle *early clobbers* which are output registers that are not allowed to alias any input +//! registers. +//! +//! If `i1 < i2 < i3` are program points, we have: +//! +//! - `i1-i2` and `i1-i3` interfere because the intervals overlap. +//! - `i1-i2` and `i2-i3` don't interfere. +//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register. +//! - `i1-i2` and `i2-i2` don't interfere. +//! - `i2-i3` and `i2-i2` do interfere. +//! +//! Because of this behavior around interval end points, live range interference is not completely +//! equivalent to mathematical intersection of open or half-open intervals. +//! +//! # Implementation notes +//! +//! A few notes about the implementation of the live intervals field `liveins`. This should not +//! concern someone only looking to use the public interface. +//! +//! ## Current representation +//! +//! Our current implementation uses a sorted array of compressed intervals, represented by their +//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of +//! intervals easily, and shows some nice performance behavior. See +//! https://github.com/bytecodealliance/cranelift/issues/1084 for benchmarks against using a +//! bforest::Map<Block, Inst>. +//! +//! ## block ordering +//! +//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to +//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't +//! depend on any property of the program order, so alternative orderings are possible: +//! +//! 1. The block layout order. This is what we currently use. +//! 2. A topological order of the dominator tree. All the live-in intervals would come after the +//! def interval. +//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the +//! `ProgramOrder` for comparisons. +//! +//! These orderings will cause small differences in coalescing opportunities, but all of them would +//! do a decent job of compressing a long live range. The numerical order might be preferable +//! because: +//! +//! - It has better performance because block numbers can be compared directly without any table +//! lookups. +//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious +//! live-in intervals from any coalesced representations that happen to cross a new block. +//! +//! For comparing instructions, the layout order is always what we want. +//! +//! ## Alternative representation +//! +//! Since a local live-in interval always begins at its block header, it is uniquely described by its +//! end point instruction alone. We can use the layout to look up the block containing the end point. +//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals. +//! +//! Coalescing is an important compression technique because some live ranges can span thousands of +//! blocks. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where +//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding +//! `Block` entry represents a single live-in interval. +//! +//! This representation is more compact for a live range with many uncoalesced live-in intervals. +//! It is more complicated to work with, though, so it is probably not worth it. The performance +//! benefits of switching to a numerical block order only appears if the binary search is doing +//! block-block comparisons. +//! +//! A `BTreeMap<Block, Inst>` could have been used for the live-in intervals, but it doesn't provide +//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes. +//! +//! Even the specialized `bforest::Map<Block, Inst>` implementation is slower than a plain sorted +//! array, see https://github.com/bytecodealliance/cranelift/issues/1084 for details. + +use crate::entity::SparseMapValue; +use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value}; +use crate::regalloc::affinity::Affinity; +use core::cmp::Ordering; +use core::marker::PhantomData; +use smallvec::SmallVec; + +/// Global live range of a single SSA value. +/// +/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an +/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at +/// most one interval per block. We further distinguish between: +/// +/// 1. The *def interval* is the local interval in the block where the value is defined, and +/// 2. The *live-in intervals* are the local intervals in the remaining blocks. +/// +/// A live-in interval always begins at the block header, while the def interval can begin at the +/// defining instruction, or at the block header for a block argument value. +/// +/// All values have a def interval, but a large proportion of values don't have any live-in +/// intervals. These are called *local live ranges*. +/// +/// # Program order requirements +/// +/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for +/// ordering instructions inside a block *and* for ordering blocks. The methods that depend on the +/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to +/// ensure that the provided ordering is consistent between calls. +/// +/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges. +/// +/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the +/// instructions using or defining their value, `LiveRange` structs can contain references to +/// branch and jump instructions. +pub type LiveRange = GenericLiveRange<Layout>; + +// See comment of liveins below. +pub struct Interval { + begin: Block, + end: Inst, +} + +/// Generic live range implementation. +/// +/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order. +/// Use `LiveRange` instead of using this generic directly. +pub struct GenericLiveRange<PO: ProgramOrder> { + /// The value described by this live range. + /// This member can't be modified in case the live range is stored in a `SparseMap`. + value: Value, + + /// The preferred register allocation for this value. + pub affinity: Affinity, + + /// The instruction or block header where this value is defined. + def_begin: ProgramPoint, + + /// The end point of the def interval. This must always belong to the same block as `def_begin`. + /// + /// We always have `def_begin <= def_end` with equality implying a dead def live range with no + /// uses. + def_end: ProgramPoint, + + /// Additional live-in intervals sorted in program order. + /// + /// This vector is empty for most values which are only used in one block. + /// + /// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to + /// `inst` which may belong to a later block in the program order. + /// + /// The entries are non-overlapping, and none of them overlap the block where the value is + /// defined. + liveins: SmallVec<[Interval; 2]>, + + po: PhantomData<*const PO>, +} + +/// A simple helper macro to make comparisons more natural to read. +macro_rules! cmp { + ($order:ident, $a:ident > $b:expr) => { + $order.cmp($a, $b) == Ordering::Greater + }; + ($order:ident, $a:ident >= $b:expr) => { + $order.cmp($a, $b) != Ordering::Less + }; + ($order:ident, $a:ident < $b:expr) => { + $order.cmp($a, $b) == Ordering::Less + }; + ($order:ident, $a:ident <= $b:expr) => { + $order.cmp($a, $b) != Ordering::Greater + }; +} + +impl<PO: ProgramOrder> GenericLiveRange<PO> { + /// Create a new live range for `value` defined at `def`. + /// + /// The live range will be created as dead, but it can be extended with `extend_in_block()`. + pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self { + Self { + value, + affinity, + def_begin: def, + def_end: def, + liveins: SmallVec::new(), + po: PhantomData, + } + } + + /// Finds an entry in the compressed set of live-in intervals that contains `block`, or return + /// the position where to insert such a new entry. + fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result<usize, usize> { + self.liveins + .binary_search_by(|interval| order.cmp(interval.begin, block)) + .or_else(|n| { + // The previous interval's end might cover the searched block. + if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) { + Ok(n - 1) + } else { + Err(n) + } + }) + } + + /// Extend the local interval for `block` so it reaches `to` which must belong to `block`. + /// Create a live-in interval if necessary. + /// + /// If the live range already has a local interval in `block`, extend its end point so it + /// includes `to`, and return false. + /// + /// If the live range did not previously have a local interval in `block`, add one so the value + /// is live-in to `block`, extending to `to`. Return true. + /// + /// The return value can be used to detect if we just learned that the value is live-in to + /// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks. + pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool { + // First check if we're extending the def interval. + // + // We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't + // check it without a method for getting `inst`'s block. + if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) { + let inst_pp = inst.into(); + debug_assert_ne!( + inst_pp, self.def_begin, + "Can't use value in the defining instruction." + ); + if cmp!(order, inst > self.def_end) { + self.def_end = inst_pp; + } + return false; + } + + // Now check if we're extending any of the existing live-in intervals. + match self.lookup_entry_containing_block(block, order) { + Ok(n) => { + // We found one interval and might need to extend it. + if cmp!(order, inst <= self.liveins[n].end) { + // Both interval parts are already included in a compressed interval. + return false; + } + + // If the instruction at the end is the last instruction before the next block, + // coalesce the two intervals: + // [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end] + if let Some(next) = &self.liveins.get(n + 1) { + if order.is_block_gap(inst, next.begin) { + // At this point we can choose to remove the current interval or the next + // one; remove the next one to avoid one memory move. + let next_end = next.end; + debug_assert!(cmp!(order, next_end > self.liveins[n].end)); + self.liveins[n].end = next_end; + self.liveins.remove(n + 1); + return false; + } + } + + // We can't coalesce, just extend the interval. + self.liveins[n].end = inst; + false + } + + Err(n) => { + // No interval was found containing the current block: we need to insert a new one, + // unless there's a coalescing opportunity with the previous or next one. + let coalesce_next = self + .liveins + .get(n) + .filter(|next| order.is_block_gap(inst, next.begin)) + .is_some(); + let coalesce_prev = self + .liveins + .get(n.wrapping_sub(1)) + .filter(|prev| order.is_block_gap(prev.end, block)) + .is_some(); + + match (coalesce_prev, coalesce_next) { + // The new interval is the missing hole between prev and next: we can merge + // them all together. + (true, true) => { + let prev_end = self.liveins[n - 1].end; + debug_assert!(cmp!(order, prev_end <= self.liveins[n].end)); + self.liveins[n - 1].end = self.liveins[n].end; + self.liveins.remove(n); + } + + // Coalesce only with the previous or next one. + (true, false) => { + debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end)); + self.liveins[n - 1].end = inst; + } + (false, true) => { + debug_assert!(cmp!(order, block <= self.liveins[n].begin)); + self.liveins[n].begin = block; + } + + (false, false) => { + // No coalescing opportunity, we have to insert. + self.liveins.insert( + n, + Interval { + begin: block, + end: inst, + }, + ); + } + } + + true + } + } + } + + /// Is this the live range of a dead value? + /// + /// A dead value has no uses, and its live range ends at the same program point where it is + /// defined. + pub fn is_dead(&self) -> bool { + self.def_begin == self.def_end + } + + /// Is this a local live range? + /// + /// A local live range is only used in the same block where it was defined. It is allowed to span + /// multiple basic blocks within that block. + pub fn is_local(&self) -> bool { + self.liveins.is_empty() + } + + /// Get the program point where this live range is defined. + /// + /// This will be a block header when the value is a block argument, otherwise it is the defining + /// instruction. + pub fn def(&self) -> ProgramPoint { + self.def_begin + } + + /// Move the definition of this value to a new program point. + /// + /// It is only valid to move the definition within the same block, and it can't be moved beyond + /// `def_local_end()`. + pub fn move_def_locally(&mut self, def: ProgramPoint) { + self.def_begin = def; + } + + /// Get the local end-point of this live range in the block where it is defined. + /// + /// This can be the block header itself in the case of a dead block argument. + /// Otherwise, it will be the last local use or branch/jump that can reach a use. + pub fn def_local_end(&self) -> ProgramPoint { + self.def_end + } + + /// Get the local end-point of this live range in a block where it is live-in. + /// + /// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point + /// of this live range's local interval in `block`. + /// + /// If the live range is live through all of `block`, the terminator of `block` is a correct + /// answer, but it is also possible that an even later program point is returned. So don't + /// depend on the returned `Inst` to belong to `block`. + pub fn livein_local_end(&self, block: Block, order: &PO) -> Option<Inst> { + self.lookup_entry_containing_block(block, order) + .and_then(|i| { + let inst = self.liveins[i].end; + if cmp!(order, block < inst) { + Ok(inst) + } else { + // Can be any error type, really, since it's discarded by ok(). + Err(i) + } + }) + .ok() + } + + /// Is this value live-in to `block`? + /// + /// A block argument is not considered to be live in. + pub fn is_livein(&self, block: Block, order: &PO) -> bool { + self.livein_local_end(block, order).is_some() + } + + /// Get all the live-in intervals. + /// + /// Note that the intervals are stored in a compressed form so each entry may span multiple + /// blocks where the value is live in. + pub fn liveins<'a>(&'a self) -> impl Iterator<Item = (Block, Inst)> + 'a { + self.liveins + .iter() + .map(|interval| (interval.begin, interval.end)) + } + + /// Check if this live range overlaps a definition in `block`. + pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool { + // Two defs at the same program point always overlap, even if one is dead. + if def == self.def_begin.into() { + return true; + } + + // Check for an overlap with the local range. + if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) { + return true; + } + + // Check for an overlap with a live-in range. + match self.livein_local_end(block, order) { + Some(inst) => cmp!(order, def < inst), + None => false, + } + } + + /// Check if this live range reaches a use at `user` in `block`. + pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool { + // Check for an overlap with the local range. + if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) { + return true; + } + + // Check for an overlap with a live-in range. + match self.livein_local_end(block, order) { + Some(inst) => cmp!(order, user <= inst), + None => false, + } + } + + /// Check if this live range is killed at `user` in `block`. + pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool { + self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user) + } +} + +/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values. +impl<PO: ProgramOrder> SparseMapValue<Value> for GenericLiveRange<PO> { + fn key(&self) -> Value { + self.value + } +} + +#[cfg(test)] +mod tests { + use super::{GenericLiveRange, Interval}; + use crate::entity::EntityRef; + use crate::ir::{Block, Inst, Value}; + use crate::ir::{ExpandedProgramPoint, ProgramOrder}; + use alloc::vec::Vec; + use core::cmp::Ordering; + + // Dummy program order which simply compares indexes. + // It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes + // in between. `is_block_gap` assumes that terminator instructions have indexes of the form + // block * 10 + 1. This is used in the coalesce test. + struct ProgOrder {} + + impl ProgramOrder for ProgOrder { + fn cmp<A, B>(&self, a: A, b: B) -> Ordering + where + A: Into<ExpandedProgramPoint>, + B: Into<ExpandedProgramPoint>, + { + fn idx(pp: ExpandedProgramPoint) -> usize { + match pp { + ExpandedProgramPoint::Inst(i) => i.index(), + ExpandedProgramPoint::Block(e) => e.index(), + } + } + + let ia = idx(a.into()); + let ib = idx(b.into()); + ia.cmp(&ib) + } + + fn is_block_gap(&self, inst: Inst, block: Block) -> bool { + inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1 + } + } + + impl ProgOrder { + // Get the block corresponding to `inst`. + fn inst_block(&self, inst: Inst) -> Block { + let i = inst.index(); + Block::new(i - i % 10) + } + + // Get the block of a program point. + fn pp_block<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Block { + match pp.into() { + ExpandedProgramPoint::Inst(i) => self.inst_block(i), + ExpandedProgramPoint::Block(e) => e, + } + } + + // Validate the live range invariants. + fn validate(&self, lr: &GenericLiveRange<Self>) { + // The def interval must cover a single block. + let def_block = self.pp_block(lr.def_begin); + assert_eq!(def_block, self.pp_block(lr.def_end)); + + // Check that the def interval isn't backwards. + match self.cmp(lr.def_begin, lr.def_end) { + Ordering::Equal => assert!(lr.liveins.is_empty()), + Ordering::Greater => { + panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end) + } + Ordering::Less => {} + } + + // Check the live-in intervals. + let mut prev_end = None; + for Interval { begin, end } in lr.liveins.iter() { + let begin = *begin; + let end = *end; + + assert_eq!(self.cmp(begin, end), Ordering::Less); + if let Some(e) = prev_end { + assert_eq!(self.cmp(e, begin), Ordering::Less); + } + + assert!( + self.cmp(lr.def_end, begin) == Ordering::Less + || self.cmp(lr.def_begin, end) == Ordering::Greater, + "Interval can't overlap the def block" + ); + + // Save for next round. + prev_end = Some(end); + } + } + } + + // Singleton `ProgramOrder` for tests below. + const PO: &'static ProgOrder = &ProgOrder {}; + + #[test] + fn dead_def_range() { + let v0 = Value::new(0); + let e0 = Block::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let e2 = Block::new(2); + let lr = GenericLiveRange::new(v0, i1.into(), Default::default()); + assert!(lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), i1.into()); + assert_eq!(lr.def_local_end(), i1.into()); + assert_eq!(lr.livein_local_end(e2, PO), None); + PO.validate(&lr); + + // A dead live range overlaps its own def program point. + assert!(lr.overlaps_def(i1.into(), e0, PO)); + assert!(!lr.overlaps_def(i2.into(), e0, PO)); + assert!(!lr.overlaps_def(e0.into(), e0, PO)); + } + + #[test] + fn dead_arg_range() { + let v0 = Value::new(0); + let e2 = Block::new(2); + let lr = GenericLiveRange::new(v0, e2.into(), Default::default()); + assert!(lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), e2.into()); + assert_eq!(lr.def_local_end(), e2.into()); + // The def interval of a block argument does not count as live-in. + assert_eq!(lr.livein_local_end(e2, PO), None); + PO.validate(&lr); + } + + #[test] + fn local_def() { + let v0 = Value::new(0); + let e10 = Block::new(10); + let i11 = Inst::new(11); + let i12 = Inst::new(12); + let i13 = Inst::new(13); + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e10, i13, PO), false); + PO.validate(&lr); + assert!(!lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), i11.into()); + assert_eq!(lr.def_local_end(), i13.into()); + + // Extending to an already covered inst should not change anything. + assert_eq!(lr.extend_in_block(e10, i12, PO), false); + PO.validate(&lr); + assert_eq!(lr.def(), i11.into()); + assert_eq!(lr.def_local_end(), i13.into()); + } + + #[test] + fn local_arg() { + let v0 = Value::new(0); + let e10 = Block::new(10); + let i11 = Inst::new(11); + let i12 = Inst::new(12); + let i13 = Inst::new(13); + let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default()); + + // Extending a dead block argument in its own block should not indicate that a live-in + // interval was created. + assert_eq!(lr.extend_in_block(e10, i12, PO), false); + PO.validate(&lr); + assert!(!lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), e10.into()); + assert_eq!(lr.def_local_end(), i12.into()); + + // Extending to an already covered inst should not change anything. + assert_eq!(lr.extend_in_block(e10, i11, PO), false); + PO.validate(&lr); + assert_eq!(lr.def(), e10.into()); + assert_eq!(lr.def_local_end(), i12.into()); + + // Extending further. + assert_eq!(lr.extend_in_block(e10, i13, PO), false); + PO.validate(&lr); + assert_eq!(lr.def(), e10.into()); + assert_eq!(lr.def_local_end(), i13.into()); + } + + #[test] + fn global_def() { + let v0 = Value::new(0); + let e10 = Block::new(10); + let i11 = Inst::new(11); + let i12 = Inst::new(12); + let e20 = Block::new(20); + let i21 = Inst::new(21); + let i22 = Inst::new(22); + let i23 = Inst::new(23); + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e10, i12, PO), false); + + // Adding a live-in interval. + assert_eq!(lr.extend_in_block(e20, i22, PO), true); + PO.validate(&lr); + assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); + + // Non-extending the live-in. + assert_eq!(lr.extend_in_block(e20, i21, PO), false); + assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); + + // Extending the existing live-in. + assert_eq!(lr.extend_in_block(e20, i23, PO), false); + PO.validate(&lr); + assert_eq!(lr.livein_local_end(e20, PO), Some(i23)); + } + + #[test] + fn coalesce() { + let v0 = Value::new(0); + let i11 = Inst::new(11); + let e20 = Block::new(20); + let i21 = Inst::new(21); + let e30 = Block::new(30); + let i31 = Inst::new(31); + let e40 = Block::new(40); + let i41 = Inst::new(41); + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e30, i31, PO,), true); + assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i31)]); + + // Coalesce to previous + assert_eq!(lr.extend_in_block(e40, i41, PO,), true); + assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i41)]); + + // Coalesce to next + assert_eq!(lr.extend_in_block(e20, i21, PO,), true); + assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]); + + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e40, i41, PO,), true); + assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e40, i41)]); + + assert_eq!(lr.extend_in_block(e20, i21, PO,), true); + assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i21), (e40, i41)]); + + // Coalesce to previous and next + assert_eq!(lr.extend_in_block(e30, i31, PO,), true); + assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/mod.rs b/third_party/rust/cranelift-codegen/src/regalloc/mod.rs new file mode 100644 index 0000000000..581acc408e --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/mod.rs @@ -0,0 +1,26 @@ +//! Register allocation. +//! +//! This module contains data structures and algorithms used for register allocation. + +pub mod coloring; +pub mod live_value_tracker; +pub mod liveness; +pub mod liverange; +pub mod register_set; +pub mod virtregs; + +mod affinity; +mod branch_splitting; +mod coalescing; +mod context; +mod diversion; +mod pressure; +mod reload; +mod safepoint; +mod solver; +mod spilling; + +pub use self::context::Context; +pub use self::diversion::{EntryRegDiversions, RegDiversions}; +pub use self::register_set::RegisterSet; +pub use self::safepoint::emit_stack_maps; diff --git a/third_party/rust/cranelift-codegen/src/regalloc/pressure.rs b/third_party/rust/cranelift-codegen/src/regalloc/pressure.rs new file mode 100644 index 0000000000..aa83037041 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/pressure.rs @@ -0,0 +1,371 @@ +//! Register pressure tracking. +//! +//! SSA-based register allocation depends on a spilling phase that "lowers register pressure +//! sufficiently". This module defines the data structures needed to measure register pressure +//! accurately enough to guarantee that the coloring phase will not run out of registers. +//! +//! Ideally, measuring register pressure amounts to simply counting the number of live registers at +//! any given program point. This simplistic method has two problems: +//! +//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point +//! register banks, so we need to at least count the number of live registers in each register +//! bank separately. +//! +//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM +//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register. +//! This makes it difficult to accurately measure register pressure. +//! +//! This module deals with the problems via *register banks* and *top-level register classes*. +//! Register classes in different register banks are completely independent, so we can count +//! registers in one bank without worrying about the other bank at all. +//! +//! All register classes have a unique top-level register class, and we will count registers for +//! each top-level register class individually. However, a register bank can have multiple +//! top-level register classes that interfere with each other, so all top-level counts need to +//! be considered when determining how many more registers can be allocated. +//! +//! Currently, the only register bank with multiple top-level registers is the `arm32` +//! floating-point register bank which has `S`, `D`, and `Q` top-level classes. +//! +//! # Base and transient counts +//! +//! We maintain two separate register counts per top-level register class: base counts and +//! transient counts. The base counts are adjusted with the `take` and `free` functions. The +//! transient counts are adjusted with `take_transient` and `free_transient`. + +// Remove once we're using the pressure tracker. +#![allow(dead_code)] + +use crate::isa::registers::{RegClass, RegClassMask, RegInfo}; +use crate::regalloc::RegisterSet; +use core::cmp::min; +use core::fmt; +use core::iter::ExactSizeIterator; +use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS; + +/// Information per top-level register class. +/// +/// Everything but the counts is static information computed from the constructor arguments. +#[derive(Default)] +struct TopRC { + /// Number of registers currently used from this register class. + base_count: u32, + transient_count: u32, + + /// Max number of registers that can be allocated. + limit: u32, + + /// Register units per register. + width: u8, + + /// The first aliasing top-level RC. + first_toprc: u8, + + /// The number of aliasing top-level RCs. + num_toprcs: u8, +} + +impl TopRC { + fn total_count(&self) -> u32 { + self.base_count + self.transient_count + } +} + +pub struct Pressure { + /// Bit mask of top-level register classes that are aliased by other top-level register classes. + /// Unaliased register classes can use a simpler interference algorithm. + aliased: RegClassMask, + + /// Current register counts per top-level register class. + toprc: [TopRC; MAX_TRACKED_TOP_RCS], +} + +impl Pressure { + /// Create a new register pressure tracker. + pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self { + let mut p = Self { + aliased: 0, + toprc: Default::default(), + }; + + // Get the layout of aliasing top-level register classes from the register banks. + for bank in reginfo.banks { + let first = bank.first_toprc; + let num = bank.num_toprcs; + + if bank.pressure_tracking { + for rc in &mut p.toprc[first..first + num] { + rc.first_toprc = first as u8; + rc.num_toprcs = num as u8; + } + + // Flag the top-level register classes with aliases. + if num > 1 { + p.aliased |= ((1 << num) - 1) << first; + } + } else { + // This bank has no pressure tracking, so its top-level register classes may exceed + // `MAX_TRACKED_TOPRCS`. Fill in dummy entries. + for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] { + // These aren't used if we don't set the `aliased` bit. + rc.first_toprc = !0; + rc.limit = !0; + } + } + } + + // Compute per-class limits from `usable`. + for (toprc, rc) in p + .toprc + .iter_mut() + .take_while(|t| t.num_toprcs > 0) + .zip(reginfo.classes) + { + toprc.limit = usable.iter(rc).len() as u32; + toprc.width = rc.width; + } + + p + } + + /// Check for an available register in the register class `rc`. + /// + /// If it is possible to allocate one more register from `rc`'s top-level register class, + /// returns 0. + /// + /// If not, returns a bit-mask of top-level register classes that are interfering. Register + /// pressure should be eased in one of the returned top-level register classes before calling + /// `can_take()` to check again. + fn check_avail(&self, rc: RegClass) -> RegClassMask { + let entry = match self.toprc.get(rc.toprc as usize) { + None => return 0, // Not a pressure tracked bank. + Some(e) => e, + }; + let mask = 1 << rc.toprc; + if (self.aliased & mask) == 0 { + // This is a simple unaliased top-level register class. + if entry.total_count() < entry.limit { + 0 + } else { + mask + } + } else { + // This is the more complicated case. The top-level register class has aliases. + self.check_avail_aliased(entry) + } + } + + /// Check for an available register in a top-level register class that may have aliases. + /// + /// This is the out-of-line slow path for `check_avail()`. + fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask { + let first = usize::from(entry.first_toprc); + let num = usize::from(entry.num_toprcs); + let width = u32::from(entry.width); + let ulimit = entry.limit * width; + + // Count up the number of available register units. + let mut units = 0; + for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) { + let rcw = u32::from(rc.width); + // If `rc.width` is smaller than `width`, each register in `rc` could potentially block + // one of ours. This is assuming that none of the smaller registers are straddling the + // bigger ones. + // + // If `rc.width` is larger than `width`, we are also assuming that the registers are + // aligned and `rc.width` is a multiple of `width`. + let u = if rcw < width { + // We can't take more than the total number of register units in the class. + // This matters for arm32 S-registers which can only ever lock out 16 D-registers. + min(rc.total_count() * width, rc.limit * rcw) + } else { + rc.total_count() * rcw + }; + + // If this top-level RC on its own is responsible for exceeding our limit, return it + // early to guarantee that registers here are spilled before spilling other registers + // unnecessarily. + if u >= ulimit { + return 1 << rci; + } + + units += u; + } + + // We've counted up the worst-case number of register units claimed by all aliasing + // classes. Compare to the unit limit in this class. + if units < ulimit { + 0 + } else { + // Registers need to be spilled from any one of the aliasing classes. + ((1 << num) - 1) << first + } + } + + /// Take a register from `rc`. + /// + /// This does not check if there are enough registers available. + pub fn take(&mut self, rc: RegClass) { + if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { + t.base_count += 1; + } + } + + /// Free a register in `rc`. + pub fn free(&mut self, rc: RegClass) { + if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { + t.base_count -= 1; + } + } + + /// Reset all counts to 0, both base and transient. + pub fn reset(&mut self) { + for e in &mut self.toprc { + e.base_count = 0; + e.transient_count = 0; + } + } + + /// Try to increment a transient counter. + /// + /// This will fail if there are not enough registers available. + pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> { + let mask = self.check_avail(rc); + if mask == 0 { + if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { + t.transient_count += 1; + } + + Ok(()) + } else { + Err(mask) + } + } + + /// Reset all transient counts to 0. + pub fn reset_transient(&mut self) { + for e in &mut self.toprc { + e.transient_count = 0; + } + } + + /// Preserve the transient counts by transferring them to the base counts. + pub fn preserve_transient(&mut self) { + for e in &mut self.toprc { + e.base_count += e.transient_count; + e.transient_count = 0; + } + } +} + +impl fmt::Display for Pressure { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Pressure[")?; + for rc in &self.toprc { + if rc.limit > 0 && rc.limit < !0 { + write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?; + } + } + write!(f, " ]") + } +} + +#[cfg(test)] +#[cfg(feature = "arm32")] +mod tests { + use super::Pressure; + use crate::isa::registers::{RegBank, RegClassData}; + use crate::isa::{RegClass, RegInfo, RegUnit}; + use crate::regalloc::RegisterSet; + use core::borrow::Borrow; + + // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info + // about registers, so we directly access `INFO` from registers-arm32.rs. + include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); + + // Get a register class by name. + fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { + reginfo + .classes + .iter() + .find(|rc| rc.name == name) + .expect("Can't find named register class.") + } + + #[test] + fn basic_counting() { + let reginfo = INFO.borrow(); + let gpr = rc_by_name(®info, "GPR"); + let s = rc_by_name(®info, "S"); + + let regs = RegisterSet::new(); + + let mut pressure = Pressure::new(®info, ®s); + let mut count = 0; + while pressure.check_avail(gpr) == 0 { + pressure.take(gpr); + count += 1; + } + assert_eq!(count, 16); + assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); + assert_eq!(pressure.check_avail(s), 0); + pressure.free(gpr); + assert_eq!(pressure.check_avail(gpr), 0); + pressure.take(gpr); + assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); + assert_eq!(pressure.check_avail(s), 0); + pressure.reset(); + assert_eq!(pressure.check_avail(gpr), 0); + assert_eq!(pressure.check_avail(s), 0); + } + + #[test] + fn arm_float_bank() { + let reginfo = INFO.borrow(); + let s = rc_by_name(®info, "S"); + let d = rc_by_name(®info, "D"); + let q = rc_by_name(®info, "Q"); + let regs = RegisterSet::new(); + + let mut pressure = Pressure::new(®info, ®s); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + // Allocating a single S-register should not affect availability. + pressure.take(s); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + pressure.take(d); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + pressure.take(q); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + // Take a total of 16 S-regs. + for _ in 1..16 { + pressure.take(s); + } + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs. + for _ in 0..6 { + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + pressure.take(q); + } + + // We've taken 16 S, 1 D, and 7 Qs. + assert!(pressure.check_avail(s) != 0); + assert_eq!(pressure.check_avail(d), 0); + assert!(pressure.check_avail(q) != 0); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/register_set.rs b/third_party/rust/cranelift-codegen/src/regalloc/register_set.rs new file mode 100644 index 0000000000..52b8a6fa0a --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/register_set.rs @@ -0,0 +1,391 @@ +//! Set of allocatable registers as a bit vector of register units. +//! +//! While allocating registers, we need to keep track of which registers are available and which +//! registers are in use. Since registers can alias in different ways, we track this via the +//! "register unit" abstraction. Every register contains one or more register units. Registers that +//! share a register unit can't be in use at the same time. + +use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask}; +use core::char; +use core::fmt; +use core::iter::ExactSizeIterator; +use core::mem::size_of_val; + +/// Set of registers available for allocation. +#[derive(Clone)] +pub struct RegisterSet { + avail: RegUnitMask, +} + +// Given a register class and a register unit in the class, compute a word index and a bit mask of +// register units representing that register. +// +// Note that a register is not allowed to straddle words. +fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) { + // Bit mask representing the register. It is `rc.width` consecutive units. + let width_bits = (1 << rc.width) - 1; + // Index into avail[] of the word containing `reg`. + let word_index = (reg / 32) as usize; + // The actual bits in the word that cover `reg`. + let reg_bits = width_bits << (reg % 32); + + (word_index, reg_bits) +} + +impl RegisterSet { + /// Create a new register set with all registers available. + /// + /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of + /// allocatable registers where reserved registers have been filtered out. + pub fn new() -> Self { + Self { avail: [!0; 3] } + } + + /// Create a new register set with no registers available. + pub fn empty() -> Self { + Self { avail: [0; 3] } + } + + /// Returns `true` if the specified register is available. + pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool { + let (idx, bits) = bitmask(rc, reg); + (self.avail[idx] & bits) == bits + } + + /// Allocate `reg` from `rc` so it is no longer available. + /// + /// It is an error to take a register that doesn't have all of its register units available. + pub fn take(&mut self, rc: RegClass, reg: RegUnit) { + let (idx, bits) = bitmask(rc, reg); + debug_assert!( + (self.avail[idx] & bits) == bits, + "{}:{} not available in {}", + rc, + rc.info.display_regunit(reg), + self.display(rc.info) + ); + self.avail[idx] &= !bits; + } + + /// Return `reg` and all of its register units to the set of available registers. + pub fn free(&mut self, rc: RegClass, reg: RegUnit) { + let (idx, bits) = bitmask(rc, reg); + debug_assert!( + (self.avail[idx] & bits) == 0, + "{}:{} is already free in {}", + rc, + rc.info.display_regunit(reg), + self.display(rc.info) + ); + self.avail[idx] |= bits; + } + + /// Return an iterator over all available registers belonging to the register class `rc`. + /// + /// This doesn't allocate anything from the set; use `take()` for that. + pub fn iter(&self, rc: RegClass) -> RegSetIter { + // Start by copying the RC mask. It is a single set bit for each register in the class. + let mut rsi = RegSetIter { regs: rc.mask }; + + // Mask out the unavailable units. + for idx in 0..self.avail.len() { + // If a single unit in a register is unavailable, the whole register can't be used. If + // a register straddles a word boundary, it will be marked as unavailable. There's an + // assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that. + for i in 0..rc.width { + rsi.regs[idx] &= self.avail[idx] >> i; + } + } + rsi + } + + /// Check if any register units allocated out of this set interferes with units allocated out + /// of `other`. + /// + /// This assumes that unused bits are 1. + pub fn interferes_with(&self, other: &Self) -> bool { + self.avail + .iter() + .zip(&other.avail) + .any(|(&x, &y)| (x | y) != !0) + } + + /// Intersect this set of registers with `other`. This has the effect of removing any register + /// units from this set that are not in `other`. + pub fn intersect(&mut self, other: &Self) { + for (x, &y) in self.avail.iter_mut().zip(&other.avail) { + *x &= y; + } + } + + /// Return an object that can display this register set, using the register info from the + /// target ISA. + pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> { + DisplayRegisterSet(self.clone(), regs.into()) + } +} + +/// Iterator over available registers in a register class. +#[derive(Clone)] +pub struct RegSetIter { + regs: RegUnitMask, +} + +impl Iterator for RegSetIter { + type Item = RegUnit; + + fn next(&mut self) -> Option<RegUnit> { + let mut unit_offset = 0; + + // Find the first set bit in `self.regs`. + for word in &mut self.regs { + if *word != 0 { + // Compute the register unit number from the lowest set bit in the word. + let unit = unit_offset + word.trailing_zeros() as RegUnit; + + // Clear that lowest bit so we won't find it again. + *word &= *word - 1; + + return Some(unit); + } + // How many register units was there in the word? This is a constant 32 for `u32` etc. + unit_offset += 8 * size_of_val(word) as RegUnit; + } + + // All of `self.regs` is 0. + None + } + + fn size_hint(&self) -> (usize, Option<usize>) { + let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum(); + (bits, Some(bits)) + } +} + +impl RegSetIter { + pub fn rnext(&mut self) -> Option<RegUnit> { + let num_words = self.regs.len(); + let bits_per_word = 8 * size_of_val(&self.regs[0]); + + // Find the last set bit in `self.regs`. + for i in 0..num_words { + let word_ix = num_words - 1 - i; + + let word = &mut self.regs[word_ix]; + if *word != 0 { + let lzeroes = word.leading_zeros() as usize; + + // Clear that highest bit so we won't find it again. + *word &= !(1 << (bits_per_word - 1 - lzeroes)); + + return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit); + } + } + + // All of `self.regs` is 0. + None + } +} + +impl ExactSizeIterator for RegSetIter {} + +/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA. +pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayRegisterSet<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[")?; + match self.1 { + None => { + for w in &self.0.avail { + write!(f, " #{:08x}", w)?; + } + } + Some(reginfo) => { + let toprcs = reginfo + .banks + .iter() + .map(|b| b.first_toprc + b.num_toprcs) + .max() + .expect("No register banks"); + for rc in ®info.classes[0..toprcs] { + if rc.width == 1 { + let bank = ®info.banks[rc.bank as usize]; + write!(f, " {}: ", rc)?; + for offset in 0..bank.units { + let reg = bank.first_unit + offset; + if !rc.contains(reg) { + continue; + } + if !self.0.is_avail(rc, reg) { + write!(f, "-")?; + continue; + } + // Display individual registers as either the second letter of their + // name or the last digit of their number. + // This works for x86 (rax, rbx, ...) and for numbered regs. + write!( + f, + "{}", + bank.names + .get(offset as usize) + .and_then(|name| name.chars().nth(1)) + .unwrap_or_else(|| char::from_digit( + u32::from(offset % 10), + 10 + ) + .unwrap()) + )?; + } + } + } + } + } + write!(f, " ]") + } +} + +impl fmt::Display for RegisterSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(None).fmt(f) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::isa::registers::{RegClass, RegClassData}; + use alloc::vec::Vec; + + // Register classes for testing. + const GPR: RegClass = &RegClassData { + name: "GPR", + index: 0, + width: 1, + bank: 0, + toprc: 0, + first: 28, + subclasses: 0, + mask: [0xf0000000, 0x0000000f, 0], + info: &INFO, + pinned_reg: None, + }; + + const DPR: RegClass = &RegClassData { + name: "DPR", + index: 0, + width: 2, + bank: 0, + toprc: 0, + first: 28, + subclasses: 0, + mask: [0x50000000, 0x0000000a, 0], + info: &INFO, + pinned_reg: None, + }; + + const INFO: RegInfo = RegInfo { + banks: &[], + classes: &[], + }; + + const RSI_1: RegSetIter = RegSetIter { + regs: [0x31415927, 0x27182818, 0x14141356], + }; + + const RSI_2: RegSetIter = RegSetIter { + regs: [0x00000000, 0x00000000, 0x00000000], + }; + + const RSI_3: RegSetIter = RegSetIter { + regs: [0xffffffff, 0xffffffff, 0xffffffff], + }; + + fn reverse_regset_iteration_work(rsi: &RegSetIter) { + // Check the reverse iterator by comparing its output with the forward iterator. + let rsi_f = (*rsi).clone(); + let results_f = rsi_f.collect::<Vec<_>>(); + + let mut rsi_r = (*rsi).clone(); + let mut results_r = Vec::<RegUnit>::new(); + while let Some(r) = rsi_r.rnext() { + results_r.push(r); + } + + let len_f = results_f.len(); + let len_r = results_r.len(); + assert_eq!(len_f, len_r); + + for i in 0..len_f { + assert_eq!(results_f[i], results_r[len_f - 1 - i]); + } + } + + #[test] + fn reverse_regset_iteration() { + reverse_regset_iteration_work(&RSI_1); + reverse_regset_iteration_work(&RSI_2); + reverse_regset_iteration_work(&RSI_3); + } + + #[test] + fn put_and_take() { + let mut regs = RegisterSet::new(); + + // `GPR` has units 28-36. + assert_eq!(regs.iter(GPR).len(), 8); + assert_eq!(regs.iter(GPR).count(), 8); + assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]); + + assert!(regs.is_avail(GPR, 29)); + regs.take(&GPR, 29); + assert!(!regs.is_avail(GPR, 29)); + + assert_eq!(regs.iter(GPR).count(), 7); + assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]); + + assert!(regs.is_avail(GPR, 30)); + regs.take(&GPR, 30); + assert!(!regs.is_avail(GPR, 30)); + + assert_eq!(regs.iter(GPR).count(), 6); + assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]); + + assert!(regs.is_avail(GPR, 32)); + regs.take(&GPR, 32); + assert!(!regs.is_avail(GPR, 32)); + + assert_eq!(regs.iter(GPR).count(), 5); + assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]); + + regs.free(&GPR, 30); + assert!(regs.is_avail(GPR, 30)); + assert!(!regs.is_avail(GPR, 29)); + assert!(!regs.is_avail(GPR, 32)); + + assert_eq!(regs.iter(GPR).count(), 6); + assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]); + + regs.free(&GPR, 32); + assert!(regs.is_avail(GPR, 31)); + assert!(!regs.is_avail(GPR, 29)); + assert!(regs.is_avail(GPR, 32)); + + assert_eq!(regs.iter(GPR).count(), 7); + assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]); + } + + #[test] + fn interference() { + let mut regs1 = RegisterSet::new(); + let mut regs2 = RegisterSet::new(); + + assert!(!regs1.interferes_with(®s2)); + regs1.take(&GPR, 32); + assert!(!regs1.interferes_with(®s2)); + regs2.take(&GPR, 31); + assert!(!regs1.interferes_with(®s2)); + regs1.intersect(®s2); + assert!(regs1.interferes_with(®s2)); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/reload.rs b/third_party/rust/cranelift-codegen/src/regalloc/reload.rs new file mode 100644 index 0000000000..cdafb68af8 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/reload.rs @@ -0,0 +1,485 @@ +//! Reload pass +//! +//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to +//! insert `spill` and `fill` instructions such that instruction operands expecting a register will +//! get a value with register affinity, and operands expecting a stack slot will get a value with +//! stack affinity. +//! +//! The secondary responsibility of the reload pass is to reuse values in registers as much as +//! possible to minimize the number of `fill` instructions needed. This must not cause the register +//! pressure limits to be exceeded. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::entity::{SparseMap, SparseMapValue}; +use crate::ir::{AbiParam, ArgumentLoc, InstBuilder}; +use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc}; +use crate::isa::RegClass; +use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; +use crate::regalloc::liveness::Liveness; +use crate::timing; +use crate::topo_order::TopoOrder; +use alloc::vec::Vec; +use log::debug; + +/// Reusable data structures for the reload pass. +pub struct Reload { + candidates: Vec<ReloadCandidate>, + reloads: SparseMap<Value, ReloadedValue>, +} + +/// Context data structure that gets instantiated once per pass. +struct Context<'a> { + cur: EncCursor<'a>, + + // Cached ISA information. + // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. + encinfo: EncInfo, + + // References to contextual data structures we need. + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + topo: &'a mut TopoOrder, + + candidates: &'a mut Vec<ReloadCandidate>, + reloads: &'a mut SparseMap<Value, ReloadedValue>, +} + +impl Reload { + /// Create a new blank reload pass. + pub fn new() -> Self { + Self { + candidates: Vec::new(), + reloads: SparseMap::new(), + } + } + + /// Clear all data structures in this reload pass. + pub fn clear(&mut self) { + self.candidates.clear(); + self.reloads.clear(); + } + + /// Run the reload algorithm over `func`. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + domtree: &DominatorTree, + liveness: &mut Liveness, + topo: &mut TopoOrder, + tracker: &mut LiveValueTracker, + ) { + let _tt = timing::ra_reload(); + debug!("Reload for:\n{}", func.display(isa)); + let mut ctx = Context { + cur: EncCursor::new(func, isa), + encinfo: isa.encoding_info(), + domtree, + liveness, + topo, + candidates: &mut self.candidates, + reloads: &mut self.reloads, + }; + ctx.run(tracker) + } +} + +/// A reload candidate. +/// +/// This represents a stack value that is used by the current instruction where a register is +/// needed. +struct ReloadCandidate { + argidx: usize, + value: Value, + regclass: RegClass, +} + +/// A Reloaded value. +/// +/// This represents a value that has been reloaded into a register value from the stack. +struct ReloadedValue { + stack: Value, + reg: Value, +} + +impl SparseMapValue<Value> for ReloadedValue { + fn key(&self) -> Value { + self.stack + } +} + +impl<'a> Context<'a> { + fn run(&mut self, tracker: &mut LiveValueTracker) { + self.topo.reset(self.cur.func.layout.blocks()); + while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { + self.visit_block(block, tracker); + } + } + + fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { + debug!("Reloading {}:", block); + self.visit_block_header(block, tracker); + tracker.drop_dead_params(); + + // visit_block_header() places us at the first interesting instruction in the block. + while let Some(inst) = self.cur.current_inst() { + if !self.cur.func.dfg[inst].opcode().is_ghost() { + // This instruction either has an encoding or has ABI constraints, so visit it to + // insert spills and fills as needed. + let encoding = self.cur.func.encodings[inst]; + self.visit_inst(block, inst, encoding, tracker); + tracker.drop_dead(inst); + } else { + // This is a ghost instruction with no encoding and no extra constraints, so we can + // just skip over it. + self.cur.next_inst(); + } + } + } + + /// Process the block parameters. Move to the next instruction in the block to be processed + fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { + let (liveins, args) = tracker.block_top( + block, + &self.cur.func.dfg, + self.liveness, + &self.cur.func.layout, + self.domtree, + ); + + if self.cur.func.layout.entry_block() == Some(block) { + debug_assert_eq!(liveins.len(), 0); + self.visit_entry_params(block, args); + } else { + self.visit_block_params(block, args); + } + } + + /// Visit the parameters on the entry block. + /// These values have ABI constraints from the function signature. + fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) { + debug_assert_eq!(self.cur.func.signature.params.len(), args.len()); + self.cur.goto_first_inst(block); + + for (arg_idx, arg) in args.iter().enumerate() { + let abi = self.cur.func.signature.params[arg_idx]; + match abi.location { + ArgumentLoc::Reg(_) => { + if arg.affinity.is_stack() { + // An incoming register parameter was spilled. Replace the parameter value + // with a temporary register value that is immediately spilled. + let reg = self + .cur + .func + .dfg + .replace_block_param(arg.value, abi.value_type); + let affinity = Affinity::abi(&abi, self.cur.isa); + self.liveness.create_dead(reg, block, affinity); + self.insert_spill(block, arg.value, reg); + } + } + ArgumentLoc::Stack(_) => { + debug_assert!(arg.affinity.is_stack()); + } + ArgumentLoc::Unassigned => panic!("Unexpected ABI location"), + } + } + } + + fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) { + self.cur.goto_first_inst(block); + } + + /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction + /// that needs processing. + fn visit_inst( + &mut self, + block: Block, + inst: Inst, + encoding: Encoding, + tracker: &mut LiveValueTracker, + ) { + self.cur.use_srcloc(inst); + + // Get the operand constraints for `inst` that we are trying to satisfy. + let constraints = self.encinfo.operand_constraints(encoding); + + // Identify reload candidates. + debug_assert!(self.candidates.is_empty()); + self.find_candidates(inst, constraints); + + // If we find a copy from a stack slot to the same stack slot, replace + // it with a `copy_nop` but otherwise ignore it. In particular, don't + // generate a reload immediately followed by a spill. The `copy_nop` + // has a zero-length encoding, so will disappear at emission time. + if let InstructionData::Unary { + opcode: Opcode::Copy, + arg, + } = self.cur.func.dfg[inst] + { + let dst_vals = self.cur.func.dfg.inst_results(inst); + if dst_vals.len() == 1 { + let dst_val = dst_vals[0]; + let can_transform = match ( + self.cur.func.locations[arg], + self.cur.func.locations[dst_val], + ) { + (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => { + src_slot == dst_slot && { + let src_ty = self.cur.func.dfg.value_type(arg); + let dst_ty = self.cur.func.dfg.value_type(dst_val); + debug_assert!(src_ty == dst_ty); + // This limits the transformation to copies of the + // types: I128 I64 I32 I16 I8 F64 and F32, since that's + // the set of `copy_nop` encodings available. + src_ty.is_int() || src_ty.is_float() + } + } + _ => false, + }; + if can_transform { + // Convert the instruction into a `copy_nop`. + self.cur.func.dfg.replace(inst).copy_nop(arg); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok, "copy_nop encoding missing for this type"); + + // And move on to the next insn. + self.reloads.clear(); + let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + self.cur.next_inst(); + self.candidates.clear(); + return; + } + } + } + + // Deal with all instructions not special-cased by the immediately + // preceding fragment. + if let InstructionData::Unary { + opcode: Opcode::Copy, + .. + } = self.cur.func.dfg[inst] + { + self.reload_copy_candidates(inst); + } else { + self.reload_inst_candidates(block, inst); + } + + // TODO: Reuse reloads for future instructions. + self.reloads.clear(); + + let (_throughs, _kills, defs) = + tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + + // Advance to the next instruction so we can insert any spills after the instruction. + self.cur.next_inst(); + + // Rewrite register defs that need to be spilled. + // + // Change: + // + // v2 = inst ... + // + // Into: + // + // v7 = inst ... + // v2 = spill v7 + // + // That way, we don't need to rewrite all future uses of v2. + if let Some(constraints) = constraints { + for (lv, op) in defs.iter().zip(constraints.outs) { + if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack { + if let InstructionData::Unary { + opcode: Opcode::Copy, + arg, + } = self.cur.func.dfg[inst] + { + self.cur.func.dfg.replace(inst).spill(arg); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok); + } else { + let value_type = self.cur.func.dfg.value_type(lv.value); + let reg = self.cur.func.dfg.replace_result(lv.value, value_type); + self.liveness.create_dead(reg, inst, Affinity::new(op)); + self.insert_spill(block, lv.value, reg); + } + } + } + } + + // Same thing for spilled call return values. + let retvals = &defs[self.cur.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_results()..]; + if !retvals.is_empty() { + let sig = self + .cur + .func + .dfg + .call_signature(inst) + .expect("Extra results on non-call instruction"); + for (i, lv) in retvals.iter().enumerate() { + let abi = self.cur.func.dfg.signatures[sig].returns[i]; + debug_assert!( + abi.location.is_reg(), + "expected reg; got {:?}", + abi.location + ); + if lv.affinity.is_stack() { + let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type); + self.liveness + .create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa)); + self.insert_spill(block, lv.value, reg); + } + } + } + } + + // Reload the current candidates for the given `inst`. + fn reload_inst_candidates(&mut self, block: Block, inst: Inst) { + // Insert fill instructions before `inst` and replace `cand.value` with the filled value. + for cand in self.candidates.iter_mut() { + if let Some(reload) = self.reloads.get(cand.value) { + cand.value = reload.reg; + continue; + } + + let reg = self.cur.ins().fill(cand.value); + let fill = self.cur.built_inst(); + + self.reloads.insert(ReloadedValue { + stack: cand.value, + reg, + }); + cand.value = reg; + + // Create a live range for the new reload. + let affinity = Affinity::Reg(cand.regclass.into()); + self.liveness.create_dead(reg, fill, affinity); + self.liveness + .extend_locally(reg, block, inst, &self.cur.func.layout); + } + + // Rewrite instruction arguments. + // + // Only rewrite those arguments that were identified as candidates. This leaves block + // arguments on branches as-is without rewriting them. A spilled block argument needs to stay + // spilled because the matching block parameter is going to be in the same virtual register + // and therefore the same stack slot as the block argument value. + if !self.candidates.is_empty() { + let args = self.cur.func.dfg.inst_args_mut(inst); + while let Some(cand) = self.candidates.pop() { + args[cand.argidx] = cand.value; + } + } + } + + // Reload the current candidates for the given copy `inst`. + // + // As an optimization, replace a copy instruction where the argument has been spilled with + // a fill instruction. + fn reload_copy_candidates(&mut self, inst: Inst) { + // Copy instructions can only have one argument. + debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1); + + if let Some(cand) = self.candidates.pop() { + self.cur.func.dfg.replace(inst).fill(cand.value); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok); + } + } + + // Find reload candidates for `inst` and add them to `self.candidates`. + // + // These are uses of spilled values where the operand constraint requires a register. + fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) { + let args = self.cur.func.dfg.inst_args(inst); + + if let Some(constraints) = constraints { + for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { + if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() { + self.candidates.push(ReloadCandidate { + argidx, + value: arg, + regclass: op.regclass, + }) + } + } + } + + // If we only have the fixed arguments, we're done now. + let offset = self.cur.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + if args.len() == offset { + return; + } + let var_args = &args[offset..]; + + // Handle ABI arguments. + if let Some(sig) = self.cur.func.dfg.call_signature(inst) { + handle_abi_args( + self.candidates, + &self.cur.func.dfg.signatures[sig].params, + var_args, + offset, + self.cur.isa, + self.liveness, + ); + } else if self.cur.func.dfg[inst].opcode().is_return() { + handle_abi_args( + self.candidates, + &self.cur.func.signature.returns, + var_args, + offset, + self.cur.isa, + self.liveness, + ); + } + } + + /// Insert a spill at `pos` and update data structures. + /// + /// - Insert `stack = spill reg` at `pos`, and assign an encoding. + /// - Move the `stack` live range starting point to the new instruction. + /// - Extend the `reg` live range to reach the new instruction. + fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) { + self.cur.ins().with_result(stack).spill(reg); + let inst = self.cur.built_inst(); + + // Update live ranges. + self.liveness.move_def_locally(stack, inst); + self.liveness + .extend_locally(reg, block, inst, &self.cur.func.layout); + } +} + +/// Find reload candidates in the instruction's ABI variable arguments. This handles both +/// return values and call arguments. +fn handle_abi_args( + candidates: &mut Vec<ReloadCandidate>, + abi_types: &[AbiParam], + var_args: &[Value], + offset: usize, + isa: &dyn TargetIsa, + liveness: &Liveness, +) { + debug_assert_eq!(abi_types.len(), var_args.len()); + for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) { + if abi.location.is_reg() { + let lv = liveness.get(arg).expect("Missing live range for ABI arg"); + if lv.affinity.is_stack() { + candidates.push(ReloadCandidate { + argidx, + value: arg, + regclass: isa.regclass_for_abi_type(abi.value_type), + }); + } + } + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs b/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs new file mode 100644 index 0000000000..2686c57277 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/safepoint.rs @@ -0,0 +1,65 @@ +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::inst_predicates::is_safepoint; +use crate::ir::{Function, InstBuilder}; +use crate::isa::TargetIsa; +use crate::regalloc::live_value_tracker::LiveValueTracker; +use crate::regalloc::liveness::Liveness; +use alloc::vec::Vec; + +fn insert_and_encode_safepoint<'f>( + pos: &mut FuncCursor<'f>, + tracker: &LiveValueTracker, + isa: &dyn TargetIsa, +) { + // Iterate through all live values, collect only the references. + let live_ref_values = tracker + .live() + .iter() + .filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref()) + .map(|live_val| live_val.value) + .collect::<Vec<_>>(); + + if !live_ref_values.is_empty() { + pos.ins().safepoint(&live_ref_values); + // Move cursor to the new safepoint instruction to encode it. + if let Some(inst) = pos.prev_inst() { + let ok = pos.func.update_encoding(inst, isa).is_ok(); + debug_assert!(ok); + } + // Restore cursor position. + pos.next_inst(); + } +} + +// The emit_stack_maps() function analyzes each instruction to retrieve the liveness of +// the defs and operands by traversing a function's blocks in layout order. +pub fn emit_stack_maps( + func: &mut Function, + domtree: &DominatorTree, + liveness: &Liveness, + tracker: &mut LiveValueTracker, + isa: &dyn TargetIsa, +) { + let mut curr = func.layout.entry_block(); + + while let Some(block) = curr { + tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree); + tracker.drop_dead_params(); + let mut pos = FuncCursor::new(func); + + // From the top of the block, step through the instructions. + pos.goto_top(block); + + while let Some(inst) = pos.next_inst() { + if is_safepoint(&pos.func, inst) { + insert_and_encode_safepoint(&mut pos, tracker, isa); + } + + // Process the instruction and get rid of dead values. + tracker.process_inst(inst, &pos.func.dfg, liveness); + tracker.drop_dead(inst); + } + curr = func.layout.next_block(block); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/solver.rs b/third_party/rust/cranelift-codegen/src/regalloc/solver.rs new file mode 100644 index 0000000000..07b3aba9b9 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/solver.rs @@ -0,0 +1,1383 @@ +//! Constraint solver for register coloring. +//! +//! The coloring phase of SSA-based register allocation is very simple in theory, but in practice +//! it is complicated by the various constraints imposed by individual instructions: +//! +//! - Call and return instructions have to satisfy ABI requirements for arguments and return +//! values. +//! - Values live across a call must be in a callee-saved register. +//! - Some instructions have operand constraints such as register sub-classes, fixed registers, or +//! tied operands. +//! +//! # The instruction register coloring problem +//! +//! The constraint solver addresses the problem of satisfying the constraints of a single +//! instruction. We have: +//! +//! - A set of values that are live in registers before the instruction, with current register +//! assignments. Some are used by the instruction, some are not. +//! - A subset of the live register values that are killed by the instruction. +//! - A set of new register values that are defined by the instruction. +//! +//! We are not concerned with stack values at all. The reload pass ensures that all values required +//! to be in a register by the instruction are already in a register. +//! +//! A solution to the register coloring problem consists of: +//! +//! - Register reassignment prescriptions for a subset of the live register values. +//! - Register assignments for the instruction's defined values. +//! +//! The solution ensures that when live registers are reassigned as prescribed before the +//! instruction, all its operand constraints are satisfied, and the definition assignments won't +//! conflict. +//! +//! # Register diversions and global interference +//! +//! We can divert register values temporarily to satisfy constraints, but we need to put the +//! values back into their originally assigned register locations before leaving the block. +//! Otherwise, values won't be in the right register at the entry point of other blocks. +//! +//! Some values are *local*, and we don't need to worry about putting those values back since they +//! are not used in any other blocks. +//! +//! When we assign register locations to defines, we are assigning both the register used locally +//! immediately after the instruction and the register used globally when the defined value is used +//! in a different block. We need to avoid interference both locally at the instruction and globally. +//! +//! We have multiple mappings of values to registers: +//! +//! 1. The initial local mapping before the instruction. This includes any diversions from previous +//! instructions in the block, but not diversions for the current instruction. +//! 2. The local mapping after applying the additional reassignments required to satisfy the +//! constraints of the current instruction. +//! 3. The local mapping after the instruction. This excludes values killed by the instruction and +//! includes values defined by the instruction. +//! 4. The global mapping after the instruction. This mapping only contains values with global live +//! ranges, and it does not include any diversions. +//! +//! All four mappings must be kept free of interference. +//! +//! # Problems handled by previous passes. +//! +//! The constraint solver can only reassign registers, it can't create spill code, so some +//! constraints are handled by earlier passes: +//! +//! - There will be enough free registers available for the defines. Ensuring this is the primary +//! purpose of the spilling phase. +//! - When the same value is used for multiple operands, the intersection of operand constraints is +//! non-empty. The spilling phase will insert copies to handle mutually incompatible constraints, +//! such as when the same value is bound to two different function arguments. +//! - Values bound to tied operands must be killed by the instruction. Also enforced by the +//! spiller. +//! - Values used by register operands are in registers, and values used by stack operands are in +//! stack slots. This is enforced by the reload pass. +//! +//! # Solver algorithm +//! +//! The goal of the solver is to satisfy the instruction constraints with a minimal number of +//! register assignments before the instruction. +//! +//! 1. Compute the set of values used by operands with a fixed register constraint that isn't +//! already satisfied. These are mandatory predetermined reassignments. +//! 2. Compute the set of values that don't satisfy their register class constraint. These are +//! mandatory reassignments that we need to solve. +//! 3. Add the set of defines to the set of variables computed in 2. Exclude defines tied to an +//! input operand since their value is pre-determined. +//! +//! The set of values computed in 2. and 3. are the *variables* for the solver. Given a set of +//! variables, we can also compute a set of allocatable registers by removing the variables from +//! the set of assigned registers before the instruction. +//! +//! 1. For each variable, compute its domain as the intersection of the allocatable registers and +//! its register class constraint. +//! 2. Sort the variables in order of increasing domain size. +//! 3. Search for a solution that assigns each variable a register from its domain without +//! interference between variables. +//! +//! If the search fails to find a solution, we may need to reassign more registers. Find an +//! appropriate candidate among the set of live register values, add it as a variable and start +//! over. + +use super::RegisterSet; +use crate::dbg::DisplayList; +use crate::entity::{SparseMap, SparseMapValue}; +use crate::ir::Value; +use crate::isa::{RegClass, RegUnit}; +use crate::regalloc::register_set::RegSetIter; +use alloc::vec::Vec; +use core::cmp; +use core::fmt; +use core::mem; +use core::u16; +use log::debug; + +/// A variable in the constraint problem. +/// +/// Variables represent register values that can be assigned to any register unit within the +/// constraint register class. This includes live register values that can be reassigned to a new +/// register and values defined by the instruction which must be assigned to a register. +/// +/// Besides satisfying the register class constraint, variables must also be mutually +/// non-interfering in up to three contexts: +/// +/// 1. Input side live registers, after applying all the reassignments. +/// 2. Output side live registers, considering all the local register diversions. +/// 3. Global live register, not considering any local diversions. +/// +pub struct Variable { + /// The value whose register assignment we're looking for. + pub value: Value, + + /// Original register unit holding this live value before the instruction, or `None` for a + /// value that is defined by the instruction. + from: Option<RegUnit>, + + /// Avoid interference on the input side. + is_input: bool, + + /// Avoid interference on the output side. + is_output: bool, + + /// Avoid interference with the global registers. + is_global: bool, + + /// Number of registers available in the domain of this variable. + domain: u16, + + /// The assigned register unit after a full solution was found. + pub solution: RegUnit, + + /// Any solution must belong to the constraint register class. + constraint: RegClass, +} + +impl Variable { + fn new_live(value: Value, constraint: RegClass, from: RegUnit, is_output: bool) -> Self { + Self { + value, + constraint, + from: Some(from), + is_input: true, + is_output, + is_global: false, + domain: 0, + solution: !0, + } + } + + fn new_def(value: Value, constraint: RegClass, is_global: bool) -> Self { + Self { + value, + constraint, + from: None, + is_input: false, + is_output: true, + is_global, + domain: 0, + solution: !0, + } + } + + /// Does this variable represent a value defined by the current instruction? + pub fn is_define(&self) -> bool { + self.from.is_none() + } + + /// Get an iterator over possible register choices, given the available registers on the input + /// and output sides as well as the available global register set. + fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter { + if !self.is_output { + debug_assert!(!self.is_global, "Global implies output"); + debug_assert!(self.is_input, "Missing interference set"); + return iregs.iter(self.constraint); + } + + let mut r = oregs.clone(); + if self.is_input { + r.intersect(iregs); + } + if self.is_global { + r.intersect(gregs); + } + r.iter(self.constraint) + } +} + +impl fmt::Display for Variable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}({}", self.value, self.constraint)?; + if let Some(reg) = self.from { + write!(f, ", from {}", self.constraint.info.display_regunit(reg))?; + } + if self.is_input { + write!(f, ", in")?; + } + if self.is_output { + write!(f, ", out")?; + } + if self.is_global { + write!(f, ", global")?; + } + if self.is_define() { + write!(f, ", def")?; + } + if self.domain > 0 { + write!(f, ", {}", self.domain)?; + } + write!(f, ")") + } +} + +#[derive(Clone, Debug)] +pub struct Assignment { + pub value: Value, + pub from: RegUnit, + pub to: RegUnit, + pub rc: RegClass, +} + +impl SparseMapValue<Value> for Assignment { + fn key(&self) -> Value { + self.value + } +} + +impl fmt::Display for Assignment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ri = self.rc.info; + write!( + f, + "{}:{}({} -> {})", + self.value, + self.rc, + ri.display_regunit(self.from), + ri.display_regunit(self.to) + ) + } +} + +/// A move operation between two registers or between a register and an emergency spill slot. +#[derive(Clone, PartialEq)] +pub enum Move { + Reg { + value: Value, + rc: RegClass, + from: RegUnit, + to: RegUnit, + }, + #[allow(dead_code)] // rustc doesn't see it isn't dead. + Spill { + value: Value, + rc: RegClass, + from: RegUnit, + to_slot: usize, + }, + Fill { + value: Value, + rc: RegClass, + from_slot: usize, + to: RegUnit, + }, +} + +impl Move { + /// Create a register move from an assignment, but not for identity assignments. + fn with_assignment(a: &Assignment) -> Option<Self> { + if a.from != a.to { + Some(Self::Reg { + value: a.value, + from: a.from, + to: a.to, + rc: a.rc, + }) + } else { + None + } + } + + /// Get the "from" register and register class, if possible. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::wrong_self_convention))] + fn from_reg(&self) -> Option<(RegClass, RegUnit)> { + match *self { + Self::Reg { rc, from, .. } | Self::Spill { rc, from, .. } => Some((rc, from)), + Self::Fill { .. } => None, + } + } + + /// Get the "to" register and register class, if possible. + fn to_reg(&self) -> Option<(RegClass, RegUnit)> { + match *self { + Self::Reg { rc, to, .. } | Self::Fill { rc, to, .. } => Some((rc, to)), + Self::Spill { .. } => None, + } + } + + /// Replace the "to" register with `new` and return the old value. + fn replace_to_reg(&mut self, new: RegUnit) -> RegUnit { + mem::replace( + match *self { + Self::Reg { ref mut to, .. } | Self::Fill { ref mut to, .. } => to, + Self::Spill { .. } => panic!("No to register in a spill {}", self), + }, + new, + ) + } + + /// Convert this `Reg` move to a spill to `slot` and return the old "to" register. + fn change_to_spill(&mut self, slot: usize) -> RegUnit { + match self.clone() { + Self::Reg { + value, + rc, + from, + to, + } => { + *self = Self::Spill { + value, + rc, + from, + to_slot: slot, + }; + to + } + _ => panic!("Expected reg move: {}", self), + } + } + + /// Get the value being moved. + fn value(&self) -> Value { + match *self { + Self::Reg { value, .. } | Self::Fill { value, .. } | Self::Spill { value, .. } => value, + } + } + + /// Get the associated register class. + fn rc(&self) -> RegClass { + match *self { + Self::Reg { rc, .. } | Self::Fill { rc, .. } | Self::Spill { rc, .. } => rc, + } + } +} + +impl fmt::Display for Move { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Reg { + value, + from, + to, + rc, + } => write!( + f, + "{}:{}({} -> {})", + value, + rc, + rc.info.display_regunit(from), + rc.info.display_regunit(to) + ), + Self::Spill { + value, + from, + to_slot, + rc, + } => write!( + f, + "{}:{}({} -> slot {})", + value, + rc, + rc.info.display_regunit(from), + to_slot + ), + Self::Fill { + value, + from_slot, + to, + rc, + } => write!( + f, + "{}:{}(slot {} -> {})", + value, + rc, + from_slot, + rc.info.display_regunit(to) + ), + } + } +} + +impl fmt::Debug for Move { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let as_display: &dyn fmt::Display = self; + as_display.fmt(f) + } +} + +/// Constraint solver for register allocation around a single instruction. +/// +/// Start by programming in the instruction constraints. +/// +/// 1. Initialize the solver by calling `reset()` with the set of allocatable registers before the +/// instruction. +/// 2. Program the input side constraints: Call `reassign_in()` for all fixed register constraints, +/// and `add_var()` for any input operands whose constraints are not already satisfied. +/// 3. Check for conflicts between fixed input assignments and existing live values by calling +/// `has_fixed_input_conflicts()`. Resolve any conflicts by calling `add_var()` with the +/// conflicting values. +/// 4. Prepare for adding output side constraints by calling `inputs_done()`. +/// 5. Add any killed register values that no longer cause interference on the output side by +/// calling `add_kill()`. +/// 6. Program the output side constraints: Call `add_fixed_output()` for all fixed register +/// constraints and `add_def()` for free defines. Resolve fixed output conflicts by calling +/// `add_through_var()`. +/// +pub struct Solver { + /// Register reassignments that are required or decided as part of a full solution. + /// This includes identity assignments for values that are already in the correct fixed + /// register. + assignments: SparseMap<Value, Assignment>, + + /// Variables are the values that should be reassigned as part of a solution. + /// Values with fixed register constraints are not considered variables. They are represented + /// in the `assignments` vector if necessary. + vars: Vec<Variable>, + + /// Are we finished adding input-side constraints? This changes the meaning of the `regs_in` + /// and `regs_out` register sets. + inputs_done: bool, + + /// Available registers on the input side of the instruction. + /// + /// While we're adding input constraints (`!inputs_done`): + /// + /// - Live values on the input side are marked as unavailable. + /// - The 'from' registers of fixed input reassignments are marked as available as they are + /// added. + /// - Input-side variables are marked as available. + /// + /// After finishing input constraints (`inputs_done`): + /// + /// - Live values on the input side are marked as unavailable. + /// - The 'to' registers of fixed input reassignments are marked as unavailable. + /// - Input-side variables are marked as available. + /// + regs_in: RegisterSet, + + /// Available registers on the output side of the instruction / fixed input scratch space. + /// + /// While we're adding input constraints (`!inputs_done`): + /// + /// - The 'to' registers of fixed input reassignments are marked as unavailable. + /// + /// After finishing input constraints (`inputs_done`): + /// + /// - Live-through values are marked as unavailable. + /// - Fixed output assignments are marked as unavailable. + /// - Live-through variables are marked as available. + /// + regs_out: RegisterSet, + + /// List of register moves scheduled to avoid conflicts. + /// + /// This is used as working space by the `schedule_moves()` function. + moves: Vec<Move>, + + /// List of pending fill moves. This is only used during `schedule_moves()`. + fills: Vec<Move>, +} + +/// Interface for programming the constraints into the solver. +impl Solver { + /// Create a new empty solver. + pub fn new() -> Self { + Self { + assignments: SparseMap::new(), + vars: Vec::new(), + inputs_done: false, + regs_in: RegisterSet::new(), + regs_out: RegisterSet::new(), + moves: Vec::new(), + fills: Vec::new(), + } + } + + /// Clear all data structures in this coloring pass. + pub fn clear(&mut self) { + self.assignments.clear(); + self.vars.clear(); + self.inputs_done = false; + self.regs_in = RegisterSet::new(); + self.regs_out = RegisterSet::new(); + self.moves.clear(); + self.fills.clear(); + } + + /// Reset the solver state and prepare solving for a new instruction with an initial set of + /// allocatable registers. + /// + /// The `regs` set is the allocatable registers before any reassignments are applied. + pub fn reset(&mut self, regs: &RegisterSet) { + self.assignments.clear(); + self.vars.clear(); + self.inputs_done = false; + self.regs_in = regs.clone(); + // Used for tracking fixed input assignments while `!inputs_done`: + self.regs_out = RegisterSet::new(); + self.moves.clear(); + self.fills.clear(); + } + + /// Add a fixed input reassignment of `value`. + /// + /// This means that `value` must be assigned to `to` and can't become a variable. Call with + /// `from == to` to ensure that `value` is not reassigned from its existing register location. + /// + /// In either case, `to` will not be available for variables on the input side of the + /// instruction. + pub fn reassign_in(&mut self, value: Value, rc: RegClass, from: RegUnit, to: RegUnit) { + debug!( + "reassign_in({}:{}, {} -> {})", + value, + rc, + rc.info.display_regunit(from), + rc.info.display_regunit(to) + ); + debug_assert!(!self.inputs_done); + if self.regs_in.is_avail(rc, from) { + // It looks like `value` was already removed from the register set. It must have been + // added as a variable previously. A fixed constraint beats a variable, so convert it. + if let Some(idx) = self.vars.iter().position(|v| v.value == value) { + let v = self.vars.remove(idx); + debug!("-> converting variable {} to a fixed constraint", v); + // The spiller is responsible for ensuring that all constraints on the uses of a + // value are compatible. + debug_assert!( + v.constraint.contains(to), + "Incompatible constraints for {}", + value + ); + } else { + panic!("Invalid from register for fixed {} constraint", value); + } + } + self.regs_in.free(rc, from); + self.regs_out.take(rc, to); + self.assignments.insert(Assignment { + value, + rc, + from, + to, + }); + } + + /// Add a variable representing an input side value with an existing register assignment. + /// + /// A variable is a value that should be reassigned to something in the `constraint` register + /// class. + /// + /// It is assumed initially that the value is also live on the output side of the instruction. + /// This can be changed by calling to `add_kill()`. + /// + /// This function can only be used before calling `inputs_done()`. Afterwards, more input-side + /// variables can be added by calling `add_killed_var()` and `add_through_var()` + pub fn add_var(&mut self, value: Value, constraint: RegClass, from: RegUnit) { + debug!( + "add_var({}:{}, from={})", + value, + constraint, + constraint.info.display_regunit(from) + ); + debug_assert!(!self.inputs_done); + self.add_live_var(value, constraint, from, true); + } + + /// Add an extra input-side variable representing a value that is killed by the current + /// instruction. + /// + /// This function should be called after `inputs_done()` only. Use `add_var()` before. + pub fn add_killed_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { + debug!( + "add_killed_var({}:{}, from={})", + value, + rc, + rc.info.display_regunit(from) + ); + debug_assert!(self.inputs_done); + self.add_live_var(value, rc, from, false); + } + + /// Add an extra input-side variable representing a value that is live through the current + /// instruction. + /// + /// This function should be called after `inputs_done()` only. Use `add_var()` before. + pub fn add_through_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { + debug!( + "add_through_var({}:{}, from={})", + value, + rc, + rc.info.display_regunit(from) + ); + debug_assert!(self.inputs_done); + self.add_live_var(value, rc, from, true); + } + + /// Shared code for `add_var`, `add_killed_var`, and `add_through_var`. + /// + /// Add a variable that is live before the instruction, and possibly live through. Merge + /// constraints if the value has already been added as a variable or fixed assignment. + fn add_live_var(&mut self, value: Value, rc: RegClass, from: RegUnit, live_through: bool) { + // Check for existing entries for this value. + if !self.can_add_var(rc, from) { + // There could be an existing variable entry. + if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { + // We have an existing variable entry for `value`. Combine the constraints. + if let Some(rc) = v.constraint.intersect(rc) { + debug!("-> combining constraint with {} yields {}", v, rc); + v.constraint = rc; + return; + } else { + // The spiller should have made sure the same value is not used with disjoint + // constraints. + panic!("Incompatible constraints: {} + {}", rc, v) + } + } + + // No variable, then it must be a fixed reassignment. + if let Some(a) = self.assignments.get(value) { + debug!("-> already fixed assignment {}", a); + debug_assert!(rc.contains(a.to), "Incompatible constraints for {}", value); + return; + } + + debug!("{}", self); + panic!("Wrong from register for {}", value); + } + + let new_var = Variable::new_live(value, rc, from, live_through); + debug!("-> new var: {}", new_var); + + self.regs_in.free(rc, from); + if self.inputs_done && live_through { + self.regs_out.free(rc, from); + } + self.vars.push(new_var); + } + + /// Check for conflicts between fixed input assignments and existing live values. + /// + /// Returns true if one of the live values conflicts with a fixed input assignment. Such a + /// conflicting value must be turned into a variable. + pub fn has_fixed_input_conflicts(&self) -> bool { + debug_assert!(!self.inputs_done); + // The `from` side of the fixed input diversions are taken from `regs_out`. + self.regs_out.interferes_with(&self.regs_in) + } + + /// Check if `rc, reg` specifically conflicts with the fixed input assignments. + pub fn is_fixed_input_conflict(&self, rc: RegClass, reg: RegUnit) -> bool { + debug_assert!(!self.inputs_done); + !self.regs_out.is_avail(rc, reg) + } + + /// Finish adding input side constraints. + /// + /// Call this method to indicate that there will be no more fixed input reassignments added + /// and prepare for the output side constraints. + pub fn inputs_done(&mut self) { + debug_assert!(!self.has_fixed_input_conflicts()); + + // At this point, `regs_out` contains the `to` side of the input reassignments, and the + // `from` side has already been marked as available in `regs_in`. + // + // Remove the `to` assignments from `regs_in` so it now indicates the registers available + // to variables at the input side. + self.regs_in.intersect(&self.regs_out); + + // The meaning of `regs_out` now changes completely to indicate the registers available to + // variables on the output side. + // The initial mask will be modified by `add_kill()` and `add_fixed_output()`. + self.regs_out = self.regs_in.clone(); + + // Now we can't add more fixed input assignments, but `add_var()` is still allowed. + self.inputs_done = true; + } + + /// Record that an input register value is killed by the instruction. + /// + /// Even if a fixed reassignment has been added for the value, the `reg` argument should be the + /// original location before the reassignments. + /// + /// This means that the register is available on the output side. + pub fn add_kill(&mut self, value: Value, rc: RegClass, reg: RegUnit) { + debug_assert!(self.inputs_done); + + // If a fixed assignment is killed, the `to` register becomes available on the output side. + if let Some(a) = self.assignments.get(value) { + debug_assert_eq!(a.from, reg); + self.regs_out.free(a.rc, a.to); + return; + } + + // It's also possible that a variable is killed. That means it doesn't need to satisfy + // interference constraints on the output side. + // Variables representing tied operands will get their `is_output` flag set again later. + if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { + debug_assert!(v.is_input); + v.is_output = false; + return; + } + + // Alright, this is just a boring value being killed by the instruction. Just reclaim + // the assigned register. + self.regs_out.free(rc, reg); + } + + /// Record that an input register is tied to an output register. + /// + /// It is assumed that `add_kill` was called previously with the same arguments. + /// + /// The output value that must have the same register as the input value is not recorded in the + /// solver. + /// + /// If the value has already been assigned to a fixed register, return that. + pub fn add_tied_input( + &mut self, + value: Value, + rc: RegClass, + reg: RegUnit, + is_global: bool, + ) -> Option<RegUnit> { + debug_assert!(self.inputs_done); + + // If a fixed assignment is tied, the `to` register is not available on the output side. + if let Some(a) = self.assignments.get(value) { + debug_assert_eq!(a.from, reg); + self.regs_out.take(a.rc, a.to); + return Some(a.to); + } + + // Check if a variable was created. + if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { + debug_assert!(v.is_input); + v.is_output = true; + v.is_global = is_global; + return None; + } + + // No variable exists for `value` because its constraints are already satisfied. + // However, if the tied output value has a global live range, we must create a variable to + // avoid global interference too. + if is_global { + let mut new_var = Variable::new_live(value, rc, reg, true); + new_var.is_global = true; + debug!("add_tied_input: new tied-global value: {}", new_var); + self.vars.push(new_var); + self.regs_in.free(rc, reg); + } else { + self.regs_out.take(rc, reg); + } + + None + } + + /// Add a fixed output assignment. + /// + /// This means that `to` will not be available for variables on the output side of the + /// instruction. + /// + /// Returns `false` if a live value conflicts with `to`, so it couldn't be added. Find the + /// conflicting live-through value and turn it into a variable before calling this method + /// again. + #[allow(dead_code)] + pub fn add_fixed_output(&mut self, rc: RegClass, reg: RegUnit) -> bool { + debug_assert!(self.inputs_done); + if self.regs_out.is_avail(rc, reg) { + self.regs_out.take(rc, reg); + true + } else { + false + } + } + + /// Add a defined output value. + /// + /// This is similar to `add_var`, except the value doesn't have a prior register assignment. + pub fn add_def(&mut self, value: Value, constraint: RegClass, is_global: bool) { + debug_assert!(self.inputs_done); + self.vars + .push(Variable::new_def(value, constraint, is_global)); + } + + /// Clear the `is_global` flag on all solver variables. + /// + /// This is used when there are not enough global registers available, and global defines have + /// to be replaced with local defines followed by a copy. + pub fn clear_all_global_flags(&mut self) { + for v in &mut self.vars { + v.is_global = false; + } + } +} + +/// Error reported when the solver fails to find a solution with the current constraints. +/// +/// When no solution can be found, the error indicates how constraints could be loosened to help. +pub enum SolverError { + /// There are not available registers in the given register class. + /// + /// This should be resolved by turning live-through values into variables so they can be moved + /// out of the way. + Divert(RegClass), + + /// There are insufficient available registers in the global set to assign an `is_global` + /// variable with the given value. + /// + /// This should be resolved by converting the variable to a local one. + Global(Value), +} + +/// Interface for searching for a solution. +impl Solver { + /// Try a quick-and-dirty solution. + /// + /// This is expected to succeed for most instructions since the constraint problem is almost + /// always trivial. + /// + /// Returns `Ok(regs)` if a solution was found. + pub fn quick_solve( + &mut self, + global_regs: &RegisterSet, + is_reload: bool, + ) -> Result<RegisterSet, SolverError> { + self.find_solution(global_regs, is_reload) + } + + /// Try harder to find a solution. + /// + /// Call this method after `quick_solve()` fails. + /// + /// This may return an error with a register class that has run out of registers. If registers + /// can be freed up in the starving class, this method can be called again after adding + /// variables for the freed registers. + pub fn real_solve( + &mut self, + global_regs: &RegisterSet, + is_reload: bool, + ) -> Result<RegisterSet, SolverError> { + // Compute domain sizes for all the variables given the current register sets. + for v in &mut self.vars { + let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len(); + v.domain = cmp::min(d, u16::MAX as usize) as u16; + } + + // Solve for vars with small domains first to increase the chance of finding a solution. + // + // Also consider this case: + // + // v0: out, global + // v1: in + // v2: in+out + // + // If only %r0 and %r1 are available, the global constraint may cause us to assign: + // + // v0 -> %r1 + // v1 -> %r0 + // v2 -> ! + // + // Usually in+out variables will have a smaller domain, but in the above case the domain + // size is the same, so we also prioritize in+out variables. + // + // Include the reversed previous solution for this variable partly as a stable tie breaker, + // partly to shake things up on a second attempt. + // + // Use the `from` register and value number as a tie breaker to get a stable sort. + self.vars.sort_unstable_by_key(|v| { + ( + v.domain, + !(v.is_input && v.is_output), + !v.solution, + v.from.unwrap_or(0), + v.value, + ) + }); + + debug!("real_solve for {}", self); + self.find_solution(global_regs, is_reload) + } + + /// Search for a solution with the current list of variables. + /// + /// If a solution was found, returns `Ok(regs)` with the set of available registers on the + /// output side after the solution. If no solution could be found, returns `Err(rc)` with the + /// constraint register class that needs more available registers. + fn find_solution( + &mut self, + global_regs: &RegisterSet, + is_reload: bool, + ) -> Result<RegisterSet, SolverError> { + // Available registers on the input and output sides respectively. + let mut iregs = self.regs_in.clone(); + let mut oregs = self.regs_out.clone(); + let mut gregs = global_regs.clone(); + + for v in &mut self.vars { + let rc = v.constraint; + + // Decide which register to assign. In order to try and keep registers holding + // reloaded values separate from all other registers to the extent possible, we choose + // the first available register in the normal case, but the last available one in the + // case of a reload. See "A side note on register choice heuristics" in + // src/redundant_reload_remover.rs for further details. + let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs); + let maybe_reg = if is_reload { + reg_set_iter.rnext() + } else { + reg_set_iter.next() + }; + + let reg = match maybe_reg { + Some(reg) => reg, + None => { + // If `v` must avoid global interference, there is not point in requesting + // live registers be diverted. We need to make it a non-global value. + if v.is_global && gregs.iter(rc).next().is_none() { + return Err(SolverError::Global(v.value)); + } + return Err(SolverError::Divert(rc)); + } + }; + + v.solution = reg; + if v.is_input { + iregs.take(rc, reg); + } + if v.is_output { + oregs.take(rc, reg); + } + if v.is_global { + gregs.take(rc, reg); + } + } + + Ok(oregs) + } + + /// Get all the variables. + pub fn vars(&self) -> &[Variable] { + &self.vars + } + + /// Check if `value` can be added as a variable to help find a solution. + pub fn can_add_var(&mut self, constraint: RegClass, from: RegUnit) -> bool { + !self.regs_in.is_avail(constraint, from) + && !self.vars.iter().any(|var| var.from == Some(from)) + } +} + +/// Interface for working with parallel copies once a solution has been found. +impl Solver { + /// Collect all the register moves we need to execute. + fn collect_moves(&mut self) { + self.moves.clear(); + + // Collect moves from the chosen solution for all non-define variables. + for v in &self.vars { + if let Some(from) = v.from { + // Omit variable solutions that don't require the value to be moved. + if from != v.solution { + self.moves.push(Move::Reg { + value: v.value, + from, + to: v.solution, + rc: v.constraint, + }); + } + } + } + + // Convert all of the fixed register assignments into moves, but omit the ones that are + // already in the right register. + self.moves + .extend(self.assignments.values().filter_map(Move::with_assignment)); + + if !self.moves.is_empty() { + debug!("collect_moves: {}", DisplayList(&self.moves)); + } + } + + /// Try to schedule a sequence of `regmove` instructions that will shuffle registers into + /// place. + /// + /// This may require the use of additional available registers, and it can fail if no + /// additional registers are available. + /// + /// TODO: Handle failure by generating a sequence of register swaps, or by temporarily spilling + /// a register. + /// + /// Returns the number of spills that had to be emitted. + pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize { + self.collect_moves(); + debug_assert!(self.fills.is_empty()); + + let mut num_spill_slots = 0; + let mut avail = regs.clone(); + let mut i = 0; + while i < self.moves.len() + self.fills.len() { + // Don't even look at the fills until we've spent all the moves. Deferring these lets + // us potentially reuse the claimed registers to resolve multiple cycles. + if i >= self.moves.len() { + self.moves.append(&mut self.fills); + } + + // Find the first move that can be executed now. + if let Some(j) = self.moves[i..].iter().position(|m| match m.to_reg() { + Some((rc, reg)) => avail.is_avail(rc, reg), + None => true, + }) { + // This move can be executed now. + self.moves.swap(i, i + j); + let m = &self.moves[i]; + if let Some((rc, reg)) = m.to_reg() { + avail.take(rc, reg); + } + if let Some((rc, reg)) = m.from_reg() { + avail.free(rc, reg); + } + debug!("move #{}: {}", i, m); + i += 1; + continue; + } + + // When we get here, none of the `moves[i..]` can be executed. This means there are + // only cycles remaining. The cycles can be broken in a few ways: + // + // 1. Grab an available register and use it to break a cycle. + // 2. Move a value temporarily into a stack slot instead of a register. + // 3. Use swap instructions. + // + // TODO: So far we only implement 1 and 2. + + // Pick an assignment with the largest possible width. This is more likely to break up + // a cycle than an assignment with fewer register units. For example, it may be + // necessary to move two arm32 S-registers out of the way before a D-register can move + // into place. + // + // We use `min_by_key` and `!` instead of `max_by_key` because it preserves the + // existing order of moves with the same width. + let j = self.moves[i..] + .iter() + .enumerate() + .min_by_key(|&(_, m)| !m.rc().width) + .unwrap() + .0; + self.moves.swap(i, i + j); + + // Check the top-level register class for an available register. It is an axiom of the + // register allocator that we can move between all registers in the top-level RC. + let m = self.moves[i].clone(); + let toprc = m.rc().toprc(); + if let Some(reg) = avail.iter(toprc).next() { + debug!( + "breaking cycle at {} with available {} register {}", + m, + toprc, + toprc.info.display_regunit(reg) + ); + + // Alter the move so it is guaranteed to be picked up when we loop. It is important + // that this move is scheduled immediately, otherwise we would have multiple moves + // of the same value, and they would not be commutable. + let old_to_reg = self.moves[i].replace_to_reg(reg); + // Append a fixup move so we end up in the right place. This move will be scheduled + // later. That's ok because it is the single remaining move of `m.value` after the + // next iteration. + self.moves.push(Move::Reg { + value: m.value(), + rc: toprc, + from: reg, + to: old_to_reg, + }); + // TODO: What if allocating an extra register is not enough to break a cycle? This + // can happen when there are registers of different widths in a cycle. For ARM, we + // may have to move two S-registers out of the way before we can resolve a cycle + // involving a D-register. + continue; + } + + // It was impossible to free up a register in toprc, so use an emergency spill slot as + // a last resort. + let slot = num_spill_slots; + num_spill_slots += 1; + debug!("breaking cycle at {} with slot {}", m, slot); + let old_to_reg = self.moves[i].change_to_spill(slot); + self.fills.push(Move::Fill { + value: m.value(), + rc: toprc, + from_slot: slot, + to: old_to_reg, + }); + } + + num_spill_slots + } + + /// Borrow the scheduled set of register moves that was computed by `schedule_moves()`. + pub fn moves(&self) -> &[Move] { + &self.moves + } +} + +impl fmt::Display for Solver { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let reginfo = self.vars.first().map(|v| v.constraint.info); + writeln!(f, "Solver {{ inputs_done: {},", self.inputs_done)?; + writeln!(f, " in: {}", self.regs_in.display(reginfo))?; + writeln!(f, " out: {}", self.regs_out.display(reginfo))?; + writeln!( + f, + " assignments: {}", + DisplayList(self.assignments.as_slice()) + )?; + writeln!(f, " vars: {}", DisplayList(&self.vars))?; + writeln!(f, " moves: {}", DisplayList(&self.moves))?; + writeln!(f, "}}") + } +} + +#[cfg(test)] +#[cfg(feature = "arm32")] +mod tests { + use super::{Move, Solver}; + use crate::entity::EntityRef; + use crate::ir::Value; + use crate::isa::registers::{RegBank, RegClassData}; + use crate::isa::{RegClass, RegInfo, RegUnit}; + use crate::regalloc::RegisterSet; + use core::borrow::Borrow; + + // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info + // about registers, so we directly access `INFO` from registers-arm32.rs. + include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); + + // Get a register class by name. + fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { + reginfo + .classes + .iter() + .find(|rc| rc.name == name) + .expect("Can't find named register class.") + } + + // Construct a register move. + fn mov(value: Value, rc: RegClass, from: RegUnit, to: RegUnit) -> Move { + Move::Reg { + value, + rc, + from, + to, + } + } + + fn spill(value: Value, rc: RegClass, from: RegUnit, to_slot: usize) -> Move { + Move::Spill { + value, + rc, + from, + to_slot, + } + } + + fn fill(value: Value, rc: RegClass, from_slot: usize, to: RegUnit) -> Move { + Move::Fill { + value, + rc, + from_slot, + to, + } + } + + #[test] + fn simple_moves() { + let reginfo = INFO.borrow(); + let gpr = rc_by_name(®info, "GPR"); + let r0 = gpr.unit(0); + let r1 = gpr.unit(1); + let r2 = gpr.unit(2); + let gregs = RegisterSet::new(); + let mut regs = RegisterSet::new(); + let mut solver = Solver::new(); + let v10 = Value::new(10); + let v11 = Value::new(11); + + // As simple as it gets: Value is in r1, we want r0. + regs.take(gpr, r1); + solver.reset(®s); + solver.reassign_in(v10, gpr, r1, r0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]); + + // A bit harder: r0, r1 need to go in r1, r2. + regs.take(gpr, r0); + solver.reset(®s); + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r2); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[mov(v11, gpr, r1, r2), mov(v10, gpr, r0, r1)] + ); + + // Swap r0 and r1 in three moves using r2 as a scratch. + solver.reset(®s); + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[ + mov(v10, gpr, r0, r2), + mov(v11, gpr, r1, r0), + mov(v10, gpr, r2, r1), + ] + ); + } + + #[test] + fn harder_move_cycles() { + let reginfo = INFO.borrow(); + let s = rc_by_name(®info, "S"); + let d = rc_by_name(®info, "D"); + let d0 = d.unit(0); + let d1 = d.unit(1); + let d2 = d.unit(2); + let s0 = s.unit(0); + let s1 = s.unit(1); + let s2 = s.unit(2); + let s3 = s.unit(3); + let gregs = RegisterSet::new(); + let mut regs = RegisterSet::new(); + let mut solver = Solver::new(); + let v10 = Value::new(10); + let v11 = Value::new(11); + let v12 = Value::new(12); + + // Not a simple cycle: Swap d0 <-> (s2, s3) + regs.take(d, d0); + regs.take(d, d1); + solver.reset(®s); + solver.reassign_in(v10, d, d0, d1); + solver.reassign_in(v11, s, s2, s0); + solver.reassign_in(v12, s, s3, s1); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[ + mov(v10, d, d0, d2), + mov(v11, s, s2, s0), + mov(v12, s, s3, s1), + mov(v10, d, d2, d1), + ] + ); + + // Same problem in the other direction: Swap (s0, s1) <-> d1. + // + // If we divert the moves in order, we will need to allocate *two* temporary S registers. A + // trivial algorithm might assume that allocating a single temp is enough. + solver.reset(®s); + solver.reassign_in(v11, s, s0, s2); + solver.reassign_in(v12, s, s1, s3); + solver.reassign_in(v10, d, d1, d0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[ + mov(v10, d, d1, d2), + mov(v12, s, s1, s3), + mov(v11, s, s0, s2), + mov(v10, d, d2, d0), + ] + ); + } + + #[test] + fn emergency_spill() { + let reginfo = INFO.borrow(); + let gpr = rc_by_name(®info, "GPR"); + let r0 = gpr.unit(0); + let r1 = gpr.unit(1); + let r2 = gpr.unit(2); + let r3 = gpr.unit(3); + let r4 = gpr.unit(4); + let r5 = gpr.unit(5); + let gregs = RegisterSet::new(); + let mut regs = RegisterSet::new(); + let mut solver = Solver::new(); + let v10 = Value::new(10); + let v11 = Value::new(11); + let v12 = Value::new(12); + let v13 = Value::new(13); + let v14 = Value::new(14); + let v15 = Value::new(15); + + // Claim r0--r2 and r3--r15 for other values. + for i in 0..16 { + regs.take(gpr, gpr.unit(i)); + } + + // Request a permutation cycle. + solver.reset(®s); + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r2); + solver.reassign_in(v12, gpr, r2, r0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 1); + assert_eq!( + solver.moves(), + &[ + spill(v10, gpr, r0, 0), + mov(v12, gpr, r2, r0), + mov(v11, gpr, r1, r2), + fill(v10, gpr, 0, r1), + ] + ); + + // Two cycles should only require a single spill. + solver.reset(®s); + // Cycle 1. + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r2); + solver.reassign_in(v12, gpr, r2, r0); + // Cycle 2. + solver.reassign_in(v13, gpr, r3, r4); + solver.reassign_in(v14, gpr, r4, r5); + solver.reassign_in(v15, gpr, r5, r3); + + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + // We resolve two cycles with one spill. + assert_eq!(solver.schedule_moves(®s), 1); + assert_eq!( + solver.moves(), + &[ + spill(v10, gpr, r0, 0), + mov(v12, gpr, r2, r0), + mov(v11, gpr, r1, r2), + mov(v13, gpr, r3, r1), // Use available r1 to break cycle 2. + mov(v15, gpr, r5, r3), + mov(v14, gpr, r4, r5), + mov(v13, gpr, r1, r4), + fill(v10, gpr, 0, r1), // Finally complete cycle 1. + ] + ); + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/spilling.rs b/third_party/rust/cranelift-codegen/src/regalloc/spilling.rs new file mode 100644 index 0000000000..33d0fe9bd6 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/spilling.rs @@ -0,0 +1,637 @@ +//! Spilling pass. +//! +//! The spilling pass is the first to run after the liveness analysis. Its primary function is to +//! ensure that the register pressure never exceeds the number of available registers by moving +//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's +//! live range. +//! +//! Some instruction operand constraints may require additional registers to resolve. Since this +//! can cause spilling, the spilling pass is also responsible for resolving those constraints by +//! inserting copies. The extra constraints are: +//! +//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by +//! inserting a copy to a temporary value when necessary. +//! 2. When the same value is used more than once by an instruction, the operand constraints must +//! be compatible. Otherwise, the value must be copied into a new register for some of the +//! operands. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc}; +use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit}; +use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::pressure::Pressure; +use crate::regalloc::virtregs::VirtRegs; +use crate::timing; +use crate::topo_order::TopoOrder; +use alloc::vec::Vec; +use core::fmt; +use log::debug; + +/// Return a top-level register class which contains `unit`. +fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass { + let bank = reginfo.bank_containing_regunit(unit).unwrap(); + reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)] + .iter() + .find(|&rc| rc.contains(unit)) + .expect("reg unit should be in a toprc") +} + +/// Persistent data structures for the spilling pass. +pub struct Spilling { + spills: Vec<Value>, + reg_uses: Vec<RegUse>, +} + +/// Context data structure that gets instantiated once per pass. +struct Context<'a> { + // Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + // Cached ISA information. + reginfo: RegInfo, + encinfo: EncInfo, + + // References to contextual data structures we need. + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + virtregs: &'a VirtRegs, + topo: &'a mut TopoOrder, + + // Current register pressure. + pressure: Pressure, + + // Values spilled for the current instruction. These values have already been removed from the + // pressure tracker, but they are still present in the live value tracker and their affinity + // hasn't been changed yet. + spills: &'a mut Vec<Value>, + + // Uses of register values in the current instruction. + reg_uses: &'a mut Vec<RegUse>, +} + +impl Spilling { + /// Create a new spilling data structure. + pub fn new() -> Self { + Self { + spills: Vec::new(), + reg_uses: Vec::new(), + } + } + + /// Clear all data structures in this spilling pass. + pub fn clear(&mut self) { + self.spills.clear(); + self.reg_uses.clear(); + } + + /// Run the spilling algorithm over `func`. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + domtree: &DominatorTree, + liveness: &mut Liveness, + virtregs: &VirtRegs, + topo: &mut TopoOrder, + tracker: &mut LiveValueTracker, + ) { + let _tt = timing::ra_spilling(); + debug!("Spilling for:\n{}", func.display(isa)); + let reginfo = isa.register_info(); + let usable_regs = isa.allocatable_registers(func); + let mut ctx = Context { + cur: EncCursor::new(func, isa), + reginfo: isa.register_info(), + encinfo: isa.encoding_info(), + domtree, + liveness, + virtregs, + topo, + pressure: Pressure::new(®info, &usable_regs), + spills: &mut self.spills, + reg_uses: &mut self.reg_uses, + }; + ctx.run(tracker) + } +} + +impl<'a> Context<'a> { + fn run(&mut self, tracker: &mut LiveValueTracker) { + self.topo.reset(self.cur.func.layout.blocks()); + while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { + self.visit_block(block, tracker); + } + } + + fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { + debug!("Spilling {}:", block); + self.cur.goto_top(block); + self.visit_block_header(block, tracker); + tracker.drop_dead_params(); + self.process_spills(tracker); + + while let Some(inst) = self.cur.next_inst() { + if !self.cur.func.dfg[inst].opcode().is_ghost() { + self.visit_inst(inst, block, tracker); + } else { + let (_throughs, kills) = tracker.process_ghost(inst); + self.free_regs(kills); + } + tracker.drop_dead(inst); + self.process_spills(tracker); + } + } + + // Take all live registers in `regs` from the pressure set. + // This doesn't cause any spilling, it is assumed there are enough registers. + fn take_live_regs(&mut self, regs: &[LiveValue]) { + for lv in regs { + if !lv.is_dead { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + self.pressure.take(rc); + } + } + } + } + + // Free all registers in `kills` from the pressure set. + fn free_regs(&mut self, kills: &[LiveValue]) { + for lv in kills { + if let Affinity::Reg(rci) = lv.affinity { + if !self.spills.contains(&lv.value) { + let rc = self.reginfo.rc(rci); + self.pressure.free(rc); + } + } + } + } + + // Free all dead registers in `regs` from the pressure set. + fn free_dead_regs(&mut self, regs: &[LiveValue]) { + for lv in regs { + if lv.is_dead { + if let Affinity::Reg(rci) = lv.affinity { + if !self.spills.contains(&lv.value) { + let rc = self.reginfo.rc(rci); + self.pressure.free(rc); + } + } + } + } + } + + fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { + let (liveins, params) = tracker.block_top( + block, + &self.cur.func.dfg, + self.liveness, + &self.cur.func.layout, + self.domtree, + ); + + // Count the live-in registers. These should already fit in registers; they did at the + // dominator. + self.pressure.reset(); + self.take_live_regs(liveins); + + // A block can have an arbitrary (up to 2^16...) number of parameters, so they are not + // guaranteed to fit in registers. + for lv in params { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + 'try_take: while let Err(mask) = self.pressure.take_transient(rc) { + debug!("Need {} reg for block param {}", rc, lv.value); + match self.spill_candidate(mask, liveins) { + Some(cand) => { + debug!( + "Spilling live-in {} to make room for {} block param {}", + cand, rc, lv.value + ); + self.spill_reg(cand); + } + None => { + // We can't spill any of the live-in registers, so we have to spill an + // block argument. Since the current spill metric would consider all the + // block arguments equal, just spill the present register. + debug!("Spilling {} block argument {}", rc, lv.value); + + // Since `spill_reg` will free a register, add the current one here. + self.pressure.take(rc); + self.spill_reg(lv.value); + break 'try_take; + } + } + } + } + } + + // The transient pressure counts for the block arguments are accurate. Just preserve them. + self.pressure.preserve_transient(); + self.free_dead_regs(params); + } + + fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) { + debug!("Inst {}, {}", self.cur.display_inst(inst), self.pressure); + debug_assert_eq!(self.cur.current_inst(), Some(inst)); + debug_assert_eq!(self.cur.current_block(), Some(block)); + + let constraints = self + .encinfo + .operand_constraints(self.cur.func.encodings[inst]); + + // We may need to resolve register constraints if there are any noteworthy uses. + debug_assert!(self.reg_uses.is_empty()); + self.collect_reg_uses(inst, block, constraints); + + // Calls usually have fixed register uses. + let call_sig = self.cur.func.dfg.call_signature(inst); + if let Some(sig) = call_sig { + self.collect_abi_reg_uses(inst, sig); + } + + if !self.reg_uses.is_empty() { + self.process_reg_uses(inst, tracker); + } + + // Update the live value tracker with this instruction. + let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + + // Remove kills from the pressure tracker. + self.free_regs(kills); + + // If inst is a call, spill all register values that are live across the call. + // This means that we don't currently take advantage of callee-saved registers. + // TODO: Be more sophisticated. + let opcode = self.cur.func.dfg[inst].opcode(); + if call_sig.is_some() || opcode.clobbers_all_regs() { + for lv in throughs { + if lv.affinity.is_reg() && !self.spills.contains(&lv.value) { + self.spill_reg(lv.value); + } + } + } + + // Make sure we have enough registers for the register defs. + // Dead defs are included here. They need a register too. + // No need to process call return values, they are in fixed registers. + if let Some(constraints) = constraints { + for op in constraints.outs { + if op.kind != ConstraintKind::Stack { + // Add register def to pressure, spill if needed. + while let Err(mask) = self.pressure.take_transient(op.regclass) { + debug!("Need {} reg from {} throughs", op.regclass, throughs.len()); + match self.spill_candidate(mask, throughs) { + Some(cand) => self.spill_reg(cand), + None => panic!( + "Ran out of {} registers for {}", + op.regclass, + self.cur.display_inst(inst) + ), + } + } + } + } + self.pressure.reset_transient(); + } + + // Restore pressure state, compute pressure with affinities from `defs`. + // Exclude dead defs. Includes call return values. + // This won't cause spilling. + self.take_live_regs(defs); + } + + // Collect register uses that are noteworthy in one of the following ways: + // + // 1. It's a fixed register constraint. + // 2. It's a use of a spilled value. + // 3. It's a tied register constraint and the value isn't killed. + // + // We are assuming here that if a value is used both by a fixed register operand and a register + // class operand, they two are compatible. We are also assuming that two register class + // operands are always compatible. + fn collect_reg_uses( + &mut self, + inst: Inst, + block: Block, + constraints: Option<&RecipeConstraints>, + ) { + let args = self.cur.func.dfg.inst_args(inst); + let num_fixed_ins = if let Some(constraints) = constraints { + for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { + let mut reguse = RegUse::new(arg, idx, op.regclass.into()); + let lr = &self.liveness[arg]; + match op.kind { + ConstraintKind::Stack => continue, + ConstraintKind::FixedReg(_) => reguse.fixed = true, + ConstraintKind::Tied(_) => { + // A tied operand must kill the used value. + reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); + } + ConstraintKind::FixedTied(_) => { + reguse.fixed = true; + reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); + } + ConstraintKind::Reg => {} + } + if lr.affinity.is_stack() { + reguse.spilled = true; + } + + // Only collect the interesting register uses. + if reguse.fixed || reguse.tied || reguse.spilled { + debug!(" reguse: {}", reguse); + self.reg_uses.push(reguse); + } + } + constraints.ins.len() + } else { + // A non-ghost instruction with no constraints can't have any + // fixed operands. + 0 + }; + + // Similarly, for return instructions, collect uses of ABI-defined + // return values. + if self.cur.func.dfg[inst].opcode().is_return() { + debug_assert_eq!( + self.cur.func.dfg.inst_variable_args(inst).len(), + self.cur.func.signature.returns.len(), + "The non-fixed arguments in a return should follow the function's signature." + ); + for (ret_idx, (ret, &arg)) in + self.cur.func.signature.returns.iter().zip(args).enumerate() + { + let idx = num_fixed_ins + ret_idx; + let unit = match ret.location { + ArgumentLoc::Unassigned => { + panic!("function return signature should be legalized") + } + ArgumentLoc::Reg(unit) => unit, + ArgumentLoc::Stack(_) => continue, + }; + let toprc = toprc_containing_regunit(unit, &self.reginfo); + let mut reguse = RegUse::new(arg, idx, toprc.into()); + reguse.fixed = true; + + debug!(" reguse: {}", reguse); + self.reg_uses.push(reguse); + } + } + } + + // Collect register uses from the ABI input constraints. + fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) { + let num_fixed_args = self.cur.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let args = self.cur.func.dfg.inst_variable_args(inst); + for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig] + .params + .iter() + .zip(args) + .enumerate() + { + if abi.location.is_reg() { + let (rci, spilled) = match self.liveness[arg].affinity { + Affinity::Reg(rci) => (rci, false), + Affinity::Stack => ( + self.cur.isa.regclass_for_abi_type(abi.value_type).into(), + true, + ), + Affinity::Unassigned => panic!("Missing affinity for {}", arg), + }; + let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci); + reguse.fixed = true; + reguse.spilled = spilled; + self.reg_uses.push(reguse); + } + } + } + + // Process multiple register uses to resolve potential conflicts. + // + // Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary. + // Trigger spilling if any of the temporaries cause the register pressure to become too high. + // + // Leave `self.reg_uses` empty. + fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) { + // We're looking for multiple uses of the same value, so start by sorting by value. The + // secondary `opidx` key makes it possible to use an unstable (non-allocating) sort. + self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx)); + + self.cur.use_srcloc(inst); + for i in 0..self.reg_uses.len() { + let ru = self.reg_uses[i]; + + // Do we need to insert a copy for this use? + let need_copy = if ru.tied { + true + } else if ru.fixed { + // This is a fixed register use which doesn't necessarily require a copy. + // Make a copy only if this is not the first use of the value. + self.reg_uses + .get(i.wrapping_sub(1)) + .map_or(false, |ru2| ru2.value == ru.value) + } else { + false + }; + + if need_copy { + let copy = self.insert_copy(ru.value, ru.rci); + self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy; + } + + // Even if we don't insert a copy, we may need to account for register pressure for the + // reload pass. + if need_copy || ru.spilled { + let rc = self.reginfo.rc(ru.rci); + while let Err(mask) = self.pressure.take_transient(rc) { + debug!("Copy of {} reg causes spill", rc); + // Spill a live register that is *not* used by the current instruction. + // Spilling a use wouldn't help. + // + // Do allow spilling of block arguments on branches. This is safe since we spill + // the whole virtual register which includes the matching block parameter value + // at the branch destination. It is also necessary since there can be + // arbitrarily many block arguments. + match { + let args = if self.cur.func.dfg[inst].opcode().is_branch() { + self.cur.func.dfg.inst_fixed_args(inst) + } else { + self.cur.func.dfg.inst_args(inst) + }; + self.spill_candidate( + mask, + tracker.live().iter().filter(|lv| !args.contains(&lv.value)), + ) + } { + Some(cand) => self.spill_reg(cand), + None => panic!( + "Ran out of {} registers when inserting copy before {}", + rc, + self.cur.display_inst(inst) + ), + } + } + } + } + self.pressure.reset_transient(); + self.reg_uses.clear() + } + + // Find a spill candidate from `candidates` whose top-level register class is in `mask`. + fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value> + where + II: IntoIterator<Item = &'ii LiveValue>, + { + // Find the best viable spill candidate. + // + // The very simple strategy implemented here is to spill the value with the earliest def in + // the reverse post-order. This strategy depends on a good reload pass to generate good + // code. + // + // We know that all candidate defs dominate the current instruction, so one of them will + // dominate the others. That is the earliest def. + candidates + .into_iter() + .filter_map(|lv| { + // Viable candidates are registers in one of the `mask` classes, and not already in + // the spill set. + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) { + // Here, `lv` is a viable spill candidate. + return Some(lv.value); + } + } + None + }) + .min_by(|&a, &b| { + // Find the minimum candidate according to the RPO of their defs. + self.domtree.rpo_cmp( + self.cur.func.dfg.value_def(a), + self.cur.func.dfg.value_def(b), + &self.cur.func.layout, + ) + }) + } + + /// Spill `value` immediately by + /// + /// 1. Changing its affinity to `Stack` which marks the spill. + /// 2. Removing the value from the pressure tracker. + /// 3. Adding the value to `self.spills` for later reference by `process_spills`. + /// + /// Note that this does not update the cached affinity in the live value tracker. Call + /// `process_spills` to do that. + fn spill_reg(&mut self, value: Value) { + if let Affinity::Reg(rci) = self.liveness.spill(value) { + let rc = self.reginfo.rc(rci); + self.pressure.free(rc); + self.spills.push(value); + debug!("Spilled {}:{} -> {}", value, rc, self.pressure); + } else { + panic!("Cannot spill {} that was already on the stack", value); + } + + // Assign a spill slot for the whole virtual register. + let ss = self + .cur + .func + .stack_slots + .make_spill_slot(self.cur.func.dfg.value_type(value)); + for &v in self.virtregs.congruence_class(&value) { + self.liveness.spill(v); + self.cur.func.locations[v] = ValueLoc::Stack(ss); + } + } + + /// Process any pending spills in the `self.spills` vector. + /// + /// It is assumed that spills are removed from the pressure tracker immediately, see + /// `spill_reg` above. + /// + /// We also need to update the live range affinity and remove spilled values from the live + /// value tracker. + fn process_spills(&mut self, tracker: &mut LiveValueTracker) { + if !self.spills.is_empty() { + tracker.process_spills(|v| self.spills.contains(&v)); + self.spills.clear() + } + } + + /// Insert a `copy value` before the current instruction and give it a live range extending to + /// the current instruction. + /// + /// Returns the new local value created. + fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value { + let copy = self.cur.ins().copy(value); + let inst = self.cur.built_inst(); + + // Update live ranges. + self.liveness.create_dead(copy, inst, Affinity::Reg(rci)); + self.liveness.extend_locally( + copy, + self.cur.func.layout.pp_block(inst), + self.cur.current_inst().expect("must be at an instruction"), + &self.cur.func.layout, + ); + + copy + } +} + +/// Struct representing a register use of a value. +/// Used to detect multiple uses of the same value with incompatible register constraints. +#[derive(Clone, Copy)] +struct RegUse { + value: Value, + opidx: u16, + + // Register class required by the use. + rci: RegClassIndex, + + // A use with a fixed register constraint. + fixed: bool, + + // A register use of a spilled value. + spilled: bool, + + // A use with a tied register constraint *and* the used value is not killed. + tied: bool, +} + +impl RegUse { + fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self { + Self { + value, + opidx: idx as u16, + rci, + fixed: false, + spilled: false, + tied: false, + } + } +} + +impl fmt::Display for RegUse { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}@op{}", self.value, self.opidx)?; + if self.fixed { + write!(f, "/fixed")?; + } + if self.spilled { + write!(f, "/spilled")?; + } + if self.tied { + write!(f, "/tied")?; + } + Ok(()) + } +} diff --git a/third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs b/third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs new file mode 100644 index 0000000000..ee2ec9bcd9 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/regalloc/virtregs.rs @@ -0,0 +1,505 @@ +//! Virtual registers. +//! +//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the +//! values in a virtual register are assigned to the same location, fewer copies will result in the +//! output. +//! +//! A virtual register is typically built by merging together SSA values that are "phi-related" - +//! that is, one value is passed as a block argument to a branch and the other is the block parameter +//! value itself. +//! +//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids +//! memory-to-memory copies when a spilled value is passed as a block argument. + +use crate::dbg::DisplayList; +use crate::dominator_tree::DominatorTreePreorder; +use crate::entity::entity_impl; +use crate::entity::{EntityList, ListPool}; +use crate::entity::{Keys, PrimaryMap, SecondaryMap}; +use crate::ir::{Function, Value}; +use crate::packed_option::PackedOption; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt; +use core::slice; +use smallvec::SmallVec; + +/// A virtual register reference. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct VirtReg(u32); +entity_impl!(VirtReg, "vreg"); + +type ValueList = EntityList<Value>; + +/// Collection of virtual registers. +/// +/// Each virtual register is a list of values. Also maintain a map from values to their unique +/// virtual register, if any. +pub struct VirtRegs { + /// Memory pool for the value lists. + pool: ListPool<Value>, + + /// The primary table of virtual registers. + vregs: PrimaryMap<VirtReg, ValueList>, + + /// Allocated virtual register numbers that are no longer in use. + unused_vregs: Vec<VirtReg>, + + /// Each value belongs to at most one virtual register. + value_vregs: SecondaryMap<Value, PackedOption<VirtReg>>, + + /// Table used during the union-find phase while `vregs` is empty. + union_find: SecondaryMap<Value, i32>, + + /// Values that have been activated in the `union_find` table, but not yet added to any virtual + /// registers by the `finish_union_find()` function. + pending_values: Vec<Value>, +} + +impl VirtRegs { + /// Create a new virtual register collection. + pub fn new() -> Self { + Self { + pool: ListPool::new(), + vregs: PrimaryMap::new(), + unused_vregs: Vec::new(), + value_vregs: SecondaryMap::new(), + union_find: SecondaryMap::new(), + pending_values: Vec::new(), + } + } + + /// Clear all virtual registers. + pub fn clear(&mut self) { + self.vregs.clear(); + self.unused_vregs.clear(); + self.value_vregs.clear(); + self.pool.clear(); + self.union_find.clear(); + self.pending_values.clear(); + } + + /// Get the virtual register containing `value`, if any. + pub fn get(&self, value: Value) -> Option<VirtReg> { + self.value_vregs[value].into() + } + + /// Get the list of values in `vreg`. + pub fn values(&self, vreg: VirtReg) -> &[Value] { + self.vregs[vreg].as_slice(&self.pool) + } + + /// Get an iterator over all virtual registers. + pub fn all_virtregs(&self) -> Keys<VirtReg> { + self.vregs.keys() + } + + /// Get the congruence class of `value`. + /// + /// If `value` belongs to a virtual register, the congruence class is the values of the virtual + /// register. Otherwise it is just the value itself. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))] + pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value] + where + 'a: 'b, + { + self.get(*value) + .map_or_else(|| slice::from_ref(value), |vr| self.values(vr)) + } + + /// Check if `a` and `b` belong to the same congruence class. + pub fn same_class(&self, a: Value, b: Value) -> bool { + match (self.get(a), self.get(b)) { + (Some(va), Some(vb)) => va == vb, + _ => a == b, + } + } + + /// Sort the values in `vreg` according to the dominator tree pre-order. + /// + /// Returns the slice of sorted values which `values(vreg)` will also return from now on. + pub fn sort_values( + &mut self, + vreg: VirtReg, + func: &Function, + preorder: &DominatorTreePreorder, + ) -> &[Value] { + let s = self.vregs[vreg].as_mut_slice(&mut self.pool); + s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func)); + s + } + + /// Insert a single value into a sorted virtual register. + /// + /// It is assumed that the virtual register containing `big` is already sorted by + /// `sort_values()`, and that `single` does not already belong to a virtual register. + /// + /// If `big` is not part of a virtual register, one will be created. + pub fn insert_single( + &mut self, + big: Value, + single: Value, + func: &Function, + preorder: &DominatorTreePreorder, + ) -> VirtReg { + debug_assert_eq!(self.get(single), None, "Expected singleton {}", single); + + // Make sure `big` has a vreg. + let vreg = self.get(big).unwrap_or_else(|| { + let vr = self.alloc(); + self.vregs[vr].push(big, &mut self.pool); + self.value_vregs[big] = vr.into(); + vr + }); + + // Determine the insertion position for `single`. + let index = match self + .values(vreg) + .binary_search_by(|&v| preorder.pre_cmp_def(v, single, func)) + { + Ok(_) => panic!("{} already in {}", single, vreg), + Err(i) => i, + }; + self.vregs[vreg].insert(index, single, &mut self.pool); + self.value_vregs[single] = vreg.into(); + vreg + } + + /// Remove a virtual register. + /// + /// The values in `vreg` become singletons, and the virtual register number may be reused in + /// the future. + pub fn remove(&mut self, vreg: VirtReg) { + // Start by reassigning all the values. + for &v in self.vregs[vreg].as_slice(&self.pool) { + let old = self.value_vregs[v].take(); + debug_assert_eq!(old, Some(vreg)); + } + + self.vregs[vreg].clear(&mut self.pool); + self.unused_vregs.push(vreg); + } + + /// Allocate a new empty virtual register. + fn alloc(&mut self) -> VirtReg { + self.unused_vregs + .pop() + .unwrap_or_else(|| self.vregs.push(Default::default())) + } + + /// Unify `values` into a single virtual register. + /// + /// The values in the slice can be singletons or they can belong to a virtual register already. + /// If a value belongs to a virtual register, all of the values in that register must be + /// present. + /// + /// The values are assumed to already be in topological order. + pub fn unify(&mut self, values: &[Value]) -> VirtReg { + // Start by clearing all virtual registers involved. + let mut singletons = 0; + let mut cleared = 0; + for &val in values { + match self.get(val) { + None => singletons += 1, + Some(vreg) => { + if !self.vregs[vreg].is_empty() { + cleared += self.vregs[vreg].len(&self.pool); + self.vregs[vreg].clear(&mut self.pool); + self.unused_vregs.push(vreg); + } + } + } + } + + debug_assert_eq!( + values.len(), + singletons + cleared, + "Can't unify partial virtual registers" + ); + + let vreg = self.alloc(); + self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool); + for &v in values { + self.value_vregs[v] = vreg.into(); + } + + vreg + } +} + +impl fmt::Display for VirtRegs { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for vreg in self.all_virtregs() { + write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?; + } + Ok(()) + } +} + +/// Expanded version of a union-find table entry. +enum UFEntry { + /// This value is a a set leader. The embedded number is the set's rank. + Rank(u32), + + /// This value belongs to the same set as the linked value. + Link(Value), +} + +/// The `union_find` table contains `i32` entries that are interpreted as follows: +/// +/// x = 0: The value belongs to its own singleton set. +/// x > 0: The value is the leader of a set with rank x. +/// x < 0: The value belongs to the same set as the value numbered !x. +/// +/// The rank of a set is an upper bound on the number of links that must be followed from a member +/// of the set to the set leader. +/// +/// A singleton set is the same as a set with rank 0. It contains only the leader value. +impl UFEntry { + /// Decode a table entry. + fn decode(x: i32) -> Self { + if x < 0 { + Self::Link(Value::from_u32((!x) as u32)) + } else { + Self::Rank(x as u32) + } + } + + /// Encode a link entry. + fn encode_link(v: Value) -> i32 { + !(v.as_u32() as i32) + } +} + +/// Union-find algorithm for building virtual registers. +/// +/// Before values are added to virtual registers, it is possible to use a union-find algorithm to +/// construct virtual registers efficiently. This support implemented here is used as follows: +/// +/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same +/// virtual register. +/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the +/// `union()` calls. +/// +/// The values that were passed to `union(a, b)` must not belong to any existing virtual registers +/// by the time `finish_union_find()` is called. +/// +/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for +/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed. +/// +/// The [Wikipedia entry on disjoint-set data +/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good. +impl VirtRegs { + /// Find the leader value and rank of the set containing `v`. + /// Compress the path if needed. + fn find(&mut self, mut val: Value) -> (Value, u32) { + let mut val_stack = SmallVec::<[Value; 8]>::new(); + let found = loop { + match UFEntry::decode(self.union_find[val]) { + UFEntry::Rank(rank) => break (val, rank), + UFEntry::Link(parent) => { + val_stack.push(val); + val = parent; + } + } + }; + // Compress the path + while let Some(val) = val_stack.pop() { + self.union_find[val] = UFEntry::encode_link(found.0); + } + found + } + + /// Union the two sets containing `a` and `b`. + /// + /// This ensures that `a` and `b` will belong to the same virtual register after calling + /// `finish_union_find()`. + pub fn union(&mut self, a: Value, b: Value) { + let (leader_a, rank_a) = self.find(a); + let (leader_b, rank_b) = self.find(b); + + if leader_a == leader_b { + return; + } + + // The first time we see a value, its rank will be 0. Add it to the list of pending values. + if rank_a == 0 { + debug_assert_eq!(a, leader_a); + self.pending_values.push(a); + } + if rank_b == 0 { + debug_assert_eq!(b, leader_b); + self.pending_values.push(b); + } + + // Merge into the set with the greater rank. This preserves the invariant that the rank is + // an upper bound on the number of links to the leader. + match rank_a.cmp(&rank_b) { + Ordering::Less => { + self.union_find[leader_a] = UFEntry::encode_link(leader_b); + } + Ordering::Greater => { + self.union_find[leader_b] = UFEntry::encode_link(leader_a); + } + Ordering::Equal => { + // When the two sets have the same rank, we arbitrarily pick the a-set to preserve. + // We need to increase the rank by one since the elements in the b-set are now one + // link further away from the leader. + self.union_find[leader_a] += 1; + self.union_find[leader_b] = UFEntry::encode_link(leader_a); + } + } + } + + /// Compute virtual registers based on previous calls to `union(a, b)`. + /// + /// This terminates the union-find algorithm, so the next time `union()` is called, it is for a + /// new independent batch of values. + /// + /// The values in each virtual register will be ordered according to when they were first + /// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to + /// create a more sensible value order. + /// + /// The new virtual registers will be appended to `new_vregs`, if present. + pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec<VirtReg>>) { + debug_assert_eq!( + self.pending_values.iter().find(|&&v| self.get(v).is_some()), + None, + "Values participating in union-find must not belong to existing virtual registers" + ); + + while let Some(val) = self.pending_values.pop() { + let (leader, _) = self.find(val); + + // Get the vreg for `leader`, or create it. + let vreg = self.get(leader).unwrap_or_else(|| { + // Allocate a vreg for `leader`, but leave it empty. + let vr = self.alloc(); + if let Some(ref mut vec) = new_vregs { + vec.push(vr); + } + self.value_vregs[leader] = vr.into(); + vr + }); + + // Push values in `pending_values` order, including when `v == leader`. + self.vregs[vreg].push(val, &mut self.pool); + self.value_vregs[val] = vreg.into(); + + // Clear the entry in the union-find table. The `find(val)` call may still look at this + // entry in a future iteration, but that it ok. It will return a rank 0 leader that has + // already been assigned to the correct virtual register. + self.union_find[val] = 0; + } + + // We do *not* call `union_find.clear()` table here because re-initializing the table for + // sparse use takes time linear in the number of values in the function. Instead we reset + // the entries that are known to be non-zero in the loop above. + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + use crate::ir::Value; + + #[test] + fn empty_union_find() { + let mut vregs = VirtRegs::new(); + vregs.finish_union_find(None); + assert_eq!(vregs.all_virtregs().count(), 0); + } + + #[test] + fn union_self() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + vregs.union(v1, v1); + vregs.finish_union_find(None); + assert_eq!(vregs.get(v1), None); + assert_eq!(vregs.all_virtregs().count(), 0); + } + + #[test] + fn union_pair() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + vregs.union(v1, v2); + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v2, v1]); + assert_eq!(vregs.congruence_class(&v2), &[v2, v1]); + assert_eq!(vregs.all_virtregs().count(), 1); + } + + #[test] + fn union_pair_backwards() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + vregs.union(v2, v1); + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v2]); + assert_eq!(vregs.congruence_class(&v2), &[v1, v2]); + assert_eq!(vregs.all_virtregs().count(), 1); + } + + #[test] + fn union_tree() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + let v3 = Value::new(3); + let v4 = Value::new(4); + + vregs.union(v2, v4); + vregs.union(v3, v1); + // Leaders: v2, v3 + vregs.union(v4, v1); + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]); + assert_eq!(vregs.all_virtregs().count(), 1); + } + + #[test] + fn union_two() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + let v3 = Value::new(3); + let v4 = Value::new(4); + + vregs.union(v2, v4); + vregs.union(v3, v1); + // Leaders: v2, v3 + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v3]); + assert_eq!(vregs.congruence_class(&v2), &[v4, v2]); + assert_eq!(vregs.congruence_class(&v3), &[v1, v3]); + assert_eq!(vregs.congruence_class(&v4), &[v4, v2]); + assert_eq!(vregs.all_virtregs().count(), 2); + } + + #[test] + fn union_uneven() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + let v3 = Value::new(3); + let v4 = Value::new(4); + + vregs.union(v2, v4); // Rank 0-0 + vregs.union(v3, v2); // Rank 0-1 + vregs.union(v2, v1); // Rank 1-0 + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]); + assert_eq!(vregs.all_virtregs().count(), 1); + } +} |