summaryrefslogtreecommitdiffstats
path: root/third_party/rust/regalloc/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/regalloc/src
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/regalloc/src')
-rw-r--r--third_party/rust/regalloc/src/analysis_control_flow.rs742
-rw-r--r--third_party/rust/regalloc/src/analysis_data_flow.rs1981
-rw-r--r--third_party/rust/regalloc/src/analysis_main.rs317
-rw-r--r--third_party/rust/regalloc/src/analysis_reftypes.rs137
-rw-r--r--third_party/rust/regalloc/src/avl_tree.rs1281
-rw-r--r--third_party/rust/regalloc/src/bt_coalescing_analysis.rs672
-rw-r--r--third_party/rust/regalloc/src/bt_commitment_map.rs170
-rw-r--r--third_party/rust/regalloc/src/bt_main.rs1844
-rw-r--r--third_party/rust/regalloc/src/bt_spillslot_allocator.rs522
-rw-r--r--third_party/rust/regalloc/src/bt_vlr_priority_queue.rs172
-rw-r--r--third_party/rust/regalloc/src/checker.rs717
-rw-r--r--third_party/rust/regalloc/src/data_structures.rs2505
-rw-r--r--third_party/rust/regalloc/src/inst_stream.rs664
-rw-r--r--third_party/rust/regalloc/src/lib.rs637
-rw-r--r--third_party/rust/regalloc/src/linear_scan/analysis.rs853
-rw-r--r--third_party/rust/regalloc/src/linear_scan/assign_registers.rs1248
-rw-r--r--third_party/rust/regalloc/src/linear_scan/mod.rs807
-rw-r--r--third_party/rust/regalloc/src/linear_scan/resolve_moves.rs889
-rw-r--r--third_party/rust/regalloc/src/pretty_print.rs56
-rw-r--r--third_party/rust/regalloc/src/reg_maps.rs347
-rw-r--r--third_party/rust/regalloc/src/snapshot.rs320
-rw-r--r--third_party/rust/regalloc/src/sparse_set.rs881
-rw-r--r--third_party/rust/regalloc/src/union_find.rs749
23 files changed, 18511 insertions, 0 deletions
diff --git a/third_party/rust/regalloc/src/analysis_control_flow.rs b/third_party/rust/regalloc/src/analysis_control_flow.rs
new file mode 100644
index 0000000000..e28f630aa0
--- /dev/null
+++ b/third_party/rust/regalloc/src/analysis_control_flow.rs
@@ -0,0 +1,742 @@
+//! Performs control flow analysis.
+
+use log::{debug, info};
+use std::cmp::Ordering;
+
+use crate::analysis_main::AnalysisError;
+use crate::data_structures::{BlockIx, InstIx, Range, Set, TypedIxVec};
+use crate::sparse_set::{SparseSetU, SparseSetUIter};
+use crate::Function;
+
+use smallvec::SmallVec;
+
+//=============================================================================
+// Debugging config. Set all these to `false` for normal operation.
+
+// DEBUGGING: set to true to cross-check the dominator-tree computation.
+const CROSSCHECK_DOMS: bool = false;
+
+//===========================================================================//
+// //
+// CONTROL FLOW ANALYSIS //
+// //
+//===========================================================================//
+
+//=============================================================================
+// Control flow analysis: create the InstIx-to-BlockIx mapping
+
+// This is trivial, but it's sometimes useful to have.
+// Note: confusingly, the `Range` here is data_structures::Range, not
+// std::ops::Range.
+pub struct InstIxToBlockIxMap {
+ vek: TypedIxVec<BlockIx, Range<InstIx>>,
+}
+
+impl InstIxToBlockIxMap {
+ #[inline(never)]
+ pub fn new<F: Function>(func: &F) -> Self {
+ let mut vek = TypedIxVec::<BlockIx, Range<InstIx>>::new();
+ for bix in func.blocks() {
+ let r: Range<InstIx> = func.block_insns(bix);
+ assert!(r.start() <= r.last_plus1());
+ vek.push(r);
+ }
+
+ fn cmp_ranges(r1: &Range<InstIx>, r2: &Range<InstIx>) -> Ordering {
+ if r1.last_plus1() <= r2.first() {
+ return Ordering::Less;
+ }
+ if r2.last_plus1() <= r1.first() {
+ return Ordering::Greater;
+ }
+ if r1.first() == r2.first() && r1.last_plus1() == r2.last_plus1() {
+ return Ordering::Equal;
+ }
+ // If this happens, F::block_insns is telling us something that isn't right.
+ panic!("InstIxToBlockIxMap::cmp_ranges: overlapping InstIx ranges!");
+ }
+
+ vek.sort_unstable_by(|r1, r2| cmp_ranges(r1, r2));
+ // Sanity check: ascending, non-overlapping, no gaps. We need this in
+ // order to ensure that binary searching in `map` works properly.
+ for i in 1..vek.len() {
+ let r_m1 = &vek[BlockIx::new(i - 1)];
+ let r_m0 = &vek[BlockIx::new(i - 0)];
+ assert!(r_m1.last_plus1() == r_m0.first());
+ }
+
+ Self { vek }
+ }
+
+ #[inline(never)]
+ pub fn map(&self, iix: InstIx) -> BlockIx {
+ if self.vek.len() > 0 {
+ let mut lo = 0isize;
+ let mut hi = self.vek.len() as isize - 1;
+ loop {
+ if lo > hi {
+ break;
+ }
+ let mid = (lo + hi) / 2;
+ let midv = &self.vek[BlockIx::new(mid as u32)];
+ if iix < midv.start() {
+ hi = mid - 1;
+ continue;
+ }
+ if iix >= midv.last_plus1() {
+ lo = mid + 1;
+ continue;
+ }
+ assert!(midv.start() <= iix && iix < midv.last_plus1());
+ return BlockIx::new(mid as u32);
+ }
+ }
+ panic!("InstIxToBlockIxMap::map: can't map {:?}", iix);
+ }
+}
+
+//=============================================================================
+// Control flow analysis: calculation of block successor and predecessor maps
+
+// Returned TypedIxVecs contain one element per block
+#[inline(never)]
+fn calc_preds_and_succs<F: Function>(
+ func: &F,
+ num_blocks: u32,
+) -> (
+ TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+ TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+) {
+ info!(" calc_preds_and_succs: begin");
+
+ assert!(func.blocks().len() == num_blocks as usize);
+
+ // First calculate the succ map, since we can do that directly from the
+ // Func.
+ //
+ // Func::finish() ensures that all blocks are non-empty, and that only the
+ // last instruction is a control flow transfer. Hence the following won't
+ // miss any edges.
+ let mut succ_map = TypedIxVec::<BlockIx, SparseSetU<[BlockIx; 4]>>::new();
+ for b in func.blocks() {
+ let mut bix_set = SparseSetU::<[BlockIx; 4]>::empty();
+ for bix in func.block_succs(b).iter() {
+ bix_set.insert(*bix);
+ }
+ succ_map.push(bix_set);
+ }
+
+ // Now invert the mapping
+ let mut pred_map = TypedIxVec::<BlockIx, SparseSetU<[BlockIx; 4]>>::new();
+ pred_map.resize(num_blocks, SparseSetU::<[BlockIx; 4]>::empty());
+ for (src, dst_set) in (0..).zip(succ_map.iter()) {
+ for dst in dst_set.iter() {
+ pred_map[*dst].insert(BlockIx::new(src));
+ }
+ }
+
+ // Stay sane ..
+ assert!(pred_map.len() == num_blocks);
+ assert!(succ_map.len() == num_blocks);
+
+ let mut n = 0;
+ debug!("");
+ for (preds, succs) in pred_map.iter().zip(succ_map.iter()) {
+ debug!(
+ "{:<3?} preds {:<16?} succs {:?}",
+ BlockIx::new(n),
+ preds,
+ succs
+ );
+ n += 1;
+ }
+
+ info!(" calc_preds_and_succs: end");
+ (pred_map, succ_map)
+}
+
+//=============================================================================
+// Control flow analysis: calculation of block preorder and postorder sequences
+
+// Returned Vecs contain one element per block. `None` is returned if the
+// sequences do not contain `num_blocks` elements, in which case the input
+// contains blocks not reachable from the entry point, and is invalid.
+#[inline(never)]
+fn calc_preord_and_postord<F: Function>(
+ func: &F,
+ num_blocks: u32,
+ succ_map: &TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+) -> Option<(Vec<BlockIx>, Vec<BlockIx>)> {
+ info!(" calc_preord_and_postord: begin");
+
+ let mut pre_ord = Vec::<BlockIx>::new();
+ let mut post_ord = Vec::<BlockIx>::new();
+
+ let mut visited = TypedIxVec::<BlockIx, bool>::new();
+ visited.resize(num_blocks, false);
+
+ // Set up initial state: entry block on the stack, marked as visited, and placed at the
+ // start of the pre-ord sequence.
+ let mut stack = SmallVec::<[(BlockIx, SparseSetUIter<[BlockIx; 4]>); 64]>::new();
+ let bix_entry = func.entry_block();
+ visited[bix_entry] = true;
+ pre_ord.push(bix_entry);
+ stack.push((bix_entry, succ_map[bix_entry].iter()));
+
+ 'outer: while let Some((bix, bix_succ_iter)) = stack.last_mut() {
+ // Consider the block on the top of the stack. Does it have any successors we
+ // haven't yet visited?
+ while let Some(bix_next_succ) = bix_succ_iter.next() {
+ if !visited[*bix_next_succ] {
+ // Yes. Push just one of them on the stack, along with a newly initialised
+ // iterator for it, and continue by considering the new stack top. Because
+ // blocks are only ever pushed onto the stack once, we must also add the
+ // block to the pre-ord sequence at this point.
+ visited[*bix_next_succ] = true;
+ pre_ord.push(*bix_next_succ);
+ stack.push((*bix_next_succ, succ_map[*bix_next_succ].iter()));
+ continue 'outer;
+ }
+ }
+ // No. This is the last time we'll ever hear of it. So add it to the post-ord
+ // sequence, remove the now-defunct stack-top item, and move on.
+ post_ord.push(*bix);
+ stack.pop();
+ }
+
+ assert!(pre_ord.len() == post_ord.len());
+ assert!(pre_ord.len() <= num_blocks as usize);
+ if pre_ord.len() < num_blocks as usize {
+ info!(
+ " calc_preord_and_postord: invalid: {} blocks, {} reachable",
+ num_blocks,
+ pre_ord.len()
+ );
+ return None;
+ }
+
+ assert!(pre_ord.len() == num_blocks as usize);
+ assert!(post_ord.len() == num_blocks as usize);
+ #[cfg(debug_assertions)]
+ {
+ let mut pre_ord_sorted: Vec<BlockIx> = pre_ord.clone();
+ let mut post_ord_sorted: Vec<BlockIx> = post_ord.clone();
+ pre_ord_sorted.sort_by(|bix1, bix2| bix1.get().partial_cmp(&bix2.get()).unwrap());
+ post_ord_sorted.sort_by(|bix1, bix2| bix1.get().partial_cmp(&bix2.get()).unwrap());
+ let expected: Vec<BlockIx> = (0..num_blocks).map(|u| BlockIx::new(u)).collect();
+ debug_assert!(pre_ord_sorted == expected);
+ debug_assert!(post_ord_sorted == expected);
+ }
+
+ info!(" calc_preord_and_postord: end. {} blocks", num_blocks);
+ Some((pre_ord, post_ord))
+}
+
+//=============================================================================
+// Computation of per-block dominator sets. Note, this is slow, and will be
+// removed at some point.
+
+// Calculate the dominance relationship, given `pred_map` and a start node
+// `start`. The resulting vector maps each block to the set of blocks that
+// dominate it. This algorithm is from Fig 7.14 of Muchnick 1997. The
+// algorithm is described as simple but not as performant as some others.
+#[inline(never)]
+fn calc_dom_sets_slow(
+ num_blocks: u32,
+ pred_map: &TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+ post_ord: &Vec<BlockIx>,
+ start: BlockIx,
+) -> TypedIxVec<BlockIx, Set<BlockIx>> {
+ info!(" calc_dom_sets_slow: begin");
+
+ let mut dom_map = TypedIxVec::<BlockIx, Set<BlockIx>>::new();
+
+ // FIXME find better names for n/d/t sets.
+ {
+ let root: BlockIx = start;
+ let n_set: Set<BlockIx> =
+ Set::from_vec((0..num_blocks).map(|bix| BlockIx::new(bix)).collect());
+ let mut d_set: Set<BlockIx>;
+ let mut t_set: Set<BlockIx>;
+
+ dom_map.resize(num_blocks, Set::<BlockIx>::empty());
+ dom_map[root] = Set::unit(root);
+ for block_i in 0..num_blocks {
+ let block_ix = BlockIx::new(block_i);
+ if block_ix != root {
+ dom_map[block_ix] = n_set.clone();
+ }
+ }
+
+ let mut num_iter = 0;
+ loop {
+ num_iter += 1;
+ info!(" calc_dom_sets_slow: outer loop {}", num_iter);
+ let mut change = false;
+ for i in 0..num_blocks {
+ // block_ix travels in "reverse preorder"
+ let block_ix = post_ord[(num_blocks - 1 - i) as usize];
+ if block_ix == root {
+ continue;
+ }
+ t_set = n_set.clone();
+ for pred_ix in pred_map[block_ix].iter() {
+ t_set.intersect(&dom_map[*pred_ix]);
+ }
+ d_set = t_set.clone();
+ d_set.insert(block_ix);
+ if !d_set.equals(&dom_map[block_ix]) {
+ change = true;
+ dom_map[block_ix] = d_set;
+ }
+ }
+ if !change {
+ break;
+ }
+ }
+ }
+
+ debug!("");
+ let mut block_ix = 0;
+ for dom_set in dom_map.iter() {
+ debug!("{:<3?} dom_set {:<16?}", BlockIx::new(block_ix), dom_set);
+ block_ix += 1;
+ }
+ info!(" calc_dom_sets_slow: end");
+ dom_map
+}
+
+//=============================================================================
+// Computation of per-block dominator sets by first computing trees.
+//
+// This is an implementation of the algorithm described in
+//
+// A Simple, Fast Dominance Algorithm
+// Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy
+// Department of Computer Science, Rice University, Houston, Texas, USA
+// TR-06-33870
+// https://www.cs.rice.edu/~keith/EMBED/dom.pdf
+//
+// which appears to be the de-facto standard scheme for computing dominance
+// quickly nowadays.
+
+// Unfortunately it seems like local consts are not allowed in Rust.
+const DT_INVALID_POSTORD: u32 = 0xFFFF_FFFF;
+const DT_INVALID_BLOCKIX: BlockIx = BlockIx::BlockIx(0xFFFF_FFFF);
+
+// Helper
+fn dt_merge_sets(
+ idom: &TypedIxVec<BlockIx, BlockIx>,
+ bix2rpostord: &TypedIxVec<BlockIx, u32>,
+ mut node1: BlockIx,
+ mut node2: BlockIx,
+) -> BlockIx {
+ while node1 != node2 {
+ if node1 == DT_INVALID_BLOCKIX || node2 == DT_INVALID_BLOCKIX {
+ return DT_INVALID_BLOCKIX;
+ }
+ let rpo1 = bix2rpostord[node1];
+ let rpo2 = bix2rpostord[node2];
+ if rpo1 > rpo2 {
+ node1 = idom[node1];
+ } else if rpo2 > rpo1 {
+ node2 = idom[node2];
+ }
+ }
+ assert!(node1 == node2);
+ node1
+}
+
+#[inline(never)]
+fn calc_dom_tree(
+ num_blocks: u32,
+ pred_map: &TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+ post_ord: &Vec<BlockIx>,
+ start: BlockIx,
+) -> TypedIxVec<BlockIx, BlockIx> {
+ info!(" calc_dom_tree: begin");
+
+ // We use 2^32-1 as a marker for an invalid BlockIx or postorder number.
+ // Hence we need this:
+ assert!(num_blocks < DT_INVALID_POSTORD);
+
+ // We have post_ord, which is the postorder sequence.
+
+ // Compute bix2rpostord, which maps a BlockIx to its reverse postorder
+ // number. And rpostord2bix, which maps a reverse postorder number to its
+ // BlockIx.
+ let mut bix2rpostord = TypedIxVec::<BlockIx, u32>::new();
+ let mut rpostord2bix = Vec::<BlockIx>::new();
+ bix2rpostord.resize(num_blocks, DT_INVALID_POSTORD);
+ rpostord2bix.resize(num_blocks as usize, DT_INVALID_BLOCKIX);
+ for n in 0..num_blocks {
+ // bix visits the blocks in reverse postorder
+ let bix = post_ord[(num_blocks - 1 - n) as usize];
+ // Hence:
+ bix2rpostord[bix] = n;
+ // and
+ rpostord2bix[n as usize] = bix;
+ }
+ for n in 0..num_blocks {
+ debug_assert!(bix2rpostord[BlockIx::new(n)] < num_blocks);
+ }
+
+ let mut idom = TypedIxVec::<BlockIx, BlockIx>::new();
+ idom.resize(num_blocks, DT_INVALID_BLOCKIX);
+
+ // The start node must have itself as a parent.
+ idom[start] = start;
+
+ for i in 0..num_blocks {
+ let block_ix = BlockIx::new(i);
+ let preds_of_i = &pred_map[block_ix];
+ // All nodes must be reachable from the root. That means that all nodes
+ // that aren't `start` must have at least one predecessor. However, we
+ // can't assert the inverse case -- that the start node has no
+ // predecessors -- because the start node might be a self-loop, in which
+ // case it will have itself as a pred. See tests/domtree_fuzz1.rat.
+ if block_ix != start {
+ assert!(!preds_of_i.is_empty());
+ }
+ }
+
+ let mut changed = true;
+ while changed {
+ changed = false;
+ for n in 0..num_blocks {
+ // Consider blocks in reverse postorder.
+ let node = rpostord2bix[n as usize];
+ assert!(node != DT_INVALID_BLOCKIX);
+ let node_preds = &pred_map[node];
+ let rponum = bix2rpostord[node];
+
+ let mut parent = DT_INVALID_BLOCKIX;
+ if node_preds.is_empty() {
+ // No preds, `parent` remains invalid.
+ } else {
+ for pred in node_preds.iter() {
+ let pred_rpo = bix2rpostord[*pred];
+ if pred_rpo < rponum {
+ parent = *pred;
+ break;
+ }
+ }
+ }
+
+ if parent != DT_INVALID_BLOCKIX {
+ for pred in node_preds.iter() {
+ if *pred == parent {
+ continue;
+ }
+ if idom[*pred] == DT_INVALID_BLOCKIX {
+ continue;
+ }
+ parent = dt_merge_sets(&idom, &bix2rpostord, parent, *pred);
+ }
+ }
+
+ if parent != DT_INVALID_BLOCKIX && parent != idom[node] {
+ idom[node] = parent;
+ changed = true;
+ }
+ }
+ }
+
+ // Check what we can. The start node should be its own parent. All other
+ // nodes should not be their own parent, since we are assured that there are
+ // no dead blocks in the graph, and hence that there is only one dominator
+ // tree, that covers the whole graph.
+ assert!(idom[start] == start);
+ for i in 0..num_blocks {
+ let block_ix = BlockIx::new(i);
+ // All "parent pointers" are valid.
+ assert!(idom[block_ix] != DT_INVALID_BLOCKIX);
+ // The only node whose parent pointer points to itself is the start node.
+ assert!((idom[block_ix] == block_ix) == (block_ix == start));
+ }
+
+ if CROSSCHECK_DOMS {
+ // Crosscheck the dom tree, by computing dom sets using the simple
+ // iterative algorithm. Then, for each block, construct the dominator set
+ // by walking up the tree to the root, and check that it's the same as
+ // what the simple algorithm produced.
+
+ info!(" calc_dom_tree crosscheck: begin");
+ let slow_sets = calc_dom_sets_slow(num_blocks, pred_map, post_ord, start);
+ assert!(slow_sets.len() == idom.len());
+
+ for i in 0..num_blocks {
+ let mut block_ix = BlockIx::new(i);
+ let mut set = Set::<BlockIx>::empty();
+ loop {
+ set.insert(block_ix);
+ let other_block_ix = idom[block_ix];
+ if other_block_ix == block_ix {
+ break;
+ }
+ block_ix = other_block_ix;
+ }
+ assert!(set.to_vec() == slow_sets[BlockIx::new(i)].to_vec());
+ }
+ info!(" calc_dom_tree crosscheck: end");
+ }
+
+ info!(" calc_dom_tree: end");
+ idom
+}
+
+//=============================================================================
+// Computation of per-block loop-depths
+
+#[inline(never)]
+fn calc_loop_depths(
+ num_blocks: u32,
+ pred_map: &TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+ succ_map: &TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+ post_ord: &Vec<BlockIx>,
+ start: BlockIx,
+) -> TypedIxVec<BlockIx, u32> {
+ info!(" calc_loop_depths: begin");
+ let idom = calc_dom_tree(num_blocks, pred_map, post_ord, start);
+
+ // Find the loops. First, find the "loop header nodes", and from those,
+ // derive the loops.
+ //
+ // loop_set headers:
+ // A "back edge" m->n is some edge m->n where n dominates m. 'n' is
+ // the loop header node.
+ //
+ // `back_edges` is a set rather than a vector so as to avoid complications
+ // that might later arise if the same loop is enumerated more than once.
+ //
+ // Iterate over all edges (m->n)
+ let mut back_edges = Set::<(BlockIx, BlockIx)>::empty();
+ for block_m_ix in BlockIx::new(0).dotdot(BlockIx::new(num_blocks)) {
+ for block_n_ix in succ_map[block_m_ix].iter() {
+ // Figure out if N dominates M. Do this by walking the dom tree from M
+ // back up to the root, and seeing if we encounter N on the way.
+ let mut n_dominates_m = false;
+ let mut block_ix = block_m_ix;
+ loop {
+ if block_ix == *block_n_ix {
+ n_dominates_m = true;
+ break;
+ }
+ let other_block_ix = idom[block_ix];
+ if other_block_ix == block_ix {
+ break;
+ }
+ block_ix = other_block_ix;
+ }
+ if n_dominates_m {
+ //println!("QQQQ back edge {} -> {}",
+ // block_m_ix.show(), block_n_ix.show());
+ back_edges.insert((block_m_ix, *block_n_ix));
+ }
+ }
+ }
+
+ // Now collect the sets of Blocks for each loop. For each back edge,
+ // collect up all the blocks in the natural loop defined by the back edge
+ // M->N. This algorithm is from Fig 7.21 of Muchnick 1997 (an excellent
+ // book). Order in `natural_loops` has no particular meaning.
+ let mut natural_loops = Vec::<Set<BlockIx>>::new();
+ for (block_m_ix, block_n_ix) in back_edges.iter() {
+ let mut loop_set: Set<BlockIx>;
+ let mut stack: Vec<BlockIx>;
+ stack = Vec::<BlockIx>::new();
+ loop_set = Set::<BlockIx>::two(*block_m_ix, *block_n_ix);
+ if block_m_ix != block_n_ix {
+ // The next line is missing in the Muchnick description. Without it the
+ // algorithm doesn't make any sense, though.
+ stack.push(*block_m_ix);
+ while let Some(block_p_ix) = stack.pop() {
+ for block_q_ix in pred_map[block_p_ix].iter() {
+ if !loop_set.contains(*block_q_ix) {
+ loop_set.insert(*block_q_ix);
+ stack.push(*block_q_ix);
+ }
+ }
+ }
+ }
+ natural_loops.push(loop_set);
+ }
+
+ // Here is a kludgey way to compute the depth of each loop. First, order
+ // `natural_loops` by increasing size, so the largest loops are at the end.
+ // Then, repeatedly scan forwards through the vector, in "upper triangular
+ // matrix" style. For each scan, remember the "current loop". Initially
+ // the "current loop is the start point of each scan. If, during the scan,
+ // we encounter a loop which is a superset of the "current loop", change the
+ // "current loop" to this new loop, and increment a counter associated with
+ // the start point of the scan. The effect is that the counter records the
+ // nesting depth of the loop at the start of the scan. For this to be
+ // completely accurate, I _think_ this requires the property that loops are
+ // either disjoint or nested, but are in no case intersecting.
+
+ natural_loops.sort_by(|left_block_set, right_block_set| {
+ left_block_set
+ .card()
+ .partial_cmp(&right_block_set.card())
+ .unwrap()
+ });
+
+ let num_loops = natural_loops.len();
+ let mut loop_depths = Vec::<u32>::new();
+ loop_depths.resize(num_loops, 0);
+
+ for i in 0..num_loops {
+ let mut curr = i;
+ let mut depth = 1;
+ for j in i + 1..num_loops {
+ debug_assert!(curr < j);
+ if natural_loops[curr].is_subset_of(&natural_loops[j]) {
+ depth += 1;
+ curr = j;
+ }
+ }
+ loop_depths[i] = depth;
+ }
+
+ // Now that we have a depth for each loop, we can finally compute the depth
+ // for each block.
+ let mut depth_map = TypedIxVec::<BlockIx, u32>::new();
+ depth_map.resize(num_blocks, 0);
+ for (loop_block_indexes, depth) in natural_loops.iter().zip(loop_depths) {
+ for loop_block_ix in loop_block_indexes.iter() {
+ if depth_map[*loop_block_ix] < depth {
+ depth_map[*loop_block_ix] = depth;
+ }
+ }
+ }
+
+ debug_assert!(depth_map.len() == num_blocks);
+
+ let mut n = 0;
+ debug!("");
+ for (depth, idom_by) in depth_map.iter().zip(idom.iter()) {
+ debug!(
+ "{:<3?} depth {} idom {:?}",
+ BlockIx::new(n),
+ depth,
+ idom_by
+ );
+ n += 1;
+ }
+
+ info!(" calc_loop_depths: end");
+ depth_map
+}
+
+//=============================================================================
+// Control-flow analysis top level: For a Func: predecessors, successors,
+// preord and postord sequences, and loop depths.
+
+// CFGInfo contains CFG-related info computed from a Func.
+pub struct CFGInfo {
+ // All these TypedIxVecs and plain Vecs contain one element per Block in the
+ // Func.
+
+ // Predecessor and successor maps.
+ pub pred_map: TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+ pub succ_map: TypedIxVec<BlockIx, SparseSetU<[BlockIx; 4]>>,
+
+ // Pre- and post-order sequences. Iterating forwards through these
+ // vectors enumerates the blocks in preorder and postorder respectively.
+ pub pre_ord: Vec<BlockIx>,
+ pub _post_ord: Vec<BlockIx>,
+
+ // This maps from a Block to the loop depth that it is at
+ pub depth_map: TypedIxVec<BlockIx, u32>,
+}
+
+impl CFGInfo {
+ #[inline(never)]
+ pub fn create<F: Function>(func: &F) -> Result<Self, AnalysisError> {
+ info!(" CFGInfo::create: begin");
+
+ // Throw out insanely large inputs. They'll probably cause failure later
+ // on.
+ let num_blocks_usize = func.blocks().len();
+ if num_blocks_usize >= 1 * 1024 * 1024 {
+ // 1 million blocks should be enough for anyone. That will soak up 20
+ // index bits, leaving a "safety margin" of 12 bits for indices for
+ // induced structures (RangeFragIx, InstIx, VirtualRangeIx, RealRangeIx,
+ // etc).
+ return Err(AnalysisError::ImplementationLimitsExceeded);
+ }
+
+ // Similarly, limit the number of instructions to 16 million. This allows
+ // 16 insns per block with the worst-case number of blocks. Because each
+ // insn typically generates somewhat less than one new value, this check
+ // also has the effect of limiting the number of virtual registers to
+ // roughly the same amount (16 million).
+ if func.insns().len() >= 16 * 1024 * 1024 {
+ return Err(AnalysisError::ImplementationLimitsExceeded);
+ }
+
+ // Now we know we're safe to narrow it to u32.
+ let num_blocks = num_blocks_usize as u32;
+
+ // === BEGIN compute successor and predecessor maps ===
+ //
+ let (pred_map, succ_map) = calc_preds_and_succs(func, num_blocks);
+ assert!(pred_map.len() == num_blocks);
+ assert!(succ_map.len() == num_blocks);
+ //
+ // === END compute successor and predecessor maps ===
+
+ // === BEGIN check that critical edges have been split ===
+ //
+ for (src, dst_set) in (0..).zip(succ_map.iter()) {
+ if dst_set.card() < 2 {
+ continue;
+ }
+ for dst in dst_set.iter() {
+ if pred_map[*dst].card() >= 2 {
+ return Err(AnalysisError::CriticalEdge {
+ from: BlockIx::new(src),
+ to: *dst,
+ });
+ }
+ }
+ }
+ //
+ // === END check that critical edges have been split ===
+
+ // === BEGIN compute preord/postord sequences ===
+ //
+ let mb_pre_ord_and_post_ord = calc_preord_and_postord(func, num_blocks, &succ_map);
+ if mb_pre_ord_and_post_ord.is_none() {
+ return Err(AnalysisError::UnreachableBlocks);
+ }
+
+ let (pre_ord, post_ord) = mb_pre_ord_and_post_ord.unwrap();
+ assert!(pre_ord.len() == num_blocks as usize);
+ assert!(post_ord.len() == num_blocks as usize);
+ //
+ // === END compute preord/postord sequences ===
+
+ // === BEGIN compute loop depth of all Blocks
+ //
+ let depth_map = calc_loop_depths(
+ num_blocks,
+ &pred_map,
+ &succ_map,
+ &post_ord,
+ func.entry_block(),
+ );
+ debug_assert!(depth_map.len() == num_blocks);
+ //
+ // === END compute loop depth of all Blocks
+
+ info!(" CFGInfo::create: end");
+ Ok(CFGInfo {
+ pred_map,
+ succ_map,
+ pre_ord,
+ _post_ord: post_ord,
+ depth_map,
+ })
+ }
+}
diff --git a/third_party/rust/regalloc/src/analysis_data_flow.rs b/third_party/rust/regalloc/src/analysis_data_flow.rs
new file mode 100644
index 0000000000..9f3c544af7
--- /dev/null
+++ b/third_party/rust/regalloc/src/analysis_data_flow.rs
@@ -0,0 +1,1981 @@
+//! Performs dataflow and liveness analysis, including live range construction.
+
+use log::{debug, info, log_enabled, Level};
+use smallvec::{smallvec, SmallVec};
+use std::cmp::min;
+use std::fmt;
+
+use crate::analysis_control_flow::CFGInfo;
+use crate::data_structures::{
+ BlockIx, InstIx, InstPoint, MoveInfo, MoveInfoElem, Point, Queue, RangeFrag, RangeFragIx,
+ RangeFragKind, RangeFragMetrics, RealRange, RealRangeIx, RealReg, RealRegUniverse, Reg,
+ RegClass, RegSets, RegToRangesMaps, RegUsageCollector, RegVecBounds, RegVecs, RegVecsAndBounds,
+ SortedRangeFragIxs, SortedRangeFrags, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
+ VirtualReg,
+};
+use crate::sparse_set::SparseSet;
+use crate::union_find::{ToFromU32, UnionFind};
+use crate::Function;
+
+//===========================================================================//
+// //
+// DATA FLOW AND LIVENESS ANALYSIS //
+// //
+//===========================================================================//
+
+//=============================================================================
+// Data flow analysis: extraction and sanitization of reg-use information: low
+// level interface
+
+// === The meaning of "sanitization" ===
+//
+// The meaning of "sanitization" is as follows. Incoming virtual-registerised
+// code may mention a mixture of virtual and real registers. Those real
+// registers may include some which aren't available for the allocators to
+// use. Rather than scatter ad-hoc logic all over the analysis phase and the
+// allocators, we simply remove all non-available real registers from the
+// per-instruction use/def/mod sets. The effect is that, after this point, we
+// can operate on the assumption that any register we come across is either a
+// virtual register or a real register available to the allocator.
+//
+// A real register is available to the allocator iff its index number is less
+// than `RealRegUniverse.allocable`.
+//
+// Furthermore, it is not allowed that any incoming instruction mentions one
+// of the per-class scratch registers listed in
+// `RealRegUniverse.allocable_by_class[..].suggested_scratch` in either a use
+// or mod role. Sanitisation will also detect this case and return an error.
+// Mentions of a scratch register in a def role are tolerated; however, since
+// no instruction may use or modify a scratch register, all such writes are
+// dead..
+//
+// In all of the above, "mentions" of a real register really means "uses,
+// defines or modifications of said register". It doesn't matter whether the
+// instruction explicitly mentions the register or whether it is an implicit
+// mention (eg, %cl in x86 shift-by-a-variable-amount instructions). In other
+// words, a "mention" is any use, def or mod as detected by the client's
+// `get_regs` routine.
+
+// === Filtering of register groups in `RegVec`s ===
+//
+// Filtering on a group is done by leaving the start point unchanged, sliding
+// back retained registers to fill the holes from non-retained registers, and
+// reducing the group length accordingly. The effect is to effectively "leak"
+// some registers in the group, but that's not a problem.
+//
+// Extraction of register usages for the whole function is done by
+// `get_sanitized_reg_uses_for_func`. For each instruction, their used,
+// defined and modified register sets are acquired by calling the client's
+// `get_regs` function. Then each of those three sets are cleaned up as
+// follows:
+//
+// (1) duplicates are removed (after which they really are sets)
+//
+// (2) any registers in the modified set are removed from the used and defined
+// sets. This enforces the invariant that
+// `intersect(modified, union(used, defined))` is the empty set. Live range
+// fragment computation (get_range_frags_for_block) depends on this property.
+//
+// (3) real registers unavailable to the allocator are removed, per the
+// abovementioned sanitization rules.
+
+// ==== LOCAL FN ====
+// Given a register group in `regs[start, +len)`, remove duplicates from the
+// group. The new group size is written to `*len`.
+#[inline(never)]
+fn remove_dups_from_group(regs: &mut Vec<Reg>, start: u32, len: &mut u8) {
+ // First sort the group, to facilitate de-duplication.
+ regs[start as usize..start as usize + *len as usize].sort_unstable();
+
+ // Now make a compacting pass over the group. 'rd' = read point in the
+ // group, 'wr' = write point in the group.
+ let mut wr = start as usize;
+ for rd in start as usize..start as usize + *len as usize {
+ let reg = regs[rd];
+ if rd == start as usize || regs[rd - 1] != reg {
+ // It's not a duplicate.
+ if wr != rd {
+ regs[wr] = reg;
+ }
+ wr += 1;
+ }
+ }
+
+ let new_len_usize = wr - start as usize;
+ assert!(new_len_usize <= *len as usize);
+ // This narrowing is safe because the old `len` fitted in 8 bits.
+ *len = new_len_usize as u8;
+}
+
+// ==== LOCAL FN ====
+// Remove from `group[group_start, +group_len)` any registers mentioned in
+// `mods[mods_start, +mods_len)`, and update `*group_len` accordingly.
+#[inline(never)]
+fn remove_mods_from_group(
+ group: &mut Vec<Reg>,
+ group_start: u32,
+ group_len: &mut u8,
+ mods: &Vec<Reg>,
+ mods_start: u32,
+ mods_len: u8,
+) {
+ let mut wr = group_start as usize;
+ for rd in group_start as usize..group_start as usize + *group_len as usize {
+ let reg = group[rd];
+ // Only retain `reg` if it is not mentioned in `mods[mods_start, +mods_len)`
+ let mut retain = true;
+ for i in mods_start as usize..mods_start as usize + mods_len as usize {
+ if reg == mods[i] {
+ retain = false;
+ break;
+ }
+ }
+ if retain {
+ if wr != rd {
+ group[wr] = reg;
+ }
+ wr += 1;
+ }
+ }
+ let new_group_len_usize = wr - group_start as usize;
+ assert!(new_group_len_usize <= *group_len as usize);
+ // This narrowing is safe because the old `group_len` fitted in 8 bits.
+ *group_len = new_group_len_usize as u8;
+}
+
+// ==== EXPORTED FN ====
+// For instruction `inst`, add the register uses to the ends of `reg_vecs`,
+// and write bounds information into `bounds`. The register uses are raw
+// (unsanitized) but they are guaranteed to be duplicate-free and also to have
+// no `mod` mentions in the `use` or `def` groups. That is, cleanups (1) and
+// (2) above have been done.
+#[inline(never)]
+pub fn add_raw_reg_vecs_for_insn<F: Function>(
+ inst: &F::Inst,
+ reg_vecs: &mut RegVecs,
+ bounds: &mut RegVecBounds,
+) {
+ bounds.uses_start = reg_vecs.uses.len() as u32;
+ bounds.defs_start = reg_vecs.defs.len() as u32;
+ bounds.mods_start = reg_vecs.mods.len() as u32;
+
+ let mut collector = RegUsageCollector::new(reg_vecs);
+ F::get_regs(inst, &mut collector);
+
+ let uses_len = collector.reg_vecs.uses.len() as u32 - bounds.uses_start;
+ let defs_len = collector.reg_vecs.defs.len() as u32 - bounds.defs_start;
+ let mods_len = collector.reg_vecs.mods.len() as u32 - bounds.mods_start;
+
+ // This assertion is important -- the cleanup logic also depends on it.
+ assert!((uses_len | defs_len | mods_len) < 256);
+ bounds.uses_len = uses_len as u8;
+ bounds.defs_len = defs_len as u8;
+ bounds.mods_len = mods_len as u8;
+
+ // First, de-dup the three new groups.
+ if bounds.uses_len > 0 {
+ remove_dups_from_group(
+ &mut collector.reg_vecs.uses,
+ bounds.uses_start,
+ &mut bounds.uses_len,
+ );
+ }
+ if bounds.defs_len > 0 {
+ remove_dups_from_group(
+ &mut collector.reg_vecs.defs,
+ bounds.defs_start,
+ &mut bounds.defs_len,
+ );
+ }
+ if bounds.mods_len > 0 {
+ remove_dups_from_group(
+ &mut collector.reg_vecs.mods,
+ bounds.mods_start,
+ &mut bounds.mods_len,
+ );
+ }
+
+ // And finally, remove modified registers from the set of used and defined
+ // registers, so we don't have to make the client do so.
+ if bounds.mods_len > 0 {
+ if bounds.uses_len > 0 {
+ remove_mods_from_group(
+ &mut collector.reg_vecs.uses,
+ bounds.uses_start,
+ &mut bounds.uses_len,
+ &collector.reg_vecs.mods,
+ bounds.mods_start,
+ bounds.mods_len,
+ );
+ }
+ if bounds.defs_len > 0 {
+ remove_mods_from_group(
+ &mut collector.reg_vecs.defs,
+ bounds.defs_start,
+ &mut bounds.defs_len,
+ &collector.reg_vecs.mods,
+ bounds.mods_start,
+ bounds.mods_len,
+ );
+ }
+ }
+}
+
+// ==== LOCAL FN ====
+// This is the fundamental keep-or-don't-keep? predicate for sanitization. To
+// do this exactly right we also need to know whether the register is
+// mentioned in a def role (as opposed to a use or mod role). Note that this
+// function can fail, and the error must be propagated.
+#[inline(never)]
+fn sanitize_should_retain_reg(
+ reg_universe: &RealRegUniverse,
+ reg: Reg,
+ reg_is_defd: bool,
+) -> Result<bool, RealReg> {
+ // Retain all virtual regs.
+ if reg.is_virtual() {
+ return Ok(true);
+ }
+
+ // So it's a RealReg.
+ let rreg_ix = reg.get_index();
+
+ // Check that this RealReg is mentioned in the universe.
+ if rreg_ix >= reg_universe.regs.len() {
+ // This is a serious error which should be investigated. It means the
+ // client gave us an instruction which mentions a RealReg which isn't
+ // listed in the RealRegUniverse it gave us. That's not allowed.
+ return Err(reg.as_real_reg().unwrap());
+ }
+
+ // Discard all real regs that aren't available to the allocator.
+ if rreg_ix >= reg_universe.allocable {
+ return Ok(false);
+ }
+
+ // It isn't allowed for the client to give us an instruction which reads or
+ // modifies one of the scratch registers. It is however allowed to write a
+ // scratch register.
+ for reg_info in &reg_universe.allocable_by_class {
+ if let Some(reg_info) = reg_info {
+ if let Some(scratch_idx) = &reg_info.suggested_scratch {
+ let scratch_reg = reg_universe.regs[*scratch_idx].0;
+ if reg.to_real_reg() == scratch_reg {
+ if !reg_is_defd {
+ // This is an error (on the part of the client).
+ return Err(reg.as_real_reg().unwrap());
+ }
+ }
+ }
+ }
+ }
+
+ // `reg` is mentioned in the universe, is available to the allocator, and if
+ // it is one of the scratch regs, it is only written, not read or modified.
+ Ok(true)
+}
+// END helper fn
+
+// ==== LOCAL FN ====
+// Given a register group in `regs[start, +len)`, sanitize the group. To do
+// this exactly right we also need to know whether the registers in the group
+// are mentioned in def roles (as opposed to use or mod roles). Sanitisation
+// can fail, in which case we must propagate the error. If it is successful,
+// the new group size is written to `*len`.
+#[inline(never)]
+fn sanitize_group(
+ reg_universe: &RealRegUniverse,
+ regs: &mut Vec<Reg>,
+ start: u32,
+ len: &mut u8,
+ is_def_group: bool,
+) -> Result<(), RealReg> {
+ // Make a single compacting pass over the group. 'rd' = read point in the
+ // group, 'wr' = write point in the group.
+ let mut wr = start as usize;
+ for rd in start as usize..start as usize + *len as usize {
+ let reg = regs[rd];
+ // This call can fail:
+ if sanitize_should_retain_reg(reg_universe, reg, is_def_group)? {
+ if wr != rd {
+ regs[wr] = reg;
+ }
+ wr += 1;
+ }
+ }
+
+ let new_len_usize = wr - start as usize;
+ assert!(new_len_usize <= *len as usize);
+ // This narrowing is safe because the old `len` fitted in 8 bits.
+ *len = new_len_usize as u8;
+ Ok(())
+}
+
+// ==== LOCAL FN ====
+// For instruction `inst`, add the fully cleaned-up register uses to the ends
+// of `reg_vecs`, and write bounds information into `bounds`. Cleanups (1)
+// (2) and (3) mentioned above have been done. Note, this can fail, and the
+// error must be propagated.
+#[inline(never)]
+fn add_san_reg_vecs_for_insn<F: Function>(
+ inst: &F::Inst,
+ reg_universe: &RealRegUniverse,
+ reg_vecs: &mut RegVecs,
+ bounds: &mut RegVecBounds,
+) -> Result<(), RealReg> {
+ // Get the raw reg usages. These will be dup-free and mod-cleaned-up
+ // (meaning cleanups (1) and (3) have been done).
+ add_raw_reg_vecs_for_insn::<F>(inst, reg_vecs, bounds);
+
+ // Finally and sanitize them. Any errors from sanitization are propagated.
+ if bounds.uses_len > 0 {
+ sanitize_group(
+ &reg_universe,
+ &mut reg_vecs.uses,
+ bounds.uses_start,
+ &mut bounds.uses_len,
+ /*is_def_group=*/ false,
+ )?;
+ }
+ if bounds.defs_len > 0 {
+ sanitize_group(
+ &reg_universe,
+ &mut reg_vecs.defs,
+ bounds.defs_start,
+ &mut bounds.defs_len,
+ /*is_def_group=*/ true,
+ )?;
+ }
+ if bounds.mods_len > 0 {
+ sanitize_group(
+ &reg_universe,
+ &mut reg_vecs.mods,
+ bounds.mods_start,
+ &mut bounds.mods_len,
+ /*is_def_group=*/ false,
+ )?;
+ }
+
+ Ok(())
+}
+
+// ==== MAIN FN ====
+#[inline(never)]
+pub fn get_sanitized_reg_uses_for_func<F: Function>(
+ func: &F,
+ reg_universe: &RealRegUniverse,
+) -> Result<RegVecsAndBounds, RealReg> {
+ // These are modified by the per-insn loop.
+ let mut reg_vecs = RegVecs::new(false);
+ let mut bounds_vec = TypedIxVec::<InstIx, RegVecBounds>::new();
+ bounds_vec.reserve(func.insns().len());
+
+ // For each insn, add their register uses to the ends of the 3 vectors in
+ // `reg_vecs`, and create an admin entry to describe the 3 new groups. Any
+ // errors from sanitization are propagated.
+ for insn in func.insns() {
+ let mut bounds = RegVecBounds::new();
+ add_san_reg_vecs_for_insn::<F>(insn, &reg_universe, &mut reg_vecs, &mut bounds)?;
+
+ bounds_vec.push(bounds);
+ }
+
+ assert!(!reg_vecs.is_sanitized());
+ reg_vecs.set_sanitized(true);
+
+ if log_enabled!(Level::Debug) {
+ let show_reg = |r: Reg| {
+ if r.is_real() {
+ reg_universe.regs[r.get_index()].1.clone()
+ } else {
+ format!("{:?}", r).to_string()
+ }
+ };
+ let show_regs = |r_vec: &[Reg]| {
+ let mut s = "".to_string();
+ for r in r_vec {
+ s = s + &show_reg(*r) + &" ".to_string();
+ }
+ s
+ };
+
+ for i in 0..bounds_vec.len() {
+ let iix = InstIx::new(i);
+ let s_use = show_regs(
+ &reg_vecs.uses[bounds_vec[iix].uses_start as usize
+ ..bounds_vec[iix].uses_start as usize + bounds_vec[iix].uses_len as usize],
+ );
+ let s_mod = show_regs(
+ &reg_vecs.mods[bounds_vec[iix].mods_start as usize
+ ..bounds_vec[iix].mods_start as usize + bounds_vec[iix].mods_len as usize],
+ );
+ let s_def = show_regs(
+ &reg_vecs.defs[bounds_vec[iix].defs_start as usize
+ ..bounds_vec[iix].defs_start as usize + bounds_vec[iix].defs_len as usize],
+ );
+ debug!(
+ "{:?} SAN_RU: use {{ {}}} mod {{ {}}} def {{ {}}}",
+ iix, s_use, s_mod, s_def
+ );
+ }
+ }
+
+ Ok(RegVecsAndBounds::new(reg_vecs, bounds_vec))
+}
+// END main function
+
+//=============================================================================
+// Data flow analysis: extraction and sanitization of reg-use information:
+// convenience interface
+
+// ==== EXPORTED ====
+#[inline(always)]
+pub fn does_inst_use_def_or_mod_reg(
+ rvb: &RegVecsAndBounds,
+ iix: InstIx,
+ reg: Reg,
+) -> (/*uses*/ bool, /*defs*/ bool, /*mods*/ bool) {
+ let bounds = &rvb.bounds[iix];
+ let vecs = &rvb.vecs;
+ let mut uses = false;
+ let mut defs = false;
+ let mut mods = false;
+ // Since each group of registers is in order and duplicate-free (as a result
+ // of `remove_dups_from_group`), we could in theory binary-search here. But
+ // it'd almost certainly be a net loss; the group sizes are very small,
+ // often zero.
+ for i in bounds.uses_start as usize..bounds.uses_start as usize + bounds.uses_len as usize {
+ if vecs.uses[i] == reg {
+ uses = true;
+ break;
+ }
+ }
+ for i in bounds.defs_start as usize..bounds.defs_start as usize + bounds.defs_len as usize {
+ if vecs.defs[i] == reg {
+ defs = true;
+ break;
+ }
+ }
+ for i in bounds.mods_start as usize..bounds.mods_start as usize + bounds.mods_len as usize {
+ if vecs.mods[i] == reg {
+ mods = true;
+ break;
+ }
+ }
+ (uses, defs, mods)
+}
+
+// ==== EXPORTED ====
+// This is slow, really slow. Don't use it on critical paths. This applies
+// `get_regs` to `inst`, performs cleanups (1) and (2), but does not sanitize
+// the results. The results are wrapped up as Sets for convenience.
+// JRS 2020Apr09: remove this if no further use for it appears soon.
+#[allow(dead_code)]
+#[inline(never)]
+pub fn get_raw_reg_sets_for_insn<F: Function>(inst: &F::Inst) -> RegSets {
+ let mut reg_vecs = RegVecs::new(false);
+ let mut bounds = RegVecBounds::new();
+
+ add_raw_reg_vecs_for_insn::<F>(inst, &mut reg_vecs, &mut bounds);
+
+ // Make up a fake RegVecsAndBounds for just this insn, so we can hand it to
+ // RegVecsAndBounds::get_reg_sets_for_iix.
+ let mut single_insn_bounds = TypedIxVec::<InstIx, RegVecBounds>::new();
+ single_insn_bounds.push(bounds);
+
+ assert!(!reg_vecs.is_sanitized());
+ let single_insn_rvb = RegVecsAndBounds::new(reg_vecs, single_insn_bounds);
+ single_insn_rvb.get_reg_sets_for_iix(InstIx::new(0))
+}
+
+// ==== EXPORTED ====
+// This is even slower. This applies `get_regs` to `inst`, performs cleanups
+// (1) (2) and (3). The results are wrapped up as Sets for convenience. Note
+// this function can fail.
+#[inline(never)]
+pub fn get_san_reg_sets_for_insn<F: Function>(
+ inst: &F::Inst,
+ reg_universe: &RealRegUniverse,
+) -> Result<RegSets, RealReg> {
+ let mut reg_vecs = RegVecs::new(false);
+ let mut bounds = RegVecBounds::new();
+
+ add_san_reg_vecs_for_insn::<F>(inst, &reg_universe, &mut reg_vecs, &mut bounds)?;
+
+ // Make up a fake RegVecsAndBounds for just this insn, so we can hand it to
+ // RegVecsAndBounds::get_reg_sets_for_iix.
+ let mut single_insn_bounds = TypedIxVec::<InstIx, RegVecBounds>::new();
+ single_insn_bounds.push(bounds);
+
+ assert!(!reg_vecs.is_sanitized());
+ reg_vecs.set_sanitized(true);
+ let single_insn_rvb = RegVecsAndBounds::new(reg_vecs, single_insn_bounds);
+ Ok(single_insn_rvb.get_reg_sets_for_iix(InstIx::new(0)))
+}
+
+//=============================================================================
+// Data flow analysis: calculation of per-block register def and use sets
+
+// Returned TypedIxVecs contain one element per block
+#[inline(never)]
+pub fn calc_def_and_use<F: Function>(
+ func: &F,
+ rvb: &RegVecsAndBounds,
+ univ: &RealRegUniverse,
+) -> (
+ TypedIxVec<BlockIx, SparseSet<Reg>>,
+ TypedIxVec<BlockIx, SparseSet<Reg>>,
+) {
+ info!(" calc_def_and_use: begin");
+ assert!(rvb.is_sanitized());
+ let mut def_sets = TypedIxVec::new();
+ let mut use_sets = TypedIxVec::new();
+ for b in func.blocks() {
+ let mut def = SparseSet::empty();
+ let mut uce = SparseSet::empty();
+ for iix in func.block_insns(b) {
+ let bounds_for_iix = &rvb.bounds[iix];
+ // Add to `uce`, any registers for which the first event in this block
+ // is a read. Dealing with the "first event" constraint is a bit
+ // tricky. In the next two loops, `u` and `m` is used (either read or
+ // modified) by the instruction. Whether or not we should consider it
+ // live-in for the block depends on whether it was been written earlier
+ // in the block. We can determine that by checking whether it is
+ // already in the def set for the block.
+ // FIXME: isn't thus just:
+ // uce union= (regs_u minus def) followed by
+ // uce union= (regs_m minus def)
+ for i in bounds_for_iix.uses_start as usize
+ ..bounds_for_iix.uses_start as usize + bounds_for_iix.uses_len as usize
+ {
+ let u = rvb.vecs.uses[i];
+ if !def.contains(u) {
+ uce.insert(u);
+ }
+ }
+ for i in bounds_for_iix.mods_start as usize
+ ..bounds_for_iix.mods_start as usize + bounds_for_iix.mods_len as usize
+ {
+ let m = rvb.vecs.mods[i];
+ if !def.contains(m) {
+ uce.insert(m);
+ }
+ }
+
+ // Now add to `def`, all registers written by the instruction.
+ // This is simpler.
+ // FIXME: isn't this just: def union= (regs_d union regs_m) ?
+ for i in bounds_for_iix.defs_start as usize
+ ..bounds_for_iix.defs_start as usize + bounds_for_iix.defs_len as usize
+ {
+ let d = rvb.vecs.defs[i];
+ def.insert(d);
+ }
+ for i in bounds_for_iix.mods_start as usize
+ ..bounds_for_iix.mods_start as usize + bounds_for_iix.mods_len as usize
+ {
+ let m = rvb.vecs.mods[i];
+ def.insert(m);
+ }
+ }
+ def_sets.push(def);
+ use_sets.push(uce);
+ }
+
+ assert!(def_sets.len() == use_sets.len());
+
+ if log_enabled!(Level::Debug) {
+ let mut n = 0;
+ debug!("");
+ for (def_set, use_set) in def_sets.iter().zip(use_sets.iter()) {
+ let mut first = true;
+ let mut defs_str = "".to_string();
+ for def in def_set.to_vec() {
+ if !first {
+ defs_str = defs_str + &" ".to_string();
+ }
+ first = false;
+ defs_str = defs_str + &def.show_with_rru(univ);
+ }
+ first = true;
+ let mut uses_str = "".to_string();
+ for uce in use_set.to_vec() {
+ if !first {
+ uses_str = uses_str + &" ".to_string();
+ }
+ first = false;
+ uses_str = uses_str + &uce.show_with_rru(univ);
+ }
+ debug!(
+ "{:<3?} def {{{}}} use {{{}}}",
+ BlockIx::new(n),
+ defs_str,
+ uses_str
+ );
+ n += 1;
+ }
+ }
+
+ info!(" calc_def_and_use: end");
+ (def_sets, use_sets)
+}
+
+//=============================================================================
+// Data flow analysis: computation of per-block register live-in and live-out
+// sets
+
+// Returned vectors contain one element per block
+#[inline(never)]
+pub fn calc_livein_and_liveout<F: Function>(
+ func: &F,
+ def_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ use_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ cfg_info: &CFGInfo,
+ univ: &RealRegUniverse,
+) -> (
+ TypedIxVec<BlockIx, SparseSet<Reg>>,
+ TypedIxVec<BlockIx, SparseSet<Reg>>,
+) {
+ info!(" calc_livein_and_liveout: begin");
+ let num_blocks = func.blocks().len() as u32;
+ let empty = SparseSet::<Reg>::empty();
+
+ let mut num_evals = 0;
+ let mut liveouts = TypedIxVec::<BlockIx, SparseSet<Reg>>::new();
+ liveouts.resize(num_blocks, empty.clone());
+
+ // Initialise the work queue so as to do a reverse preorder traversal
+ // through the graph, after which blocks are re-evaluated on demand.
+ let mut work_queue = Queue::<BlockIx>::new();
+ for i in 0..num_blocks {
+ // block_ix travels in "reverse preorder"
+ let block_ix = cfg_info.pre_ord[(num_blocks - 1 - i) as usize];
+ work_queue.push_back(block_ix);
+ }
+
+ // in_queue is an optimisation -- this routine works fine without it. in_queue is
+ // used to avoid inserting duplicate work items in work_queue. This avoids some
+ // number of duplicate re-evaluations and gets us to a fixed point faster.
+ // Very roughly, it reduces the number of evaluations per block from around
+ // 3 to around 2.
+ let mut in_queue = Vec::<bool>::new();
+ in_queue.resize(num_blocks as usize, true);
+
+ while let Some(block_ix) = work_queue.pop_front() {
+ let i = block_ix.get() as usize;
+ assert!(in_queue[i]);
+ in_queue[i] = false;
+
+ // Compute a new value for liveouts[block_ix]
+ let mut set = SparseSet::<Reg>::empty();
+ for block_j_ix in cfg_info.succ_map[block_ix].iter() {
+ let mut live_in_j = liveouts[*block_j_ix].clone();
+ live_in_j.remove(&def_sets_per_block[*block_j_ix]);
+ live_in_j.union(&use_sets_per_block[*block_j_ix]);
+ set.union(&live_in_j);
+ }
+ num_evals += 1;
+
+ if !set.equals(&liveouts[block_ix]) {
+ liveouts[block_ix] = set;
+ // Add `block_ix`'s predecessors to the work queue, since their
+ // liveout values might be affected.
+ for block_j_ix in cfg_info.pred_map[block_ix].iter() {
+ let j = block_j_ix.get() as usize;
+ if !in_queue[j] {
+ work_queue.push_back(*block_j_ix);
+ in_queue[j] = true;
+ }
+ }
+ }
+ }
+
+ // The liveout values are done, but we need to compute the liveins
+ // too.
+ let mut liveins = TypedIxVec::<BlockIx, SparseSet<Reg>>::new();
+ liveins.resize(num_blocks, empty.clone());
+ for block_ix in BlockIx::new(0).dotdot(BlockIx::new(num_blocks)) {
+ let mut live_in = liveouts[block_ix].clone();
+ live_in.remove(&def_sets_per_block[block_ix]);
+ live_in.union(&use_sets_per_block[block_ix]);
+ liveins[block_ix] = live_in;
+ }
+
+ if false {
+ let mut sum_card_live_in = 0;
+ let mut sum_card_live_out = 0;
+ for bix in BlockIx::new(0).dotdot(BlockIx::new(num_blocks)) {
+ sum_card_live_in += liveins[bix].card();
+ sum_card_live_out += liveouts[bix].card();
+ }
+ println!(
+ "QQQQ calc_LI/LO: num_evals {}, tot LI {}, tot LO {}",
+ num_evals, sum_card_live_in, sum_card_live_out
+ );
+ }
+
+ let ratio: f32 = (num_evals as f32) / ((if num_blocks == 0 { 1 } else { num_blocks }) as f32);
+ info!(
+ " calc_livein_and_liveout: {} blocks, {} evals ({:<.2} per block)",
+ num_blocks, num_evals, ratio
+ );
+
+ if log_enabled!(Level::Debug) {
+ let mut n = 0;
+ debug!("");
+ for (livein, liveout) in liveins.iter().zip(liveouts.iter()) {
+ let mut first = true;
+ let mut li_str = "".to_string();
+ for li in livein.to_vec() {
+ if !first {
+ li_str = li_str + &" ".to_string();
+ }
+ first = false;
+ li_str = li_str + &li.show_with_rru(univ);
+ }
+ first = true;
+ let mut lo_str = "".to_string();
+ for lo in liveout.to_vec() {
+ if !first {
+ lo_str = lo_str + &" ".to_string();
+ }
+ first = false;
+ lo_str = lo_str + &lo.show_with_rru(univ);
+ }
+ debug!(
+ "{:<3?} livein {{{}}} liveout {{{}}}",
+ BlockIx::new(n),
+ li_str,
+ lo_str
+ );
+ n += 1;
+ }
+ }
+
+ info!(" calc_livein_and_liveout: end");
+ (liveins, liveouts)
+}
+
+//=============================================================================
+// Computation of RangeFrags (Live Range Fragments), aggregated per register.
+// This does not produce complete live ranges. That is done later, by
+// `merge_range_frags` below, using the information computed in this section
+// by `get_range_frags`.
+
+// This is surprisingly complex, in part because of the need to correctly
+// handle (1) live-in and live-out Regs, (2) dead writes, and (3) instructions
+// that modify registers rather than merely reading or writing them.
+
+/// A ProtoRangeFrag carries information about a [write .. read] range, within a Block, which
+/// we will later turn into a fully-fledged RangeFrag. It basically records the first and
+/// last-known InstPoints for appearances of a Reg.
+///
+/// ProtoRangeFrag also keeps count of the number of appearances of the Reg to which it
+/// pertains, using `uses`. The counts get rolled into the resulting RangeFrags, and later are
+/// used to calculate spill costs.
+///
+/// The running state of this function is a map from Reg to ProtoRangeFrag. Only Regs that
+/// actually appear in the Block (or are live-in to it) are mapped. This has the advantage of
+/// economy, since most Regs will not appear in (or be live-in to) most Blocks.
+#[derive(Clone)]
+struct ProtoRangeFrag {
+ /// The InstPoint in this Block at which the associated Reg most recently became live (when
+ /// moving forwards though the Block). If this value is the first InstPoint for the Block
+ /// (the U point for the Block's lowest InstIx), that indicates the associated Reg is
+ /// live-in to the Block.
+ first: InstPoint,
+
+ /// This is the InstPoint which is the end point (most recently observed read, in general)
+ /// for the current RangeFrag under construction. In general we will move `last` forwards
+ /// as we discover reads of the associated Reg. If this is the last InstPoint for the
+ /// Block (the D point for the Block's highest InstInx), that indicates that the associated
+ /// reg is live-out from the Block.
+ last: InstPoint,
+
+ /// Number of mentions of the associated Reg in this ProtoRangeFrag.
+ num_mentions: u16,
+}
+
+impl fmt::Debug for ProtoRangeFrag {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "{:?}x {:?} - {:?}",
+ self.num_mentions, self.first, self.last
+ )
+ }
+}
+
+// `fn get_range_frags` and `fn get_range_frags_for_block` below work with two vectors,
+// `out_map` and `state`, that are indexed by register. This allows them to entirely avoid the
+// use of hash-based `Map`s. However, it does give a problem in that real and virtual registers
+// occupy separate, zero-based index spaces. To solve this, we map `Reg`s to a "unified index
+// space" as follows:
+//
+// a `RealReg` is mapped to its `.get_index()` value
+//
+// a `VirtualReg` is mapped to its `.get_index()` value + the number of real registers
+//
+// To make this not too inconvenient, `fn reg_to_reg_ix` and `fn reg_ix_to_reg` convert `Reg`s
+// to and from the unified index space. This has the annoying side effect that reconstructing a
+// `Reg` from an index space value requires having available both the register universe, and a
+// table specifying the class for each virtual register.
+//
+// Really, we ought to rework the `Reg`/`RealReg`/`VirtualReg` abstractions, so as to (1) impose
+// a single index space for both register kinds, and (2) so as to separate the concepts of the
+// register index from the `Reg` itself. This second point would have the additional benefit of
+// making it feasible to represent sets of registers using bit sets.
+
+#[inline(always)]
+pub(crate) fn reg_to_reg_ix(num_real_regs: u32, r: Reg) -> u32 {
+ if r.is_real() {
+ r.get_index_u32()
+ } else {
+ num_real_regs + r.get_index_u32()
+ }
+}
+
+#[inline(always)]
+pub(crate) fn reg_ix_to_reg(
+ reg_universe: &RealRegUniverse,
+ vreg_classes: &Vec</*vreg index,*/ RegClass>,
+ reg_ix: u32,
+) -> Reg {
+ let reg_ix = reg_ix as usize;
+ let num_real_regs = reg_universe.regs.len();
+ if reg_ix < num_real_regs {
+ reg_universe.regs[reg_ix].0.to_reg()
+ } else {
+ let vreg_ix = reg_ix - num_real_regs;
+ Reg::new_virtual(vreg_classes[vreg_ix], vreg_ix as u32)
+ }
+}
+
+// HELPER FUNCTION
+// Add to `out_map`, a binding from `reg` to the frags-and-metrics pair specified by `frag` and
+// `frag_metrics`. As a space-saving optimisation, make some attempt to avoid creating
+// duplicate entries in `out_frags` and `out_frag_metrics`.
+#[inline(always)]
+fn emit_range_frag(
+ out_map: &mut Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
+ out_frags: &mut TypedIxVec<RangeFragIx, RangeFrag>,
+ out_frag_metrics: &mut TypedIxVec<RangeFragIx, RangeFragMetrics>,
+ num_real_regs: u32,
+ reg: Reg,
+ frag: &RangeFrag,
+ frag_metrics: &RangeFragMetrics,
+) {
+ debug_assert!(out_frags.len() == out_frag_metrics.len());
+
+ // Allocate a new RangeFragIx for `frag`, except, make some minimal effort to avoid huge
+ // numbers of duplicates by inspecting the previous two entries, and using them if
+ // possible.
+ let mut new_fix = None;
+
+ let num_out_frags = out_frags.len();
+ if num_out_frags >= 2 {
+ let back_0 = RangeFragIx::new(num_out_frags - 1);
+ let back_1 = RangeFragIx::new(num_out_frags - 2);
+ if out_frags[back_0] == *frag && out_frag_metrics[back_0] == *frag_metrics {
+ new_fix = Some(back_0);
+ } else if out_frags[back_1] == *frag && out_frag_metrics[back_1] == *frag_metrics {
+ new_fix = Some(back_1);
+ }
+ }
+
+ let new_fix = match new_fix {
+ Some(fix) => fix,
+ None => {
+ // We can't look back or there was no match; create a new one.
+ out_frags.push(frag.clone());
+ out_frag_metrics.push(frag_metrics.clone());
+ RangeFragIx::new(out_frags.len() as u32 - 1)
+ }
+ };
+
+ // And use the new RangeFragIx.
+ out_map[reg_to_reg_ix(num_real_regs, reg) as usize].push(new_fix);
+}
+
+/// Calculate all the RangeFrags for `bix`. Add them to `out_frags` and corresponding metrics
+/// data to `out_frag_metrics`. Add to `out_map`, the associated RangeFragIxs, segregated by
+/// Reg. `bix`, `livein`, `liveout` and `rvb` are expected to be valid in the context of the
+/// Func `f` (duh!).
+#[inline(never)]
+fn get_range_frags_for_block<F: Function>(
+ // Constants
+ func: &F,
+ rvb: &RegVecsAndBounds,
+ reg_universe: &RealRegUniverse,
+ vreg_classes: &Vec</*vreg index,*/ RegClass>,
+ bix: BlockIx,
+ livein: &SparseSet<Reg>,
+ liveout: &SparseSet<Reg>,
+ // Preallocated storage for use in this function. They do not carry any useful information
+ // in between calls here.
+ visited: &mut Vec<u32>,
+ state: &mut Vec</*rreg index, then vreg index, */ Option<ProtoRangeFrag>>,
+ // These accumulate the results of RangeFrag/RangeFragMetrics across multiple calls here.
+ out_map: &mut Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
+ out_frags: &mut TypedIxVec<RangeFragIx, RangeFrag>,
+ out_frag_metrics: &mut TypedIxVec<RangeFragIx, RangeFragMetrics>,
+) {
+ #[inline(always)]
+ fn plus1(n: u16) -> u16 {
+ if n == 0xFFFFu16 {
+ n
+ } else {
+ n + 1
+ }
+ }
+
+ // Invariants for the preallocated storage:
+ //
+ // * `visited` is always irrelevant (and cleared) at the start
+ //
+ // * `state` always has size (# real regs + # virtual regs). However, all its entries
+ // should be `None` in between calls here.
+
+ // We use `visited` to keep track of which `state` entries need processing at the end of
+ // this function. Since `state` is indexed by unified-reg-index, it follows that `visited`
+ // is a vector of unified-reg-indices. We add an entry to `visited` whenever we change a
+ // `state` entry from `None` to `Some`. This guarantees that we can find all the `Some`
+ // `state` entries at the end of the function, change them back to `None`, and emit the
+ // corresponding fragment.
+ visited.clear();
+
+ // Some handy constants.
+ assert!(func.block_insns(bix).len() >= 1);
+ let first_iix_in_block = func.block_insns(bix).first();
+ let last_iix_in_block = func.block_insns(bix).last();
+ let first_pt_in_block = InstPoint::new_use(first_iix_in_block);
+ let last_pt_in_block = InstPoint::new_def(last_iix_in_block);
+ let num_real_regs = reg_universe.regs.len() as u32;
+
+ // First, set up `state` as if all of `livein` had been written just prior to the block.
+ for r in livein.iter() {
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+ debug_assert!(state[r_state_ix].is_none());
+ state[r_state_ix] = Some(ProtoRangeFrag {
+ num_mentions: 0,
+ first: first_pt_in_block,
+ last: first_pt_in_block,
+ });
+ visited.push(r_state_ix as u32);
+ }
+
+ // Now visit each instruction in turn, examining first the registers it reads, then those it
+ // modifies, and finally those it writes.
+ for iix in func.block_insns(bix) {
+ let bounds_for_iix = &rvb.bounds[iix];
+
+ // Examine reads. This is pretty simple. They simply extend an existing ProtoRangeFrag
+ // to the U point of the reading insn.
+ for i in
+ bounds_for_iix.uses_start..bounds_for_iix.uses_start + bounds_for_iix.uses_len as u32
+ {
+ let r = rvb.vecs.uses[i as usize];
+ let r_state_ix = reg_to_reg_ix(num_real_regs, r) as usize;
+ match &mut state[r_state_ix] {
+ // First event for `r` is a read, but it's not listed in `livein`, since otherwise
+ // `state` would have an entry for it.
+ None => panic!("get_range_frags_for_block: fail #1"),
+ Some(ref mut pf) => {
+ // This the first or subsequent read after a write. Note that the "write" can
+ // be either a real write, or due to the fact that `r` is listed in `livein`.
+ // We don't care here.
+ pf.num_mentions = plus1(pf.num_mentions);
+ let new_last = InstPoint::new_use(iix);
+ debug_assert!(pf.last <= new_last);
+ pf.last = new_last;
+ }
+ }
+ }
+
+ // Examine modifies. These are handled almost identically to reads, except that they
+ // extend an existing ProtoRangeFrag down to the D point of the modifying insn.
+ for i in
+ bounds_for_iix.mods_start..bounds_for_iix.mods_start + bounds_for_iix.mods_len as u32
+ {
+ let r = &rvb.vecs.mods[i as usize];
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+ match &mut state[r_state_ix] {
+ // First event for `r` is a read (really, since this insn modifies `r`), but it's
+ // not listed in `livein`, since otherwise `state` would have an entry for it.
+ None => panic!("get_range_frags_for_block: fail #2"),
+ Some(ref mut pf) => {
+ // This the first or subsequent modify after a write. Add two to the
+ // mentions count, as that reflects the implied spill cost increment more
+ // accurately than just adding one: if we spill the live range in which this
+ // ends up, we'll generate both a reload and a spill instruction.
+ pf.num_mentions = plus1(plus1(pf.num_mentions));
+ let new_last = InstPoint::new_def(iix);
+ debug_assert!(pf.last <= new_last);
+ pf.last = new_last;
+ }
+ }
+ }
+
+ // Examine writes (but not writes implied by modifies). The general idea is that a
+ // write causes us to terminate and emit the existing ProtoRangeFrag, if any, and start
+ // a new frag.
+ for i in
+ bounds_for_iix.defs_start..bounds_for_iix.defs_start + bounds_for_iix.defs_len as u32
+ {
+ let r = &rvb.vecs.defs[i as usize];
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+ match &mut state[r_state_ix] {
+ // First mention of a Reg we've never heard of before. Start a new
+ // ProtoRangeFrag for it and keep going.
+ None => {
+ let new_pt = InstPoint::new_def(iix);
+ let new_pf = ProtoRangeFrag {
+ num_mentions: 1,
+ first: new_pt,
+ last: new_pt,
+ };
+ state[r_state_ix] = Some(new_pf);
+ visited.push(r_state_ix as u32);
+ }
+
+ // There's already a ProtoRangeFrag for `r`. This write will start a new one,
+ // so emit the existing one and note this write.
+ Some(ProtoRangeFrag {
+ ref mut num_mentions,
+ ref mut first,
+ ref mut last,
+ }) => {
+ if first == last {
+ debug_assert!(*num_mentions == 1);
+ }
+
+ let (frag, frag_metrics) =
+ RangeFrag::new_with_metrics(func, bix, *first, *last, *num_mentions);
+ emit_range_frag(
+ out_map,
+ out_frags,
+ out_frag_metrics,
+ num_real_regs,
+ *r,
+ &frag,
+ &frag_metrics,
+ );
+ let new_pt = InstPoint::new_def(iix);
+ // Reuse the previous entry for this new definition of the same vreg.
+ *num_mentions = 1;
+ *first = new_pt;
+ *last = new_pt;
+ }
+ }
+ }
+ }
+
+ // We are at the end of the block. We still have to deal with live-out Regs. We must also
+ // deal with ProtoRangeFrags in `state` that are for registers not listed as live-out.
+
+ // Deal with live-out Regs. Treat each one as if it is read just after the block.
+ for r in liveout.iter() {
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+ let state_elem_p = &mut state[r_state_ix];
+ match state_elem_p {
+ // This can't happen. `r` is in `liveout`, but this implies that it is neither
+ // defined in the block nor present in `livein`.
+ None => panic!("get_range_frags_for_block: fail #3"),
+ Some(ref pf) => {
+ // `r` is written (or modified), either literally or by virtue of being present
+ // in `livein`, and may or may not subsequently be read -- we don't care,
+ // because it must be read "after" the block. Create a `LiveOut` or `Thru` frag
+ // accordingly.
+ let (frag, frag_metrics) = RangeFrag::new_with_metrics(
+ func,
+ bix,
+ pf.first,
+ last_pt_in_block,
+ pf.num_mentions,
+ );
+ emit_range_frag(
+ out_map,
+ out_frags,
+ out_frag_metrics,
+ num_real_regs,
+ *r,
+ &frag,
+ &frag_metrics,
+ );
+ // Remove the entry from `state` so that the following loop doesn't process it
+ // again.
+ *state_elem_p = None;
+ }
+ }
+ }
+
+ // Finally, round up any remaining ProtoRangeFrags left in `state`. This is what `visited`
+ // is used for.
+ for r_state_ix in visited {
+ let state_elem_p = &mut state[*r_state_ix as usize];
+ match state_elem_p {
+ None => {}
+ Some(pf) => {
+ if pf.first == pf.last {
+ debug_assert!(pf.num_mentions == 1);
+ }
+ let (frag, frag_metrics) =
+ RangeFrag::new_with_metrics(func, bix, pf.first, pf.last, pf.num_mentions);
+ let r = reg_ix_to_reg(reg_universe, vreg_classes, *r_state_ix);
+ emit_range_frag(
+ out_map,
+ out_frags,
+ out_frag_metrics,
+ num_real_regs,
+ r,
+ &frag,
+ &frag_metrics,
+ );
+ // Maintain invariant that all `state` entries are `None` in between calls to
+ // this function.
+ *state_elem_p = None;
+ }
+ }
+ }
+}
+
+#[inline(never)]
+pub fn get_range_frags<F: Function>(
+ func: &F,
+ rvb: &RegVecsAndBounds,
+ reg_universe: &RealRegUniverse,
+ livein_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ liveout_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+) -> (
+ Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
+ TypedIxVec<RangeFragIx, RangeFrag>,
+ TypedIxVec<RangeFragIx, RangeFragMetrics>,
+ Vec</*vreg index,*/ RegClass>,
+) {
+ info!(" get_range_frags: begin");
+ assert!(livein_sets_per_block.len() == func.blocks().len() as u32);
+ assert!(liveout_sets_per_block.len() == func.blocks().len() as u32);
+ assert!(rvb.is_sanitized());
+
+ // In order that we can work with unified-reg-indices (see comments above), we need to know
+ // the `RegClass` for each virtual register. That info is collected here.
+ let mut vreg_classes = vec![RegClass::INVALID; func.get_num_vregs()];
+ for r in rvb
+ .vecs
+ .uses
+ .iter()
+ .chain(rvb.vecs.defs.iter())
+ .chain(rvb.vecs.mods.iter())
+ {
+ if r.is_real() {
+ continue;
+ }
+ let r_ix = r.get_index();
+ // rustc 1.43.0 appears to have problems avoiding duplicate bounds checks for
+ // `vreg_classes[r_ix]`; hence give it a helping hand here.
+ let vreg_classes_ptr = &mut vreg_classes[r_ix];
+ if *vreg_classes_ptr == RegClass::INVALID {
+ *vreg_classes_ptr = r.get_class();
+ } else {
+ assert_eq!(*vreg_classes_ptr, r.get_class());
+ }
+ }
+
+ let num_real_regs = reg_universe.regs.len();
+ let num_virtual_regs = vreg_classes.len();
+ let num_regs = num_real_regs + num_virtual_regs;
+
+ // A state variable that's reused across calls to `get_range_frags_for_block`. When not in
+ // a call to `get_range_frags_for_block`, all entries should be `None`.
+ let mut state = Vec::</*rreg index, then vreg index, */ Option<ProtoRangeFrag>>::new();
+ state.resize(num_regs, None);
+
+ // Scratch storage needed by `get_range_frags_for_block`. Doesn't carry any useful info in
+ // between calls. Start it off not-quite-empty since it will always get used at least a
+ // bit.
+ let mut visited = Vec::<u32>::with_capacity(32);
+
+ // `RangeFrag`/`RangeFragMetrics` are collected across multiple calls to
+ // `get_range_frag_for_blocks` in these three vectors. In other words, they collect the
+ // overall results for this function.
+ let mut result_frags = TypedIxVec::<RangeFragIx, RangeFrag>::new();
+ let mut result_frag_metrics = TypedIxVec::<RangeFragIx, RangeFragMetrics>::new();
+ let mut result_map =
+ Vec::</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>::default();
+ result_map.resize(num_regs, smallvec![]);
+
+ for bix in func.blocks() {
+ get_range_frags_for_block(
+ func,
+ rvb,
+ reg_universe,
+ &vreg_classes,
+ bix,
+ &livein_sets_per_block[bix],
+ &liveout_sets_per_block[bix],
+ &mut visited,
+ &mut state,
+ &mut result_map,
+ &mut result_frags,
+ &mut result_frag_metrics,
+ );
+ }
+
+ assert!(state.len() == num_regs);
+ assert!(result_map.len() == num_regs);
+ assert!(vreg_classes.len() == num_virtual_regs);
+ // This is pretty cheap (once per fn) and any failure will be catastrophic since it means we
+ // may have forgotten some live range fragments. Hence `assert!` and not `debug_assert!`.
+ for state_elem in &state {
+ assert!(state_elem.is_none());
+ }
+
+ if log_enabled!(Level::Debug) {
+ debug!("");
+ let mut n = 0;
+ for frag in result_frags.iter() {
+ debug!("{:<3?} {:?}", RangeFragIx::new(n), frag);
+ n += 1;
+ }
+
+ debug!("");
+ for (reg_ix, frag_ixs) in result_map.iter().enumerate() {
+ if frag_ixs.len() == 0 {
+ continue;
+ }
+ let reg = reg_ix_to_reg(reg_universe, &vreg_classes, reg_ix as u32);
+ debug!(
+ "frags for {} {:?}",
+ reg.show_with_rru(reg_universe),
+ frag_ixs
+ );
+ }
+ }
+
+ info!(" get_range_frags: end");
+ assert!(result_frags.len() == result_frag_metrics.len());
+ (result_map, result_frags, result_frag_metrics, vreg_classes)
+}
+
+//=============================================================================
+// Auxiliary tasks involved in creating a single VirtualRange from its
+// constituent RangeFragIxs:
+//
+// * The RangeFragIxs we are given here are purely within single blocks.
+// Here, we "compress" them, that is, merge those pairs that flow from one
+// block into the the one that immediately follows it in the instruction
+// stream. This does not imply anything about control flow; it is purely a
+// scheme for reducing the total number of fragments that need to be dealt
+// with during interference detection (later on).
+//
+// * Computation of metrics for the VirtualRange. This is done by examining
+// metrics of the individual fragments, and must be done before they are
+// compressed.
+
+// HELPER FUNCTION
+// Does `frag1` describe some range of instructions that is followed
+// immediately by `frag2` ? Note that this assumes (and checks) that there
+// are no spill or reload ranges in play at this point; there should not be.
+// Note also, this is very conservative: it only merges the case where the two
+// ranges are separated by a block boundary. From measurements, it appears that
+// this is the only case where merging is actually a win, though.
+fn frags_are_mergeable(
+ frag1: &RangeFrag,
+ frag1metrics: &RangeFragMetrics,
+ frag2: &RangeFrag,
+ frag2metrics: &RangeFragMetrics,
+) -> bool {
+ assert!(frag1.first.pt().is_use_or_def());
+ assert!(frag1.last.pt().is_use_or_def());
+ assert!(frag2.first.pt().is_use_or_def());
+ assert!(frag2.last.pt().is_use_or_def());
+
+ if frag1metrics.bix != frag2metrics.bix
+ && frag1.last.iix().plus(1) == frag2.first.iix()
+ && frag1.last.pt() == Point::Def
+ && frag2.first.pt() == Point::Use
+ {
+ assert!(
+ frag1metrics.kind == RangeFragKind::LiveOut || frag1metrics.kind == RangeFragKind::Thru
+ );
+ assert!(
+ frag2metrics.kind == RangeFragKind::LiveIn || frag2metrics.kind == RangeFragKind::Thru
+ );
+ return true;
+ }
+
+ false
+}
+
+// HELPER FUNCTION
+// Create a compressed version of the fragments listed in `sorted_frag_ixs`,
+// taking the opportunity to dereference them (look them up in `frag_env`) in
+// the process. Assumes that `sorted_frag_ixs` is indeed ordered so that the
+// dereferenced frag sequence is in instruction order.
+#[inline(never)]
+fn deref_and_compress_sorted_range_frag_ixs(
+ stats_num_vfrags_uncompressed: &mut usize,
+ stats_num_vfrags_compressed: &mut usize,
+ sorted_frag_ixs: &SortedRangeFragIxs,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
+) -> SortedRangeFrags {
+ let mut res = SortedRangeFrags::empty();
+
+ let frag_ixs = &sorted_frag_ixs.frag_ixs;
+ let num_frags = frag_ixs.len();
+ *stats_num_vfrags_uncompressed += num_frags;
+
+ if num_frags == 1 {
+ // Nothing we can do. Shortcut.
+ res.frags.push(frag_env[frag_ixs[0]].clone());
+ *stats_num_vfrags_compressed += 1;
+ return res;
+ }
+
+ // BEGIN merge this frag sequence as much as possible
+ assert!(num_frags > 1);
+
+ let mut s = 0; // start point of current group
+ let mut e = 0; // end point of current group
+ loop {
+ if s >= num_frags {
+ break;
+ }
+ while e + 1 < num_frags
+ && frags_are_mergeable(
+ &frag_env[frag_ixs[e]],
+ &frag_metrics_env[frag_ixs[e]],
+ &frag_env[frag_ixs[e + 1]],
+ &frag_metrics_env[frag_ixs[e + 1]],
+ )
+ {
+ e += 1;
+ }
+ // s to e inclusive is a maximal group
+ // emit (s, e)
+ if s == e {
+ // Can't compress this one
+ res.frags.push(frag_env[frag_ixs[s]].clone());
+ } else {
+ let compressed_frag = RangeFrag {
+ first: frag_env[frag_ixs[s]].first,
+ last: frag_env[frag_ixs[e]].last,
+ };
+ res.frags.push(compressed_frag);
+ }
+ // move on
+ s = e + 1;
+ e = s;
+ }
+ // END merge this frag sequence as much as possible
+
+ *stats_num_vfrags_compressed += res.frags.len();
+ res
+}
+
+// HELPER FUNCTION
+// Computes the `size`, `total_cost` and `spill_cost` values for a
+// VirtualRange, while being very careful to avoid overflow.
+fn calc_virtual_range_metrics(
+ sorted_frag_ixs: &SortedRangeFragIxs,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
+ estimated_frequencies: &TypedIxVec<BlockIx, u32>,
+) -> (u16, u32, SpillCost) {
+ assert!(frag_env.len() == frag_metrics_env.len());
+
+ let mut tot_size: u32 = 0;
+ let mut tot_cost: u32 = 0;
+
+ for fix in &sorted_frag_ixs.frag_ixs {
+ let frag = &frag_env[*fix];
+ let frag_metrics = &frag_metrics_env[*fix];
+
+ // Add on the size of this fragment, but make sure we can't
+ // overflow a u32 no matter how many fragments there are.
+ let mut frag_size: u32 = frag.last.iix().get() - frag.first.iix().get() + 1;
+ frag_size = min(frag_size, 0xFFFFu32);
+ tot_size += frag_size;
+ tot_size = min(tot_size, 0xFFFFu32);
+
+ // Here, tot_size <= 0xFFFF. frag.count is u16. estFreq[] is u32.
+ // We must be careful not to overflow tot_cost, which is u32.
+ let mut new_tot_cost: u64 = frag_metrics.count as u64; // at max 16 bits
+ new_tot_cost *= estimated_frequencies[frag_metrics.bix] as u64; // at max 48 bits
+ new_tot_cost += tot_cost as u64; // at max 48 bits + epsilon
+ new_tot_cost = min(new_tot_cost, 0xFFFF_FFFFu64);
+
+ // Hence this is safe.
+ tot_cost = new_tot_cost as u32;
+ }
+
+ debug_assert!(tot_size <= 0xFFFF);
+ let size = tot_size as u16;
+ let total_cost = tot_cost;
+
+ // Divide tot_cost by the total length, so as to increase the apparent
+ // spill cost of short LRs. This is so as to give the advantage to
+ // short LRs in competition for registers. This seems a bit of a hack
+ // to me, but hey ..
+ debug_assert!(tot_size >= 1);
+ let spill_cost = SpillCost::finite(tot_cost as f32 / tot_size as f32);
+
+ (size, total_cost, spill_cost)
+}
+
+// MAIN FUNCTION in this section
+#[inline(never)]
+fn create_and_add_range(
+ stats_num_vfrags_uncompressed: &mut usize,
+ stats_num_vfrags_compressed: &mut usize,
+ result_real: &mut TypedIxVec<RealRangeIx, RealRange>,
+ result_virtual: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
+ reg: Reg,
+ sorted_frag_ixs: SortedRangeFragIxs,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
+ estimated_frequencies: &TypedIxVec<BlockIx, u32>,
+) {
+ if reg.is_virtual() {
+ // First, compute the VirtualRange metrics. This has to be done
+ // before fragment compression.
+ let (size, total_cost, spill_cost) = calc_virtual_range_metrics(
+ &sorted_frag_ixs,
+ frag_env,
+ frag_metrics_env,
+ estimated_frequencies,
+ );
+
+ // Now it's safe to compress the fragments.
+ let sorted_frags = deref_and_compress_sorted_range_frag_ixs(
+ stats_num_vfrags_uncompressed,
+ stats_num_vfrags_compressed,
+ &sorted_frag_ixs,
+ frag_env,
+ frag_metrics_env,
+ );
+
+ result_virtual.push(VirtualRange {
+ vreg: reg.to_virtual_reg(),
+ rreg: None,
+ sorted_frags,
+ is_ref: false, // analysis_reftypes.rs may later change this
+ size,
+ total_cost,
+ spill_cost,
+ });
+ } else {
+ result_real.push(RealRange {
+ rreg: reg.to_real_reg(),
+ sorted_frags: sorted_frag_ixs,
+ is_ref: false, // analysis_reftypes.rs may later change this
+ });
+ }
+}
+
+//=============================================================================
+// Merging of RangeFrags, producing the final LRs, including metrication and
+// compression
+
+// We need this in order to construct a UnionFind<usize>.
+impl ToFromU32 for usize {
+ // 64 bit
+ #[cfg(target_pointer_width = "64")]
+ fn to_u32(x: usize) -> u32 {
+ if x < 0x1_0000_0000usize {
+ x as u32
+ } else {
+ panic!("impl ToFromU32 for usize: to_u32: out of range")
+ }
+ }
+ #[cfg(target_pointer_width = "64")]
+ fn from_u32(x: u32) -> usize {
+ x as usize
+ }
+ // 32 bit
+ #[cfg(target_pointer_width = "32")]
+ fn to_u32(x: usize) -> u32 {
+ x as u32
+ }
+ #[cfg(target_pointer_width = "32")]
+ fn from_u32(x: u32) -> usize {
+ x as usize
+ }
+}
+
+#[inline(never)]
+pub fn merge_range_frags(
+ frag_ix_vec_per_reg: &Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
+ estimated_frequencies: &TypedIxVec<BlockIx, u32>,
+ cfg_info: &CFGInfo,
+ reg_universe: &RealRegUniverse,
+ vreg_classes: &Vec</*vreg index,*/ RegClass>,
+) -> (
+ TypedIxVec<RealRangeIx, RealRange>,
+ TypedIxVec<VirtualRangeIx, VirtualRange>,
+) {
+ assert!(frag_env.len() == frag_metrics_env.len());
+ let mut stats_num_total_incoming_frags = 0;
+ let mut stats_num_total_incoming_regs = 0;
+ for all_frag_ixs_for_reg in frag_ix_vec_per_reg {
+ stats_num_total_incoming_frags += all_frag_ixs_for_reg.len();
+ if all_frag_ixs_for_reg.len() > 0 {
+ stats_num_total_incoming_regs += 1;
+ }
+ }
+ info!(" merge_range_frags: begin");
+ info!(" in: {} in frag_env", frag_env.len());
+ info!(
+ " in: {} regs containing in total {} frags",
+ stats_num_total_incoming_regs, stats_num_total_incoming_frags
+ );
+
+ let mut stats_num_single_grps = 0;
+ let mut stats_num_local_frags = 0;
+
+ let mut stats_num_multi_grps_small = 0;
+ let mut stats_num_multi_grps_large = 0;
+ let mut stats_size_multi_grps_small = 0;
+ let mut stats_size_multi_grps_large = 0;
+
+ let mut stats_num_vfrags_uncompressed = 0;
+ let mut stats_num_vfrags_compressed = 0;
+
+ let mut result_real = TypedIxVec::<RealRangeIx, RealRange>::new();
+ let mut result_virtual = TypedIxVec::<VirtualRangeIx, VirtualRange>::new();
+
+ // BEGIN per_reg_loop
+ for (reg_ix, all_frag_ixs_for_reg) in frag_ix_vec_per_reg.iter().enumerate() {
+ let n_frags_for_this_reg = all_frag_ixs_for_reg.len();
+
+ // The reg might never have been mentioned at all, especially if it's a real reg.
+ if n_frags_for_this_reg == 0 {
+ continue;
+ }
+
+ let reg_ix = reg_ix as u32;
+ let reg = reg_ix_to_reg(reg_universe, vreg_classes, reg_ix);
+
+ // Do some shortcutting. First off, if there's only one frag for this reg, we can directly
+ // give it its own live range, and have done.
+ if n_frags_for_this_reg == 1 {
+ create_and_add_range(
+ &mut stats_num_vfrags_uncompressed,
+ &mut stats_num_vfrags_compressed,
+ &mut result_real,
+ &mut result_virtual,
+ reg,
+ SortedRangeFragIxs::unit(all_frag_ixs_for_reg[0], frag_env),
+ frag_env,
+ frag_metrics_env,
+ estimated_frequencies,
+ );
+ stats_num_single_grps += 1;
+ continue;
+ }
+
+ // BEGIN merge `all_frag_ixs_for_reg` entries as much as possible.
+ //
+ // but .. if we come across independents (RangeKind::Local), pull them out immediately.
+
+ // Try to avoid heap allocation if at all possible. Up to 100 entries are very
+ // common, so this is sized large to be effective. Each entry is definitely
+ // 16 bytes at most, so this will use 4KB stack at most, which is reasonable.
+ let mut triples = SmallVec::<[(RangeFragIx, RangeFragKind, BlockIx); 256]>::new();
+
+ // Create `triples`. We will use it to guide the merging phase, but it is immutable there.
+ for fix in all_frag_ixs_for_reg {
+ let frag_metrics = &frag_metrics_env[*fix];
+
+ if frag_metrics.kind == RangeFragKind::Local {
+ // This frag is Local (standalone). Give it its own Range and move on. This is an
+ // optimisation, but it's also necessary: the main fragment-merging logic below
+ // relies on the fact that the fragments it is presented with are all either
+ // LiveIn, LiveOut or Thru.
+ create_and_add_range(
+ &mut stats_num_vfrags_uncompressed,
+ &mut stats_num_vfrags_compressed,
+ &mut result_real,
+ &mut result_virtual,
+ reg,
+ SortedRangeFragIxs::unit(*fix, frag_env),
+ frag_env,
+ frag_metrics_env,
+ estimated_frequencies,
+ );
+ stats_num_local_frags += 1;
+ continue;
+ }
+
+ // This frag isn't Local (standalone) so we have to process it the slow way.
+ triples.push((*fix, frag_metrics.kind, frag_metrics.bix));
+ }
+
+ let triples_len = triples.len();
+
+ // This is the core of the merging algorithm.
+ //
+ // For each ix@(fix, kind, bix) in `triples` (order unimportant):
+ //
+ // (1) "Merge with blocks that are live 'downstream' from here":
+ // if fix is live-out or live-through:
+ // for b in succs[bix]
+ // for each ix2@(fix2, kind2, bix2) in `triples`
+ // if bix2 == b && kind2 is live-in or live-through:
+ // merge(ix, ix2)
+ //
+ // (2) "Merge with blocks that are live 'upstream' from here":
+ // if fix is live-in or live-through:
+ // for b in preds[bix]
+ // for each ix2@(fix2, kind2, bix2) in `triples`
+ // if bix2 == b && kind2 is live-out or live-through:
+ // merge(ix, ix2)
+ //
+ // `triples` remains unchanged. The equivalence class info is accumulated
+ // in `eclasses_uf` instead. `eclasses_uf` entries are indices into
+ // `triples`.
+ //
+ // Now, you might think it necessary to do both (1) and (2). But no, they
+ // are mutually redundant, since if two blocks are connected by a live
+ // flow from one to the other, then they are also connected in the other
+ // direction. Hence checking one of the directions is enough.
+ let mut eclasses_uf = UnionFind::<usize>::new(triples_len);
+
+ // We have two schemes for group merging, one of which is N^2 in the
+ // length of triples, the other is N-log-N, but with higher constant
+ // factors. Some experimentation with the bz2 test on a Cortex A57 puts
+ // the optimal crossover point between 200 and 300; it's not critical.
+ // Having this protects us against bad behaviour for huge inputs whilst
+ // still being fast for small inputs.
+ if triples_len <= 250 {
+ // The simple way, which is N^2 in the length of `triples`.
+ for (ix, (_fix, kind, bix)) in triples.iter().enumerate() {
+ // Deal with liveness flows outbound from `fix`. Meaning, (1) above.
+ if *kind == RangeFragKind::LiveOut || *kind == RangeFragKind::Thru {
+ for b in cfg_info.succ_map[*bix].iter() {
+ // Visit all entries in `triples` that are for `b`.
+ for (ix2, (_fix2, kind2, bix2)) in triples.iter().enumerate() {
+ if *bix2 != *b || *kind2 == RangeFragKind::LiveOut {
+ continue;
+ }
+ debug_assert!(
+ *kind2 == RangeFragKind::LiveIn || *kind2 == RangeFragKind::Thru
+ );
+ // Now we know that liveness for this reg "flows" from `triples[ix]` to
+ // `triples[ix2]`. So those two frags must be part of the same live
+ // range. Note this.
+ if ix != ix2 {
+ eclasses_uf.union(ix, ix2); // Order of args irrelevant
+ }
+ }
+ }
+ }
+ } // outermost iteration over `triples`
+
+ stats_num_multi_grps_small += 1;
+ stats_size_multi_grps_small += triples_len;
+ } else {
+ // The more complex way, which is N-log-N in the length of `triples`. This is the same
+ // as the simple way, except that the innermost loop, which is a linear search in
+ // `triples` to find entries for some block `b`, is replaced by a binary search. This
+ // means that `triples` first needs to be sorted by block index.
+ triples.sort_unstable_by_key(|(_, _, bix)| *bix);
+
+ for (ix, (_fix, kind, bix)) in triples.iter().enumerate() {
+ // Deal with liveness flows outbound from `fix`. Meaning, (1) above.
+ if *kind == RangeFragKind::LiveOut || *kind == RangeFragKind::Thru {
+ for b in cfg_info.succ_map[*bix].iter() {
+ // Visit all entries in `triples` that are for `b`. Binary search
+ // `triples` to find the lowest-indexed entry for `b`.
+ let mut ix_left = 0;
+ let mut ix_right = triples_len;
+ while ix_left < ix_right {
+ let m = (ix_left + ix_right) >> 1;
+ if triples[m].2 < *b {
+ ix_left = m + 1;
+ } else {
+ ix_right = m;
+ }
+ }
+
+ // It might be that there is no block for `b` in the sequence. That's
+ // legit; it just means that block `bix` jumps to a successor where the
+ // associated register isn't live-in/thru. A failure to find `b` can be
+ // indicated one of two ways:
+ //
+ // * ix_left == triples_len
+ // * ix_left < triples_len and b < triples[ix_left].b
+ //
+ // In both cases I *think* the 'loop_over_entries_for_b below will not do
+ // anything. But this is all a bit hairy, so let's convert the second
+ // variant into the first, so as to make it obvious that the loop won't do
+ // anything.
+
+ // ix_left now holds the lowest index of any `triples` entry for block `b`.
+ // Assert this.
+ if ix_left < triples_len && *b < triples[ix_left].2 {
+ ix_left = triples_len;
+ }
+ if ix_left < triples_len {
+ assert!(ix_left == 0 || triples[ix_left - 1].2 < *b);
+ }
+
+ // ix2 plays the same role as in the quadratic version. ix_left and
+ // ix_right are not used after this point.
+ let mut ix2 = ix_left;
+ loop {
+ let (_fix2, kind2, bix2) = match triples.get(ix2) {
+ None => break,
+ Some(triple) => *triple,
+ };
+ if *b < bix2 {
+ // We've come to the end of the sequence of `b`-blocks.
+ break;
+ }
+ debug_assert!(*b == bix2);
+ if kind2 == RangeFragKind::LiveOut {
+ ix2 += 1;
+ continue;
+ }
+ // Now we know that liveness for this reg "flows" from `triples[ix]` to
+ // `triples[ix2]`. So those two frags must be part of the same live
+ // range. Note this.
+ eclasses_uf.union(ix, ix2);
+ ix2 += 1;
+ }
+
+ if ix2 + 1 < triples_len {
+ debug_assert!(*b < triples[ix2 + 1].2);
+ }
+ }
+ }
+ }
+
+ stats_num_multi_grps_large += 1;
+ stats_size_multi_grps_large += triples_len;
+ }
+
+ // Now `eclasses_uf` contains the results of the merging-search. Visit each of its
+ // equivalence classes in turn, and convert each into a virtual or real live range as
+ // appropriate.
+ let eclasses = eclasses_uf.get_equiv_classes();
+ for leader_triple_ix in eclasses.equiv_class_leaders_iter() {
+ // `leader_triple_ix` is an eclass leader. Enumerate the whole eclass.
+ let mut frag_ixs = SmallVec::<[RangeFragIx; 4]>::new();
+ for triple_ix in eclasses.equiv_class_elems_iter(leader_triple_ix) {
+ frag_ixs.push(triples[triple_ix].0 /*first field is frag ix*/);
+ }
+ let sorted_frags = SortedRangeFragIxs::new(frag_ixs, &frag_env);
+ create_and_add_range(
+ &mut stats_num_vfrags_uncompressed,
+ &mut stats_num_vfrags_compressed,
+ &mut result_real,
+ &mut result_virtual,
+ reg,
+ sorted_frags,
+ frag_env,
+ frag_metrics_env,
+ estimated_frequencies,
+ );
+ }
+ // END merge `all_frag_ixs_for_reg` entries as much as possible
+ } // END per reg loop
+
+ info!(" in: {} single groups", stats_num_single_grps);
+ info!(
+ " in: {} local frags in multi groups",
+ stats_num_local_frags
+ );
+ info!(
+ " in: {} small multi groups, {} small multi group total size",
+ stats_num_multi_grps_small, stats_size_multi_grps_small
+ );
+ info!(
+ " in: {} large multi groups, {} large multi group total size",
+ stats_num_multi_grps_large, stats_size_multi_grps_large
+ );
+ info!(
+ " out: {} VLRs, {} RLRs",
+ result_virtual.len(),
+ result_real.len()
+ );
+ info!(
+ " compress vfrags: in {}, out {}",
+ stats_num_vfrags_uncompressed, stats_num_vfrags_compressed
+ );
+ info!(" merge_range_frags: end");
+
+ (result_real, result_virtual)
+}
+
+//=============================================================================
+// Auxiliary activities that mostly fall under the category "dataflow analysis", but are not
+// part of the main dataflow analysis pipeline.
+
+// Dataflow and liveness together create vectors of VirtualRanges and RealRanges. These define
+// (amongst other things) mappings from VirtualRanges to VirtualRegs and from RealRanges to
+// RealRegs. However, we often need the inverse mappings: from VirtualRegs to (sets of
+// VirtualRanges) and from RealRegs to (sets of) RealRanges. This function computes those
+// inverse mappings. They are used by BT's coalescing analysis, and for the dataflow analysis
+// that supports reftype handling.
+#[inline(never)]
+pub fn compute_reg_to_ranges_maps<F: Function>(
+ func: &F,
+ univ: &RealRegUniverse,
+ rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+) -> RegToRangesMaps {
+ // Arbitrary, but chosen after quite some profiling, so as to minimise both instruction
+ // count and number of `malloc` calls. Don't mess with this without first collecting
+ // comprehensive measurements. Note that if you set this above 255, the type of
+ // `r/vreg_approx_frag_counts` below will need to change accordingly.
+ const MANY_FRAGS_THRESH: u8 = 200;
+
+ // Adds `to_add` to `*counter`, taking care not to overflow it in the process.
+ let add_u8_usize_saturate_to_u8 = |counter: &mut u8, mut to_add: usize| {
+ if to_add > 0xFF {
+ to_add = 0xFF;
+ }
+ let mut n = *counter as usize;
+ n += to_add as usize;
+ // n is at max 0x1FE (510)
+ if n > 0xFF {
+ n = 0xFF;
+ }
+ *counter = n as u8;
+ };
+
+ // We have in hand the virtual live ranges. Each of these carries its
+ // associated vreg. So in effect we have a VLR -> VReg mapping. We now
+ // invert that, so as to generate a mapping from VRegs to their containing
+ // VLRs.
+ //
+ // Note that multiple VLRs may map to the same VReg. So the inverse mapping
+ // will actually be from VRegs to a set of VLRs. In most cases, we expect
+ // the virtual-registerised-code given to this allocator to be derived from
+ // SSA, in which case each VReg will have only one VLR. So in this case,
+ // the cost of first creating the mapping, and then looking up all the VRegs
+ // in moves in it, will have cost linear in the size of the input function.
+ //
+ // NB re the SmallVec. That has set semantics (no dups).
+
+ let num_vregs = func.get_num_vregs();
+ let num_rregs = univ.allocable;
+
+ let mut vreg_approx_frag_counts = vec![0u8; num_vregs];
+ let mut vreg_to_vlrs_map = vec![SmallVec::<[VirtualRangeIx; 3]>::new(); num_vregs];
+ for (vlr, n) in vlr_env.iter().zip(0..) {
+ let vlrix = VirtualRangeIx::new(n);
+ let vreg: VirtualReg = vlr.vreg;
+ // Now we know that there's a VLR `vlr` that is for VReg `vreg`. Update the inverse
+ // mapping accordingly. We know we are stepping sequentially through the VLR (index)
+ // space, so we'll never see the same VLRIx twice. Hence there's no need to check for
+ // dups when adding a VLR index to an existing binding for a VReg.
+ //
+ // If this array-indexing fails, it means the client's `.get_num_vregs()` function
+ // claims there are fewer virtual regs than we actually observe in the code it gave us.
+ // So it's a bug in the client.
+ let vreg_index = vreg.get_index();
+ vreg_to_vlrs_map[vreg_index].push(vlrix);
+
+ let vlr_num_frags = vlr.sorted_frags.frags.len();
+ add_u8_usize_saturate_to_u8(&mut vreg_approx_frag_counts[vreg_index], vlr_num_frags);
+ }
+
+ // Same for the real live ranges.
+ let mut rreg_approx_frag_counts = vec![0u8; num_rregs];
+ let mut rreg_to_rlrs_map = vec![SmallVec::<[RealRangeIx; 6]>::new(); num_rregs];
+ for (rlr, n) in rlr_env.iter().zip(0..) {
+ let rlrix = RealRangeIx::new(n);
+ let rreg: RealReg = rlr.rreg;
+ // If this array-indexing fails, it means something has gone wrong with sanitisation of
+ // real registers -- that should ensure that we never see a real register with an index
+ // greater than `univ.allocable`. So it's a bug in the allocator's analysis phases.
+ let rreg_index = rreg.get_index();
+ rreg_to_rlrs_map[rreg_index].push(rlrix);
+
+ let rlr_num_frags = rlr.sorted_frags.frag_ixs.len();
+ add_u8_usize_saturate_to_u8(&mut rreg_approx_frag_counts[rreg_index], rlr_num_frags);
+ }
+
+ // Create sets indicating which regs have "many" live ranges. Hopefully very few.
+ // Since the `push`ed-in values are supplied by the `zip(0..)` iterator, they are
+ // guaranteed duplicate-free, as required by the defn of `RegToRangesMaps`.
+ let mut vregs_with_many_frags = Vec::<u32 /*VirtualReg index*/>::with_capacity(16);
+ for (count, vreg_ix) in vreg_approx_frag_counts.iter().zip(0..) {
+ if *count >= MANY_FRAGS_THRESH {
+ vregs_with_many_frags.push(vreg_ix);
+ }
+ }
+
+ let mut rregs_with_many_frags = Vec::<u32 /*RealReg index*/>::with_capacity(64);
+ for (count, rreg_ix) in rreg_approx_frag_counts.iter().zip(0..) {
+ if *count >= MANY_FRAGS_THRESH {
+ rregs_with_many_frags.push(rreg_ix);
+ }
+ }
+
+ RegToRangesMaps {
+ rreg_to_rlrs_map,
+ vreg_to_vlrs_map,
+ rregs_with_many_frags,
+ vregs_with_many_frags,
+ many_frags_thresh: MANY_FRAGS_THRESH as usize,
+ }
+}
+
+// Collect info about registers that are connected by moves.
+#[inline(never)]
+pub fn collect_move_info<F: Function>(
+ func: &F,
+ reg_vecs_and_bounds: &RegVecsAndBounds,
+ est_freqs: &TypedIxVec<BlockIx, u32>,
+) -> MoveInfo {
+ let mut moves = Vec::<MoveInfoElem>::new();
+ for b in func.blocks() {
+ let block_eef = est_freqs[b];
+ for iix in func.block_insns(b) {
+ let insn = &func.get_insn(iix);
+ let im = func.is_move(insn);
+ match im {
+ None => {}
+ Some((wreg, reg)) => {
+ let iix_bounds = &reg_vecs_and_bounds.bounds[iix];
+ // It might seem strange to assert that `defs_len` and/or
+ // `uses_len` is <= 1 rather than == 1. The reason is
+ // that either or even both registers might be ones which
+ // are not available to the allocator. Hence they will
+ // have been removed by the sanitisation machinery before
+ // we get to this point. If either is missing, we
+ // unfortunately can't coalesce the move away, and just
+ // have to live with it.
+ //
+ // If any of the following five assertions fail, the
+ // client's `is_move` is probably lying to us.
+ assert!(iix_bounds.uses_len <= 1);
+ assert!(iix_bounds.defs_len <= 1);
+ assert!(iix_bounds.mods_len == 0);
+ if iix_bounds.uses_len == 1 && iix_bounds.defs_len == 1 {
+ let reg_vecs = &reg_vecs_and_bounds.vecs;
+ assert!(reg_vecs.uses[iix_bounds.uses_start as usize] == reg);
+ assert!(reg_vecs.defs[iix_bounds.defs_start as usize] == wreg.to_reg());
+ let dst = wreg.to_reg();
+ let src = reg;
+ let est_freq = block_eef;
+ moves.push(MoveInfoElem {
+ dst,
+ src,
+ iix,
+ est_freq,
+ });
+ }
+ }
+ }
+ }
+ }
+
+ MoveInfo { moves }
+}
diff --git a/third_party/rust/regalloc/src/analysis_main.rs b/third_party/rust/regalloc/src/analysis_main.rs
new file mode 100644
index 0000000000..105ab338de
--- /dev/null
+++ b/third_party/rust/regalloc/src/analysis_main.rs
@@ -0,0 +1,317 @@
+//! Top level module for all analysis activities.
+
+use log::{debug, info};
+
+use crate::analysis_control_flow::{CFGInfo, InstIxToBlockIxMap};
+use crate::analysis_data_flow::{
+ calc_def_and_use, calc_livein_and_liveout, collect_move_info, compute_reg_to_ranges_maps,
+ get_range_frags, get_sanitized_reg_uses_for_func, merge_range_frags,
+};
+use crate::analysis_reftypes::do_reftypes_analysis;
+use crate::data_structures::{
+ BlockIx, MoveInfo, RangeFrag, RangeFragIx, RangeFragMetrics, RealRange, RealRangeIx, RealReg,
+ RealRegUniverse, RegClass, RegToRangesMaps, RegVecsAndBounds, TypedIxVec, VirtualRange,
+ VirtualRangeIx, VirtualReg,
+};
+use crate::sparse_set::SparseSet;
+use crate::AlgorithmWithDefaults;
+use crate::{Function, Reg};
+
+//=============================================================================
+// Overall analysis return results, for both control- and data-flow analyses.
+// All of these failures refer to various problems with the code that the
+// client (caller) supplied to us.
+
+#[derive(Clone, Debug)]
+pub enum AnalysisError {
+ /// A critical edge from "from" to "to" has been found, and should have been
+ /// removed by the caller in the first place.
+ CriticalEdge { from: BlockIx, to: BlockIx },
+
+ /// Some values in the entry block are live in to the function, but are not
+ /// declared as such.
+ EntryLiveinValues(Vec<Reg>),
+
+ /// The incoming code has an explicit or implicit mention (use, def or mod)
+ /// of a real register, which either (1) isn't listed in the universe at
+ /// all, or (2) is one of the `suggested_scratch` registers in the universe.
+ /// (1) isn't allowed because the client must mention *all* real registers
+ /// in the universe. (2) isn't allowed because the client promises to us
+ /// that the `suggested_scratch` registers really are completely unused in
+ /// the incoming code, so that the allocator can use them at literally any
+ /// point it wants.
+ IllegalRealReg(RealReg),
+
+ /// At least one block is dead.
+ UnreachableBlocks,
+
+ /// Implementation limits exceeded. The incoming function is too big. It
+ /// may contain at most 1 million basic blocks and 16 million instructions.
+ ImplementationLimitsExceeded,
+
+ /// Currently LSRA can't generate stackmaps, but the client has requested LSRA *and*
+ /// stackmaps.
+ LSRACantDoStackmaps,
+}
+
+impl ToString for AnalysisError {
+ fn to_string(&self) -> String {
+ match self {
+ AnalysisError::CriticalEdge { from, to } => {
+ format!("critical edge detected, from {:?} to {:?}", from, to)
+ }
+ AnalysisError::EntryLiveinValues(regs) => {
+ let regs_string = regs.iter().map(|reg| format!("{:?}", reg)).collect::<Vec<_>>().join(", ");
+ format!("entry block has love-in value not present in function liveins: {}", regs_string)
+ }
+ AnalysisError::IllegalRealReg(reg) => {
+ format!("instructions mention real register {:?}, which either isn't defined in the register universe, or is a 'suggested_scratch' register", reg)
+ }
+ AnalysisError::UnreachableBlocks => {
+ "at least one block is unreachable".to_string()
+ }
+ AnalysisError::ImplementationLimitsExceeded => {
+ "implementation limits exceeded (more than 1 million blocks or 16 million insns)".to_string()
+ }
+ AnalysisError::LSRACantDoStackmaps => {
+ "LSRA *and* stackmap creation requested; but this combination is not yet supported".to_string()
+ }
+ }
+ }
+}
+
+//=============================================================================
+// Top level for all analysis activities.
+
+pub struct AnalysisInfo {
+ /// The sanitized per-insn reg-use info
+ pub(crate) reg_vecs_and_bounds: RegVecsAndBounds,
+ /// The real-reg live ranges
+ pub(crate) real_ranges: TypedIxVec<RealRangeIx, RealRange>,
+ /// The virtual-reg live ranges
+ pub(crate) virtual_ranges: TypedIxVec<VirtualRangeIx, VirtualRange>,
+ /// The fragment table
+ pub(crate) range_frags: TypedIxVec<RangeFragIx, RangeFrag>,
+ /// The fragment metrics table
+ pub(crate) range_metrics: TypedIxVec<RangeFragIx, RangeFragMetrics>,
+ /// Estimated execution frequency per block
+ pub(crate) estimated_frequencies: TypedIxVec<BlockIx, u32>,
+ /// Maps InstIxs to BlockIxs
+ pub(crate) inst_to_block_map: InstIxToBlockIxMap,
+ /// Maps from RealRegs to sets of RealRanges and VirtualRegs to sets of VirtualRanges
+ /// (all operating on indices, not the actual objects). This is only generated in
+ /// situations where we need it, hence the `Option`.
+ pub(crate) reg_to_ranges_maps: Option<RegToRangesMaps>,
+ /// Information about registers connected by moves. This is only generated in situations
+ /// where we need it, hence the `Option`.
+ pub(crate) move_info: Option<MoveInfo>,
+}
+
+#[inline(never)]
+pub fn run_analysis<F: Function>(
+ func: &F,
+ reg_universe: &RealRegUniverse,
+ algorithm: AlgorithmWithDefaults,
+ client_wants_stackmaps: bool,
+ reftype_class: RegClass,
+ reftyped_vregs: &Vec<VirtualReg>, // as supplied by the client
+) -> Result<AnalysisInfo, AnalysisError> {
+ info!("run_analysis: begin");
+ info!(
+ " run_analysis: {} blocks, {} insns",
+ func.blocks().len(),
+ func.insns().len()
+ );
+
+ // LSRA can't do reftypes yet. That should have been checked at the top level already.
+ if client_wants_stackmaps {
+ assert!(algorithm != AlgorithmWithDefaults::LinearScan);
+ }
+
+ info!(" run_analysis: begin control flow analysis");
+
+ // First do control flow analysis. This is (relatively) simple. Note that
+ // this can fail, for various reasons; we propagate the failure if so.
+ let cfg_info = CFGInfo::create(func)?;
+
+ // Create the InstIx-to-BlockIx map. This isn't really control-flow
+ // analysis, but needs to be done at some point.
+ let inst_to_block_map = InstIxToBlockIxMap::new(func);
+
+ // Annotate each Block with its estimated execution frequency
+ let mut estimated_frequencies = TypedIxVec::new();
+ for bix in func.blocks() {
+ let mut estimated_frequency = 1;
+ let depth = u32::min(cfg_info.depth_map[bix], 3);
+ for _ in 0..depth {
+ estimated_frequency *= 10;
+ }
+ assert!(bix == BlockIx::new(estimated_frequencies.len()));
+ estimated_frequencies.push(estimated_frequency);
+ }
+
+ info!(" run_analysis: end control flow analysis");
+
+ // Now perform dataflow analysis. This is somewhat more complex.
+ info!(" run_analysis: begin data flow analysis");
+
+ // See `get_sanitized_reg_uses_for_func` for the meaning of "sanitized".
+ let reg_vecs_and_bounds = get_sanitized_reg_uses_for_func(func, reg_universe)
+ .map_err(|reg| AnalysisError::IllegalRealReg(reg))?;
+ assert!(reg_vecs_and_bounds.is_sanitized());
+
+ // Calculate block-local def/use sets.
+ let (def_sets_per_block, use_sets_per_block) =
+ calc_def_and_use(func, &reg_vecs_and_bounds, &reg_universe);
+ debug_assert!(def_sets_per_block.len() == func.blocks().len() as u32);
+ debug_assert!(use_sets_per_block.len() == func.blocks().len() as u32);
+
+ // Calculate live-in and live-out sets per block, using the traditional
+ // iterate-to-a-fixed-point scheme.
+
+ // `liveout_sets_per_block` is amended below for return blocks, hence `mut`.
+ let (livein_sets_per_block, mut liveout_sets_per_block) = calc_livein_and_liveout(
+ func,
+ &def_sets_per_block,
+ &use_sets_per_block,
+ &cfg_info,
+ &reg_universe,
+ );
+ debug_assert!(livein_sets_per_block.len() == func.blocks().len() as u32);
+ debug_assert!(liveout_sets_per_block.len() == func.blocks().len() as u32);
+
+ // Verify livein set of entry block against liveins specified by function
+ // (e.g., ABI params).
+ let func_liveins = SparseSet::from_vec(
+ func.func_liveins()
+ .to_vec()
+ .into_iter()
+ .map(|rreg| rreg.to_reg())
+ .collect(),
+ );
+ if !livein_sets_per_block[func.entry_block()].is_subset_of(&func_liveins) {
+ let mut regs = livein_sets_per_block[func.entry_block()].clone();
+ regs.remove(&func_liveins);
+ return Err(AnalysisError::EntryLiveinValues(regs.to_vec()));
+ }
+
+ // Add function liveouts to every block ending in a return.
+ let func_liveouts = SparseSet::from_vec(
+ func.func_liveouts()
+ .to_vec()
+ .into_iter()
+ .map(|rreg| rreg.to_reg())
+ .collect(),
+ );
+ for block in func.blocks() {
+ let last_iix = func.block_insns(block).last();
+ if func.is_ret(last_iix) {
+ liveout_sets_per_block[block].union(&func_liveouts);
+ }
+ }
+
+ info!(" run_analysis: end data flow analysis");
+
+ // Dataflow analysis is now complete. Now compute the virtual and real live
+ // ranges, in two steps: (1) compute RangeFrags, and (2) merge them
+ // together, guided by flow and liveness info, so as to create the final
+ // VirtualRanges and RealRanges.
+ info!(" run_analysis: begin liveness analysis");
+
+ let (frag_ixs_per_reg, frag_env, frag_metrics_env, vreg_classes) = get_range_frags(
+ func,
+ &reg_vecs_and_bounds,
+ &reg_universe,
+ &livein_sets_per_block,
+ &liveout_sets_per_block,
+ );
+
+ // These have to be mut because they may get changed below by the call to
+ // `to_reftypes_analysis`.
+ let (mut rlr_env, mut vlr_env) = merge_range_frags(
+ &frag_ixs_per_reg,
+ &frag_env,
+ &frag_metrics_env,
+ &estimated_frequencies,
+ &cfg_info,
+ &reg_universe,
+ &vreg_classes,
+ );
+
+ debug_assert!(liveout_sets_per_block.len() == estimated_frequencies.len());
+
+ debug!("");
+ let mut n = 0;
+ for rlr in rlr_env.iter() {
+ debug!(
+ "{:<4?} {}",
+ RealRangeIx::new(n),
+ rlr.show_with_rru(&reg_universe)
+ );
+ n += 1;
+ }
+
+ debug!("");
+ n = 0;
+ for vlr in vlr_env.iter() {
+ debug!("{:<4?} {:?}", VirtualRangeIx::new(n), vlr);
+ n += 1;
+ }
+
+ // Now a bit of auxiliary info collection, which isn't really either control- or data-flow
+ // analysis.
+
+ // For BT and/or reftypes, we'll also need the reg-to-ranges maps.
+ let reg_to_ranges_maps =
+ if client_wants_stackmaps || algorithm == AlgorithmWithDefaults::Backtracking {
+ Some(compute_reg_to_ranges_maps(
+ func,
+ &reg_universe,
+ &rlr_env,
+ &vlr_env,
+ ))
+ } else {
+ None
+ };
+
+ // For BT and/or reftypes, we'll also need information about moves.
+ let move_info = if client_wants_stackmaps || algorithm == AlgorithmWithDefaults::Backtracking {
+ Some(collect_move_info(
+ func,
+ &reg_vecs_and_bounds,
+ &estimated_frequencies,
+ ))
+ } else {
+ None
+ };
+
+ info!(" run_analysis: end liveness analysis");
+
+ if client_wants_stackmaps {
+ info!(" run_analysis: begin reftypes analysis");
+ do_reftypes_analysis(
+ &mut rlr_env,
+ &mut vlr_env,
+ &frag_env,
+ reg_to_ranges_maps.as_ref().unwrap(), /* safe because of logic just above */
+ &move_info.as_ref().unwrap(), /* ditto */
+ reftype_class,
+ reftyped_vregs,
+ );
+ info!(" run_analysis: end reftypes analysis");
+ }
+
+ info!("run_analysis: end");
+
+ Ok(AnalysisInfo {
+ reg_vecs_and_bounds,
+ real_ranges: rlr_env,
+ virtual_ranges: vlr_env,
+ range_frags: frag_env,
+ range_metrics: frag_metrics_env,
+ estimated_frequencies,
+ inst_to_block_map,
+ reg_to_ranges_maps,
+ move_info,
+ })
+}
diff --git a/third_party/rust/regalloc/src/analysis_reftypes.rs b/third_party/rust/regalloc/src/analysis_reftypes.rs
new file mode 100644
index 0000000000..2a0aafa0d2
--- /dev/null
+++ b/third_party/rust/regalloc/src/analysis_reftypes.rs
@@ -0,0 +1,137 @@
+//! Performs a simple taint analysis, to find all live ranges that are reftyped.
+
+use crate::data_structures::{
+ InstPoint, Map, MoveInfo, MoveInfoElem, RangeFrag, RangeFragIx, RangeId, RealRange,
+ RealRangeIx, Reg, RegClass, RegToRangesMaps, TypedIxVec, VirtualRange, VirtualRangeIx,
+ VirtualReg,
+};
+use crate::sparse_set::{SparseSet, SparseSetU};
+
+use log::debug;
+use smallvec::SmallVec;
+
+pub fn do_reftypes_analysis(
+ // From dataflow/liveness analysis. Modified by setting their is_ref bit.
+ rlr_env: &mut TypedIxVec<RealRangeIx, RealRange>,
+ vlr_env: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
+ // From dataflow analysis
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ reg_to_ranges_maps: &RegToRangesMaps,
+ move_info: &MoveInfo,
+ // As supplied by the client
+ reftype_class: RegClass,
+ reftyped_vregs: &Vec<VirtualReg>,
+) {
+ // Helper: find the RangeId (RealRange or VirtualRange) for a register at an InstPoint.
+ let find_range_id_for_reg = |pt: InstPoint, reg: Reg| -> RangeId {
+ if reg.is_real() {
+ for &rlrix in &reg_to_ranges_maps.rreg_to_rlrs_map[reg.get_index() as usize] {
+ if rlr_env[rlrix].sorted_frags.contains_pt(frag_env, pt) {
+ return RangeId::new_real(rlrix);
+ }
+ }
+ } else {
+ for &vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[reg.get_index() as usize] {
+ if vlr_env[vlrix].sorted_frags.contains_pt(pt) {
+ return RangeId::new_virtual(vlrix);
+ }
+ }
+ }
+ panic!("do_reftypes_analysis::find_range_for_reg: can't find range");
+ };
+
+ // The game here is: starting with `reftyped_vregs`, find *all* the VirtualRanges and
+ // RealRanges to which refness can flow, via instructions which the client's `is_move`
+ // function considers to be moves.
+
+ // This is done in three stages:
+ //
+ // (1) Create a mapping from source (virtual or real) ranges to sets of destination ranges.
+ // We have `move_info`, which tells us which (virtual or real) regs are connected by
+ // moves. However, that's not directly useful -- we need to know which *ranges* are
+ // connected by moves. `move_info` as supplied helpfully indicates both source and
+ // destination regs and ranges, so we can simply use that.
+ //
+ // (2) Similarly, convert `reftyped_vregs` into a set of reftyped ranges by consulting
+ // `reg_to_ranges_maps`.
+ //
+ // (3) Compute the transitive closure of (1) starting from the ranges in (2). This is done
+ // by a depth first search of the graph implied by (1).
+
+ // ====== Compute (1) above ======
+ // Each entry in `succ` maps from `src` to a `SparseSet<dsts>`, so to speak. That is, for
+ // `d1`, `d2`, etc, in `dsts`, the function contains moves `d1 := src`, `d2 := src`, etc.
+ let mut succ = Map::<RangeId, SparseSetU<[RangeId; 4]>>::default();
+ for &MoveInfoElem { dst, src, iix, .. } in &move_info.moves {
+ // Don't waste time processing moves which can't possibly be of reftyped values.
+ debug_assert!(dst.get_class() == src.get_class());
+ if dst.get_class() != reftype_class {
+ continue;
+ }
+ let src_range = find_range_id_for_reg(InstPoint::new_use(iix), src);
+ let dst_range = find_range_id_for_reg(InstPoint::new_def(iix), dst);
+ debug!(
+ "move from {:?} (range {:?}) to {:?} (range {:?}) at inst {:?}",
+ src, src_range, dst, dst_range, iix
+ );
+ match succ.get_mut(&src_range) {
+ Some(dst_ranges) => dst_ranges.insert(dst_range),
+ None => {
+ // Re `; 4`: we expect most copies copy a register to only a few destinations.
+ let mut dst_ranges = SparseSetU::<[RangeId; 4]>::empty();
+ dst_ranges.insert(dst_range);
+ let r = succ.insert(src_range, dst_ranges);
+ assert!(r.is_none());
+ }
+ }
+ }
+
+ // ====== Compute (2) above ======
+ let mut reftyped_ranges = SparseSet::<RangeId>::empty();
+ for vreg in reftyped_vregs {
+ // If this fails, the client has been telling is that some virtual reg is reftyped, yet
+ // it doesn't belong to the class of regs that it claims can carry refs. So the client
+ // is buggy.
+ debug_assert!(vreg.get_class() == reftype_class);
+ for vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[vreg.get_index()] {
+ debug!("range {:?} is reffy due to reffy vreg {:?}", vlrix, vreg);
+ reftyped_ranges.insert(RangeId::new_virtual(*vlrix));
+ }
+ }
+
+ // ====== Compute (3) above ======
+ // Almost all chains of copies will be less than 64 long, I would guess.
+ let mut stack = SmallVec::<[RangeId; 64]>::new();
+ let mut visited = reftyped_ranges.clone();
+ for start_point_range in reftyped_ranges.iter() {
+ // Perform DFS from `start_point_range`.
+ stack.clear();
+ stack.push(*start_point_range);
+ while let Some(src_range) = stack.pop() {
+ visited.insert(src_range);
+ if let Some(dst_ranges) = succ.get(&src_range) {
+ for dst_range in dst_ranges.iter() {
+ if !visited.contains(*dst_range) {
+ stack.push(*dst_range);
+ }
+ }
+ }
+ }
+ }
+
+ // Finally, annotate rlr_env/vlr_env with the results of the analysis. (That was the whole
+ // point!)
+ for range in visited.iter() {
+ if range.is_real() {
+ let rrange = &mut rlr_env[range.to_real()];
+ debug_assert!(!rrange.is_ref);
+ debug!(" -> rrange {:?} is reffy", range.to_real());
+ rrange.is_ref = true;
+ } else {
+ let vrange = &mut vlr_env[range.to_virtual()];
+ debug_assert!(!vrange.is_ref);
+ debug!(" -> rrange {:?} is reffy", range.to_virtual());
+ vrange.is_ref = true;
+ }
+ }
+}
diff --git a/third_party/rust/regalloc/src/avl_tree.rs b/third_party/rust/regalloc/src/avl_tree.rs
new file mode 100644
index 0000000000..e42208425f
--- /dev/null
+++ b/third_party/rust/regalloc/src/avl_tree.rs
@@ -0,0 +1,1281 @@
+//! AVL trees with a private allocation pool.
+//!
+//! AVL tree internals are public, so that backtracking.rs can do custom
+//! traversals of the tree as it wishes.
+
+use smallvec::SmallVec;
+use std::cmp::Ordering;
+
+//=============================================================================
+// Data structures for AVLTree
+
+#[derive(Clone, PartialEq)]
+pub enum AVLTag {
+ Free, // This pool entry is not in use
+ None, // This pool entry is in use. Neither subtree is higher.
+ Left, // This pool entry is in use. The left subtree is higher.
+ Right, // This pool entry is in use. The right subtree is higher.
+}
+
+#[derive(Clone)]
+pub struct AVLNode<T> {
+ pub tag: AVLTag,
+ pub left: u32,
+ pub right: u32,
+ pub item: T,
+}
+impl<T> AVLNode<T> {
+ fn new(tag: AVLTag, left: u32, right: u32, item: T) -> Self {
+ Self {
+ tag,
+ left,
+ right,
+ item,
+ }
+ }
+}
+
+pub const AVL_NULL: u32 = 0xFFFF_FFFF;
+
+pub struct AVLTree<T> {
+ // The storage area. There can be at most 2^32-2 entries, since AVL_NULL
+ // (== 2^32-1) is used to mean "the null pointer".
+ pub pool: Vec<AVLNode<T>>,
+ // A default value for the stored item. We don't care what this is;
+ // unfortunately Rust forces us to have one so that additions to the free
+ // list will be fully initialised.
+ default: T,
+ // The freelist head. This is a list of available entries. Each item on
+ // the freelist must have its .tag be AVLTag::Free, and will use its .left
+ // field as the link to the next freelist item. A freelist link value of
+ // AVL_NULL denotes the end of the list. If `freelist` itself is AVL_NULL
+ // then the list is empty.
+ freelist: u32,
+ // Last but not least, the root node.
+ pub root: u32,
+}
+
+//=============================================================================
+// Storage management functions for AVLTree
+
+impl<T: Clone> AVLTree<T> {
+ // Create a new tree and its associated storage pool. This requires knowing
+ // the default item value.
+ pub fn new(default: T) -> Self {
+ // Pre-allocate a few entries so as to save a few reallocs later, on the
+ // assumption that most trees will get quite large.
+ let pool = Vec::with_capacity(16);
+ let freelist = AVL_NULL;
+ let root = AVL_NULL;
+ Self {
+ pool,
+ default,
+ freelist,
+ root,
+ }
+ }
+
+ // Private function: free a tree node and put it back on the storage pool's
+ // freelist.
+ fn free(&mut self, index: u32) {
+ assert!(index != AVL_NULL);
+ assert!(self.pool[index as usize].tag != AVLTag::Free);
+ self.pool[index as usize] =
+ AVLNode::new(AVLTag::Free, self.freelist, AVL_NULL, self.default.clone());
+ self.freelist = index;
+ }
+
+ // Private function: allocate a tree node from the storage pool, resizing
+ // the pool if necessary. This will decline to expand the tree past about
+ // 1.75 billion items.
+ fn alloc(&mut self) -> u32 {
+ // Check to see if the freelist is empty, and if so allocate a bunch more
+ // slots.
+ if self.freelist == AVL_NULL {
+ let start = self.pool.len();
+ let fill_item = AVLNode::new(AVLTag::Free, AVL_NULL, AVL_NULL, self.default.clone());
+ // What happens if this OOMs? At least guard against u32 overflow by
+ // doing this:
+ if start >= 0x7000_0000 {
+ // 1.75 billion elements in the tree should be enough for any
+ // reasonable use of this register allocator.
+ panic!("AVLTree<T>::alloc: too many items");
+ }
+ self.pool.resize(2 * start + 2, fill_item);
+ let end_plus_1 = self.pool.len();
+ debug_assert!(end_plus_1 >= 2);
+ self.pool[end_plus_1 - 1].left = self.freelist;
+ let mut i = end_plus_1 - 2;
+ while i >= start {
+ // The entry is already marked as free, but we must set the link.
+ self.pool[i].left = i as u32 + 1;
+ if i == 0 {
+ break;
+ }
+ i -= 1;
+ }
+ self.freelist = start as u32;
+ debug_assert!(self.freelist != AVL_NULL);
+ }
+ // And now allocate.
+ let new = self.freelist;
+ assert!(self.pool[new as usize].tag == AVLTag::Free);
+ // The caller is responsible for filling in the entry. But at least set
+ // the tag to non-Free, for sanity.
+ self.pool[new as usize].tag = AVLTag::None;
+ self.freelist = self.pool[new as usize].left;
+ new
+ }
+}
+
+//=============================================================================
+// Tree-wrangling machinery for AVLTree (private)
+
+// For the public interface, see below.
+
+// The functions 'insert' and 'delete', and all supporting functions reachable
+// from them, are derived from a public domain implementation by Georg Kraml.
+// Unfortunately the relevant web site is long gone, and can only be found on
+// the Wayback Machine.
+//
+// https://web.archive.org/web/20010419134337/
+// http://www.kraml.at/georg/avltree/index.html
+//
+// https://web.archive.org/web/20030926063347/
+// http://www.kraml.at/georg/avltree/avlmonolithic.c
+//
+// https://web.archive.org/web/20030401124003/http://www.kraml.at/src/howto/
+//
+// For relicensing clearance, see Mozilla bug 1620332, at
+// https://bugzilla.mozilla.org/show_bug.cgi?id=1620332.
+
+// Did a given insertion/deletion succeed, and what do we do next?
+#[derive(Clone, Copy, PartialEq)]
+enum AVLRes {
+ Error,
+ OK,
+ Balance,
+}
+
+impl<T: Clone + PartialOrd> AVLTree<T> {
+ // Private function: rotleft: perform counterclockwise rotation
+ // Takes the root of the tree to rotate, returns the new root
+ fn rotleft(&mut self, old_root: u32) -> u32 {
+ let new_root = self.pool[old_root as usize].right;
+ self.pool[old_root as usize].right = self.pool[new_root as usize].left;
+ self.pool[new_root as usize].left = old_root;
+ new_root
+ }
+
+ // Private function: rotright: perform clockwise rotation
+ // Takes the root of the tree to rotate, returns the new root
+ fn rotright(&mut self, old_root: u32) -> u32 {
+ let new_root = self.pool[old_root as usize].left;
+ self.pool[old_root as usize].left = self.pool[new_root as usize].right;
+ self.pool[new_root as usize].right = old_root;
+ new_root
+ }
+
+ // Private function: leftgrown: helper function for `insert`
+ //
+ // Parameters:
+ //
+ // root Root of a tree. This node's left
+ // subtree has just grown due to item insertion; its
+ // "tag" flag needs adjustment, and the local tree
+ // (the subtree of which this node is the root node) may
+ // have become unbalanced.
+ //
+ // Return values:
+ //
+ // The new root of the subtree, plus either:
+ //
+ // OK The local tree could be rebalanced or was balanced
+ // from the start. The parent activations of the avlinsert
+ // activation that called this function may assume the
+ // entire tree is valid.
+ // or
+ // BALANCE The local tree was balanced, but has grown in height.
+ // Do not assume the entire tree is valid.
+ //
+ // This function has been split into two pieces: `leftgrown`, which is small and hot, and is
+ // marked always-inline, and `leftgrown_left`, which handles a more complex and less
+ // frequent case, and is marked never-inline. The intent is to have the common case always
+ // inlined without having to deal with the extra register pressure from inlining the less
+ // frequent code. The dual function `rightgrown` is split similarly.
+ #[inline(never)]
+ fn leftgrown_left(&mut self, mut root: u32) -> (u32, AVLRes) {
+ if self.pool[self.pool[root as usize].left as usize].tag == AVLTag::Left {
+ self.pool[root as usize].tag = AVLTag::None;
+ let t = self.pool[root as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ root = self.rotright(root);
+ } else {
+ match self.pool[self.pool[self.pool[root as usize].left as usize].right as usize].tag {
+ AVLTag::Left => {
+ self.pool[root as usize].tag = AVLTag::Right;
+ let t = self.pool[root as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Right => {
+ self.pool[root as usize].tag = AVLTag::None;
+ let t = self.pool[root as usize].left;
+ self.pool[t as usize].tag = AVLTag::Left;
+ }
+ AVLTag::None => {
+ self.pool[root as usize].tag = AVLTag::None;
+ let t = self.pool[root as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Free => panic!("AVLTree::leftgrown_left: unallocated node in tree"),
+ }
+ let t = self.pool[self.pool[root as usize].left as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ self.pool[root as usize].left = self.rotleft(self.pool[root as usize].left);
+ root = self.rotright(root);
+ }
+ return (root, AVLRes::OK);
+ }
+
+ #[inline(always)]
+ fn leftgrown(&mut self, root: u32) -> (u32, AVLRes) {
+ let root_node = &mut self.pool[root as usize];
+ match root_node.tag {
+ AVLTag::Left => self.leftgrown_left(root),
+ AVLTag::Right => {
+ root_node.tag = AVLTag::None;
+ return (root, AVLRes::OK);
+ }
+ AVLTag::None => {
+ root_node.tag = AVLTag::Left;
+ return (root, AVLRes::Balance);
+ }
+ AVLTag::Free => panic!("AVLTree::leftgrown: unallocated node in tree"),
+ }
+ }
+
+ // Private function: rightgrown: helper function for `insert`
+ //
+ // See leftgrown for details.
+ #[inline(never)]
+ fn rightgrown_right(&mut self, mut root: u32) -> (u32, AVLRes) {
+ if self.pool[self.pool[root as usize].right as usize].tag == AVLTag::Right {
+ self.pool[root as usize].tag = AVLTag::None;
+ let t = self.pool[root as usize].right as usize;
+ self.pool[t].tag = AVLTag::None;
+ root = self.rotleft(root);
+ } else {
+ match self.pool[self.pool[self.pool[root as usize].right as usize].left as usize].tag {
+ AVLTag::Right => {
+ self.pool[root as usize].tag = AVLTag::Left;
+ let t = self.pool[root as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Left => {
+ self.pool[root as usize].tag = AVLTag::None;
+ let t = self.pool[root as usize].right;
+ self.pool[t as usize].tag = AVLTag::Right;
+ }
+ AVLTag::None => {
+ self.pool[root as usize].tag = AVLTag::None;
+ let t = self.pool[root as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Free => panic!("AVLTree::rightgrown_right: unallocated node in tree"),
+ }
+ let t = self.pool[self.pool[root as usize].right as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ self.pool[root as usize].right = self.rotright(self.pool[root as usize].right);
+ root = self.rotleft(root);
+ }
+ return (root, AVLRes::OK);
+ }
+
+ #[inline(always)]
+ fn rightgrown(&mut self, root: u32) -> (u32, AVLRes) {
+ match self.pool[root as usize].tag {
+ AVLTag::Left => {
+ self.pool[root as usize].tag = AVLTag::None;
+ return (root, AVLRes::OK);
+ }
+ AVLTag::Right => self.rightgrown_right(root),
+ AVLTag::None => {
+ self.pool[root as usize].tag = AVLTag::Right;
+ return (root, AVLRes::Balance);
+ }
+ AVLTag::Free => panic!("AVLTree::rightgrown: unallocated node in tree"),
+ }
+ }
+
+ // Private function: insert_wrk: insert a node into the AVL tree
+ // (worker function)
+ //
+ // Parameters:
+ //
+ // root Root of the tree in whch to insert `d`.
+ //
+ // item Item to be inserted.
+ //
+ // Returns AVL_NULL if the value is already in the tree. Otherwise returns the index of the
+ // new root (which is obviously always non-AVL_NULL). This is infallible in the sense that,
+ // if allocation of a new node fails, it won't return -- `self.alloc()` will panic.
+ //
+ // This function relies on the fact that any non-AVL_NULL value will have its top bit (bit
+ // 31) clear, since that bit is used as a boolean in the `stack`. That property is
+ // guaranteed us by `fn alloc`, which ensures that the max number of nodes in the tree is
+ // 0x70000000.
+ fn insert_wrk<F>(&mut self, mut root: u32, item: T, mb_cmp: Option<&F>) -> u32
+ where
+ F: Fn(T, T) -> Option<Ordering>,
+ {
+ #[inline(always)]
+ fn stack_entry_set_is_left(node: u32) -> u32 {
+ node | 0x8000_0000
+ }
+ #[inline(always)]
+ fn stack_entry_get_is_left(ent: u32) -> u32 {
+ ent & 0x8000_0000
+ }
+ #[inline(always)]
+ fn stack_entry_get_node(ent: u32) -> u32 {
+ ent & 0x7FFF_FFFF
+ }
+
+ // The stack will hold a root-leaf path. Give that the max number of elements allowed
+ // in the tree is 0x70000000, which is (7/8) * 2^31, and that the max depth is 1.44 *
+ // log2(elems), a 64 entry stack should always be sufficient. Hence there should never
+ // be any dynamic allocation here. In fact a 48 entry stack would also suffice, but
+ // SmallVec doesn't provide that size.
+ let mut stack = SmallVec::<[u32; 64]>::new();
+
+ // In the first phase, walk down the tree to find the place where the new node should be
+ // inserted. This loop is cloned so as to allow the test on `mb_cmp` to be done just
+ // once.
+ match mb_cmp {
+ None => {
+ while root != AVL_NULL {
+ let cmp_loc_right = &self.pool[root as usize];
+ let cmp_arg_right: T = cmp_loc_right.item.clone();
+ let cmp_arg_left: T = item.clone();
+ debug_assert!(stack_entry_get_is_left(root) == 0);
+ match cmp_arg_left.partial_cmp(&cmp_arg_right) {
+ None => panic!("AVLTree::insert_wrk: unordered elements(1)"),
+ Some(Ordering::Less) => {
+ stack.push(stack_entry_set_is_left(root));
+ root = cmp_loc_right.left;
+ }
+ Some(Ordering::Greater) => {
+ stack.push(root);
+ root = cmp_loc_right.right;
+ }
+ Some(Ordering::Equal) => {
+ // Item is already in the tree.
+ return AVL_NULL;
+ }
+ }
+ }
+ }
+ Some(cmp) => {
+ while root != AVL_NULL {
+ let cmp_loc_right = &self.pool[root as usize];
+ let cmp_arg_right: T = cmp_loc_right.item.clone();
+ let cmp_arg_left: T = item.clone();
+ debug_assert!(stack_entry_get_is_left(root) == 0);
+ match cmp(cmp_arg_left, cmp_arg_right) {
+ None => panic!("AVLTree::insert_wrk: unordered elements(2)"),
+ Some(Ordering::Less) => {
+ stack.push(stack_entry_set_is_left(root));
+ root = cmp_loc_right.left;
+ }
+ Some(Ordering::Greater) => {
+ stack.push(root);
+ root = cmp_loc_right.right;
+ }
+ Some(Ordering::Equal) => {
+ // Item is already in the tree.
+ return AVL_NULL;
+ }
+ }
+ }
+ }
+ }
+
+ // Now allocate the new node.
+ debug_assert!(root == AVL_NULL);
+ let new_node = self.alloc();
+ self.pool[new_node as usize] = AVLNode::new(AVLTag::None, AVL_NULL, AVL_NULL, item.clone());
+
+ // And unwind the stack, back to the root, rebalancing as we go. Once get to a place
+ // where the new subtree doesn't need to be rebalanced, we can stop this upward scan,
+ // because no nodes above it will need to be rebalanced either.
+ let mut curr_node = new_node;
+ let mut curr_node_action = AVLRes::Balance;
+
+ while let Some(parent_node_tagged) = stack.pop() {
+ let parent_node = stack_entry_get_node(parent_node_tagged);
+ if stack_entry_get_is_left(parent_node_tagged) != 0 {
+ self.pool[parent_node as usize].left = curr_node;
+ if curr_node_action == AVLRes::Balance {
+ let pair = self.leftgrown(parent_node);
+ curr_node = pair.0;
+ curr_node_action = pair.1;
+ } else {
+ curr_node = parent_node;
+ break;
+ }
+ } else {
+ self.pool[parent_node as usize].right = curr_node;
+ if curr_node_action == AVLRes::Balance {
+ let pair = self.rightgrown(parent_node);
+ curr_node = pair.0;
+ curr_node_action = pair.1;
+ } else {
+ curr_node = parent_node;
+ break;
+ }
+ }
+ }
+
+ if !stack.is_empty() {
+ curr_node = stack_entry_get_node(stack[0]);
+ }
+
+ debug_assert!(curr_node != AVL_NULL);
+ return curr_node;
+ }
+
+ // Private function: leftshrunk: helper function for delete and
+ // findlowest
+ //
+ // Parameters:
+ //
+ // n Address of a pointer to a node. The node's left
+ // subtree has just shrunk due to item removal; its
+ // "skew" flag needs adjustment, and the local tree
+ // (the subtree of which this node is the root node) may
+ // have become unbalanced.
+ //
+ // Return values:
+ //
+ // OK The parent activation of the delete activation
+ // that called this function may assume the entire
+ // tree is valid.
+ //
+ // BALANCE Do not assume the entire tree is valid.
+ fn leftshrunk(&mut self, mut n: u32) -> (u32, AVLRes) {
+ match self.pool[n as usize].tag {
+ AVLTag::Left => {
+ self.pool[n as usize].tag = AVLTag::None;
+ return (n, AVLRes::Balance);
+ }
+ AVLTag::Right => {
+ if self.pool[self.pool[n as usize].right as usize].tag == AVLTag::Right {
+ self.pool[n as usize].tag = AVLTag::None;
+ let t = self.pool[n as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ n = self.rotleft(n);
+ return (n, AVLRes::Balance);
+ } else if self.pool[self.pool[n as usize].right as usize].tag == AVLTag::None {
+ self.pool[n as usize].tag = AVLTag::Right;
+ let t = self.pool[n as usize].right;
+ self.pool[t as usize].tag = AVLTag::Left;
+ n = self.rotleft(n);
+ return (n, AVLRes::OK);
+ } else {
+ match self.pool[self.pool[self.pool[n as usize].right as usize].left as usize]
+ .tag
+ {
+ AVLTag::Left => {
+ self.pool[n as usize].tag = AVLTag::None;
+ let t = self.pool[n as usize].right;
+ self.pool[t as usize].tag = AVLTag::Right;
+ }
+ AVLTag::Right => {
+ self.pool[n as usize].tag = AVLTag::Left;
+ let t = self.pool[n as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::None => {
+ self.pool[n as usize].tag = AVLTag::None;
+ let t = self.pool[n as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Free => {
+ panic!("AVLTree::leftshrunk(1): unallocated node in tree");
+ }
+ }
+ {
+ let t = self.pool[self.pool[n as usize].right as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ {
+ let t = self.rotright(self.pool[n as usize].right);
+ self.pool[n as usize].right = t;
+ }
+ n = self.rotleft(n);
+ return (n, AVLRes::Balance);
+ }
+ }
+ AVLTag::None => {
+ self.pool[n as usize].tag = AVLTag::Right;
+ return (n, AVLRes::OK);
+ }
+ AVLTag::Free => {
+ panic!("AVLTree::leftshrunk(2): unallocated node in tree");
+ }
+ }
+ }
+
+ // Private function: rightshrunk: helper function for delete and
+ // findhighest
+ //
+ // See leftshrunk for details.
+ fn rightshrunk(&mut self, mut n: u32) -> (u32, AVLRes) {
+ match self.pool[n as usize].tag {
+ AVLTag::Right => {
+ self.pool[n as usize].tag = AVLTag::None;
+ return (n, AVLRes::Balance);
+ }
+ AVLTag::Left => {
+ if self.pool[self.pool[n as usize].left as usize].tag == AVLTag::Left {
+ self.pool[n as usize].tag = AVLTag::None;
+ let t = self.pool[n as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ n = self.rotright(n);
+ return (n, AVLRes::Balance);
+ } else if self.pool[self.pool[n as usize].left as usize].tag == AVLTag::None {
+ self.pool[n as usize].tag = AVLTag::Left;
+ let t = self.pool[n as usize].left;
+ self.pool[t as usize].tag = AVLTag::Right;
+ n = self.rotright(n);
+ return (n, AVLRes::OK);
+ } else {
+ match self.pool[self.pool[self.pool[n as usize].left as usize].right as usize]
+ .tag
+ {
+ AVLTag::Left => {
+ self.pool[n as usize].tag = AVLTag::Right;
+ let t = self.pool[n as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Right => {
+ self.pool[n as usize].tag = AVLTag::None;
+ let t = self.pool[n as usize].left;
+ self.pool[t as usize].tag = AVLTag::Left;
+ }
+ AVLTag::None => {
+ self.pool[n as usize].tag = AVLTag::None;
+ let t = self.pool[n as usize].left;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ AVLTag::Free => {
+ panic!("AVLTree::rightshrunk(1): unallocated node in tree");
+ }
+ }
+ {
+ let t = self.pool[self.pool[n as usize].left as usize].right;
+ self.pool[t as usize].tag = AVLTag::None;
+ }
+ {
+ let t = self.rotleft(self.pool[n as usize].left);
+ self.pool[n as usize].left = t;
+ }
+ n = self.rotright(n);
+ return (n, AVLRes::Balance);
+ }
+ }
+ AVLTag::None => {
+ self.pool[n as usize].tag = AVLTag::Left;
+ return (n, AVLRes::OK);
+ }
+ AVLTag::Free => {
+ panic!("AVLTree::rightshrunk(2): unallocated node in tree");
+ }
+ }
+ }
+
+ // Private function: findhighest: replace a node with a subtree's
+ // highest-ranking item.
+ //
+ // Parameters:
+ //
+ // target Pointer to node to be replaced.
+ //
+ // n Address of pointer to subtree.
+ //
+ // res Pointer to variable used to tell the caller whether
+ // further checks are necessary; analog to the return
+ // values of leftgrown and leftshrunk (see there).
+ //
+ // Return values:
+ //
+ // True A node was found; the target node has been replaced.
+ //
+ // False The target node could not be replaced because
+ // the subtree provided was empty.
+ //
+ fn findhighest(&mut self, target: u32, mut n: u32) -> Option<(u32, AVLRes)> {
+ if n == AVL_NULL {
+ return None;
+ }
+ let mut res = AVLRes::Balance;
+ if self.pool[n as usize].right != AVL_NULL {
+ let rec = self.findhighest(target, self.pool[n as usize].right);
+ if let Some((new_n_right, new_res)) = rec {
+ self.pool[n as usize].right = new_n_right;
+ res = new_res;
+ if res == AVLRes::Balance {
+ let (new_n, new_res) = self.rightshrunk(n);
+ n = new_n;
+ res = new_res;
+ }
+ return Some((n, res));
+ } else {
+ return None;
+ }
+ }
+ self.pool[target as usize].item = self.pool[n as usize].item.clone();
+ let tmp = n;
+ n = self.pool[n as usize].left;
+ self.free(tmp);
+ Some((n, res))
+ }
+
+ // Private function: findlowest: replace node with a subtree's
+ // lowest-ranking item.
+ //
+ // See findhighest for the details.
+ fn findlowest(&mut self, target: u32, mut n: u32) -> Option<(u32, AVLRes)> {
+ if n == AVL_NULL {
+ return None;
+ }
+ let mut res = AVLRes::Balance;
+ if self.pool[n as usize].left != AVL_NULL {
+ let rec = self.findlowest(target, self.pool[n as usize].left);
+ if let Some((new_n_left, new_res)) = rec {
+ self.pool[n as usize].left = new_n_left;
+ res = new_res;
+ if res == AVLRes::Balance {
+ let (new_n, new_res) = self.leftshrunk(n);
+ n = new_n;
+ res = new_res;
+ }
+ return Some((n, res));
+ } else {
+ return None;
+ }
+ }
+ self.pool[target as usize].item = self.pool[n as usize].item.clone();
+ let tmp = n;
+ n = self.pool[n as usize].right;
+ self.free(tmp);
+ Some((n, res))
+ }
+
+ // Private function: delete_wrk: delete an item from the tree.
+ // (worker function)
+ //
+ // Parameters:
+ //
+ // n Address of a pointer to a node.
+ //
+ // key AVLKEY of item to be removed.
+ //
+ // Return values:
+ //
+ // nonzero The item has been removed. The exact value of
+ // nonzero yields if of no concern to user code; when
+ // delete recursively calls itself, the number
+ // returned tells the parent activation if the AVL tree
+ // may have become unbalanced; specifically:
+ //
+ // OK None of the subtrees of the node that n points to
+ // has shrunk, the AVL tree is valid.
+ //
+ // BALANCE One of the subtrees of the node that n points to
+ // has shrunk, the node's "skew" flag needs adjustment,
+ // and the AVL tree may have become unbalanced.
+ //
+ // zero The tree does not contain an item yielding the
+ // AVLKEY value provided by the caller.
+ fn delete_wrk<F>(&mut self, mut root: u32, item: T, mb_cmp: Option<&F>) -> (u32, AVLRes)
+ where
+ F: Fn(T, T) -> Option<Ordering>,
+ {
+ let mut tmp = AVLRes::Balance;
+ if root == AVL_NULL {
+ return (root, AVLRes::Error);
+ }
+
+ let cmp_arg_left: T = item.clone();
+ let cmp_arg_right: T = self.pool[root as usize].item.clone();
+ let cmp_res = match mb_cmp {
+ None => cmp_arg_left.partial_cmp(&cmp_arg_right),
+ Some(cmp) => cmp(cmp_arg_left, cmp_arg_right),
+ };
+ match cmp_res {
+ None => panic!("AVLTree::delete_wrk: unordered elements"),
+ Some(Ordering::Less) => {
+ let root_left = self.pool[root as usize].left;
+ let (new_root_left, new_tmp) = self.delete_wrk(root_left, item, mb_cmp);
+ self.pool[root as usize].left = new_root_left;
+ tmp = new_tmp;
+ if tmp == AVLRes::Balance {
+ let (new_root, new_res) = self.leftshrunk(root);
+ root = new_root;
+ tmp = new_res;
+ }
+ return (root, tmp);
+ }
+ Some(Ordering::Greater) => {
+ let root_right = self.pool[root as usize].right;
+ let (new_root_right, new_tmp) = self.delete_wrk(root_right, item, mb_cmp);
+ self.pool[root as usize].right = new_root_right;
+ tmp = new_tmp;
+ if tmp == AVLRes::Balance {
+ let (new_root, new_res) = self.rightshrunk(root);
+ root = new_root;
+ tmp = new_res;
+ }
+ return (root, tmp);
+ }
+ Some(Ordering::Equal) => {
+ if self.pool[root as usize].left != AVL_NULL {
+ let root_left = self.pool[root as usize].left;
+ if let Some((new_root_left, new_tmp)) = self.findhighest(root, root_left) {
+ self.pool[root as usize].left = new_root_left;
+ tmp = new_tmp;
+ if new_tmp == AVLRes::Balance {
+ let (new_root, new_res) = self.leftshrunk(root);
+ root = new_root;
+ tmp = new_res;
+ }
+ }
+ return (root, tmp);
+ }
+ if self.pool[root as usize].right != AVL_NULL {
+ let root_right = self.pool[root as usize].right;
+ if let Some((new_root_right, new_tmp)) = self.findlowest(root, root_right) {
+ self.pool[root as usize].right = new_root_right;
+ tmp = new_tmp;
+ if new_tmp == AVLRes::Balance {
+ let (new_root, new_res) = self.rightshrunk(root);
+ root = new_root;
+ tmp = new_res;
+ }
+ }
+ return (root, tmp);
+ }
+ self.free(root);
+ root = AVL_NULL;
+ return (root, AVLRes::Balance);
+ }
+ }
+ }
+
+ // Private fn: count the number of items in the tree. Warning: costs O(N) !
+ #[cfg(test)]
+ fn count_wrk(&self, n: u32) -> usize {
+ if n == AVL_NULL {
+ return 0;
+ }
+ 1 + self.count_wrk(self.pool[n as usize].left) + self.count_wrk(self.pool[n as usize].right)
+ }
+
+ // Private fn: find the max depth of the tree. Warning: costs O(N) !
+ #[cfg(test)]
+ fn depth_wrk(&self, n: u32) -> usize {
+ if n == AVL_NULL {
+ return 0;
+ }
+ let d_left = self.depth_wrk(self.pool[n as usize].left);
+ let d_right = self.depth_wrk(self.pool[n as usize].right);
+ 1 + if d_left > d_right { d_left } else { d_right }
+ }
+}
+
+// Machinery for iterating over the tree, enumerating nodes in ascending order.
+// Unfortunately AVLTreeIter has to be public.
+pub struct AVLTreeIter<'t, 's, T> {
+ tree: &'t AVLTree<T>,
+ stack: &'s mut Vec<u32>,
+}
+
+impl<'t, 's, T> AVLTreeIter<'t, 's, T> {
+ #[allow(dead_code)]
+ fn new(tree: &'t AVLTree<T>, stack: &'s mut Vec<u32>) -> Self {
+ let mut iter = AVLTreeIter { tree, stack };
+ if tree.root != AVL_NULL {
+ iter.stack.push(tree.root);
+ iter.visit_left_children(tree.root);
+ }
+ iter
+ }
+
+ fn visit_left_children(&mut self, root: u32) {
+ let mut cur = root;
+ loop {
+ let left = self.tree.pool[cur as usize].left;
+ if left == AVL_NULL {
+ break;
+ }
+ self.stack.push(left);
+ cur = left;
+ }
+ }
+}
+
+impl<'s, 't, T: Copy> Iterator for AVLTreeIter<'s, 't, T> {
+ type Item = T;
+ fn next(&mut self) -> Option<Self::Item> {
+ let ret = match self.stack.pop() {
+ Some(ret) => ret,
+ None => return None,
+ };
+ let right = self.tree.pool[ret as usize].right;
+ if right != AVL_NULL {
+ self.stack.push(right);
+ self.visit_left_children(right);
+ }
+ Some(self.tree.pool[ret as usize].item)
+ }
+}
+
+//=============================================================================
+// Public interface for AVLTree
+
+impl<T: Clone + PartialOrd> AVLTree<T> {
+ // The core functions (insert, delete, contains) take a comparator argument
+ //
+ // mb_cmp: Option<&F>
+ // where
+ // F: Fn(T, T) -> Option<Ordering>
+ //
+ // which allows control over how node comparison is done. If this is None,
+ // then comparison is done directly using PartialOrd for the T values.
+ //
+ // If this is Some(cmp), then comparison is done by passing the two T values
+ // to `cmp`. In this case, the routines will complain (panic) if `cmp`
+ // indicates that its arguments are unordered.
+
+ // Insert a value in the tree. Returns true if an insert happened, false if
+ // the item was already present.
+ pub fn insert<F>(&mut self, item: T, mb_cmp: Option<&F>) -> bool
+ where
+ F: Fn(T, T) -> Option<Ordering>,
+ {
+ let new_root = self.insert_wrk(self.root, item, mb_cmp);
+ if new_root == AVL_NULL {
+ return false; // already in tree
+ } else {
+ self.root = new_root;
+ return true;
+ }
+ }
+
+ // Remove an item from the tree. Returns a bool which indicates whether the
+ // value was in there in the first place. (meaning, true == a removal
+ // actually occurred).
+ pub fn delete<F>(&mut self, item: T, mb_cmp: Option<&F>) -> bool
+ where
+ F: Fn(T, T) -> Option<Ordering>,
+ {
+ let (new_root, res) = self.delete_wrk(self.root, item, mb_cmp);
+ if res == AVLRes::Error {
+ return false;
+ } else {
+ self.root = new_root;
+ return true;
+ }
+ }
+
+ // Find `item` in the tree, and replace it with `replacement`. `item` and `replacement`
+ // must compare equal per the comparison function `cmp`. Returns a bool indicating whether
+ // `item` was found (and hence, replaced). There's no comparison fast-path here
+ // (meaning, `cmp` is `&F` and not `Option<&F>`) only because so far there is no use case
+ // for it.
+ pub fn find_and_replace<F>(&mut self, item: T, replacement: T, cmp: &F) -> bool
+ where
+ F: Fn(T, T) -> Option<Ordering>,
+ {
+ let mut n = self.root;
+ loop {
+ if n == AVL_NULL {
+ return false;
+ }
+ let cmp_arg_left: T = item.clone();
+ let cmp_arg_right: T = self.pool[n as usize].item.clone();
+ match cmp(cmp_arg_left, cmp_arg_right) {
+ Some(Ordering::Less) => {
+ n = self.pool[n as usize].left;
+ }
+ Some(Ordering::Greater) => {
+ n = self.pool[n as usize].right;
+ }
+ Some(Ordering::Equal) => {
+ // Do what we can to ensure the caller can't mess up the total ordering in
+ // the tree. This is more restrictive than it needs to be, but loosening
+ // it requires finding the largest item below `item` and the smallest one
+ // above it, which is expensive.
+ assert!(cmp(item, replacement.clone()) == Some(Ordering::Equal));
+ self.pool[n as usize].item = replacement.clone();
+ return true;
+ }
+ None => {
+ panic!("AVLTree::find_and_replace: unordered elements in search!");
+ }
+ }
+ }
+ }
+
+ // Determine whether an item is in the tree.
+ // sewardj 2020Mar31: this is not used; I assume all users of the trees
+ // do their own custom traversals. Remove #[cfg(test)] if any real uses
+ // appear.
+ #[cfg(test)]
+ pub fn contains<F>(&self, item: T, mb_cmp: Option<&F>) -> bool
+ where
+ F: Fn(T, T) -> Option<Ordering>,
+ {
+ let mut n = self.root;
+ // Lookup needs to be really fast, so have two versions of the loop, one
+ // for direct comparison, one for indirect.
+ match mb_cmp {
+ None => {
+ // Do comparisons directly on the items.
+ loop {
+ if n == AVL_NULL {
+ return false;
+ }
+ let cmp_arg_left: T = item.clone();
+ let cmp_arg_right: T = self.pool[n as usize].item.clone();
+ match cmp_arg_left.partial_cmp(&cmp_arg_right) {
+ Some(Ordering::Less) => {
+ n = self.pool[n as usize].left;
+ }
+ Some(Ordering::Greater) => {
+ n = self.pool[n as usize].right;
+ }
+ Some(Ordering::Equal) => {
+ return true;
+ }
+ None => {
+ panic!("AVLTree::contains(1): unordered elements in search!");
+ }
+ }
+ }
+ }
+ Some(cmp) => {
+ // Do comparisons by handing off to the supplied function.
+ loop {
+ if n == AVL_NULL {
+ return false;
+ }
+ let cmp_arg_left: T = item.clone();
+ let cmp_arg_right: T = self.pool[n as usize].item.clone();
+ match cmp(cmp_arg_left, cmp_arg_right) {
+ Some(Ordering::Less) => {
+ n = self.pool[n as usize].left;
+ }
+ Some(Ordering::Greater) => {
+ n = self.pool[n as usize].right;
+ }
+ Some(Ordering::Equal) => {
+ return true;
+ }
+ None => {
+ panic!("AVLTree::contains(2): unordered elements in search!");
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Count the number of items in the tree. Warning: costs O(N) !
+ #[cfg(test)]
+ fn count(&self) -> usize {
+ self.count_wrk(self.root)
+ }
+
+ // Private fn: find the max depth of the tree. Warning: costs O(N) !
+ #[cfg(test)]
+ fn depth(&self) -> usize {
+ self.depth_wrk(self.root)
+ }
+
+ pub fn to_vec(&self) -> Vec<T> {
+ // BEGIN helper fn
+ fn walk<U: Clone>(res: &mut Vec<U>, root: u32, pool: &Vec<AVLNode<U>>) {
+ let root_left = pool[root as usize].left;
+ if root_left != AVL_NULL {
+ walk(res, root_left, pool);
+ }
+ res.push(pool[root as usize].item.clone());
+ let root_right = pool[root as usize].right;
+ if root_right != AVL_NULL {
+ walk(res, root_right, pool);
+ }
+ }
+ // END helper fn
+
+ let mut res = Vec::<T>::new();
+ if self.root != AVL_NULL {
+ walk(&mut res, self.root, &self.pool);
+ }
+ res
+ }
+
+ #[allow(dead_code)]
+ pub fn iter<'t, 's>(&'t self, storage: &'s mut Vec<u32>) -> AVLTreeIter<'t, 's, T> {
+ storage.clear();
+ AVLTreeIter::new(self, storage)
+ }
+
+ // Show the tree. (For debugging only.)
+ //pub fn show(&self, depth: i32, node: u32) {
+ // if node != AVL_NULL {
+ // self.show(depth + 1, self.pool[node as usize].left);
+ // for _ in 0..depth {
+ // print!(" ");
+ // }
+ // println!("{}", ToFromU32::to_u32(self.pool[node as usize].item));
+ // self.show(depth + 1, self.pool[node as usize].right);
+ // }
+ //}
+}
+
+//=============================================================================
+// Testing machinery for AVLTree
+
+#[cfg(test)]
+mod avl_tree_test_utils {
+ use crate::data_structures::Set;
+ use std::cmp::Ordering;
+
+ // Perform various checks on the tree, and assert if it is not OK.
+ pub fn check_tree(
+ tree: &super::AVLTree<u32>,
+ should_be_in_tree: &Set<u32>,
+ univ_min: u32,
+ univ_max: u32,
+ ) {
+ // Same cardinality
+ let n_in_set = should_be_in_tree.card();
+ let n_in_tree = tree.count();
+ assert!(n_in_set == n_in_tree);
+
+ // Tree is not wildly out of balance. Depth should not exceed 1.44 *
+ // log2(size).
+ let tree_depth = tree.depth();
+ let mut log2_size = 0;
+ {
+ let mut n: usize = n_in_tree;
+ while n > 0 {
+ n = n >> 1;
+ log2_size += 1;
+ }
+ }
+ // Actually a tighter limit than stated above. For these test cases, the
+ // tree is either perfectly balanced or within one level of being so
+ // (hence the +1).
+ assert!(tree_depth <= log2_size + 1);
+
+ // Check that everything that should be in the tree is in it, and vice
+ // versa.
+ for i in univ_min..univ_max {
+ let should_be_in = should_be_in_tree.contains(i);
+
+ // Look it up with a null comparator (so `contains` compares
+ // directly)
+ let is_in = tree.contains::<fn(u32, u32) -> Option<Ordering>>(i, None);
+ assert!(is_in == should_be_in);
+
+ // We should get the same result with a custom comparator that does the
+ // same as the null comparator.
+ let is_in_w_cmp = tree.contains(
+ i,
+ Some(&(|x_left: u32, x_right: u32| x_left.partial_cmp(&x_right))),
+ );
+ assert!(is_in_w_cmp == is_in);
+
+ // And even when the comparator is actually a closure
+ let forty_two: u32 = 52;
+ let is_in_w_cmp_closure = tree.contains(
+ i,
+ Some(
+ &(|x_left: u32, x_right: u32| {
+ (x_left + forty_two).partial_cmp(&(x_right + forty_two))
+ }),
+ ),
+ );
+ assert!(is_in_w_cmp_closure == is_in);
+ }
+
+ // We could even test that the tree items are in-order, but it hardly
+ // seems worth the hassle, since the previous test would surely have
+ // failed if that wasn't the case.
+ }
+}
+
+#[test]
+fn test_avl_tree1() {
+ use crate::data_structures::Set;
+
+ // Perform tests on an AVL tree. Use as values, every third number between
+ // 5000 and 5999 inclusive. This is to ensure that there's no confusion
+ // between element values and internal tree indices (although I think the
+ // typechecker guarantees that anyway).
+ //
+ // Also carry along a Set<u32>, which keeps track of which values should be
+ // in the tree at the current point.
+ const UNIV_MIN: u32 = 5000;
+ const UNIV_MAX: u32 = 5999;
+ const UNIV_SIZE: u32 = UNIV_MAX - UNIV_MIN + 1;
+
+ let mut tree = AVLTree::<u32>::new(0);
+ let mut should_be_in_tree = Set::<u32>::empty();
+
+ // Add numbers to the tree, checking as we go.
+ for i in UNIV_MIN..UNIV_MAX {
+ // Idiotic but simple
+ if i % 3 != 0 {
+ continue;
+ }
+ let was_added = tree.insert::<fn(u32, u32) -> Option<Ordering>>(i, None);
+ should_be_in_tree.insert(i);
+ assert!(was_added == true);
+ avl_tree_test_utils::check_tree(&tree, &should_be_in_tree, UNIV_MIN, UNIV_MAX);
+ }
+
+ // Then remove the middle half of the tree, also checking.
+ for i in UNIV_MIN + UNIV_SIZE / 4..UNIV_MIN + 3 * (UNIV_SIZE / 4) {
+ // Note that here, we're asking to delete a bunch of numbers that aren't
+ // in the tree. It should remain valid throughout.
+ let was_removed = tree.delete::<fn(u32, u32) -> Option<Ordering>>(i, None);
+ let should_have_been_removed = should_be_in_tree.contains(i);
+ assert!(was_removed == should_have_been_removed);
+ should_be_in_tree.delete(i);
+ avl_tree_test_utils::check_tree(&tree, &should_be_in_tree, UNIV_MIN, UNIV_MAX);
+ }
+
+ // Now add some numbers which are already in the tree.
+ for i in UNIV_MIN..UNIV_MIN + UNIV_SIZE / 4 {
+ if i % 3 != 0 {
+ continue;
+ }
+ let was_added = tree.insert::<fn(u32, u32) -> Option<Ordering>>(i, None);
+ let should_have_been_added = !should_be_in_tree.contains(i);
+ assert!(was_added == should_have_been_added);
+ should_be_in_tree.insert(i);
+ avl_tree_test_utils::check_tree(&tree, &should_be_in_tree, UNIV_MIN, UNIV_MAX);
+ }
+
+ // Then remove all numbers from the tree, in reverse order.
+ for ir in UNIV_MIN..UNIV_MAX {
+ let i = UNIV_MIN + (UNIV_MAX - ir);
+ let was_removed = tree.delete::<fn(u32, u32) -> Option<Ordering>>(i, None);
+ let should_have_been_removed = should_be_in_tree.contains(i);
+ assert!(was_removed == should_have_been_removed);
+ should_be_in_tree.delete(i);
+ avl_tree_test_utils::check_tree(&tree, &should_be_in_tree, UNIV_MIN, UNIV_MAX);
+ }
+
+ // Now the tree should be empty.
+ assert!(should_be_in_tree.is_empty());
+ assert!(tree.count() == 0);
+
+ // Now delete some more stuff. Tree should still be empty :-)
+ for i in UNIV_MIN + 10..UNIV_MIN + 100 {
+ assert!(should_be_in_tree.is_empty());
+ assert!(tree.count() == 0);
+ tree.delete::<fn(u32, u32) -> Option<Ordering>>(i, None);
+ avl_tree_test_utils::check_tree(&tree, &should_be_in_tree, UNIV_MIN, UNIV_MAX);
+ }
+
+ // The tree root should be NULL.
+ assert!(tree.root == AVL_NULL);
+ assert!(tree.freelist != AVL_NULL);
+
+ // Check the freelist: all entries are of the expected form.
+ for e in &tree.pool {
+ assert!(e.tag == AVLTag::Free);
+ assert!(e.left == AVL_NULL || (e.left as usize) < tree.pool.len());
+ assert!(e.right == AVL_NULL);
+ assert!(e.item == 0);
+ }
+
+ // Check the freelist: it's non-circular, and contains the expected number
+ // of elements.
+ let mut n_in_freelist = 0;
+ let mut cursor: u32 = tree.freelist;
+ while cursor != AVL_NULL {
+ assert!((cursor as usize) < tree.pool.len());
+ n_in_freelist += 1;
+ assert!(n_in_freelist < 100000 /*arbitrary*/); // else it has a cycle
+ cursor = tree.pool[cursor as usize].left;
+ }
+ // If we get here, the freelist at least doesn't have a cycle.
+
+ // All elements in the pool are on the freelist.
+ assert!(n_in_freelist == tree.pool.len());
+}
+
+#[test]
+fn test_avl_tree2() {
+ use std::cmp::Ordering;
+
+ // Do some simple testing using a custom comparator, which inverts the order
+ // of items in the tree, so as to check custom comparators work right.
+ let mut tree = AVLTree::<u32>::new(0);
+
+ let nums = [31, 41, 59, 27, 14, 35, 62, 25, 18, 28, 45, 90, 61];
+
+ fn reverse_cmp(x: u32, y: u32) -> Option<Ordering> {
+ y.partial_cmp(&x)
+ }
+
+ // Insert
+ for n in &nums {
+ let insert_happened = tree.insert(*n, Some(&reverse_cmp));
+ assert!(insert_happened == true);
+ }
+
+ // Check membership
+ for n in 0..100 {
+ let is_in = tree.contains(n, Some(&reverse_cmp));
+ let should_be_in = nums.iter().any(|m| n == *m);
+ assert!(is_in == should_be_in);
+ }
+
+ // Delete
+ for n in 0..100 {
+ let remove_happened = tree.delete(n, Some(&reverse_cmp));
+ let remove_should_have_happened = nums.iter().any(|m| n == *m);
+ assert!(remove_happened == remove_should_have_happened);
+ }
+
+ // Final check
+ assert!(tree.root == AVL_NULL);
+ assert!(tree.count() == 0);
+}
+
+#[test]
+fn test_avl_tree_iter() {
+ let mut storage = Vec::new();
+ let tree = AVLTree::<u32>::new(0);
+ assert!(tree.iter(&mut storage).next().is_none());
+
+ const FROM: u32 = 0;
+ const TO: u32 = 10000;
+
+ let mut tree = AVLTree::<u32>::new(0);
+ for i in FROM..TO {
+ tree.insert(i, Some(&|a: u32, b: u32| a.partial_cmp(&b)));
+ }
+
+ let as_vec = tree.to_vec();
+ for (i, val) in tree.iter(&mut storage).enumerate() {
+ assert_eq!(as_vec[i], val, "not equal for i={}", i);
+ }
+}
diff --git a/third_party/rust/regalloc/src/bt_coalescing_analysis.rs b/third_party/rust/regalloc/src/bt_coalescing_analysis.rs
new file mode 100644
index 0000000000..0b81de70dc
--- /dev/null
+++ b/third_party/rust/regalloc/src/bt_coalescing_analysis.rs
@@ -0,0 +1,672 @@
+//! Analysis in support of copy coalescing for the backtracking allocator.
+//!
+//! This detects and collects information about all copy coalescing
+//! opportunities in the incoming function. It does not use that information
+//! at all -- that is for the main allocation loop and the spill slot allocator
+//! to do.
+//!
+//! Coalescing analysis creates 4 pieces of information:
+//!
+//! * a map from `VirtualRangeIx` to a set of `Hint`s (see below) which state a
+//! preference for which register that range would prefer to be allocated to.
+//!
+//! * equivalence class groupings for the virtual ranges. Two virtual ranges
+//! will be assigned the same equivalence class if there is a move instruction
+//! that transfers a value from one range to the other. The equivalence
+//! classes created are the transitive closure of this pairwise relation.
+//!
+//! * a simple mapping from instruction index to bool, indicating those
+//! instructions that are moves between virtual registers, and that have been
+//! used to construct the equivalence classes above.
+//!
+//! * a mapping from virtual registers to virtual ranges. This is really
+//! produced as a side-effect of computing the above three elements, but is
+//! useful in its own right and so is also returned.
+
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+use log::{debug, info, log_enabled, Level};
+use smallvec::{smallvec, SmallVec};
+
+use crate::data_structures::{
+ InstIx, InstPoint, Map, MoveInfo, MoveInfoElem, RangeFrag, RangeFragIx, RealRange, RealRangeIx,
+ RealReg, RealRegUniverse, RegToRangesMaps, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
+ VirtualReg,
+};
+use crate::union_find::{ToFromU32, UnionFind, UnionFindEquivClasses};
+use crate::Function;
+
+//=============================================================================
+//
+// *** Some important comments about the interaction between this coalescing
+// *** analysis, the main allocation loop and the spill slot allocator.
+//
+// The main allocation loop tries to assign the same register to all the
+// VirtualRanges in an equivalence class. Similarly, the spill slot allocator
+// tries to allocate the same spill slot to all the VirtualRanges in an
+// equivalence class. In most cases they are successful, and so the moves
+// between those VirtualRanges will later disappear. However, the complete
+// story is not quite so simple.
+//
+// It is only safe to assign the VirtualRanges in the same equivalence class
+// to a single register or spill slot if those VirtualRanges are
+// non-overlapping. That is, if their overall collection of RangeFrags is
+// disjoint. If two such VirtualRanges overlapped, then they could be
+// carrying different values, and so they would need separate registers or
+// spill slots.
+//
+// Most of the time, these equivalence classes are indeed internally
+// non-overlapping. But that's just luck -- that's how the input VCode mostly
+// is. The coalescing analysis *doesn't* properly check for overlaps within an
+// equivalence class, so it can be the case that the members of an equivalence
+// class overlap. The users of that information -- the main allocation loop
+// and the spill slot allocator -- currently check for, and handle, such
+// situations. So the generated allocation is correct.
+//
+// It does, however, cause imprecision and unnecessary spilling, and, in the
+// main allocation loop, slightly increased evictions.
+//
+// The "proper" fix for all this would be to fix the coalescing analysis so as
+// only to build non-internally-overlapping VirtualRange equivalence classes.
+// However, that sounds expensive. Instead there is a half-hearted effort
+// made to avoid creating equivalence classes whose elements (VirtualRanges)
+// overlap. This is done by doing an overlap check on two VirtualRanges
+// connected by a move, and not merging their equivalence classes if they
+// overlap. That helps, but it doesn't completely avoid the problem because
+// there might be overlaps between other members (VirtualRanges) of the
+// about-to-be-merged equivalence classes.
+
+//=============================================================================
+// Coalescing analysis: Hints
+//
+// A coalescing hint for a virtual live range. The u32 is an arbitrary
+// "weight" value which indicates a relative strength-of-preference for the
+// hint. It exists because a VLR can have arbitrarily many copy
+// instructions at its "boundary", and hence arbitrarily many hints. Of
+// course the allocator core can honour at most one of them, so it needs a
+// way to choose between them. In this implementation, the u32s are simply
+// the estimated execution count of the associated copy instruction.
+#[derive(Clone)]
+pub enum Hint {
+ // I would like to have the same real register as some other virtual range.
+ SameAs(VirtualRangeIx, u32),
+ // I would like to have exactly this real register.
+ Exactly(RealReg, u32),
+}
+fn show_hint(h: &Hint, univ: &RealRegUniverse) -> String {
+ match h {
+ Hint::SameAs(vlrix, weight) => format!("(SameAs {:?}, weight={})", vlrix, weight),
+ Hint::Exactly(rreg, weight) => format!(
+ "(Exactly {}, weight={})",
+ rreg.to_reg().show_with_rru(&univ),
+ weight
+ ),
+ }
+}
+impl Hint {
+ #[inline(always)]
+ fn get_weight(&self) -> u32 {
+ match self {
+ Hint::SameAs(_vlrix, weight) => *weight,
+ Hint::Exactly(_rreg, weight) => *weight,
+ }
+ }
+}
+
+// We need this in order to construct a UnionFind<VirtualRangeIx>.
+impl ToFromU32 for VirtualRangeIx {
+ fn to_u32(x: VirtualRangeIx) -> u32 {
+ x.get()
+ }
+ fn from_u32(x: u32) -> VirtualRangeIx {
+ VirtualRangeIx::new(x)
+ }
+}
+
+//=============================================================================
+// Coalescing analysis: top level function
+
+// This performs coalescing analysis and returns info as a 3-tuple. Note that
+// it also may change the spill costs for some of the VLRs in `vlr_env` to
+// better reflect the spill cost situation in the presence of coalescing.
+#[inline(never)]
+pub fn do_coalescing_analysis<F: Function>(
+ func: &F,
+ univ: &RealRegUniverse,
+ rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+ vlr_env: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ reg_to_ranges_maps: &RegToRangesMaps,
+ move_info: &MoveInfo,
+) -> (
+ TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>>,
+ UnionFindEquivClasses<VirtualRangeIx>,
+ TypedIxVec<InstIx, bool>,
+) {
+ info!("");
+ info!("do_coalescing_analysis: begin");
+
+ // This function contains significant additional complexity due to the requirement to handle
+ // pathological cases in reasonable time without unduly burdening the common cases.
+ //
+ // ========================================================================================
+ //
+ // The core questions that the coalescing analysis asks is:
+ //
+ // For an instruction I and a reg V:
+ //
+ // * does I start a live range fragment for V? In other words, is it a "first def of V" ?
+ //
+ // * and dually: does I end a live range fragment for V? IOW, is it a "last use of V" ?
+ //
+ // V may be a real or virtual register -- we must handle both. I is invariably a move
+ // instruction. We could ask such questions about other kinds of insns, but we don't care
+ // about those.
+ //
+ // The reason we care about this is as follows. If we can find some move insn I, which is
+ // the last use of some reg V1 and the first def of some other reg V2, then V1 and V2 can at
+ // least in principle be allocated to the same real register.
+ //
+ // Note that the "last" and "first" aspect is critical for correctness. Consider this:
+ //
+ // V1 = ...
+ // I V2 = V1
+ // * V2 = V2 - 99
+ // # V3 = V1 + 47
+ //
+ // Here, I might be a first def of V2, but it's certainly not the last use of V1, and so if
+ // we allocate V1 and V2 to the same real register, the insn marked * will trash the value
+ // of V1 while it's still needed at #, and we'll create wrong code. For the same reason, we
+ // can only coalesce out a move if the destination is a first def.
+ //
+ // The use of names V* in the above example is slightly misleading. As mentioned, the above
+ // criteria apply to both real and virtual registers. The only limitation is that,
+ // obviously, we can't coalesce out a move if both registers involved are real. But if only
+ // one is real then we have at least the possibility to do that.
+ //
+ // Now to the question of compiler performance. The simplest way to establish whether (for
+ // example) I is a first def of V is to visit all of V's `RangeFrag`s, to see if any of them
+ // start at `I.d`. That can be done by iterating over all of the live ranges that belong to
+ // V, and through each `RangeFrag` in each live range. Hence it's a linear search through
+ // V's `RangeFrag`s.
+ //
+ // For the vast majority of cases, this works well because most regs -- and especially, most
+ // virtual regs, in code derived from an SSA precursor -- have short live ranges, and
+ // usually only one, and so there are very few `RangeFrag`s to visit. However, there are
+ // cases where a register has many `RangeFrag`s -- we've seen inputs where that number
+ // exceeds 100,000 -- in which case a linear search is disastrously slow.
+ //
+ // To fix this, there is a Plan B scheme for establishing the same facts. It relies on the
+ // observation that the `RangeFrag`s for each register are mutually non-overlapping. Hence
+ // their start points are all unique, so we can park them all in a vector, sort it, and
+ // binary search it. And the same for the end points. This is the purpose of structs
+ // `ManyFragsInfoR` and `ManyFragsInfoV` below.
+ //
+ // Although this plan keeps us out of performance black holes in pathological cases, it is
+ // expensive in a constant-factors sense: it requires dynamic memory allocation for these
+ // vectors, and it requires sorting them. Hence we try to avoid it as much as possible, and
+ // route almost all work via the simple linear-search scheme.
+ //
+ // The linear-vs-binary-search choice is made for each register individually. Incoming
+ // parameter `reg_to_ranges_maps` contains fields `r/vregs_with_many_frags`, and it is only
+ // for those that sorted vectors are prepared. Those vectors are tracked by the maps
+ // `r/v_many_map` below. `reg_to_ranges_maps` also contains field `many_frags_thresh` which
+ // tells us what the size threshold actually was, and this is used to opportunistically
+ // pre-size the vectors. It's not required for correctness.
+ //
+ // All this complexity is bought together in the four closures `doesVRegHaveLastUseAt`,
+ // `doesVRegHaveFirstDefAt`, `doesRRegHaveLastUseAt` and `doesRRegHaveFirstDefAt`. In each
+ // case, they first try to resolve the query by binary search, which usually fails, in which
+ // case they fall back to a linear search, which will always give a correct result. In
+ // debug builds, if the binary search does produce an answer, it is crosschecked against the
+ // linear search result.
+ //
+ // The duplication in the four closures is undesirable but hard to avoid. The real- and
+ // virtual-reg cases have different types. Similarly, the first/last cases are slightly
+ // different. If there were a way to guarantee that rustc would inline closures, then it
+ // might be worth trying to common them up, on the basis that rustc can inline and
+ // specialise, leading back to what we currently have here. However, in the absence of such
+ // a facility, I didn't want to risk it, given that these closures are performance-critical.
+ //
+ // Finally, note that the coalescing analysis proper involves more than just the above
+ // described searches, and one sees the code for the rest of it following the search
+ // closures below. However, the rest of it isn't performance critical, and is not described
+ // in this comment.
+ //
+ // ========================================================================================
+
+ // So, first: for the registers which `reg_to_ranges_maps` tells us have "many" fragments,
+ // prepare the binary-search vectors. This is done first for the real regs and then below
+ // for virtual regs.
+
+ struct ManyFragsInfoR {
+ sorted_firsts: Vec<(InstPoint, RealRangeIx)>,
+ sorted_lasts: Vec<(InstPoint, RealRangeIx)>,
+ }
+ let r_many_card = reg_to_ranges_maps.rregs_with_many_frags.len();
+ let mut r_many_map = Map::<u32 /*RealReg index*/, ManyFragsInfoR>::default();
+ r_many_map.reserve(r_many_card);
+
+ for rreg_no in &reg_to_ranges_maps.rregs_with_many_frags {
+ // `2 * reg_to_ranges_maps.many_frags_thresh` is clearly a heuristic hack, but we do
+ // know for sure that each vector will contain at least
+ // `reg_to_ranges_maps.many_frags_thresh` and very likely more. And that threshold is
+ // already quite high, so pre-sizing the vectors at this point avoids quite a number of
+ // resize-reallocations later.
+ let mut many_frags_info = ManyFragsInfoR {
+ sorted_firsts: Vec::with_capacity(2 * reg_to_ranges_maps.many_frags_thresh),
+ sorted_lasts: Vec::with_capacity(2 * reg_to_ranges_maps.many_frags_thresh),
+ };
+ let rlrixs = &reg_to_ranges_maps.rreg_to_rlrs_map[*rreg_no as usize];
+ for rlrix in rlrixs {
+ for fix in &rlr_env[*rlrix].sorted_frags.frag_ixs {
+ let frag = &frag_env[*fix];
+ many_frags_info.sorted_firsts.push((frag.first, *rlrix));
+ many_frags_info.sorted_lasts.push((frag.last, *rlrix));
+ }
+ }
+ many_frags_info
+ .sorted_firsts
+ .sort_unstable_by_key(|&(point, _)| point);
+ many_frags_info
+ .sorted_lasts
+ .sort_unstable_by_key(|&(point, _)| point);
+ debug_assert!(many_frags_info.sorted_firsts.len() == many_frags_info.sorted_lasts.len());
+ // Because the RangeFrags for any reg (virtual or real) are non-overlapping, it follows
+ // that both the sorted first points and sorted last points contain no duplicates. (In
+ // fact the implied condition (no duplicates) is weaker than the premise
+ // (non-overlapping), but this is nevertheless correct.)
+ for i in 1..(many_frags_info.sorted_firsts.len()) {
+ debug_assert!(
+ many_frags_info.sorted_firsts[i - 1].0 < many_frags_info.sorted_firsts[i].0
+ );
+ }
+ for i in 1..(many_frags_info.sorted_lasts.len()) {
+ debug_assert!(
+ many_frags_info.sorted_lasts[i - 1].0 < many_frags_info.sorted_lasts[i].0
+ );
+ }
+ r_many_map.insert(*rreg_no, many_frags_info);
+ }
+
+ // And the same for virtual regs.
+ struct ManyFragsInfoV {
+ sorted_firsts: Vec<(InstPoint, VirtualRangeIx)>,
+ sorted_lasts: Vec<(InstPoint, VirtualRangeIx)>,
+ }
+ let v_many_card = reg_to_ranges_maps.vregs_with_many_frags.len();
+ let mut v_many_map = Map::<u32 /*VirtualReg index*/, ManyFragsInfoV>::default();
+ v_many_map.reserve(v_many_card);
+
+ for vreg_no in &reg_to_ranges_maps.vregs_with_many_frags {
+ let mut many_frags_info = ManyFragsInfoV {
+ sorted_firsts: Vec::with_capacity(2 * reg_to_ranges_maps.many_frags_thresh),
+ sorted_lasts: Vec::with_capacity(2 * reg_to_ranges_maps.many_frags_thresh),
+ };
+ let vlrixs = &reg_to_ranges_maps.vreg_to_vlrs_map[*vreg_no as usize];
+ for vlrix in vlrixs {
+ for frag in &vlr_env[*vlrix].sorted_frags.frags {
+ many_frags_info.sorted_firsts.push((frag.first, *vlrix));
+ many_frags_info.sorted_lasts.push((frag.last, *vlrix));
+ }
+ }
+ many_frags_info
+ .sorted_firsts
+ .sort_unstable_by_key(|&(point, _)| point);
+ many_frags_info
+ .sorted_lasts
+ .sort_unstable_by_key(|&(point, _)| point);
+ debug_assert!(many_frags_info.sorted_firsts.len() == many_frags_info.sorted_lasts.len());
+ for i in 1..(many_frags_info.sorted_firsts.len()) {
+ debug_assert!(
+ many_frags_info.sorted_firsts[i - 1].0 < many_frags_info.sorted_firsts[i].0
+ );
+ }
+ for i in 1..(many_frags_info.sorted_lasts.len()) {
+ debug_assert!(
+ many_frags_info.sorted_lasts[i - 1].0 < many_frags_info.sorted_lasts[i].0
+ );
+ }
+ v_many_map.insert(*vreg_no, many_frags_info);
+ }
+
+ // There now follows the abovementioned four (well, actually, eight) closures, which are
+ // used to find out whether a real or virtual reg has a last use or first def at some
+ // instruction. This is the central activity of the coalescing analysis -- finding move
+ // instructions that are the last def for the src reg and the first def for the dst reg.
+
+ // ---------------- Range checks for VirtualRegs: last use ----------------
+ // Checks whether `vreg` has a last use at `iix`.u.
+
+ let doesVRegHaveLastUseAt_LINEAR = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
+ let point_to_find = InstPoint::new_use(iix);
+ let vreg_no = vreg.get_index();
+ let vlrixs = &reg_to_ranges_maps.vreg_to_vlrs_map[vreg_no];
+ for vlrix in vlrixs {
+ for frag in &vlr_env[*vlrix].sorted_frags.frags {
+ if frag.last == point_to_find {
+ return Some(*vlrix);
+ }
+ }
+ }
+ None
+ };
+ let doesVRegHaveLastUseAt = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
+ let point_to_find = InstPoint::new_use(iix);
+ let vreg_no = vreg.get_index();
+ let mut binary_search_result = None;
+ if let Some(ref mfi) = v_many_map.get(&(vreg_no as u32)) {
+ match mfi
+ .sorted_lasts
+ .binary_search_by_key(&point_to_find, |(point, _)| *point)
+ {
+ Ok(found_at_ix) => binary_search_result = Some(mfi.sorted_lasts[found_at_ix].1),
+ Err(_) => {}
+ }
+ }
+ match binary_search_result {
+ None => doesVRegHaveLastUseAt_LINEAR(vreg, iix),
+ Some(_) => {
+ debug_assert!(binary_search_result == doesVRegHaveLastUseAt_LINEAR(vreg, iix));
+ binary_search_result
+ }
+ }
+ };
+
+ // ---------------- Range checks for VirtualRegs: first def ----------------
+ // Checks whether `vreg` has a first def at `iix`.d.
+
+ let doesVRegHaveFirstDefAt_LINEAR = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
+ let point_to_find = InstPoint::new_def(iix);
+ let vreg_no = vreg.get_index();
+ let vlrixs = &reg_to_ranges_maps.vreg_to_vlrs_map[vreg_no];
+ for vlrix in vlrixs {
+ for frag in &vlr_env[*vlrix].sorted_frags.frags {
+ if frag.first == point_to_find {
+ return Some(*vlrix);
+ }
+ }
+ }
+ None
+ };
+ let doesVRegHaveFirstDefAt = |vreg: VirtualReg, iix: InstIx| -> Option<VirtualRangeIx> {
+ let point_to_find = InstPoint::new_def(iix);
+ let vreg_no = vreg.get_index();
+ let mut binary_search_result = None;
+ if let Some(ref mfi) = v_many_map.get(&(vreg_no as u32)) {
+ match mfi
+ .sorted_firsts
+ .binary_search_by_key(&point_to_find, |(point, _)| *point)
+ {
+ Ok(found_at_ix) => binary_search_result = Some(mfi.sorted_firsts[found_at_ix].1),
+ Err(_) => {}
+ }
+ }
+ match binary_search_result {
+ None => doesVRegHaveFirstDefAt_LINEAR(vreg, iix),
+ Some(_) => {
+ debug_assert!(binary_search_result == doesVRegHaveFirstDefAt_LINEAR(vreg, iix));
+ binary_search_result
+ }
+ }
+ };
+
+ // ---------------- Range checks for RealRegs: last use ----------------
+ // Checks whether `rreg` has a last use at `iix`.u.
+
+ let doesRRegHaveLastUseAt_LINEAR = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
+ let point_to_find = InstPoint::new_use(iix);
+ let rreg_no = rreg.get_index();
+ let rlrixs = &reg_to_ranges_maps.rreg_to_rlrs_map[rreg_no];
+ for rlrix in rlrixs {
+ let frags = &rlr_env[*rlrix].sorted_frags;
+ for fix in &frags.frag_ixs {
+ let frag = &frag_env[*fix];
+ if frag.last == point_to_find {
+ return Some(*rlrix);
+ }
+ }
+ }
+ None
+ };
+ let doesRRegHaveLastUseAt = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
+ let point_to_find = InstPoint::new_use(iix);
+ let rreg_no = rreg.get_index();
+ let mut binary_search_result = None;
+ if let Some(ref mfi) = r_many_map.get(&(rreg_no as u32)) {
+ match mfi
+ .sorted_lasts
+ .binary_search_by_key(&point_to_find, |(point, _)| *point)
+ {
+ Ok(found_at_ix) => binary_search_result = Some(mfi.sorted_lasts[found_at_ix].1),
+ Err(_) => {}
+ }
+ }
+ match binary_search_result {
+ None => doesRRegHaveLastUseAt_LINEAR(rreg, iix),
+ Some(_) => {
+ debug_assert!(binary_search_result == doesRRegHaveLastUseAt_LINEAR(rreg, iix));
+ binary_search_result
+ }
+ }
+ };
+
+ // ---------------- Range checks for RealRegs: first def ----------------
+ // Checks whether `rreg` has a first def at `iix`.d.
+
+ let doesRRegHaveFirstDefAt_LINEAR = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
+ let point_to_find = InstPoint::new_def(iix);
+ let rreg_no = rreg.get_index();
+ let rlrixs = &reg_to_ranges_maps.rreg_to_rlrs_map[rreg_no];
+ for rlrix in rlrixs {
+ let frags = &rlr_env[*rlrix].sorted_frags;
+ for fix in &frags.frag_ixs {
+ let frag = &frag_env[*fix];
+ if frag.first == point_to_find {
+ return Some(*rlrix);
+ }
+ }
+ }
+ None
+ };
+ let doesRRegHaveFirstDefAt = |rreg: RealReg, iix: InstIx| -> Option<RealRangeIx> {
+ let point_to_find = InstPoint::new_def(iix);
+ let rreg_no = rreg.get_index();
+ let mut binary_search_result = None;
+ if let Some(ref mfi) = r_many_map.get(&(rreg_no as u32)) {
+ match mfi
+ .sorted_firsts
+ .binary_search_by_key(&point_to_find, |(point, _)| *point)
+ {
+ Ok(found_at_ix) => binary_search_result = Some(mfi.sorted_firsts[found_at_ix].1),
+ Err(_) => {}
+ }
+ }
+ match binary_search_result {
+ None => doesRRegHaveFirstDefAt_LINEAR(rreg, iix),
+ Some(_) => {
+ debug_assert!(binary_search_result == doesRRegHaveFirstDefAt_LINEAR(rreg, iix));
+ binary_search_result
+ }
+ }
+ };
+
+ // Finally we come to the core logic of the coalescing analysis. It uses the complex
+ // hybrid-search mechanism described extensively above. The comments above however don't
+ // describe any of the logic after this point.
+
+ // RETURNED TO CALLER
+ // Hints for each VirtualRange. Note that the SmallVecs could contain duplicates, I
+ // suppose, for example if there are two identical copy insns at different points on the
+ // "boundary" for some VLR. I don't think it matters though since we're going to rank the
+ // hints by strength and then choose at most one.
+ let mut hints = TypedIxVec::<VirtualRangeIx, SmallVec<[Hint; 8]>>::new();
+ hints.resize(vlr_env.len(), smallvec![]);
+
+ // RETURNED TO CALLER
+ // A vector that simply records which insns are v-to-v boundary moves, as established by the
+ // analysis below. This info is collected here because (1) the caller (BT) needs to have it
+ // and (2) this is the first point at which we can efficiently compute it.
+ let mut is_vv_boundary_move = TypedIxVec::<InstIx, bool>::new();
+ is_vv_boundary_move.resize(func.insns().len() as u32, false);
+
+ // RETURNED TO CALLER (after finalisation)
+ // The virtual-to-virtual equivalence classes we're collecting.
+ let mut vlrEquivClassesUF = UnionFind::<VirtualRangeIx>::new(vlr_env.len() as usize);
+
+ // Not returned to caller; for use only in this function.
+ // A list of `VirtualRange`s for which the `total_cost` (hence also their
+ // `spill_cost`) should be adjusted downwards by the supplied `u32`. We
+ // can't do this directly in the loop below due to borrowing constraints,
+ // hence we collect the required info in this vector and do it in a second
+ // loop.
+ let mut decVLRcosts = Vec::<(VirtualRangeIx, VirtualRangeIx, u32)>::new();
+
+ for MoveInfoElem {
+ dst,
+ src,
+ iix,
+ est_freq,
+ ..
+ } in &move_info.moves
+ {
+ debug!(
+ "connected by moves: {:?} {:?} <- {:?} (est_freq {})",
+ iix, dst, src, est_freq
+ );
+ match (dst.is_virtual(), src.is_virtual()) {
+ (true, true) => {
+ // Check for a V <- V hint.
+ let srcV = src.to_virtual_reg();
+ let dstV = dst.to_virtual_reg();
+ let mb_vlrixSrc = doesVRegHaveLastUseAt(srcV, *iix);
+ let mb_vlrixDst = doesVRegHaveFirstDefAt(dstV, *iix);
+ if mb_vlrixSrc.is_some() && mb_vlrixDst.is_some() {
+ let vlrixSrc = mb_vlrixSrc.unwrap();
+ let vlrixDst = mb_vlrixDst.unwrap();
+ // Per block comment at top of file, make a half-hearted
+ // attempt to avoid creating equivalence classes with
+ // internal overlaps. Note this can't be completely
+ // effective as presently implemented.
+ if !vlr_env[vlrixSrc].overlaps(&vlr_env[vlrixDst]) {
+ // Add hints for both VLRs, since we don't know which one will
+ // assign first. Indeed, a VLR may be assigned and un-assigned
+ // arbitrarily many times.
+ hints[vlrixSrc].push(Hint::SameAs(vlrixDst, *est_freq));
+ hints[vlrixDst].push(Hint::SameAs(vlrixSrc, *est_freq));
+ vlrEquivClassesUF.union(vlrixDst, vlrixSrc);
+ is_vv_boundary_move[*iix] = true;
+ // Reduce the total cost, and hence the spill cost, of
+ // both `vlrixSrc` and `vlrixDst`. This is so as to reduce to
+ // zero, the cost of a VLR whose only instructions are its
+ // v-v boundary copies.
+ debug!("reduce cost of {:?} and {:?}", vlrixSrc, vlrixDst);
+ decVLRcosts.push((vlrixSrc, vlrixDst, 1 * est_freq));
+ }
+ }
+ }
+ (true, false) => {
+ // Check for a V <- R hint.
+ let srcR = src.to_real_reg();
+ let dstV = dst.to_virtual_reg();
+ let mb_rlrSrc = doesRRegHaveLastUseAt(srcR, *iix);
+ let mb_vlrDst = doesVRegHaveFirstDefAt(dstV, *iix);
+ if mb_rlrSrc.is_some() && mb_vlrDst.is_some() {
+ let vlrDst = mb_vlrDst.unwrap();
+ hints[vlrDst].push(Hint::Exactly(srcR, *est_freq));
+ }
+ }
+ (false, true) => {
+ // Check for a R <- V hint.
+ let srcV = src.to_virtual_reg();
+ let dstR = dst.to_real_reg();
+ let mb_vlrSrc = doesVRegHaveLastUseAt(srcV, *iix);
+ let mb_rlrDst = doesRRegHaveFirstDefAt(dstR, *iix);
+ if mb_vlrSrc.is_some() && mb_rlrDst.is_some() {
+ let vlrSrc = mb_vlrSrc.unwrap();
+ hints[vlrSrc].push(Hint::Exactly(dstR, *est_freq));
+ }
+ }
+ (false, false) => {
+ // This is a real-to-real move. There's nothing we can do. Ignore it.
+ }
+ }
+ }
+
+ // Now decrease the `total_cost` and `spill_cost` fields of selected
+ // `VirtualRange`s, as detected by the previous loop. Don't decrease the
+ // `spill_cost` literally to zero; doing that causes various assertion
+ // failures and boundary problems later on, in the `CommitmentMap`s. In
+ // such a case, make the `spill_cost` be tiny but nonzero.
+ fn decrease_vlr_total_cost_by(vlr: &mut VirtualRange, decrease_total_cost_by: u32) {
+ // Adjust `total_cost`.
+ if vlr.total_cost < decrease_total_cost_by {
+ vlr.total_cost = 0;
+ } else {
+ vlr.total_cost -= decrease_total_cost_by;
+ }
+ // And recompute `spill_cost` accordingly.
+ if vlr.total_cost == 0 {
+ vlr.spill_cost = SpillCost::finite(1.0e-6);
+ } else {
+ assert!(vlr.size > 0);
+ vlr.spill_cost = SpillCost::finite(vlr.total_cost as f32 / vlr.size as f32);
+ }
+ }
+
+ for (vlrix1, vlrix2, decrease_total_cost_by) in decVLRcosts {
+ decrease_vlr_total_cost_by(&mut vlr_env[vlrix1], decrease_total_cost_by);
+ decrease_vlr_total_cost_by(&mut vlr_env[vlrix2], decrease_total_cost_by);
+ }
+
+ // For the convenience of the allocator core, sort the hints for each VLR so
+ // as to move the most preferred to the front.
+ for hints_for_one_vlr in hints.iter_mut() {
+ hints_for_one_vlr.sort_by(|h1, h2| h2.get_weight().partial_cmp(&h1.get_weight()).unwrap());
+ }
+
+ let vlrEquivClasses: UnionFindEquivClasses<VirtualRangeIx> =
+ vlrEquivClassesUF.get_equiv_classes();
+
+ if log_enabled!(Level::Debug) {
+ debug!("Revised VLRs:");
+ let mut n = 0;
+ for vlr in vlr_env.iter() {
+ debug!("{:<4?} {:?}", VirtualRangeIx::new(n), vlr);
+ n += 1;
+ }
+
+ debug!("Coalescing hints:");
+ n = 0;
+ for hints_for_one_vlr in hints.iter() {
+ let mut s = "".to_string();
+ for hint in hints_for_one_vlr {
+ s = s + &show_hint(hint, &univ) + &" ".to_string();
+ }
+ debug!(" hintsfor {:<4?} = {}", VirtualRangeIx::new(n), s);
+ n += 1;
+ }
+
+ for n in 0..vlr_env.len() {
+ let vlrix = VirtualRangeIx::new(n);
+ let mut tmpvec = vec![];
+ for elem in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
+ tmpvec.reverse();
+ tmpvec.push(elem);
+ }
+ debug!(" eclassof {:?} = {:?}", vlrix, tmpvec);
+ }
+
+ for (b, i) in is_vv_boundary_move.iter().zip(0..) {
+ if *b {
+ debug!(" vv_boundary_move at {:?}", InstIx::new(i));
+ }
+ }
+ }
+
+ info!("do_coalescing_analysis: end");
+ info!("");
+
+ (hints, vlrEquivClasses, is_vv_boundary_move)
+}
diff --git a/third_party/rust/regalloc/src/bt_commitment_map.rs b/third_party/rust/regalloc/src/bt_commitment_map.rs
new file mode 100644
index 0000000000..03c989321c
--- /dev/null
+++ b/third_party/rust/regalloc/src/bt_commitment_map.rs
@@ -0,0 +1,170 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+//! Backtracking allocator: per-real-register commitment maps
+
+use std::cmp::Ordering;
+use std::fmt;
+
+use crate::avl_tree::{AVLTree, AVL_NULL};
+use crate::data_structures::{
+ cmp_range_frags, InstPoint, RangeFrag, RangeFragIx, RangeId, SortedRangeFragIxs,
+ SortedRangeFrags, TypedIxVec,
+};
+
+//=============================================================================
+// Per-real-register commitment maps
+//
+
+// Something that pairs a fragment index with the identity of the virtual or real range to which
+// this fragment conceptually "belongs", at least for the purposes of this commitment map. If
+// the `lr_id` field denotes a real range, the associated fragment belongs to a real-reg live
+// range and is therefore non-evictable. The identity of the range is necessary because:
+//
+// * for VirtualRanges, (1) we may need to evict the mapping, so we will need to get hold of the
+// VirtualRange, so that we have all fragments of the VirtualRange to hand, and (2) if the
+// client requires stackmaps, we need to look at the VirtualRange to see if it is reftyped.
+//
+// * for RealRanges, only (2) applies; (1) is irrelevant since RealRange assignments are
+// non-evictable.
+//
+// (A fragment merely denotes a sequence of instruction (points), but within the context of a
+// commitment map for a real register, obviously any particular fragment can't be part of two
+// different virtual live ranges.)
+//
+// Note that we don't intend to actually use the PartialOrd methods for RangeFragAndRangeId.
+// However, they need to exist since we want to construct an AVLTree<RangeFragAndRangeId>, and
+// that requires PartialOrd for its element type. For working with such trees we will supply
+// our own comparison function; hence PartialOrd here serves only to placate the typechecker.
+// It should never actually be used.
+#[derive(Clone)]
+pub struct RangeFragAndRangeId {
+ pub frag: RangeFrag,
+ pub id: RangeId,
+}
+impl RangeFragAndRangeId {
+ fn new(frag: RangeFrag, id: RangeId) -> Self {
+ Self { frag, id }
+ }
+}
+impl PartialEq for RangeFragAndRangeId {
+ fn eq(&self, _other: &Self) -> bool {
+ // See comments above.
+ panic!("impl PartialEq for RangeFragAndRangeId: should never be used");
+ }
+}
+impl PartialOrd for RangeFragAndRangeId {
+ fn partial_cmp(&self, _other: &Self) -> Option<Ordering> {
+ // See comments above.
+ panic!("impl PartialOrd for RangeFragAndRangeId: should never be used");
+ }
+}
+impl fmt::Debug for RangeFragAndRangeId {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "(FnV {:?} {:?})", self.frag, self.id)
+ }
+}
+
+//=============================================================================
+// Per-real-register commitment maps
+//
+
+// This indicates the current set of fragments to which some real register is
+// currently "committed". The fragments *must* be non-overlapping. Hence
+// they form a total order, and so we may validly build an AVL tree of them.
+
+pub struct CommitmentMap {
+ pub tree: AVLTree<RangeFragAndRangeId>,
+}
+impl fmt::Debug for CommitmentMap {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let as_vec = self.tree.to_vec();
+ as_vec.fmt(fmt)
+ }
+}
+
+impl CommitmentMap {
+ pub fn new() -> Self {
+ // The AVL tree constructor needs a default value for the elements. It
+ // will never be used. The RangeId index value will show as
+ // obviously bogus if we ever try to "dereference" any part of it.
+ let dflt = RangeFragAndRangeId::new(RangeFrag::invalid_value(), RangeId::invalid_value());
+ Self {
+ tree: AVLTree::<RangeFragAndRangeId>::new(dflt),
+ }
+ }
+
+ pub fn add(&mut self, to_add_frags: &SortedRangeFrags, to_add_lr_id: RangeId) {
+ for frag in &to_add_frags.frags {
+ let to_add = RangeFragAndRangeId::new(frag.clone(), to_add_lr_id);
+ let added = self.tree.insert(
+ to_add,
+ Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
+ cmp_range_frags(&pair1.frag, &pair2.frag)
+ }),
+ );
+ // If this fails, it means the fragment overlaps one that has already
+ // been committed to. That's a serious error.
+ assert!(added);
+ }
+ }
+
+ pub fn add_indirect(
+ &mut self,
+ to_add_frags: &SortedRangeFragIxs,
+ to_add_lr_id: RangeId,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ ) {
+ for fix in &to_add_frags.frag_ixs {
+ let to_add = RangeFragAndRangeId::new(frag_env[*fix].clone(), to_add_lr_id);
+ let added = self.tree.insert(
+ to_add,
+ Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
+ cmp_range_frags(&pair1.frag, &pair2.frag)
+ }),
+ );
+ // If this fails, it means the fragment overlaps one that has already
+ // been committed to. That's a serious error.
+ assert!(added);
+ }
+ }
+
+ pub fn del(&mut self, to_del_frags: &SortedRangeFrags) {
+ for frag in &to_del_frags.frags {
+ // re RangeId::invalid_value(): we don't care what the RangeId is, since we're
+ // deleting by RangeFrags alone.
+ let to_del = RangeFragAndRangeId::new(frag.clone(), RangeId::invalid_value());
+ let deleted = self.tree.delete(
+ to_del,
+ Some(&|pair1: RangeFragAndRangeId, pair2: RangeFragAndRangeId| {
+ cmp_range_frags(&pair1.frag, &pair2.frag)
+ }),
+ );
+ // If this fails, it means the fragment wasn't already committed to.
+ // That's also a serious error.
+ assert!(deleted);
+ }
+ }
+
+ // Find the RangeId for the RangeFrag that overlaps `pt`, if one exists.
+ // This is conceptually equivalent to LogicalSpillSlot::get_refness_at_inst_point.
+ pub fn lookup_inst_point(&self, pt: InstPoint) -> Option<RangeId> {
+ let mut root = self.tree.root;
+ while root != AVL_NULL {
+ let root_node = &self.tree.pool[root as usize];
+ let root_item = &root_node.item;
+ if pt < root_item.frag.first {
+ // `pt` is to the left of the `root`. So there's no
+ // overlap with `root`. Continue by inspecting the left subtree.
+ root = root_node.left;
+ } else if root_item.frag.last < pt {
+ // Ditto for the right subtree.
+ root = root_node.right;
+ } else {
+ // `pt` overlaps the `root`, so we have what we want.
+ return Some(root_item.id);
+ }
+ }
+ None
+ }
+}
diff --git a/third_party/rust/regalloc/src/bt_main.rs b/third_party/rust/regalloc/src/bt_main.rs
new file mode 100644
index 0000000000..9c33348667
--- /dev/null
+++ b/third_party/rust/regalloc/src/bt_main.rs
@@ -0,0 +1,1844 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+//! Core implementation of the backtracking allocator.
+
+use log::{debug, info, log_enabled, Level};
+use smallvec::SmallVec;
+use std::default;
+use std::fmt;
+
+use crate::analysis_data_flow::{add_raw_reg_vecs_for_insn, does_inst_use_def_or_mod_reg};
+use crate::analysis_main::{run_analysis, AnalysisInfo};
+use crate::avl_tree::{AVLTree, AVL_NULL};
+use crate::bt_coalescing_analysis::{do_coalescing_analysis, Hint};
+use crate::bt_commitment_map::{CommitmentMap, RangeFragAndRangeId};
+use crate::bt_spillslot_allocator::SpillSlotAllocator;
+use crate::bt_vlr_priority_queue::VirtualRangePrioQ;
+use crate::data_structures::{
+ BlockIx, InstIx, InstPoint, Map, Point, RangeFrag, RangeFragIx, RangeId, RealRange,
+ RealRangeIx, RealReg, RealRegUniverse, Reg, RegClass, RegVecBounds, RegVecs, RegVecsAndBounds,
+ Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec, VirtualRange, VirtualRangeIx,
+ VirtualReg, Writable,
+};
+use crate::inst_stream::{
+ edit_inst_stream, ExtPoint, InstExtPoint, InstToInsert, InstToInsertAndExtPoint,
+};
+use crate::sparse_set::SparseSetU;
+use crate::union_find::UnionFindEquivClasses;
+use crate::{AlgorithmWithDefaults, Function, RegAllocError, RegAllocResult, StackmapRequestInfo};
+
+#[derive(Clone)]
+pub struct BacktrackingOptions {
+ /// Should the register allocator generate block annotations?
+ pub request_block_annotations: bool,
+}
+
+impl default::Default for BacktrackingOptions {
+ fn default() -> Self {
+ Self {
+ request_block_annotations: false,
+ }
+ }
+}
+
+impl fmt::Debug for BacktrackingOptions {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "backtracking (block annotations: {})",
+ self.request_block_annotations
+ )
+ }
+}
+
+//=============================================================================
+// The per-real-register state
+//
+// Relevant methods are expected to be parameterised by the same VirtualRange
+// env as used in calls to `VirtualRangePrioQ`.
+
+struct PerRealReg {
+ // The current committed fragments for this RealReg.
+ committed: CommitmentMap,
+
+ // The set of VirtualRanges which have been assigned to this RealReg. The
+ // union of their frags will be equal to `committed` only if this RealReg
+ // has no RealRanges. If this RealReg does have RealRanges the
+ // aforementioned union will be exactly the subset of `committed` not used
+ // by the RealRanges.
+ vlrixs_assigned: Set<VirtualRangeIx>,
+}
+impl PerRealReg {
+ fn new() -> Self {
+ Self {
+ committed: CommitmentMap::new(),
+ vlrixs_assigned: Set::<VirtualRangeIx>::empty(),
+ }
+ }
+
+ #[inline(never)]
+ fn add_RealRange(
+ &mut self,
+ to_add_rlrix: RealRangeIx,
+ rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+ frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+ ) {
+ // Commit this register to `to_add`, irrevocably. Don't add it to `vlrixs_assigned`
+ // since we will never want to later evict the assignment. (Also, from a types point of
+ // view that would be impossible.)
+ let to_add_rlr = &rlr_env[to_add_rlrix];
+ self.committed.add_indirect(
+ &to_add_rlr.sorted_frags,
+ RangeId::new_real(to_add_rlrix),
+ frag_env,
+ );
+ }
+
+ #[inline(never)]
+ fn add_VirtualRange(
+ &mut self,
+ to_add_vlrix: VirtualRangeIx,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ ) {
+ let to_add_vlr = &vlr_env[to_add_vlrix];
+ self.committed
+ .add(&to_add_vlr.sorted_frags, RangeId::new_virtual(to_add_vlrix));
+ assert!(!self.vlrixs_assigned.contains(to_add_vlrix));
+ self.vlrixs_assigned.insert(to_add_vlrix);
+ }
+
+ #[inline(never)]
+ fn del_VirtualRange(
+ &mut self,
+ to_del_vlrix: VirtualRangeIx,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ ) {
+ // Remove it from `vlrixs_assigned`
+ // FIXME 2020June18: we could do this more efficiently by inspecting
+ // the return value from `delete`.
+ if self.vlrixs_assigned.contains(to_del_vlrix) {
+ self.vlrixs_assigned.delete(to_del_vlrix);
+ } else {
+ panic!("PerRealReg: del_VirtualRange on VR not in vlrixs_assigned");
+ }
+ // Remove it from `committed`
+ let to_del_vlr = &vlr_env[to_del_vlrix];
+ self.committed.del(&to_del_vlr.sorted_frags);
+ }
+}
+
+// HELPER FUNCTION
+// For a given `RangeFrag`, traverse the commitment tree rooted at `root`,
+// adding to `running_set` the set of VLRIxs that the frag intersects, and
+// adding their spill costs to `running_cost`. Return false if, for one of
+// the 4 reasons documented below, the traversal has been abandoned, and true
+// if the search completed successfully.
+fn search_commitment_tree<IsAllowedToEvict>(
+ // The running state, threaded through the tree traversal. These
+ // accumulate ranges and costs as we traverse the tree. These are mutable
+ // because our caller (`find_evict_set`) will want to try and allocate
+ // multiple `RangeFrag`s in this tree, not just a single one, and so it
+ // needs a way to accumulate the total evict-cost and evict-set for all
+ // the `RangeFrag`s it iterates over.
+ running_set: &mut SparseSetU<[VirtualRangeIx; 4]>,
+ running_cost: &mut SpillCost,
+ // The tree to search.
+ tree: &AVLTree<RangeFragAndRangeId>,
+ // The RangeFrag we want to accommodate.
+ pair_frag: &RangeFrag,
+ spill_cost_budget: &SpillCost,
+ allowed_to_evict: &IsAllowedToEvict,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+) -> bool
+where
+ IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,
+{
+ let mut stack = SmallVec::<[u32; 32]>::new();
+ assert!(tree.root != AVL_NULL);
+ stack.push(tree.root);
+
+ while let Some(curr) = stack.pop() {
+ let curr_node = &tree.pool[curr as usize];
+ let curr_node_item = &curr_node.item;
+ let curr_frag = &curr_node_item.frag;
+
+ // Figure out whether `pair_frag` overlaps the root of the current
+ // subtree.
+ let overlaps_curr = pair_frag.last >= curr_frag.first && pair_frag.first <= curr_frag.last;
+
+ // Let's first consider the current node. If we need it but it's not
+ // evictable, we might as well stop now.
+ if overlaps_curr {
+ // This frag is committed to a real range, not a virtual one, and hence is not
+ // evictable.
+ if curr_node_item.id.is_real() {
+ return false;
+ }
+ // Maybe this one is a spill range, in which case, it can't be
+ // evicted.
+ let vlrix_to_evict = curr_node_item.id.to_virtual();
+ let vlr_to_evict = &vlr_env[vlrix_to_evict];
+ if vlr_to_evict.spill_cost.is_infinite() {
+ return false;
+ }
+ // Check that this range alone doesn't exceed our total spill
+ // cost. NB: given the check XXX below, this isn't actually
+ // necessary; however it means that we avoid doing two
+ // SparseSet::contains operations before exiting. This saves
+ // around 0.3% instruction count for large inputs.
+ if !vlr_to_evict.spill_cost.is_less_than(spill_cost_budget) {
+ return false;
+ }
+ // Maybe our caller doesn't want us to evict this one.
+ if !allowed_to_evict(vlrix_to_evict) {
+ return false;
+ }
+ // Ok! We can evict the current node. Update the running state
+ // accordingly. Note that we may be presented with the same VLRIx
+ // to evict multiple times, so we must be careful to add the cost
+ // of it only once.
+ if !running_set.contains(vlrix_to_evict) {
+ let mut tmp_cost = *running_cost;
+ tmp_cost.add(&vlr_to_evict.spill_cost);
+ // See above XXX
+ if !tmp_cost.is_less_than(spill_cost_budget) {
+ return false;
+ }
+ *running_cost = tmp_cost;
+ running_set.insert(vlrix_to_evict);
+ }
+ }
+
+ // Now figure out if we need to visit the subtrees, and if so push the
+ // relevant nodes. Whether we visit the left or right subtree first
+ // is unimportant, at least from a correctness perspective.
+ let must_check_left = pair_frag.first < curr_frag.first;
+ if must_check_left {
+ let left_of_curr = tree.pool[curr as usize].left;
+ if left_of_curr != AVL_NULL {
+ stack.push(left_of_curr);
+ }
+ }
+
+ let must_check_right = pair_frag.last > curr_frag.last;
+ if must_check_right {
+ let right_of_curr = tree.pool[curr as usize].right;
+ if right_of_curr != AVL_NULL {
+ stack.push(right_of_curr);
+ }
+ }
+ }
+
+ // If we get here, it means that `pair_frag` can be accommodated if we
+ // evict all the frags it overlaps in `tree`.
+ //
+ // Stay sane ..
+ assert!(running_cost.is_finite());
+ assert!(running_cost.is_less_than(spill_cost_budget));
+ true
+}
+
+impl PerRealReg {
+ // Find the set of VirtualRangeIxs that would need to be evicted in order to
+ // allocate `would_like_to_add` to this register. Virtual ranges mentioned
+ // in `do_not_evict` must not be considered as candidates for eviction.
+ // Also returns the total associated spill cost. That spill cost cannot be
+ // infinite.
+ //
+ // This can fail (return None) for four different reasons:
+ //
+ // - `would_like_to_add` interferes with a real-register-live-range
+ // commitment, so the register would be unavailable even if we evicted
+ // *all* virtual ranges assigned to it.
+ //
+ // - `would_like_to_add` interferes with a virtual range which is a spill
+ // range (has infinite cost). We cannot evict those without risking
+ // non-termination of the overall allocation algorithm.
+ //
+ // - `would_like_to_add` interferes with a virtual range listed in
+ // `do_not_evict`. Our caller uses this mechanism when trying to do
+ // coalesing, to avoid the nonsensicality of evicting some part of a
+ // virtual live range group in order to allocate a member of the same
+ // group.
+ //
+ // - The total spill cost of the candidate set exceeds the spill cost of
+ // `would_like_to_add`. This means that spilling them would be a net loss
+ // per our cost model. Note that `would_like_to_add` may have an infinite
+ // spill cost, in which case it will "win" over all other
+ // non-infinite-cost eviction candidates. This is by design (so as to
+ // guarantee that we can always allocate spill/reload bridges).
+ #[inline(never)]
+ fn find_evict_set<IsAllowedToEvict>(
+ &self,
+ would_like_to_add: VirtualRangeIx,
+ allowed_to_evict: &IsAllowedToEvict,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ ) -> Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)>
+ where
+ IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,
+ {
+ // Firstly, if the commitment tree is for this reg is empty, we can
+ // declare success immediately.
+ if self.committed.tree.root == AVL_NULL {
+ let evict_set = SparseSetU::<[VirtualRangeIx; 4]>::empty();
+ let evict_cost = SpillCost::zero();
+ return Some((evict_set, evict_cost));
+ }
+
+ // The tree isn't empty, so we will have to do this the hard way: iterate
+ // over all fragments in `would_like_to_add` and check them against the
+ // tree.
+
+ // Useful constants for the main loop
+ let would_like_to_add_vlr = &vlr_env[would_like_to_add];
+ let evict_cost_budget = would_like_to_add_vlr.spill_cost;
+ // Note that `evict_cost_budget` can be infinite because
+ // `would_like_to_add` might be a spill/reload range.
+
+ // The overall evict set and cost so far. These are updated as we iterate
+ // over the fragments that make up `would_like_to_add`.
+ let mut running_set = SparseSetU::<[VirtualRangeIx; 4]>::empty();
+ let mut running_cost = SpillCost::zero();
+
+ // "wlta" = would like to add
+ for wlta_frag in &would_like_to_add_vlr.sorted_frags.frags {
+ let wlta_frag_ok = search_commitment_tree(
+ &mut running_set,
+ &mut running_cost,
+ &self.committed.tree,
+ &wlta_frag,
+ &evict_cost_budget,
+ allowed_to_evict,
+ vlr_env,
+ );
+ if !wlta_frag_ok {
+ // This fragment won't fit, for one of the four reasons listed
+ // above. So give up now.
+ return None;
+ }
+ // And move on to the next fragment.
+ }
+
+ // If we got here, it means that `would_like_to_add` can be accommodated \o/
+ assert!(running_cost.is_finite());
+ assert!(running_cost.is_less_than(&evict_cost_budget));
+ Some((running_set, running_cost))
+ }
+
+ #[allow(dead_code)]
+ #[inline(never)]
+ fn show1_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String {
+ //"in_use: ".to_string() + &self.committed.show_with_frag_env(&frag_env)
+ "(show1_with_envs:FIXME)".to_string()
+ }
+ #[allow(dead_code)]
+ #[inline(never)]
+ fn show2_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String {
+ //"assigned: ".to_string() + &format!("{:?}", &self.vlrixs_assigned)
+ "(show2_with_envs:FIXME)".to_string()
+ }
+}
+
+//=============================================================================
+// Printing the allocator's top level state
+
+#[inline(never)]
+fn print_RA_state(
+ who: &str,
+ _universe: &RealRegUniverse,
+ // State components
+ prioQ: &VirtualRangePrioQ,
+ _perRealReg: &Vec<PerRealReg>,
+ edit_list_move: &Vec<EditListItem>,
+ edit_list_other: &Vec<EditListItem>,
+ // The context (environment)
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
+) {
+ debug!("<<<<====---- RA state at '{}' ----====", who);
+ //for ix in 0..perRealReg.len() {
+ // if !&perRealReg[ix].committed.pairs.is_empty() {
+ // debug!(
+ // "{:<5} {}",
+ // universe.regs[ix].1,
+ // &perRealReg[ix].show1_with_envs(&frag_env)
+ // );
+ // debug!(" {}", &perRealReg[ix].show2_with_envs(&frag_env));
+ // debug!("");
+ // }
+ //}
+ if !prioQ.is_empty() {
+ for s in prioQ.show_with_envs(vlr_env) {
+ debug!("{}", s);
+ }
+ }
+ for eli in edit_list_move {
+ debug!("ELI MOVE: {:?}", eli);
+ }
+ for eli in edit_list_other {
+ debug!("ELI other: {:?}", eli);
+ }
+ debug!(">>>>");
+}
+
+//=============================================================================
+// Reftype/stackmap support
+
+// This creates the artefacts for a safepoint/stackmap at some insn `iix`: the set of reftyped
+// spill slots, the spills to be placed at `iix.r` (yes, you read that right) and the reloads to
+// be placed at `iix.s`.
+//
+// This consults:
+//
+// * the commitment maps, to figure out which real registers are live and reftyped at `iix.u`.
+//
+// * the spillslot allocator, to figure out which spill slots are live and reftyped at `iix.u`.
+//
+// This may fail, meaning the request is in some way nonsensical; failure is propagated upwards.
+
+fn get_stackmap_artefacts_at(
+ spill_slot_allocator: &mut SpillSlotAllocator,
+ univ: &RealRegUniverse,
+ reftype_class: RegClass,
+ reg_vecs_and_bounds: &RegVecsAndBounds,
+ per_real_reg: &Vec<PerRealReg>,
+ rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ iix: InstIx,
+) -> Result<(Vec<InstToInsert>, Vec<InstToInsert>, Vec<SpillSlot>), RegAllocError> {
+ // From a code generation perspective, what we need to compute is:
+ //
+ // * Sbefore: real regs that are live at `iix.u`, that are reftypes
+ //
+ // * Safter: Sbefore - real regs written by `iix`
+ //
+ // Then:
+ //
+ // * for r in Sbefore . add "spill r" at `iix.r` *after* all the reloads that are already
+ // there
+ //
+ // * for r in Safter . add "reload r" at `iix.s` *before* all the spills that are already
+ // there
+ //
+ // Once those spills have been "recorded" by the `spill_slot_allocator`, we can then ask it
+ // to tell us all the reftyped spill slots at `iix.u`, and that's our stackmap! This routine
+ // only computes the stackmap and the vectors of spills and reloads. It doesn't deal with
+ // interleaving them into the final code sequence.
+ //
+ // Note that this scheme isn't as runtime-inefficient as it sounds, at least in the
+ // SpiderMonkey use case and where `iix` is a call insn. That's because SM's calling
+ // convention has no callee saved registers. Hence "real regs written by `iix`" will be
+ // "all real regs" and so Safter will be empty. And Sbefore is in any case pretty small.
+ //
+ // (/me thinks ..) hmm, if Safter is empty, then what is the point of dumping Sbefore on the
+ // stack before the GC? For r in Sbefore, either r is the only reference to some object, in
+ // which case there's no point in presenting that ref to the GC since r is dead after call,
+ // or r isn't the only ref to the object, in which case some other ref to it must exist
+ // elsewhere in the stack, and that will keep the object alive. Maybe this needs a rethink.
+ // Maybe the spills before the call should be only for the set Safter?
+
+ let pt = InstPoint::new_use(iix);
+
+ // Compute Sbefore.
+
+ // FIXME change this to SparseSet
+ let mut s_before = Set::<RealReg>::empty();
+
+ let rci = univ.allocable_by_class[reftype_class.rc_to_usize()];
+ if rci.is_none() {
+ return Err(RegAllocError::Other(
+ "stackmap request: no regs in specified reftype class".to_string(),
+ ));
+ }
+ let rci = rci.unwrap();
+
+ debug!("computing stackmap info at {:?}", pt);
+
+ for rreg_no in rci.first..rci.last + 1 {
+ // Get the RangeId, if any, assigned for `rreg_no` at `iix.u`. From that we can figure
+ // out if it is reftyped.
+ let mb_range_id = per_real_reg[rreg_no].committed.lookup_inst_point(pt);
+ if let Some(range_id) = mb_range_id {
+ // `rreg_no` is live at `iix.u`.
+ let is_ref = if range_id.is_real() {
+ debug!(
+ " real reg {:?} is real-range {:?}",
+ rreg_no,
+ rlr_env[range_id.to_real()]
+ );
+ rlr_env[range_id.to_real()].is_ref
+ } else {
+ debug!(
+ " real reg {:?} is virtual-range {:?}",
+ rreg_no,
+ vlr_env[range_id.to_virtual()]
+ );
+ vlr_env[range_id.to_virtual()].is_ref
+ };
+ if is_ref {
+ // Finally .. we know that `rreg_no` is reftyped and live at `iix.u`.
+ let rreg = univ.regs[rreg_no].0;
+ s_before.insert(rreg);
+ }
+ }
+ }
+
+ debug!("Sbefore = {:?}", s_before);
+
+ // Compute Safter.
+
+ let mut s_after = s_before.clone();
+ let bounds = &reg_vecs_and_bounds.bounds[iix];
+ if bounds.mods_len != 0 {
+ // Only the GC is allowed to modify reftyped regs at this insn!
+ return Err(RegAllocError::Other(
+ "stackmap request: safepoint insn modifies a reftyped reg".to_string(),
+ ));
+ }
+
+ for i in bounds.defs_start..bounds.defs_start + bounds.defs_len as u32 {
+ let r_defd = reg_vecs_and_bounds.vecs.defs[i as usize];
+ if r_defd.is_real() && r_defd.get_class() == reftype_class {
+ s_after.delete(r_defd.to_real_reg());
+ }
+ }
+
+ debug!("Safter = {:?}", s_before);
+
+ // Create the spill insns, as defined by Sbefore. This has the side effect of recording the
+ // spill in `spill_slot_allocator`, so we can later ask it to tell us all the reftyped spill
+ // slots.
+
+ let frag = RangeFrag::new(InstPoint::new_reload(iix), InstPoint::new_spill(iix));
+
+ let mut spill_insns = Vec::<InstToInsert>::new();
+ let mut where_reg_got_spilled_to = Map::<RealReg, SpillSlot>::default();
+
+ for from_reg in s_before.iter() {
+ let to_slot = spill_slot_allocator.alloc_reftyped_spillslot_for_frag(frag.clone());
+ let spill = InstToInsert::Spill {
+ to_slot,
+ from_reg: *from_reg,
+ for_vreg: None, // spill isn't associated with any virtual reg
+ };
+ spill_insns.push(spill);
+ // We also need to remember where we stashed it, so we can reload it, if it is in Safter.
+ if s_after.contains(*from_reg) {
+ where_reg_got_spilled_to.insert(*from_reg, to_slot);
+ }
+ }
+
+ // Create the reload insns, as defined by Safter. Except, we might as well use the map we
+ // just made, since its domain is the same as Safter.
+
+ let mut reload_insns = Vec::<InstToInsert>::new();
+
+ for (to_reg, from_slot) in where_reg_got_spilled_to.iter() {
+ let reload = InstToInsert::Reload {
+ to_reg: Writable::from_reg(*to_reg),
+ from_slot: *from_slot,
+ for_vreg: None, // reload isn't associated with any virtual reg
+ };
+ reload_insns.push(reload);
+ }
+
+ // And finally .. round up all the reftyped spill slots. That includes both "normal" spill
+ // slots that happen to hold reftyped values, as well as the "extras" we created here, to
+ // hold values of reftyped regs that are live over this instruction.
+
+ let reftyped_spillslots = spill_slot_allocator.get_reftyped_spillslots_at_inst_point(pt);
+
+ debug!("reftyped_spillslots = {:?}", reftyped_spillslots);
+
+ // And we're done!
+
+ Ok((spill_insns, reload_insns, reftyped_spillslots))
+}
+
+//=============================================================================
+// Allocator top level
+
+/* (const) For each virtual live range
+ - its sorted RangeFrags
+ - its total size
+ - its spill cost
+ - (eventually) its assigned real register
+ New VirtualRanges are created as we go, but existing ones are unchanged,
+ apart from being tagged with its assigned real register.
+
+ (mut) For each real register
+ - the sorted RangeFrags assigned to it (todo: rm the M)
+ - the virtual LR indices assigned to it. This is so we can eject
+ a VirtualRange from the commitment, as needed
+
+ (mut) the set of VirtualRanges not yet allocated, prioritised by total size
+
+ (mut) the edit list that is produced
+*/
+/*
+fn show_commit_tab(commit_tab: &Vec::<SortedRangeFragIxs>,
+ who: &str,
+ context: &TypedIxVec::<RangeFragIx, RangeFrag>) -> String {
+ let mut res = "Commit Table at '".to_string()
+ + &who.to_string() + &"'\n".to_string();
+ let mut rregNo = 0;
+ for smf in commit_tab.iter() {
+ res += &" ".to_string();
+ res += &mkRealReg(rregNo).show();
+ res += &" ".to_string();
+ res += &smf.show_with_fenv(&context);
+ res += &"\n".to_string();
+ rregNo += 1;
+ }
+ res
+}
+*/
+
+// VirtualRanges created by spilling all pertain to a single InstIx. But
+// within that InstIx, there are three kinds of "bridges":
+#[derive(Clone, Copy, PartialEq)]
+pub(crate) enum BridgeKind {
+ RtoU, // A bridge for a USE. This connects the reload to the use.
+ RtoS, // a bridge for a MOD. This connects the reload, the use/def
+ // and the spill, since the value must first be reloade, then
+ // operated on, and finally re-spilled.
+ DtoS, // A bridge for a DEF. This connects the def to the spill.
+}
+
+impl fmt::Debug for BridgeKind {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ BridgeKind::RtoU => write!(fmt, "R->U"),
+ BridgeKind::RtoS => write!(fmt, "R->S"),
+ BridgeKind::DtoS => write!(fmt, "D->S"),
+ }
+ }
+}
+
+#[derive(Clone, Copy)]
+struct EditListItem {
+ // This holds enough information to create a spill or reload instruction,
+ // or both, and also specifies where in the instruction stream it/they
+ // should be added. Note that if the edit list as a whole specifies
+ // multiple items for the same location, then it is assumed that the order
+ // in which they execute isn't important.
+ //
+ // Some of the relevant info can be found via the VirtualRangeIx link:
+ // (1) the real reg involved
+ // (2) the place where the insn should go, since the VirtualRange should
+ // only have one RangeFrag, and we can deduce the correct location
+ // from that.
+ // Despite (2) we also carry here the InstIx of the affected instruction
+ // (there should be only one) since computing it via (2) is expensive.
+ // This however gives a redundancy in representation against (2). Beware!
+ slot: SpillSlot,
+ vlrix: VirtualRangeIx,
+ kind: BridgeKind,
+ iix: InstIx,
+}
+
+impl fmt::Debug for EditListItem {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "(ELI: at {:?} for {:?} add {:?}, slot={:?})",
+ self.iix, self.vlrix, self.kind, self.slot
+ )
+ }
+}
+
+// Allocator top level. This function returns a result struct that contains
+// the final sequence of instructions, possibly with fills/spills/moves
+// spliced in and redundant moves elided, and with all virtual registers
+// replaced with real registers. Allocation can fail if there are insufficient
+// registers to even generate spill/reload code, or if the function appears to
+// have any undefined VirtualReg/RealReg uses.
+
+#[inline(never)]
+pub fn alloc_main<F: Function>(
+ func: &mut F,
+ reg_universe: &RealRegUniverse,
+ stackmap_request: Option<&StackmapRequestInfo>,
+ use_checker: bool,
+ opts: &BacktrackingOptions,
+) -> Result<RegAllocResult<F>, RegAllocError> {
+ // -------- Initial arrangements for stackmaps --------
+ let empty_vec_vregs = vec![];
+ let empty_vec_iixs = vec![];
+ let (client_wants_stackmaps, reftype_class, reftyped_vregs, safepoint_insns) =
+ match stackmap_request {
+ Some(&StackmapRequestInfo {
+ reftype_class,
+ ref reftyped_vregs,
+ ref safepoint_insns,
+ }) => (true, reftype_class, reftyped_vregs, safepoint_insns),
+ None => (false, RegClass::INVALID, &empty_vec_vregs, &empty_vec_iixs),
+ };
+
+ // -------- Perform initial liveness analysis --------
+ // Note that the analysis phase can fail; hence we propagate any error.
+ let AnalysisInfo {
+ reg_vecs_and_bounds,
+ real_ranges: rlr_env,
+ virtual_ranges: mut vlr_env,
+ range_frags: frag_env,
+ range_metrics: frag_metrics_env,
+ estimated_frequencies: est_freqs,
+ inst_to_block_map,
+ reg_to_ranges_maps: mb_reg_to_ranges_maps,
+ move_info: mb_move_info,
+ } = run_analysis(
+ func,
+ reg_universe,
+ AlgorithmWithDefaults::Backtracking,
+ client_wants_stackmaps,
+ reftype_class,
+ reftyped_vregs,
+ )
+ .map_err(|err| RegAllocError::Analysis(err))?;
+
+ assert!(reg_vecs_and_bounds.is_sanitized());
+ assert!(frag_env.len() == frag_metrics_env.len());
+ assert!(mb_reg_to_ranges_maps.is_some()); // ensured by `run_analysis`
+ assert!(mb_move_info.is_some()); // ensured by `run_analysis`
+ let reg_to_ranges_maps = mb_reg_to_ranges_maps.unwrap();
+ let move_info = mb_move_info.unwrap();
+
+ // Also perform analysis that finds all coalescing opportunities.
+ let coalescing_info = do_coalescing_analysis(
+ func,
+ &reg_universe,
+ &rlr_env,
+ &mut vlr_env,
+ &frag_env,
+ &reg_to_ranges_maps,
+ &move_info,
+ );
+ let mut hints: TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>> = coalescing_info.0;
+ let vlrEquivClasses: UnionFindEquivClasses<VirtualRangeIx> = coalescing_info.1;
+ let is_vv_boundary_move: TypedIxVec<InstIx, bool> = coalescing_info.2;
+ assert!(hints.len() == vlr_env.len());
+
+ // -------- Alloc main --------
+
+ // Create initial state
+ info!("alloc_main: begin");
+ info!(
+ "alloc_main: in: {} insns in {} blocks",
+ func.insns().len(),
+ func.blocks().len()
+ );
+ let num_vlrs_initial = vlr_env.len();
+ info!(
+ "alloc_main: in: {} VLRs, {} RLRs",
+ num_vlrs_initial,
+ rlr_env.len()
+ );
+
+ // This is fully populated by the ::new call.
+ let mut prioQ = VirtualRangePrioQ::new(&vlr_env);
+
+ // Whereas this is empty. We have to populate it "by hand", by
+ // effectively cloning the allocatable part (prefix) of the universe.
+ let mut per_real_reg = Vec::<PerRealReg>::new();
+ for _ in 0..reg_universe.allocable {
+ // Doing this instead of simply .resize avoids needing Clone for
+ // PerRealReg
+ per_real_reg.push(PerRealReg::new());
+ }
+ for (rlrix_no, rlr) in rlr_env.iter().enumerate() {
+ let rlrix = RealRangeIx::new(rlrix_no as u32);
+ let rregIndex = rlr.rreg.get_index();
+ // Ignore RealRanges for RealRegs that are not part of the allocatable
+ // set. As far as the allocator is concerned, such RealRegs simply
+ // don't exist.
+ if rregIndex >= reg_universe.allocable {
+ continue;
+ }
+ per_real_reg[rregIndex].add_RealRange(rlrix, &rlr_env, &frag_env);
+ }
+
+ let mut edit_list_move = Vec::<EditListItem>::new();
+ let mut edit_list_other = Vec::<EditListItem>::new();
+ if log_enabled!(Level::Debug) {
+ debug!("");
+ print_RA_state(
+ "Initial",
+ &reg_universe,
+ &prioQ,
+ &per_real_reg,
+ &edit_list_move,
+ &edit_list_other,
+ &vlr_env,
+ &frag_env,
+ );
+ }
+
+ // This is also part of the running state. `vlr_slot_env` tells us the
+ // assigned spill slot for each VirtualRange, if any.
+ // `spill_slot_allocator` decides on the assignments and writes them into
+ // `vlr_slot_env`.
+ let mut vlr_slot_env = TypedIxVec::<VirtualRangeIx, Option<SpillSlot>>::new();
+ vlr_slot_env.resize(num_vlrs_initial, None);
+ let mut spill_slot_allocator = SpillSlotAllocator::new();
+
+ // Main allocation loop. Each time round, pull out the longest
+ // unallocated VirtualRange, and do one of three things:
+ //
+ // * allocate it to a RealReg, possibly by ejecting some existing
+ // allocation, but only one with a lower spill cost than this one, or
+ //
+ // * spill it. This causes the VirtualRange to disappear. It is replaced
+ // by a set of very short VirtualRanges to carry the spill and reload
+ // values. Or,
+ //
+ // * split it. This causes it to disappear but be replaced by two
+ // VirtualRanges which together constitute the original.
+ debug!("");
+ debug!("-- MAIN ALLOCATION LOOP (DI means 'direct', CO means 'coalesced'):");
+
+ info!("alloc_main: main allocation loop: begin");
+
+ // ======== BEGIN Main allocation loop ========
+ let mut num_vlrs_processed = 0; // stats only
+ let mut num_vlrs_spilled = 0; // stats only
+ let mut num_vlrs_evicted = 0; // stats only
+
+ 'main_allocation_loop: loop {
+ debug!("-- still TODO {}", prioQ.len());
+ if false {
+ if log_enabled!(Level::Debug) {
+ debug!("");
+ print_RA_state(
+ "Loop Top",
+ &reg_universe,
+ &prioQ,
+ &per_real_reg,
+ &edit_list_move,
+ &edit_list_other,
+ &vlr_env,
+ &frag_env,
+ );
+ debug!("");
+ }
+ }
+
+ let mb_curr_vlrix = prioQ.get_longest_VirtualRange();
+ if mb_curr_vlrix.is_none() {
+ break 'main_allocation_loop;
+ }
+
+ num_vlrs_processed += 1;
+ let curr_vlrix = mb_curr_vlrix.unwrap();
+ let curr_vlr = &vlr_env[curr_vlrix];
+
+ debug!("-- considering {:?}: {:?}", curr_vlrix, curr_vlr);
+
+ assert!(curr_vlr.vreg.to_reg().is_virtual());
+ assert!(curr_vlr.rreg.is_none());
+ let curr_vlr_regclass = curr_vlr.vreg.get_class();
+ let curr_vlr_rc = curr_vlr_regclass.rc_to_usize();
+
+ // ====== BEGIN Try to do coalescing ======
+ //
+ // First, look through the hints for `curr_vlr`, collecting up candidate
+ // real regs, in decreasing order of preference, in `hinted_regs`. Note
+ // that we don't have to consider the weights here, because the coalescing
+ // analysis phase has already sorted the hints for the VLR so as to
+ // present the most favoured (weighty) first, so we merely need to retain
+ // that ordering when copying into `hinted_regs`.
+ assert!(hints.len() == vlr_env.len());
+ let mut hinted_regs = SmallVec::<[RealReg; 8]>::new();
+
+ // === BEGIN collect all hints for `curr_vlr` ===
+ // `hints` has one entry per VLR, but only for VLRs which existed
+ // initially (viz, not for any created by spilling/splitting/whatever).
+ // Similarly, `vlrEquivClasses` can only map VLRs that existed initially,
+ // and will panic otherwise. Hence the following check:
+ if curr_vlrix.get() < hints.len() {
+ for hint in &hints[curr_vlrix] {
+ // BEGIN for each hint
+ let mb_cand = match hint {
+ Hint::SameAs(other_vlrix, _weight) => {
+ // It wants the same reg as some other VLR, but we can only honour
+ // that if the other VLR actually *has* a reg at this point. Its
+ // `rreg` field will tell us exactly that.
+ vlr_env[*other_vlrix].rreg
+ }
+ Hint::Exactly(rreg, _weight) => Some(*rreg),
+ };
+ // So now `mb_cand` might have a preferred real reg. If so, add it to
+ // the list of cands. De-dup as we go, since that is way cheaper than
+ // effectively doing the same via repeated lookups in the
+ // CommitmentMaps.
+ if let Some(rreg) = mb_cand {
+ if !hinted_regs.iter().any(|r| *r == rreg) {
+ hinted_regs.push(rreg);
+ }
+ }
+ // END for each hint
+ }
+
+ // At this point, we have in `hinted_regs`, the hint candidates that
+ // arise from copies between `curr_vlr` and its immediate neighbouring
+ // VLRs or RLRs, in order of declining preference. And that is a good
+ // start.
+ //
+ // However, it may be the case that there is some other VLR which
+ // is in the same equivalence class as `curr_vlr`, but is not a
+ // direct neighbour, and which has already been assigned a
+ // register. We really ought to take those into account too, as
+ // the least-preferred candidates. Hence we need to iterate over
+ // the equivalence class and "round up the secondary candidates."
+ //
+ // Note that the equivalence class might contain VirtualRanges
+ // that are mutually overlapping. That is handled correctly,
+ // since we always consult the relevant CommitmentMaps (in the
+ // PerRealRegs) to detect interference. To more fully understand
+ // this, see the big block comment at the top of
+ // bt_coalescing_analysis.rs.
+ let n_primary_cands = hinted_regs.len();
+
+ // Work the equivalence class set for `curr_vlrix` to pick up any
+ // rreg hints. Equivalence class info exists only for "initial" VLRs.
+ if curr_vlrix.get() < num_vlrs_initial {
+ // `curr_vlrix` is an "initial" VLR.
+ for vlrix in vlrEquivClasses.equiv_class_elems_iter(curr_vlrix) {
+ if vlrix != curr_vlrix {
+ if let Some(rreg) = vlr_env[vlrix].rreg {
+ // Add `rreg` as a cand, if we don't already have it.
+ if !hinted_regs.iter().any(|r| *r == rreg) {
+ hinted_regs.push(rreg);
+ }
+ }
+ }
+ }
+
+ // Sort the secondary cands, so as to try and impose more consistency
+ // across the group. I don't know if this is worthwhile, but it seems
+ // sensible.
+ hinted_regs[n_primary_cands..].sort_by(|rreg1, rreg2| {
+ rreg1.get_index().partial_cmp(&rreg2.get_index()).unwrap()
+ });
+ }
+
+ if log_enabled!(Level::Debug) {
+ if !hinted_regs.is_empty() {
+ let mut candStr = "pri {".to_string();
+ for (rreg, n) in hinted_regs.iter().zip(0..) {
+ if n == n_primary_cands {
+ candStr = candStr + &" } sec {".to_string();
+ }
+ candStr =
+ candStr + &" ".to_string() + &reg_universe.regs[rreg.get_index()].1;
+ }
+ candStr = candStr + &" }";
+ debug!("-- CO candidates {}", candStr);
+ }
+ }
+ }
+ // === END collect all hints for `curr_vlr` ===
+
+ // === BEGIN try to use the hints for `curr_vlr` ===
+ // Now work through the list of preferences, to see if we can honour any
+ // of them.
+ for rreg in &hinted_regs {
+ let rregNo = rreg.get_index();
+
+ // Find the set of ranges which we'd have to evict in order to honour
+ // this hint. In the best case the set will be empty. In the worst
+ // case, we will get None either because (1) it would require evicting a
+ // spill range, which is disallowed so as to guarantee termination of
+ // the algorithm, or (2) because it would require evicting a real reg
+ // live range, which we can't do.
+ //
+ // We take care not to evict any range which is in the same equivalence
+ // class as `curr_vlr` since those are (by definition) connected to
+ // `curr_vlr` via V-V copies, and so evicting any of them would be
+ // counterproductive from the point of view of removing copies.
+
+ let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> =
+ per_real_reg[rregNo].find_evict_set(
+ curr_vlrix,
+ &|vlrix_to_evict| {
+ // What this means is: don't evict `vlrix_to_evict` if
+ // it is in the same equivalence class as `curr_vlrix`
+ // (the VLR which we're trying to allocate) AND we
+ // actually know the equivalence classes for both
+ // (hence the `Some`). Spill/reload ("non-original")
+ // VLRS don't have entries in `vlrEquivClasses`.
+ vlrEquivClasses.in_same_equivalence_class(vlrix_to_evict, curr_vlrix)
+ != Some(true)
+ },
+ &vlr_env,
+ );
+ if let Some((vlrixs_to_evict, total_evict_cost)) = mb_evict_info {
+ // Stay sane #1
+ assert!(curr_vlr.rreg.is_none());
+ // Stay sane #2
+ assert!(vlrixs_to_evict.is_empty() == total_evict_cost.is_zero());
+ // Can't evict if any in the set are spill ranges
+ assert!(total_evict_cost.is_finite());
+ // Ensure forward progress
+ assert!(total_evict_cost.is_less_than(&curr_vlr.spill_cost));
+ // Evict all evictees in the set
+ for vlrix_to_evict in vlrixs_to_evict.iter() {
+ // Ensure we're not evicting anything in `curr_vlrix`'s eclass.
+ // This should be guaranteed us by find_evict_set.
+ assert!(
+ vlrEquivClasses.in_same_equivalence_class(*vlrix_to_evict, curr_vlrix)
+ != Some(true)
+ );
+ // Evict ..
+ debug!(
+ "-- CO evict {:?}: {:?}",
+ *vlrix_to_evict, &vlr_env[*vlrix_to_evict]
+ );
+ per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env);
+ prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict);
+ // Directly modify bits of vlr_env. This means we have to abandon
+ // the immutable borrow for curr_vlr, but that's OK -- we won't need
+ // it again (in this loop iteration).
+ debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some());
+ vlr_env[*vlrix_to_evict].rreg = None;
+ num_vlrs_evicted += 1;
+ }
+ // .. and reassign.
+ debug!("-- CO alloc to {}", reg_universe.regs[rregNo].1);
+ per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env);
+ vlr_env[curr_vlrix].rreg = Some(*rreg);
+ // We're done!
+ continue 'main_allocation_loop;
+ }
+ } /* for rreg in hinted_regs */
+ // === END try to use the hints for `curr_vlr` ===
+
+ // ====== END Try to do coalescing ======
+
+ // We get here if we failed to find a viable assignment by the process of
+ // looking at the coalescing hints.
+ //
+ // So: do almost exactly as we did in the "try for coalescing" stage
+ // above. Except, instead of trying each coalescing candidate
+ // individually, iterate over all the registers in the class, to find the
+ // one (if any) that has the lowest total evict cost. If we find one that
+ // has zero cost -- that is, we can make the assignment without evicting
+ // anything -- then stop the search at that point, since searching further
+ // is pointless.
+
+ let (first_in_rc, last_in_rc) = match &reg_universe.allocable_by_class[curr_vlr_rc] {
+ &None => {
+ return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass));
+ }
+ &Some(ref info) => (info.first, info.last),
+ };
+
+ let mut best_so_far: Option<(
+ /*rreg index*/ usize,
+ SparseSetU<[VirtualRangeIx; 4]>,
+ SpillCost,
+ )> = None;
+
+ 'search_through_cand_rregs_loop: for rregNo in first_in_rc..last_in_rc + 1 {
+ //debug!("-- Cand {} ...",
+ // reg_universe.regs[rregNo].1);
+
+ let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> =
+ per_real_reg[rregNo].find_evict_set(
+ curr_vlrix,
+ // We pass a closure that ignores its arg and returns `true`.
+ // Meaning, "we are not specifying any particular
+ // can't-be-evicted VLRs in this call."
+ &|_vlrix_to_evict| true,
+ &vlr_env,
+ );
+ //
+ //match mb_evict_info {
+ // None => debug!("-- Cand {}: Unavail",
+ // reg_universe.regs[rregNo].1),
+ // Some((ref evict_set, ref evict_cost)) =>
+ // debug!("-- Cand {}: Avail, evict cost {:?}, set {:?}",
+ // reg_universe.regs[rregNo].1, evict_cost, evict_set)
+ //}
+ //
+ if let Some((cand_vlrixs_to_evict, cand_total_evict_cost)) = mb_evict_info {
+ // Stay sane ..
+ assert!(cand_vlrixs_to_evict.is_empty() == cand_total_evict_cost.is_zero());
+ // We can't evict if any in the set are spill ranges, and
+ // find_evict_set should not offer us that possibility.
+ assert!(cand_total_evict_cost.is_finite());
+ // Ensure forward progress
+ assert!(cand_total_evict_cost.is_less_than(&curr_vlr.spill_cost));
+ // Update the "best so far". First, if the evict set is empty, then
+ // the candidate is by definition better than all others, and we will
+ // quit searching just below.
+ let mut cand_is_better = cand_vlrixs_to_evict.is_empty();
+ if !cand_is_better {
+ if let Some((_, _, best_spill_cost)) = best_so_far {
+ // If we've already got a candidate, this one is better if it has
+ // a lower total spill cost.
+ if cand_total_evict_cost.is_less_than(&best_spill_cost) {
+ cand_is_better = true;
+ }
+ } else {
+ // We don't have any candidate so far, so what we just got is
+ // better (than nothing).
+ cand_is_better = true;
+ }
+ }
+ // Remember the candidate if required.
+ let cand_vlrixs_to_evict_is_empty = cand_vlrixs_to_evict.is_empty();
+ if cand_is_better {
+ best_so_far = Some((rregNo, cand_vlrixs_to_evict, cand_total_evict_cost));
+ }
+ // If we've found a no-evictions-necessary candidate, quit searching
+ // immediately. We won't find anything better.
+ if cand_vlrixs_to_evict_is_empty {
+ break 'search_through_cand_rregs_loop;
+ }
+ }
+ } // for rregNo in first_in_rc..last_in_rc + 1 {
+
+ // Examine the results of the search. Did we find any usable candidate?
+ if let Some((rregNo, vlrixs_to_evict, total_spill_cost)) = best_so_far {
+ // We are still Totally Paranoid (tm)
+ // Stay sane #1
+ debug_assert!(curr_vlr.rreg.is_none());
+ // Can't spill a spill range
+ assert!(total_spill_cost.is_finite());
+ // Ensure forward progress
+ assert!(total_spill_cost.is_less_than(&curr_vlr.spill_cost));
+ // Now the same evict-reassign section as with the coalescing logic above.
+ // Evict all evictees in the set
+ for vlrix_to_evict in vlrixs_to_evict.iter() {
+ // Evict ..
+ debug!(
+ "-- DI evict {:?}: {:?}",
+ *vlrix_to_evict, &vlr_env[*vlrix_to_evict]
+ );
+ per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env);
+ prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict);
+ debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some());
+ vlr_env[*vlrix_to_evict].rreg = None;
+ num_vlrs_evicted += 1;
+ }
+ // .. and reassign.
+ debug!("-- DI alloc to {}", reg_universe.regs[rregNo].1);
+ per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env);
+ let rreg = reg_universe.regs[rregNo].0;
+ vlr_env[curr_vlrix].rreg = Some(rreg);
+ // We're done!
+ continue 'main_allocation_loop;
+ }
+
+ // Still no luck. We can't find a register to put it in, so we'll
+ // have to spill it, since splitting it isn't yet implemented.
+ debug!("-- spill");
+
+ // If the live range already pertains to a spill or restore, then
+ // it's Game Over. The constraints (availability of RealRegs vs
+ // requirement for them) are impossible to fulfill, and so we cannot
+ // generate any valid allocation for this function.
+ if curr_vlr.spill_cost.is_infinite() {
+ return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass));
+ }
+
+ // Generate a new spill slot number, S
+ /* Spilling vreg V with virtual live range VirtualRange to slot S:
+ for each frag F in VirtualRange {
+ for each insn I in F.first.iix .. F.last.iix {
+ if I does not mention V
+ continue
+ if I mentions V in a Read role {
+ // invar: I cannot mention V in a Mod role
+ add new VirtualRange I.reload -> I.use,
+ vreg V, spillcost Inf
+ add eli @ I.reload V SpillSlot
+ }
+ if I mentions V in a Mod role {
+ // invar: I cannot mention V in a Read or Write Role
+ add new VirtualRange I.reload -> I.spill,
+ Vreg V, spillcost Inf
+ add eli @ I.reload V SpillSlot
+ add eli @ I.spill SpillSlot V
+ }
+ if I mentions V in a Write role {
+ // invar: I cannot mention V in a Mod role
+ add new VirtualRange I.def -> I.spill,
+ vreg V, spillcost Inf
+ add eli @ I.spill V SpillSlot
+ }
+ }
+ }
+ */
+
+ // We will be spilling vreg `curr_vlr.reg` with VirtualRange `curr_vlr` to
+ // a spill slot that the spill slot allocator will choose for us, just a
+ // bit further down this function.
+
+ // This holds enough info to create reload or spill (or both)
+ // instructions around an instruction that references a VirtualReg
+ // that has been spilled.
+ struct SpillAndOrReloadInfo {
+ bix: BlockIx, // that `iix` is in
+ iix: InstIx, // this is the Inst we are spilling/reloading for
+ kind: BridgeKind, // says whether to create a spill or reload or both
+ }
+
+ // Most spills won't require anywhere near 32 entries, so this avoids
+ // almost all heap allocation.
+ let mut sri_vec = SmallVec::<[SpillAndOrReloadInfo; 32]>::new();
+
+ let curr_vlr_vreg = curr_vlr.vreg;
+ let curr_vlr_reg = curr_vlr_vreg.to_reg();
+ let curr_vlr_is_ref = curr_vlr.is_ref;
+
+ for frag in &curr_vlr.sorted_frags.frags {
+ for iix in frag.first.iix().dotdot(frag.last.iix().plus(1)) {
+ let (iix_uses_curr_vlr_reg, iix_defs_curr_vlr_reg, iix_mods_curr_vlr_reg) =
+ does_inst_use_def_or_mod_reg(&reg_vecs_and_bounds, iix, curr_vlr_reg);
+ // If this insn doesn't mention the vreg we're spilling for,
+ // move on.
+ if !iix_defs_curr_vlr_reg && !iix_mods_curr_vlr_reg && !iix_uses_curr_vlr_reg {
+ continue;
+ }
+ // USES: Do we need to create a reload-to-use bridge
+ // (VirtualRange) ?
+ if iix_uses_curr_vlr_reg && frag.contains(&InstPoint::new_use(iix)) {
+ debug_assert!(!iix_mods_curr_vlr_reg);
+ // Stash enough info that we can create a new VirtualRange
+ // and a new edit list entry for the reload.
+ let bix = inst_to_block_map.map(iix);
+ let sri = SpillAndOrReloadInfo {
+ bix,
+ iix,
+ kind: BridgeKind::RtoU,
+ };
+ sri_vec.push(sri);
+ }
+ // MODS: Do we need to create a reload-to-spill bridge? This
+ // can only happen for instructions which modify a register.
+ // Note this has to be a single VirtualRange, since if it were
+ // two (one for the reload, one for the spill) they could
+ // later end up being assigned to different RealRegs, which is
+ // obviously nonsensical.
+ if iix_mods_curr_vlr_reg
+ && frag.contains(&InstPoint::new_use(iix))
+ && frag.contains(&InstPoint::new_def(iix))
+ {
+ debug_assert!(!iix_uses_curr_vlr_reg);
+ debug_assert!(!iix_defs_curr_vlr_reg);
+ let bix = inst_to_block_map.map(iix);
+ let sri = SpillAndOrReloadInfo {
+ bix,
+ iix,
+ kind: BridgeKind::RtoS,
+ };
+ sri_vec.push(sri);
+ }
+ // DEFS: Do we need to create a def-to-spill bridge?
+ if iix_defs_curr_vlr_reg && frag.contains(&InstPoint::new_def(iix)) {
+ debug_assert!(!iix_mods_curr_vlr_reg);
+ let bix = inst_to_block_map.map(iix);
+ let sri = SpillAndOrReloadInfo {
+ bix,
+ iix,
+ kind: BridgeKind::DtoS,
+ };
+ sri_vec.push(sri);
+ }
+ }
+ }
+
+ // Now that we no longer need to access `frag_env` or `vlr_env` for
+ // the remainder of this iteration of the main allocation loop, we can
+ // actually generate the required spill/reload artefacts.
+
+ // First off, poke the spill slot allocator to get an intelligent choice
+ // of slot. Note that this will fail for "non-initial" VirtualRanges; but
+ // the only non-initial ones will have been created by spilling anyway,
+ // any we definitely shouldn't be trying to spill them again. Hence we
+ // can assert.
+ assert!(vlr_slot_env.len() == num_vlrs_initial);
+ assert!(curr_vlrix < VirtualRangeIx::new(num_vlrs_initial));
+ if vlr_slot_env[curr_vlrix].is_none() {
+ // It hasn't been decided yet. Cause it to be so by asking for an
+ // allocation for the entire eclass that `curr_vlrix` belongs to.
+ spill_slot_allocator.alloc_spill_slots(
+ &mut vlr_slot_env,
+ func,
+ &vlr_env,
+ &vlrEquivClasses,
+ curr_vlrix,
+ );
+ assert!(vlr_slot_env[curr_vlrix].is_some());
+ }
+ let spill_slot_to_use = vlr_slot_env[curr_vlrix].unwrap();
+
+ // If we're spilling a reffy VLR, we'll need to tell the spillslot allocator that. The
+ // VLR will already have been allocated to some spill slot, and relevant RangeFrags in
+ // the slot should have already been reserved for it, by the above call to
+ // `alloc_spill_slots` (although possibly relating to a prior VLR in the same
+ // equivalence class, and not this one). However, those RangeFrags will have all been
+ // marked non-reffy, because we don't know, in general, at spillslot-allocation-time,
+ // whether a VLR will actually be spilled, and we don't want the resulting stack maps to
+ // mention stack entries which are dead at the point of the safepoint insn. Hence the
+ // need to update those RangeFrags pertaining to just this VLR -- now that we *know*
+ // it's going to be spilled.
+ if curr_vlr.is_ref {
+ spill_slot_allocator
+ .notify_spillage_of_reftyped_vlr(spill_slot_to_use, &curr_vlr.sorted_frags);
+ }
+
+ for sri in sri_vec {
+ let (new_vlr_first_pt, new_vlr_last_pt) = match sri.kind {
+ BridgeKind::RtoU => (Point::Reload, Point::Use),
+ BridgeKind::RtoS => (Point::Reload, Point::Spill),
+ BridgeKind::DtoS => (Point::Def, Point::Spill),
+ };
+ let new_vlr_frag = RangeFrag {
+ first: InstPoint::new(sri.iix, new_vlr_first_pt),
+ last: InstPoint::new(sri.iix, new_vlr_last_pt),
+ };
+ debug!("-- new RangeFrag {:?}", &new_vlr_frag);
+ let new_vlr_sfrags = SortedRangeFrags::unit(new_vlr_frag);
+ let new_vlr = VirtualRange {
+ vreg: curr_vlr_vreg,
+ rreg: None,
+ sorted_frags: new_vlr_sfrags,
+ is_ref: curr_vlr_is_ref, // "inherit" refness
+ size: 1,
+ // Effectively infinite. We'll never look at this again anyway.
+ total_cost: 0xFFFF_FFFFu32,
+ spill_cost: SpillCost::infinite(),
+ };
+ let new_vlrix = VirtualRangeIx::new(vlr_env.len() as u32);
+ debug!(
+ "-- new VirtRange {:?} := {:?}",
+ new_vlrix, &new_vlr
+ );
+ vlr_env.push(new_vlr);
+ prioQ.add_VirtualRange(&vlr_env, new_vlrix);
+
+ // BEGIN (optimisation only) see if we can create any coalescing hints
+ // for this new VLR.
+ let mut new_vlr_hint = SmallVec::<[Hint; 8]>::new();
+ if is_vv_boundary_move[sri.iix] {
+ // Collect the src and dst regs for the move. It must be a
+ // move because `is_vv_boundary_move` claims that to be true.
+ let im = func.is_move(&func.get_insn(sri.iix));
+ assert!(im.is_some());
+ let (wdst_reg, src_reg): (Writable<Reg>, Reg) = im.unwrap();
+ let dst_reg: Reg = wdst_reg.to_reg();
+ assert!(src_reg.is_virtual() && dst_reg.is_virtual());
+ let dst_vreg: VirtualReg = dst_reg.to_virtual_reg();
+ let src_vreg: VirtualReg = src_reg.to_virtual_reg();
+ let bridge_eef = est_freqs[sri.bix];
+ match sri.kind {
+ BridgeKind::RtoU => {
+ // Reload-to-Use bridge. Hint that we want to be
+ // allocated to the same reg as the destination of the
+ // move. That means we have to find the VLR that owns
+ // the destination vreg.
+ for vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[dst_vreg.get_index()] {
+ if vlr_env[*vlrix].vreg == dst_vreg {
+ new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
+ break;
+ }
+ }
+ }
+ BridgeKind::DtoS => {
+ // Def-to-Spill bridge. Hint that we want to be
+ // allocated to the same reg as the source of the
+ // move.
+ for vlrix in &reg_to_ranges_maps.vreg_to_vlrs_map[src_vreg.get_index()] {
+ if vlr_env[*vlrix].vreg == src_vreg {
+ new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
+ break;
+ }
+ }
+ }
+ BridgeKind::RtoS => {
+ // A Reload-to-Spill bridge. This can't happen. It
+ // implies that the instruction modifies (both reads
+ // and writes) one of its operands, but the client's
+ // `is_move` function claims this instruction is a
+ // plain "move" (reads source, writes dest). These
+ // claims are mutually contradictory.
+ panic!("RtoS bridge for v-v boundary move");
+ }
+ }
+ }
+ hints.push(new_vlr_hint);
+ // END see if we can create any coalescing hints for this new VLR.
+
+ // Finally, create a new EditListItem. This holds enough
+ // information that a suitable spill or reload instruction can
+ // later be created.
+ let new_eli = EditListItem {
+ slot: spill_slot_to_use,
+ vlrix: new_vlrix,
+ kind: sri.kind,
+ iix: sri.iix,
+ };
+ if is_vv_boundary_move[sri.iix] {
+ debug!("-- new ELI MOVE {:?}", &new_eli);
+ edit_list_move.push(new_eli);
+ } else {
+ debug!("-- new ELI other {:?}", &new_eli);
+ edit_list_other.push(new_eli);
+ }
+ }
+
+ num_vlrs_spilled += 1;
+ // And implicitly "continue 'main_allocation_loop"
+ }
+ // ======== END Main allocation loop ========
+
+ info!("alloc_main: main allocation loop: end");
+
+ if log_enabled!(Level::Debug) {
+ debug!("");
+ print_RA_state(
+ "Final",
+ &reg_universe,
+ &prioQ,
+ &per_real_reg,
+ &edit_list_move,
+ &edit_list_other,
+ &vlr_env,
+ &frag_env,
+ );
+ }
+
+ // ======== BEGIN Do spill slot coalescing ========
+
+ debug!("");
+ info!("alloc_main: create spills_n_reloads for MOVE insns");
+
+ // Sort `edit_list_move` by the insn with which each item is associated.
+ edit_list_move.sort_unstable_by(|eli1, eli2| eli1.iix.cmp(&eli2.iix));
+
+ // Now go through `edit_list_move` and find pairs which constitute a
+ // spillslot-to-the-same-spillslot move. What we have in `edit_list_move` is
+ // heavily constrained, as follows:
+ //
+ // * each entry should reference an InstIx which the coalescing analysis
+ // identified as a virtual-to-virtual copy which exists at the boundary
+ // between two VirtualRanges. The "boundary" aspect is important; we
+ // can't coalesce out moves in which the source vreg is not the "last use"
+ // or for which the destination vreg is not the "first def". (The same is
+ // true for coalescing of unspilled moves).
+ //
+ // * the each entry must reference a VirtualRange which has only a single
+ // RangeFrag, and that frag must exist entirely "within" the referenced
+ // instruction. Specifically, it may only be a R->U frag (bridge) or a
+ // D->S frag.
+ //
+ // * For a referenced instruction, there may be at most two entries in this
+ // list: one that references the R->U frag and one that references the
+ // D->S frag. Furthermore, the two entries must carry values of the same
+ // RegClass; if that isn't true, the client's `is_move` function is
+ // defective.
+ //
+ // For any such pair identified, if both frags mention the same spill slot,
+ // we skip generating both the reload and the spill instruction. We also
+ // note that the instruction itself is to be deleted (converted to a
+ // zero-len nop). In a sense we have "cancelled out" a reload/spill pair.
+ // Entries that can't be cancelled out are handled the same way as for
+ // entries in `edit_list_other`, by simply copying them there.
+ //
+ // Since `edit_list_move` is sorted by insn ix, we can scan linearly over
+ // it, looking for adjacent pairs. We'll have to accept them in either
+ // order though (first R->U then D->S, or the other way round). There's no
+ // fixed ordering since there is no particular ordering in the way
+ // VirtualRanges are allocated.
+
+ // As a result of spill slot coalescing, we'll need to delete the move
+ // instructions leading to a mergable spill slot move. The insn stream
+ // editor can't delete instructions, so instead it'll replace them with zero
+ // len nops obtained from the client. `iixs_to_nop_out` records the insns
+ // that this has to happen to. It is in increasing order of InstIx (because
+ // `edit_list_sorted` is), and the indices in it refer to the original
+ // virtual-registerised code.
+ let mut iixs_to_nop_out = Vec::<InstIx>::new();
+ let mut ghost_moves = vec![];
+
+ let n_edit_list_move = edit_list_move.len();
+ let mut n_edit_list_move_processed = 0; // for assertions only
+ let mut i_min = 0;
+ loop {
+ if i_min >= n_edit_list_move {
+ break;
+ }
+ // Find the bounds of the current group.
+ debug!("editlist entry (MOVE): min: {:?}", &edit_list_move[i_min]);
+ let i_min_iix = edit_list_move[i_min].iix;
+ let mut i_max = i_min;
+ while i_max + 1 < n_edit_list_move && edit_list_move[i_max + 1].iix == i_min_iix {
+ i_max += 1;
+ debug!("editlist entry (MOVE): max: {:?}", &edit_list_move[i_max]);
+ }
+ // Current group is from i_min to i_max inclusive. At most 2 entries are
+ // allowed per group.
+ assert!(i_max - i_min <= 1);
+ // Check for a mergeable pair.
+ if i_max - i_min == 1 {
+ assert!(is_vv_boundary_move[i_min_iix]);
+ let vlrix1 = edit_list_move[i_min].vlrix;
+ let vlrix2 = edit_list_move[i_max].vlrix;
+ assert!(vlrix1 != vlrix2);
+ let vlr1 = &vlr_env[vlrix1];
+ let vlr2 = &vlr_env[vlrix2];
+ let frags1 = &vlr1.sorted_frags;
+ let frags2 = &vlr2.sorted_frags;
+ assert!(frags1.frags.len() == 1);
+ assert!(frags2.frags.len() == 1);
+ let frag1 = &frags1.frags[0];
+ let frag2 = &frags2.frags[0];
+ assert!(frag1.first.iix() == i_min_iix);
+ assert!(frag1.last.iix() == i_min_iix);
+ assert!(frag2.first.iix() == i_min_iix);
+ assert!(frag2.last.iix() == i_min_iix);
+ // frag1 must be R->U and frag2 must be D->S, or vice versa
+ match (
+ frag1.first.pt(),
+ frag1.last.pt(),
+ frag2.first.pt(),
+ frag2.last.pt(),
+ ) {
+ (Point::Reload, Point::Use, Point::Def, Point::Spill)
+ | (Point::Def, Point::Spill, Point::Reload, Point::Use) => {
+ let slot1 = edit_list_move[i_min].slot;
+ let slot2 = edit_list_move[i_max].slot;
+ if slot1 == slot2 {
+ // Yay. We've found a coalescable pair. We can just ignore the
+ // two entries and move on. Except we have to mark the insn
+ // itself for deletion.
+ debug!("editlist entry (MOVE): delete {:?}", i_min_iix);
+ iixs_to_nop_out.push(i_min_iix);
+ i_min = i_max + 1;
+ n_edit_list_move_processed += 2;
+ if use_checker {
+ let (from_reg, to_reg) = if frag1.last.pt() == Point::Use {
+ (vlr1.vreg.to_reg(), vlr2.vreg.to_reg())
+ } else {
+ (vlr2.vreg.to_reg(), vlr1.vreg.to_reg())
+ };
+ ghost_moves.push(InstToInsertAndExtPoint::new(
+ InstToInsert::ChangeSpillSlotOwnership {
+ inst_ix: i_min_iix,
+ slot: slot1,
+ from_reg,
+ to_reg,
+ },
+ InstExtPoint::new(i_min_iix, ExtPoint::Reload),
+ ));
+ }
+ continue;
+ }
+ }
+ (_, _, _, _) => {
+ panic!("spill slot coalescing, edit_list_move: unexpected frags");
+ }
+ }
+ }
+ // If we get here for whatever reason, this group is uninteresting. Copy
+ // in to `edit_list_other` so that it gets processed in the normal way.
+ for i in i_min..=i_max {
+ edit_list_other.push(edit_list_move[i]);
+ n_edit_list_move_processed += 1;
+ }
+ i_min = i_max + 1;
+ }
+ assert!(n_edit_list_move_processed == n_edit_list_move);
+
+ // ======== END Do spill slot coalescing ========
+
+ // ======== BEGIN Create all other spills and reloads ========
+
+ debug!("");
+ info!("alloc_main: create spills_n_reloads for other insns");
+
+ // Reload and spill instructions are missing. To generate them, go through
+ // the "edit list", which contains info on both how to generate the
+ // instructions, and where to insert them.
+ let mut spills_n_reloads = Vec::<InstToInsertAndExtPoint>::new();
+ let mut num_spills = 0; // stats only
+ let mut num_reloads = 0; // stats only
+ for eli in &edit_list_other {
+ debug!("editlist entry (other): {:?}", eli);
+ let vlr = &vlr_env[eli.vlrix];
+ let vlr_sfrags = &vlr.sorted_frags;
+ assert!(vlr_sfrags.frags.len() == 1);
+ let vlr_frag = &vlr_sfrags.frags[0];
+ let rreg = vlr.rreg.expect("Gen of spill/reload: reg not assigned?!");
+ let vreg = vlr.vreg;
+ match eli.kind {
+ BridgeKind::RtoU => {
+ debug_assert!(vlr_frag.first.pt().is_reload());
+ debug_assert!(vlr_frag.last.pt().is_use());
+ debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
+ let insnR = InstToInsert::Reload {
+ to_reg: Writable::from_reg(rreg),
+ from_slot: eli.slot,
+ for_vreg: Some(vreg),
+ };
+ let whereToR = InstExtPoint::from_inst_point(vlr_frag.first);
+ spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR));
+ num_reloads += 1;
+ }
+ BridgeKind::RtoS => {
+ debug_assert!(vlr_frag.first.pt().is_reload());
+ debug_assert!(vlr_frag.last.pt().is_spill());
+ debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
+ let insnR = InstToInsert::Reload {
+ to_reg: Writable::from_reg(rreg),
+ from_slot: eli.slot,
+ for_vreg: Some(vreg),
+ };
+ let whereToR = InstExtPoint::from_inst_point(vlr_frag.first);
+ let insnS = InstToInsert::Spill {
+ to_slot: eli.slot,
+ from_reg: rreg,
+ for_vreg: Some(vreg),
+ };
+ let whereToS = InstExtPoint::from_inst_point(vlr_frag.last);
+ spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR));
+ spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS));
+ num_reloads += 1;
+ num_spills += 1;
+ }
+ BridgeKind::DtoS => {
+ debug_assert!(vlr_frag.first.pt().is_def());
+ debug_assert!(vlr_frag.last.pt().is_spill());
+ debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
+ let insnS = InstToInsert::Spill {
+ to_slot: eli.slot,
+ from_reg: rreg,
+ for_vreg: Some(vreg),
+ };
+ let whereToS = InstExtPoint::from_inst_point(vlr_frag.last);
+ spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS));
+ num_spills += 1;
+ }
+ }
+ }
+
+ // Append all ghost moves.
+ if use_checker {
+ spills_n_reloads.extend(ghost_moves.into_iter());
+ spills_n_reloads.sort_by_key(|inst_and_point| inst_and_point.iep.clone());
+ }
+
+ // ======== END Create all other spills and reloads ========
+
+ // ======== BEGIN Create final instruction stream ========
+
+ // Gather up a vector of (RangeFrag, VirtualReg, RealReg) resulting from
+ // the previous phase. This fundamentally is the result of the allocation
+ // and tells us how the instruction stream must be edited. Note it does
+ // not take account of spill or reload instructions. Dealing with those
+ // is relatively simple and happens later.
+
+ info!("alloc_main: create frag_map");
+
+ let mut frag_map = Vec::<(RangeFrag, VirtualReg, RealReg)>::new();
+ // For each real register under our control ..
+ for i in 0..reg_universe.allocable {
+ let rreg = reg_universe.regs[i].0;
+ // .. look at all the VirtualRanges assigned to it. And for each such
+ // VirtualRange ..
+ for vlrix_assigned in per_real_reg[i].vlrixs_assigned.iter() {
+ let VirtualRange {
+ vreg, sorted_frags, ..
+ } = &vlr_env[*vlrix_assigned];
+ // All the RangeFrags in `vlr_assigned` require `vlr_assigned.reg`
+ // to be mapped to the real reg `i`
+ // .. collect up all its constituent RangeFrags.
+ for frag in &sorted_frags.frags {
+ frag_map.push((frag.clone(), *vreg, rreg));
+ }
+ }
+ }
+
+ // There is one of these for every entry in `safepoint_insns`.
+ let mut stackmaps = Vec::<Vec<SpillSlot>>::new();
+
+ if !safepoint_insns.is_empty() {
+ info!("alloc_main: create safepoints and stackmaps");
+ for safepoint_iix in safepoint_insns {
+ // Create the stackmap artefacts for `safepoint_iix`. Save the stackmap (the
+ // reftyped spillslots); we'll have to return it to the client as part of the
+ // overall allocation result. The extra spill and reload instructions can simply
+ // be added to `spills_n_reloads` though, and `edit_inst_stream` will correctly
+ // merge them in.
+ //
+ // Note: this modifies `spill_slot_allocator`, since at this point we have to
+ // allocate spill slots to hold reftyped real regs across the safepoint insn.
+ //
+ // Because the SB (spill-before) and RA (reload-after) `ExtPoint`s are "closer" to
+ // the "core" of an instruction than the R (reload) and S (spill) `ExtPoint`s, any
+ // "normal" reload or spill ranges that are reftyped will be handled correctly.
+ // From `get_stackmap_artefacts_at`s point of view, such spill/reload ranges are
+ // just like any other real-reg live range that it will have to spill around the
+ // safepoint. The fact that they are for spills or reloads doesn't make any
+ // difference.
+ //
+ // Note also: this call can fail; failure is propagated upwards.
+ //
+ // FIXME Passing these 3 small vectors around is inefficient. Use SmallVec or
+ // (better) owned-by-this-function vectors instead.
+ let (spills_before, reloads_after, reftyped_spillslots) = get_stackmap_artefacts_at(
+ &mut spill_slot_allocator,
+ &reg_universe,
+ reftype_class,
+ &reg_vecs_and_bounds,
+ &per_real_reg,
+ &rlr_env,
+ &vlr_env,
+ *safepoint_iix,
+ )?;
+ stackmaps.push(reftyped_spillslots);
+ for spill_before in spills_before {
+ spills_n_reloads.push(InstToInsertAndExtPoint::new(
+ spill_before,
+ InstExtPoint::new(*safepoint_iix, ExtPoint::SpillBefore),
+ ));
+ }
+ for reload_after in reloads_after {
+ spills_n_reloads.push(InstToInsertAndExtPoint::new(
+ reload_after,
+ InstExtPoint::new(*safepoint_iix, ExtPoint::ReloadAfter),
+ ));
+ }
+ }
+ }
+
+ info!("alloc_main: edit_inst_stream");
+
+ let final_insns_and_targetmap_and_new_safepoints__or_err = edit_inst_stream(
+ func,
+ &safepoint_insns,
+ spills_n_reloads,
+ &iixs_to_nop_out,
+ frag_map,
+ &reg_universe,
+ use_checker,
+ &stackmaps[..],
+ &reftyped_vregs[..],
+ );
+
+ // ======== END Create final instruction stream ========
+
+ // ======== BEGIN Create the RegAllocResult ========
+
+ match final_insns_and_targetmap_and_new_safepoints__or_err {
+ Ok((ref final_insns, ..)) => {
+ info!(
+ "alloc_main: out: VLRs: {} initially, {} processed",
+ num_vlrs_initial, num_vlrs_processed
+ );
+ info!(
+ "alloc_main: out: VLRs: {} evicted, {} spilled",
+ num_vlrs_evicted, num_vlrs_spilled
+ );
+ info!(
+ "alloc_main: out: insns: {} total, {} spills, {} reloads, {} nopzs",
+ final_insns.len(),
+ num_spills,
+ num_reloads,
+ iixs_to_nop_out.len()
+ );
+ info!(
+ "alloc_main: out: spill slots: {} used",
+ spill_slot_allocator.num_slots_in_use()
+ );
+ }
+ Err(_) => {
+ info!("alloc_main: allocation failed!");
+ }
+ }
+
+ let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) =
+ match final_insns_and_targetmap_and_new_safepoints__or_err {
+ Err(e) => {
+ info!("alloc_main: fail");
+ return Err(e);
+ }
+ Ok(quad) => {
+ info!("alloc_main: creating RegAllocResult");
+ quad
+ }
+ };
+
+ // Compute clobbered registers with one final, quick pass.
+ //
+ // FIXME: derive this information directly from the allocation data
+ // structures used above.
+ //
+ // NB at this point, the `san_reg_uses` that was computed in the analysis
+ // phase is no longer valid, because we've added and removed instructions to
+ // the function relative to the one that `san_reg_uses` was computed from,
+ // so we have to re-visit all insns with `add_raw_reg_vecs_for_insn`.
+ // That's inefficient, but we don't care .. this should only be a temporary
+ // fix.
+
+ let mut clobbered_registers: Set<RealReg> = Set::empty();
+
+ // We'll dump all the reg uses in here. We don't care about the bounds, so just
+ // pass a dummy one in the loop.
+ let mut reg_vecs = RegVecs::new(/*sanitized=*/ false);
+ let mut dummy_bounds = RegVecBounds::new();
+ for insn in &final_insns {
+ if func.is_included_in_clobbers(insn) {
+ add_raw_reg_vecs_for_insn::<F>(insn, &mut reg_vecs, &mut dummy_bounds);
+ }
+ }
+ for reg in reg_vecs.defs.iter().chain(reg_vecs.mods.iter()) {
+ assert!(reg.is_real());
+ clobbered_registers.insert(reg.to_real_reg());
+ }
+
+ // And now remove from the set, all those not available to the allocator.
+ // But not removing the reserved regs, since we might have modified those.
+ clobbered_registers.filter_map(|&reg| {
+ if reg.get_index() >= reg_universe.allocable {
+ None
+ } else {
+ Some(reg)
+ }
+ });
+
+ assert!(est_freqs.len() as usize == func.blocks().len());
+ let mut block_annotations = None;
+ if opts.request_block_annotations {
+ let mut anns = TypedIxVec::<BlockIx, Vec<String>>::new();
+ for (estFreq, i) in est_freqs.iter().zip(0..) {
+ let bix = BlockIx::new(i);
+ let ef_str = format!("RA: bix {:?}, estFreq {}", bix, estFreq);
+ anns.push(vec![ef_str]);
+ }
+ block_annotations = Some(anns);
+ }
+
+ assert!(stackmaps.len() == safepoint_insns.len());
+ assert!(new_safepoint_insns.len() == safepoint_insns.len());
+ let ra_res = RegAllocResult {
+ insns: final_insns,
+ target_map,
+ orig_insn_map: new_to_old_insn_map,
+ clobbered_registers,
+ num_spill_slots: spill_slot_allocator.num_slots_in_use() as u32,
+ block_annotations,
+ stackmaps,
+ new_safepoint_insns,
+ };
+
+ info!("alloc_main: end");
+
+ // ======== END Create the RegAllocResult ========
+
+ Ok(ra_res)
+}
diff --git a/third_party/rust/regalloc/src/bt_spillslot_allocator.rs b/third_party/rust/regalloc/src/bt_spillslot_allocator.rs
new file mode 100644
index 0000000000..a85f2c0354
--- /dev/null
+++ b/third_party/rust/regalloc/src/bt_spillslot_allocator.rs
@@ -0,0 +1,522 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+//! Allocation of spill slots for the backtracking allocator.
+
+use crate::avl_tree::{AVLTree, AVL_NULL};
+use crate::data_structures::{
+ cmp_range_frags, InstPoint, RangeFrag, SortedRangeFrags, SpillSlot, TypedIxVec, VirtualRange,
+ VirtualRangeIx,
+};
+use crate::union_find::UnionFindEquivClasses;
+use crate::Function;
+
+//=============================================================================
+// A spill slot allocator. This could be implemented more simply than it is.
+// The reason for the extra complexity is to support copy-coalescing at the
+// spill-slot level. That is, it tries make it possible to allocate all
+// members of a VirtualRange group to the same spill slot, so that moves
+// between two spilled members of the same group can be turned into no-ops.
+//
+// All of the `size` metrics in this bit are in terms of "logical spill slot
+// units", per the interface's description for `get_spillslot_size`.
+
+// *** Important: to fully understand this allocator and how it interacts with
+// coalescing analysis, you need to read the big block comment at the top of
+// bt_coalescing_analysis.rs.
+
+//=============================================================================
+// Logical spill slots
+
+// In the trees, we keep track of which frags are reftyped, so we can later create stackmaps by
+// slicing all of the trees at some `InstPoint`. Unfortunately this requires storing 65 bits of
+// data in each node -- 64 bits for the RangeFrag and 1 bit for the reftype. A TODO would be to
+// steal one bit from the RangeFrag. For now though, we do the simple thing.
+
+#[derive(Clone, PartialEq, PartialOrd)]
+struct RangeFragAndRefness {
+ frag: RangeFrag,
+ is_ref: bool,
+}
+impl RangeFragAndRefness {
+ fn new(frag: RangeFrag, is_ref: bool) -> Self {
+ Self { frag, is_ref }
+ }
+}
+
+// We keep one of these for every "logical spill slot" in use.
+enum LogicalSpillSlot {
+ // This slot is in use and can hold values of size `size` (only). Note that
+ // `InUse` may only appear in `SpillSlotAllocator::slots` positions that
+ // have indices that are 0 % `size`. Furthermore, after such an entry in
+ // `SpillSlotAllocator::slots`, the next `size` - 1 entries must be
+ // `Unavail`. This is a hard invariant, violation of which will cause
+ // overlapping spill slots and potential chaos.
+ InUse {
+ size: u32,
+ tree: AVLTree<RangeFragAndRefness>,
+ },
+ // This slot is unavailable, as described above. It's unavailable because
+ // it holds some part of the values associated with the nearest lower
+ // numbered entry which isn't `Unavail`, and that entry must be an `InUse`
+ // entry.
+ Unavail,
+}
+impl LogicalSpillSlot {
+ fn is_Unavail(&self) -> bool {
+ match self {
+ LogicalSpillSlot::Unavail => true,
+ _ => false,
+ }
+ }
+ fn is_InUse(&self) -> bool {
+ !self.is_Unavail()
+ }
+ fn get_tree(&self) -> &AVLTree<RangeFragAndRefness> {
+ match self {
+ LogicalSpillSlot::InUse { ref tree, .. } => tree,
+ LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_tree"),
+ }
+ }
+ fn get_mut_tree(&mut self) -> &mut AVLTree<RangeFragAndRefness> {
+ match self {
+ LogicalSpillSlot::InUse { ref mut tree, .. } => tree,
+ LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_mut_tree"),
+ }
+ }
+ fn get_size(&self) -> u32 {
+ match self {
+ LogicalSpillSlot::InUse { size, .. } => *size,
+ LogicalSpillSlot::Unavail => panic!("LogicalSpillSlot::get_size"),
+ }
+ }
+ // If this spill slot is occupied at `pt`, return the refness of the value (VirtualRange)
+ // stored in it. This is conceptually equivalent to CommitmentMap::lookup_inst_point.
+ fn get_refness_at_inst_point(&self, pt: InstPoint) -> Option<bool> {
+ match self {
+ LogicalSpillSlot::InUse { size: 1, tree } => {
+ // Search the tree to see if a reffy commitment intersects `pt`.
+ let mut root = tree.root;
+ while root != AVL_NULL {
+ let root_node = &tree.pool[root as usize];
+ let root_item = &root_node.item;
+ if pt < root_item.frag.first {
+ // `pt` is to the left of the `root`. So there's no
+ // overlap with `root`. Continue by inspecting the left subtree.
+ root = root_node.left;
+ } else if root_item.frag.last < pt {
+ // Ditto for the right subtree.
+ root = root_node.right;
+ } else {
+ // `pt` overlaps the `root`, so we have what we want.
+ return Some(root_item.is_ref);
+ }
+ }
+ None
+ }
+ LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
+ // Slot isn't is use, or is in use but for values of some non-ref size
+ None
+ }
+ }
+ }
+}
+
+// HELPER FUNCTION
+// Find out whether it is possible to add `frag` to `tree`.
+#[inline(always)]
+fn ssal_is_add_frag_possible(tree: &AVLTree<RangeFragAndRefness>, frag: &RangeFrag) -> bool {
+ // BEGIN check `frag` for any overlap against `tree`.
+ let mut root = tree.root;
+ while root != AVL_NULL {
+ let root_node = &tree.pool[root as usize];
+ let root_item = &root_node.item;
+ if frag.last < root_item.frag.first {
+ // `frag` is entirely to the left of the `root`. So there's no
+ // overlap with root. Continue by inspecting the left subtree.
+ root = root_node.left;
+ } else if root_item.frag.last < frag.first {
+ // Ditto for the right subtree.
+ root = root_node.right;
+ } else {
+ // `frag` overlaps the `root`. Give up.
+ return false;
+ }
+ }
+ // END check `frag` for any overlap against `tree`.
+ // `frag` doesn't overlap.
+ true
+}
+
+// HELPER FUNCTION
+// Find out whether it is possible to add all of `frags` to `tree`. Returns
+// true if possible, false if not. This routine relies on the fact that
+// SortedFrags is non-overlapping. However, this is a bit subtle. We know
+// that both `tree` and `frags` individually are non-overlapping, but there's
+// no guarantee that elements of `frags` don't overlap `tree`. Hence we have
+// to do a custom walk of `tree` to check for overlap; we can't just use
+// `AVLTree::contains`.
+fn ssal_is_add_possible(tree: &AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
+ // Figure out whether all the frags will go in.
+ for frag in &frags.frags {
+ if !ssal_is_add_frag_possible(&tree, frag) {
+ return false;
+ }
+ // `frag` doesn't overlap. Move on to the next one.
+ }
+ true
+}
+
+// HELPER FUNCTION
+// Try to add all of `frags` to `tree`. Return `true` if possible, `false` if not possible. If
+// `false` is returned, `tree` is unchanged (this is important). This routine relies on the
+// fact that SortedFrags is non-overlapping. They are initially all marked as non-reffy. That
+// may later be changed by calls to `SpillSlotAllocator::notify_spillage_of_reftyped_vlr`.
+fn ssal_add_if_possible(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) -> bool {
+ // Check if all the frags will go in.
+ if !ssal_is_add_possible(tree, frags) {
+ return false;
+ }
+ // They will. So now insert them.
+ for frag in &frags.frags {
+ let inserted = tree.insert(
+ RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false),
+ Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
+ cmp_range_frags(&item1.frag, &item2.frag)
+ }),
+ );
+ // This can't fail
+ assert!(inserted);
+ }
+ true
+}
+
+// HELPER FUNCTION
+// Let `frags` be the RangeFrags for some VirtualRange, that have already been allocated in
+// `tree`. Mark each such RangeFrag as reffy.
+fn ssal_mark_frags_as_reftyped(tree: &mut AVLTree<RangeFragAndRefness>, frags: &SortedRangeFrags) {
+ for frag in &frags.frags {
+ // Be paranoid. (1) `frag` must already exist in `tree`. (2) it must not be marked as
+ // reffy.
+ let del_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ false);
+ let add_this = RangeFragAndRefness::new(frag.clone(), /*is_ref=*/ true);
+ let replaced_ok = tree.find_and_replace(
+ del_this,
+ add_this,
+ &|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
+ cmp_range_frags(&item1.frag, &item2.frag)
+ },
+ );
+ // This assertion effectively encompasses both (1) and (2) above.
+ assert!(replaced_ok);
+ }
+}
+
+//=============================================================================
+// SpillSlotAllocator: public interface
+
+pub struct SpillSlotAllocator {
+ slots: Vec<LogicalSpillSlot>,
+}
+impl SpillSlotAllocator {
+ pub fn new() -> Self {
+ Self { slots: vec![] }
+ }
+
+ pub fn num_slots_in_use(&self) -> usize {
+ self.slots.len()
+ }
+
+ // This adds a new, empty slot, for items of the given size, and returns
+ // its index. This isn't clever, in the sense that it fails to use some
+ // slots that it could use, but at least it's simple. Note, this is a
+ // private method.
+ fn add_new_slot(&mut self, req_size: u32) -> u32 {
+ assert!(req_size == 1 || req_size == 2 || req_size == 4 || req_size == 8);
+ // Satisfy alignment constraints. These entries will unfortunately be
+ // wasted (never used).
+ while self.slots.len() % (req_size as usize) != 0 {
+ self.slots.push(LogicalSpillSlot::Unavail);
+ }
+ // And now the new slot. The `dflt` value is needed by `AVLTree` to initialise storage
+ // slots for tree nodes, but we will never actually see those values. So it doesn't
+ // matter what they are.
+ let dflt = RangeFragAndRefness::new(RangeFrag::invalid_value(), false);
+ let tree = AVLTree::<RangeFragAndRefness>::new(dflt);
+ let res = self.slots.len() as u32;
+ self.slots.push(LogicalSpillSlot::InUse {
+ size: req_size,
+ tree,
+ });
+ // And now "block out subsequent slots that `req_size` implies.
+ // viz: req_size == 1 -> block out 0 more
+ // viz: req_size == 2 -> block out 1 more
+ // viz: req_size == 4 -> block out 3 more
+ // viz: req_size == 8 -> block out 7 more
+ for _ in 1..req_size {
+ self.slots.push(LogicalSpillSlot::Unavail);
+ }
+ assert!(self.slots.len() % (req_size as usize) == 0);
+
+ res
+ }
+
+ // THE MAIN FUNCTION
+ // Allocate spill slots for all the VirtualRanges in `vlrix`s eclass,
+ // including `vlrix` itself. Since we are allocating spill slots for
+ // complete eclasses at once, none of the members of the class should
+ // currently have any allocation. This routine will try to allocate all
+ // class members the same slot, but it can only guarantee to do so if the
+ // class members are mutually non-overlapping. Hence it can't guarantee that
+ // in general.
+ pub fn alloc_spill_slots<F: Function>(
+ &mut self,
+ vlr_slot_env: &mut TypedIxVec<VirtualRangeIx, Option<SpillSlot>>,
+ func: &F,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ vlrEquivClasses: &UnionFindEquivClasses<VirtualRangeIx>,
+ vlrix: VirtualRangeIx,
+ ) {
+ let is_ref = vlr_env[vlrix].is_ref;
+ for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
+ // "None of the VLRs in this equivalence class have an allocated spill slot."
+ // This should be true because we allocate spill slots for all of the members of an
+ // eclass at once.
+ assert!(vlr_slot_env[cand_vlrix].is_none());
+
+ // "All of the VLRs in this eclass have the same ref-ness as this VLR."
+ // Why this is true is a bit subtle. The equivalence classes are computed by
+ // `do_coalescing_analysis`, fundamentally by looking at all the move instructions
+ // and computing the transitive closure induced by them. The ref-ness annotations
+ // on each VLR are computed in `do_reftypes_analysis`, and they are also computed
+ // as a transitive closure on the same move instructions. Hence the results should
+ // be identical.
+ //
+ // With all that said, note that these equivalence classes are *not* guaranteed to
+ // be internally non-overlapping. This is explained in the big block comment at the
+ // top of bt_coalescing_analysis.rs.
+ assert!(vlr_env[cand_vlrix].is_ref == is_ref);
+ }
+
+ // Do this in two passes. It's a bit cumbersome.
+ //
+ // In the first pass, find a spill slot which can take all of the
+ // candidates when we try them *individually*, but don't update the tree
+ // yet. We will always find such a slot, because if none of the existing
+ // slots can do it, we can always start a new one.
+ //
+ // Now, that doesn't guarantee that all the candidates can *together*
+ // can be assigned to the chosen slot. That's only possible when they
+ // are non-overlapping. Rather than laboriously try to determine
+ // that, simply proceed with the second pass, the assignment pass, as
+ // follows. For each candidate, try to allocate it to the slot chosen
+ // in the first pass. If it goes in without interference, fine. If
+ // not, that means it overlaps with some other member of the class --
+ // in which case we must find some other slot for it. It's too bad.
+ //
+ // The result is: all members will get a valid spill slot. And if they
+ // were all non overlapping then we are guaranteed that they all get the
+ // same slot. Which is as good as we can hope for.
+ //
+ // In both passes, only the highest-numbered 8 slots are checked for
+ // availability. This is a heuristic hack which both reduces
+ // allocation time and reduces the eventual resulting spilling:
+ //
+ // - It avoids lots of pointless repeated checking of low-numbered
+ // spill slots, that long ago became full(ish) and are unlikely to be
+ // able to take any new VirtualRanges
+ //
+ // - More subtly, it interacts with the question of whether or not
+ // each VirtualRange equivalence class is internally overlapping.
+ // When no overlaps are present, the spill slot allocator guarantees
+ // to find a slot which is free for the entire equivalence class,
+ // which is the ideal solution. When there are overlaps present, the
+ // allocator is forced to allocate at least some of the VirtualRanges
+ // in the class to different slots. By restricting the number of
+ // slots it can choose to 8 (+ extras if it needs them), we reduce the
+ // tendency for the VirtualRanges to be assigned a large number of
+ // different slots, which in turn reduces the amount of spilling in
+ // the end.
+
+ // We need to know what regclass, and hence what slot size, we're looking
+ // for. Just look at the representative; all VirtualRanges in the eclass
+ // must have the same regclass. (If they don't, the client's is_move
+ // function has been giving us wrong information.)
+ let vlrix_vreg = vlr_env[vlrix].vreg;
+ let req_size = func.get_spillslot_size(vlrix_vreg.get_class(), vlrix_vreg);
+ assert!(req_size == 1 || req_size == 2 || req_size == 4 || req_size == 8);
+
+ // Sanity check: if the VLR is reftyped, then it must need a 1-word slot
+ // (anything else is nonsensical.)
+ if is_ref {
+ assert!(req_size == 1);
+ }
+
+ // Pass 1: find a slot which can take all VirtualRanges in `vlrix`s
+ // eclass when tested individually.
+ //
+ // Pass 1a: search existing slots
+ let search_start_slotno: u32 = {
+ // We will only search from `search_start_slotno` upwards. See
+ // block comment above for significance of the value `8`.
+ let window = 8;
+ if self.slots.len() >= window {
+ (self.slots.len() - window) as u32
+ } else {
+ 0
+ }
+ };
+ let mut mb_chosen_slotno: Option<u32> = None;
+ // BEGIN search existing slots
+ for cand_slot_no in search_start_slotno..self.slots.len() as u32 {
+ let cand_slot = &self.slots[cand_slot_no as usize];
+ if !cand_slot.is_InUse() {
+ continue;
+ }
+ if cand_slot.get_size() != req_size {
+ continue;
+ }
+ let tree = &cand_slot.get_tree();
+ assert!(mb_chosen_slotno.is_none());
+
+ // BEGIN see if `cand_slot` can hold all eclass members individually
+ let mut all_cands_fit_individually = true;
+ for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
+ let cand_vlr = &vlr_env[cand_vlrix];
+ let this_cand_fits = ssal_is_add_possible(&tree, &cand_vlr.sorted_frags);
+ if !this_cand_fits {
+ all_cands_fit_individually = false;
+ break;
+ }
+ }
+ // END see if `cand_slot` can hold all eclass members individually
+ if !all_cands_fit_individually {
+ continue;
+ }
+
+ // Ok. All eclass members will fit individually in `cand_slot_no`.
+ mb_chosen_slotno = Some(cand_slot_no);
+ break;
+ }
+ // END search existing slots
+
+ // Pass 1b. If we didn't find a usable slot, allocate a new one.
+ let chosen_slotno: u32 = if mb_chosen_slotno.is_none() {
+ self.add_new_slot(req_size)
+ } else {
+ mb_chosen_slotno.unwrap()
+ };
+
+ // Pass 2. Try to allocate each eclass member individually to the chosen
+ // slot. If that fails, just allocate them anywhere.
+ let mut _all_in_chosen = true;
+ 'pass2_per_equiv_class: for cand_vlrix in vlrEquivClasses.equiv_class_elems_iter(vlrix) {
+ let cand_vlr = &vlr_env[cand_vlrix];
+ let mut tree = self.slots[chosen_slotno as usize].get_mut_tree();
+ let added = ssal_add_if_possible(&mut tree, &cand_vlr.sorted_frags);
+ if added {
+ vlr_slot_env[cand_vlrix] = Some(SpillSlot::new(chosen_slotno));
+ continue 'pass2_per_equiv_class;
+ }
+ _all_in_chosen = false;
+ // It won't fit in `chosen_slotno`, so try somewhere (anywhere) else.
+ for alt_slotno in search_start_slotno..self.slots.len() as u32 {
+ let alt_slot = &self.slots[alt_slotno as usize];
+ if !alt_slot.is_InUse() {
+ continue;
+ }
+ if alt_slot.get_size() != req_size {
+ continue;
+ }
+ if alt_slotno == chosen_slotno {
+ // We already know this won't work.
+ continue;
+ }
+ let mut tree = self.slots[alt_slotno as usize].get_mut_tree();
+ let added = ssal_add_if_possible(&mut tree, &cand_vlr.sorted_frags);
+ if added {
+ vlr_slot_env[cand_vlrix] = Some(SpillSlot::new(alt_slotno));
+ continue 'pass2_per_equiv_class;
+ }
+ }
+ // If we get here, it means it won't fit in any slot we currently have.
+ // So allocate a new one and use that.
+ let new_slotno = self.add_new_slot(req_size);
+ let mut tree = self.slots[new_slotno as usize].get_mut_tree();
+ let added = ssal_add_if_possible(&mut tree, &cand_vlr.sorted_frags);
+ if added {
+ vlr_slot_env[cand_vlrix] = Some(SpillSlot::new(new_slotno));
+ continue 'pass2_per_equiv_class;
+ }
+ // We failed to allocate it to any empty slot! This can't happen.
+ panic!("SpillSlotAllocator: alloc_spill_slots: failed?!?!");
+ /*NOTREACHED*/
+ } /* 'pass2_per_equiv_class */
+ }
+
+ // STACKMAP SUPPORT
+ // Mark the `frags` for `slot_no` as being reftyped. They are expected to already exist in
+ // the relevant tree, and not currently be marked as reftyped.
+ pub fn notify_spillage_of_reftyped_vlr(
+ &mut self,
+ slot_no: SpillSlot,
+ frags: &SortedRangeFrags,
+ ) {
+ let slot_ix = slot_no.get_usize();
+ assert!(slot_ix < self.slots.len());
+ let slot = &mut self.slots[slot_ix];
+ match slot {
+ LogicalSpillSlot::InUse { size, tree } if *size == 1 => {
+ ssal_mark_frags_as_reftyped(tree, frags)
+ }
+ _ => panic!("SpillSlotAllocator::notify_spillage_of_reftyped_vlr: invalid slot"),
+ }
+ }
+
+ // STACKMAP SUPPORT
+ // Allocate a size-1 (word!) spill slot for `frag` and return it. The slot is marked
+ // reftyped so that a later call to `get_reftyped_spillslots_at_inst_point` will return it.
+ pub fn alloc_reftyped_spillslot_for_frag(&mut self, frag: RangeFrag) -> SpillSlot {
+ for i in 0..self.slots.len() {
+ match &mut self.slots[i] {
+ LogicalSpillSlot::InUse { size: 1, tree } => {
+ if ssal_is_add_frag_possible(&tree, &frag) {
+ // We're in luck.
+ let inserted = tree.insert(
+ RangeFragAndRefness::new(frag, /*is_ref=*/ true),
+ Some(&|item1: RangeFragAndRefness, item2: RangeFragAndRefness| {
+ cmp_range_frags(&item1.frag, &item2.frag)
+ }),
+ );
+ // This can't fail -- we just checked for it!
+ assert!(inserted);
+ return SpillSlot::new(i as u32);
+ }
+ // Otherwise move on.
+ }
+ LogicalSpillSlot::InUse { .. } | LogicalSpillSlot::Unavail => {
+ // Slot isn't is use, or is in use but for values of some non-ref size.
+ // Move on.
+ }
+ }
+ }
+ // We tried all slots, but without success. Add a new one and try again. This time we
+ // must succeed. Calling recursively is a bit stupid in the sense that we then search
+ // again to find the slot we just allocated, but hey.
+ self.add_new_slot(1 /*word*/);
+ self.alloc_reftyped_spillslot_for_frag(frag) // \o/ tailcall \o/
+ }
+
+ // STACKMAP SUPPORT
+ // Examine all the spill slots at `pt` and return those that are reftyped. This is
+ // fundamentally what creates a stack map.
+ pub fn get_reftyped_spillslots_at_inst_point(&self, pt: InstPoint) -> Vec<SpillSlot> {
+ let mut res = Vec::<SpillSlot>::new();
+ for (i, slot) in self.slots.iter().enumerate() {
+ if slot.get_refness_at_inst_point(pt) == Some(true) {
+ res.push(SpillSlot::new(i as u32));
+ }
+ }
+ res
+ }
+}
diff --git a/third_party/rust/regalloc/src/bt_vlr_priority_queue.rs b/third_party/rust/regalloc/src/bt_vlr_priority_queue.rs
new file mode 100644
index 0000000000..1be9502e49
--- /dev/null
+++ b/third_party/rust/regalloc/src/bt_vlr_priority_queue.rs
@@ -0,0 +1,172 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+//! Backtracking allocator: the as-yet-unallocated VirtualReg LR prio queue.
+
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+
+use crate::data_structures::{TypedIxVec, VirtualRange, VirtualRangeIx};
+
+//=============================================================================
+// The as-yet-unallocated VirtualReg LR prio queue, `VirtualRangePrioQ`.
+//
+// Relevant methods are parameterised by a VirtualRange env.
+
+// What we seek to do with `VirtualRangePrioQ` is to implement a priority
+// queue of as-yet unallocated virtual live ranges. For each iteration of the
+// main allocation loop, we pull out the highest-priority unallocated
+// VirtualRange, and either allocate it (somehow), or spill it.
+//
+// The Rust standard type BinaryHeap gives us an efficient way to implement
+// the priority queue. However, it requires that the queue items supply the
+// necessary cost-comparisons by implementing `Ord` on that type. Hence we
+// have to wrap up the items we fundamentally want in the queue, viz,
+// `VirtualRangeIx`, into a new type `VirtualRangeIxAndSize` that also carries
+// the relevant cost field, `size`. Then we implement `Ord` for
+// `VirtualRangeIxAndSize` so as to only look at the `size` fields.
+//
+// There is a small twist, however. Most virtual ranges are small and so will
+// have a small `size` field (less than 20, let's say). For such cases,
+// `BinaryHeap` will presumably choose between contenders with the same `size`
+// field in some arbitrary way. This has two disadvantages:
+//
+// * it makes the exact allocation order, and hence allocation results,
+// dependent on `BinaryHeap`'s arbitrary-choice scheme. This seems
+// undesirable, and given recent shenanigans resulting from `HashMap` being
+// nondeterministic even in a single-threaded scenario, I don't entirely
+// trust `BinaryHeap` even to be deterministic.
+//
+// * experimentation with the "qsort" test case shows that breaking ties by
+// selecting the entry that has been in the queue the longest, rather than
+// choosing arbitrarily, gives slightly better allocations (slightly less
+// spilling) in spill-heavy situations (where there are few registers).
+// When there is not much spilling, it makes no difference.
+//
+// For these reasons, `VirtualRangeIxAndSize` also contains a `tiebreaker`
+// field. The `VirtualRangePrioQ` logic gives a different value of this for
+// each `VirtualRangeIxAndSize` it creates. These numbers start off at 2^32-1
+// and decrease towards zero. They are used as a secondary comparison key in
+// the case where the `size` fields are equal. The effect is that (1)
+// tiebreaking is made completely deterministic, and (2) it breaks ties in
+// favour of the oldest entry (since that will have the highest `tiebreaker`
+// field).
+//
+// The tiebreaker field will wrap around when it hits zero, but that can only
+// happen after processing 2^32-1 virtual live ranges. In such cases I would
+// expect that the allocator would have run out of memory long before, so it's
+// academic in practice. Even if it does wrap around there is no danger to
+// the correctness of the allocations.
+
+// Wrap up a VirtualRangeIx and its size, so that we can implement Ord for it
+// on the basis of the `size` and `tiebreaker` fields.
+//
+// NB! Do not derive {,Partial}{Eq,Ord} for this. It has its own custom
+// implementations.
+struct VirtualRangeIxAndSize {
+ vlrix: VirtualRangeIx,
+ size: u16,
+ tiebreaker: u32,
+}
+impl VirtualRangeIxAndSize {
+ fn new(vlrix: VirtualRangeIx, size: u16, tiebreaker: u32) -> Self {
+ assert!(size > 0);
+ Self {
+ vlrix,
+ size,
+ tiebreaker,
+ }
+ }
+}
+impl PartialEq for VirtualRangeIxAndSize {
+ fn eq(&self, other: &Self) -> bool {
+ self.size == other.size && self.tiebreaker == other.tiebreaker
+ }
+}
+impl Eq for VirtualRangeIxAndSize {}
+impl PartialOrd for VirtualRangeIxAndSize {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+impl Ord for VirtualRangeIxAndSize {
+ fn cmp(&self, other: &Self) -> Ordering {
+ match self.size.cmp(&other.size) {
+ Ordering::Less => Ordering::Less,
+ Ordering::Greater => Ordering::Greater,
+ Ordering::Equal => self.tiebreaker.cmp(&other.tiebreaker),
+ }
+ }
+}
+
+//=============================================================================
+// VirtualRangePrioQ: public interface
+
+pub struct VirtualRangePrioQ {
+ // The set of as-yet unallocated VirtualRangeIxs. These are indexes into a
+ // VirtualRange env that is not stored here. The VirtualRangeIxs are tagged
+ // with the VirtualRange size and a tiebreaker field.
+ heap: BinaryHeap<VirtualRangeIxAndSize>,
+ tiebreaker_ctr: u32,
+}
+impl VirtualRangePrioQ {
+ pub fn new(vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>) -> Self {
+ let mut res = Self {
+ heap: BinaryHeap::new(),
+ tiebreaker_ctr: 0xFFFF_FFFFu32,
+ };
+ for vlrix in VirtualRangeIx::new(0).dotdot(VirtualRangeIx::new(vlr_env.len())) {
+ let to_add = VirtualRangeIxAndSize::new(vlrix, vlr_env[vlrix].size, res.tiebreaker_ctr);
+ res.heap.push(to_add);
+ res.tiebreaker_ctr -= 1;
+ }
+ res
+ }
+
+ #[inline(never)]
+ pub fn is_empty(&self) -> bool {
+ self.heap.is_empty()
+ }
+
+ #[inline(never)]
+ pub fn len(&self) -> usize {
+ self.heap.len()
+ }
+
+ // Add a VirtualRange index.
+ #[inline(never)]
+ pub fn add_VirtualRange(
+ &mut self,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ vlrix: VirtualRangeIx,
+ ) {
+ let to_add = VirtualRangeIxAndSize::new(vlrix, vlr_env[vlrix].size, self.tiebreaker_ctr);
+ self.tiebreaker_ctr -= 1;
+ self.heap.push(to_add);
+ }
+
+ // Look in `unallocated` to locate the entry referencing the VirtualRange
+ // with the largest `size` value. Remove the ref from `unallocated` and
+ // return the VLRIx for said entry.
+ #[inline(never)]
+ pub fn get_longest_VirtualRange(&mut self) -> Option<VirtualRangeIx> {
+ match self.heap.pop() {
+ None => None,
+ Some(VirtualRangeIxAndSize { vlrix, .. }) => Some(vlrix),
+ }
+ }
+
+ #[inline(never)]
+ pub fn show_with_envs(
+ &self,
+ vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
+ ) -> Vec<String> {
+ let mut resV = vec![];
+ for VirtualRangeIxAndSize { vlrix, .. } in self.heap.iter() {
+ let mut res = "TODO ".to_string();
+ res += &format!("{:?} = {:?}", vlrix, &vlr_env[*vlrix]);
+ resV.push(res);
+ }
+ resV
+ }
+}
diff --git a/third_party/rust/regalloc/src/checker.rs b/third_party/rust/regalloc/src/checker.rs
new file mode 100644
index 0000000000..eef2d1e6e2
--- /dev/null
+++ b/third_party/rust/regalloc/src/checker.rs
@@ -0,0 +1,717 @@
+//! Checker: verifies that spills/reloads/moves retain equivalent dataflow to original, vreg-based
+//! code.
+//!
+//! The basic idea is that we track symbolic values as they flow through spills and reloads.
+//! The symbolic values represent particular virtual or real registers in the original
+//! function body presented to the register allocator. Any instruction in the original
+//! function body (i.e., not added by the allocator) conceptually generates a symbolic
+//! value "Rn" or "Vn" when storing to (or modifying) a real or virtual register. This
+//! includes moves (from e.g. phi-node lowering): they also generate a new value.
+//!
+//! In other words, the dataflow analysis state at each program point is:
+//!
+//! - map `R` of: real reg -> lattice value (top > Rn/Vn symbols (unordered) > bottom)
+//! - map `S` of: spill slot -> lattice value (same)
+//!
+//! And the transfer functions for each statement type are:
+//!
+//! - spill (inserted by RA): [ store spill_i, R_j ]
+//!
+//! S[spill_i] := R[R_j]
+//!
+//! - reload (inserted by RA): [ load R_i, spill_j ]
+//!
+//! R[R_i] := S[spill_j]
+//!
+//! - move (inserted by RA): [ R_i := R_j ]
+//!
+//! R[R_i] := R[R_j]
+//!
+//! - statement in pre-regalloc function [ V_i := op V_j, V_k, ... ]
+//! with allocated form [ R_i := op R_j, R_k, ... ]
+//!
+//! R[R_i] := `V_i`
+//!
+//! In other words, a statement, even after allocation, generates a symbol
+//! that corresponds to its original virtual-register def.
+//!
+//! (N.B.: moves in pre-regalloc function fall into this last case -- they
+//! are "just another operation" and generate a new symbol)
+//!
+//! (Slight extension for multi-def ops, and ops with "modify" args: the op
+//! generates symbol `V_i` into reg `R_i` allocated for that particular def/mod).
+//!
+//! The initial state is: for each real reg R_livein where R_livein is in the livein set, we set
+//! R[R_livein] to `R_livein`.
+//!
+//! At control-flow join points, the symbols meet using a very simple lattice meet-function:
+//! two different symbols in the same real-reg or spill-slot meet to "conflicted"; otherwise,
+//! the symbol meets with itself to produce itself (reflexivity).
+//!
+//! To check correctness, we first find the dataflow fixpoint with the above lattice and
+//! transfer/meet functions. Then, at each op, we examine the dataflow solution at the preceding
+//! program point, and check that the real reg for each op arg (input/use) contains the symbol
+//! corresponding to the original (usually virtual) register specified for this arg.
+
+#![allow(dead_code)]
+
+use crate::analysis_data_flow::get_san_reg_sets_for_insn;
+use crate::data_structures::{
+ BlockIx, InstIx, Map, RealReg, RealRegUniverse, Reg, RegSets, SpillSlot, VirtualReg, Writable,
+};
+use crate::inst_stream::{ExtPoint, InstExtPoint, InstToInsertAndExtPoint};
+use crate::{Function, RegUsageMapper};
+
+use rustc_hash::FxHashSet;
+use std::collections::VecDeque;
+use std::default::Default;
+use std::hash::Hash;
+use std::result::Result;
+
+use log::debug;
+
+/// A set of errors detected by the regalloc checker.
+#[derive(Clone, Debug)]
+pub struct CheckerErrors {
+ errors: Vec<CheckerError>,
+}
+
+/// A single error detected by the regalloc checker.
+#[derive(Clone, Debug)]
+pub enum CheckerError {
+ MissingAllocationForReg {
+ reg: VirtualReg,
+ inst: InstIx,
+ },
+ UnknownValueInReg {
+ real_reg: RealReg,
+ inst: InstIx,
+ },
+ IncorrectValueInReg {
+ actual: Reg,
+ expected: Reg,
+ real_reg: RealReg,
+ inst: InstIx,
+ },
+ UnknownValueInSlot {
+ slot: SpillSlot,
+ expected: Reg,
+ inst: InstIx,
+ },
+ IncorrectValueInSlot {
+ slot: SpillSlot,
+ expected: Reg,
+ actual: Reg,
+ inst: InstIx,
+ },
+ StackMapSpecifiesNonRefSlot {
+ inst: InstIx,
+ slot: SpillSlot,
+ },
+ StackMapSpecifiesUndefinedSlot {
+ inst: InstIx,
+ slot: SpillSlot,
+ },
+}
+
+/// Abstract state for a storage slot (real register or spill slot).
+///
+/// Forms a lattice with \top (`Unknown`), \bot (`Conflicted`), and a number of mutually unordered
+/// value-points in between, one per real or virtual register. Any two different registers
+/// meet to \bot.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum CheckerValue {
+ /// "top" value: this storage slot has no known value.
+ Unknown,
+ /// "bottom" value: this storage slot has a conflicted value.
+ Conflicted,
+ /// Reg: this storage slot has a value that originated as a def into
+ /// the given register, either implicitly (RealRegs at beginning of
+ /// function) or explicitly (as an instruction's def).
+ ///
+ /// The boolean flag indicates whether the value is reference-typed.
+ Reg(Reg, bool),
+}
+
+impl Default for CheckerValue {
+ fn default() -> CheckerValue {
+ CheckerValue::Unknown
+ }
+}
+
+impl CheckerValue {
+ /// Meet function of the abstract-interpretation value lattice.
+ fn meet(&self, other: &CheckerValue) -> CheckerValue {
+ match (self, other) {
+ (&CheckerValue::Unknown, _) => *other,
+ (_, &CheckerValue::Unknown) => *self,
+ (&CheckerValue::Conflicted, _) => *self,
+ (_, &CheckerValue::Conflicted) => *other,
+ (&CheckerValue::Reg(r1, ref1), &CheckerValue::Reg(r2, ref2)) if r1 == r2 => {
+ CheckerValue::Reg(r1, ref1 || ref2)
+ }
+ _ => CheckerValue::Conflicted,
+ }
+ }
+}
+
+/// State that steps through program points as we scan over the instruction stream.
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct CheckerState {
+ /// For each RealReg, abstract state.
+ reg_values: Map<RealReg, CheckerValue>,
+ /// For each spill slot, abstract state.
+ spill_slots: Map<SpillSlot, CheckerValue>,
+}
+
+impl Default for CheckerState {
+ fn default() -> CheckerState {
+ CheckerState {
+ reg_values: Map::default(),
+ spill_slots: Map::default(),
+ }
+ }
+}
+
+fn merge_map<K: Copy + Clone + PartialEq + Eq + Hash>(
+ into: &mut Map<K, CheckerValue>,
+ from: &Map<K, CheckerValue>,
+) {
+ for (k, v) in from {
+ let into_v = into.entry(*k).or_insert(Default::default());
+ let merged = into_v.meet(v);
+ *into_v = merged;
+ }
+}
+
+impl CheckerState {
+ /// Create a new checker state.
+ fn new() -> CheckerState {
+ Default::default()
+ }
+
+ /// Produce an entry checker state with all real regs holding themselves, symbolically.
+ fn entry_state(ru: &RealRegUniverse) -> CheckerState {
+ let mut state = CheckerState::new();
+ for &(rreg, _) in &ru.regs {
+ state
+ .reg_values
+ .insert(rreg, CheckerValue::Reg(rreg.to_reg(), false));
+ }
+ state
+ }
+
+ /// Merge this checker state with another at a CFG join-point.
+ fn meet_with(&mut self, other: &CheckerState) {
+ merge_map(&mut self.reg_values, &other.reg_values);
+ merge_map(&mut self.spill_slots, &other.spill_slots);
+ }
+
+ /// Check an instruction against this state.
+ fn check(&self, inst: &Inst) -> Result<(), CheckerError> {
+ match inst {
+ &Inst::Op {
+ inst_ix,
+ ref uses_orig,
+ ref uses,
+ ..
+ } => {
+ // For each use, check the mapped RealReg's symbolic value; it must
+ // be the original reg.
+ assert!(uses_orig.len() == uses.len());
+ for (orig, mapped) in uses_orig.iter().cloned().zip(uses.iter().cloned()) {
+ let val = self
+ .reg_values
+ .get(&mapped)
+ .cloned()
+ .unwrap_or(Default::default());
+ debug!(
+ "checker: inst {:?}: orig {:?}, mapped {:?}, checker state {:?}",
+ inst, orig, mapped, val
+ );
+ match val {
+ CheckerValue::Unknown | CheckerValue::Conflicted => {
+ return Err(CheckerError::UnknownValueInReg {
+ real_reg: mapped,
+ inst: inst_ix,
+ });
+ }
+ CheckerValue::Reg(r, _) if r != orig => {
+ return Err(CheckerError::IncorrectValueInReg {
+ actual: r,
+ expected: orig,
+ real_reg: mapped,
+ inst: inst_ix,
+ });
+ }
+ _ => {}
+ }
+ }
+ }
+ &Inst::ChangeSpillSlotOwnership {
+ inst_ix,
+ slot,
+ from_reg,
+ ..
+ } => {
+ let val = self
+ .spill_slots
+ .get(&slot)
+ .cloned()
+ .unwrap_or(Default::default());
+ debug!("checker: inst {:?}: slot value {:?}", inst, val);
+ match val {
+ CheckerValue::Unknown | CheckerValue::Conflicted => {
+ return Err(CheckerError::UnknownValueInSlot {
+ slot,
+ expected: from_reg,
+ inst: inst_ix,
+ });
+ }
+ CheckerValue::Reg(r, _) if r != from_reg => {
+ return Err(CheckerError::IncorrectValueInSlot {
+ slot,
+ expected: from_reg,
+ actual: r,
+ inst: inst_ix,
+ });
+ }
+ _ => {}
+ }
+ }
+ &Inst::Safepoint { inst_ix, ref slots } => {
+ self.check_stackmap(inst_ix, slots)?;
+ }
+ _ => {}
+ }
+ Ok(())
+ }
+
+ fn check_stackmap(&self, inst: InstIx, slots: &Vec<SpillSlot>) -> Result<(), CheckerError> {
+ // N.B.: it's OK for the stackmap to omit a slot that has a ref value in
+ // it; it might be dead. We simply update such a slot's value to
+ // 'undefined' in the transfer function.
+ for &slot in slots {
+ match self.spill_slots.get(&slot) {
+ Some(CheckerValue::Reg(_, false)) => {
+ return Err(CheckerError::StackMapSpecifiesNonRefSlot { inst, slot });
+ }
+ Some(CheckerValue::Reg(_, true)) => {
+ // OK.
+ }
+ _ => {
+ return Err(CheckerError::StackMapSpecifiesUndefinedSlot { inst, slot });
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn update_stackmap(&mut self, slots: &Vec<SpillSlot>) {
+ for (&slot, val) in &mut self.spill_slots {
+ if let &mut CheckerValue::Reg(_, true) = val {
+ let in_stackmap = slots.binary_search(&slot).is_ok();
+ if !in_stackmap {
+ *val = CheckerValue::Unknown;
+ }
+ }
+ }
+ }
+
+ /// Update according to instruction.
+ pub(crate) fn update(&mut self, inst: &Inst) {
+ match inst {
+ &Inst::Op {
+ ref defs_orig,
+ ref defs,
+ ref defs_reftyped,
+ ..
+ } => {
+ // For each def, set the symbolic value of the mapped RealReg to a
+ // symbol corresponding to the original def.
+ assert!(defs_orig.len() == defs.len());
+ for i in 0..defs.len() {
+ let orig = defs_orig[i];
+ let mapped = defs[i];
+ let reftyped = defs_reftyped[i];
+ self.reg_values
+ .insert(mapped, CheckerValue::Reg(orig, reftyped));
+ }
+ }
+ &Inst::Move { into, from } => {
+ let val = self
+ .reg_values
+ .get(&from)
+ .cloned()
+ .unwrap_or(Default::default());
+ self.reg_values.insert(into.to_reg(), val);
+ }
+ &Inst::ChangeSpillSlotOwnership { slot, to_reg, .. } => {
+ let reftyped = if let Some(val) = self.spill_slots.get(&slot) {
+ match val {
+ &CheckerValue::Reg(_, reftyped) => reftyped,
+ _ => false,
+ }
+ } else {
+ false
+ };
+ self.spill_slots
+ .insert(slot, CheckerValue::Reg(to_reg, reftyped));
+ }
+ &Inst::Spill { into, from } => {
+ let val = self
+ .reg_values
+ .get(&from)
+ .cloned()
+ .unwrap_or(Default::default());
+ self.spill_slots.insert(into, val);
+ }
+ &Inst::Reload { into, from } => {
+ let val = self
+ .spill_slots
+ .get(&from)
+ .cloned()
+ .unwrap_or(Default::default());
+ self.reg_values.insert(into.to_reg(), val);
+ }
+ &Inst::Safepoint { ref slots, .. } => {
+ self.update_stackmap(slots);
+ }
+ }
+ }
+}
+
+/// An instruction representation in the checker's BB summary.
+#[derive(Clone, Debug)]
+pub(crate) enum Inst {
+ /// A register spill into memory.
+ Spill { into: SpillSlot, from: RealReg },
+ /// A register reload from memory.
+ Reload {
+ into: Writable<RealReg>,
+ from: SpillSlot,
+ },
+ /// A regalloc-inserted move (not a move in the original program!)
+ Move {
+ into: Writable<RealReg>,
+ from: RealReg,
+ },
+ /// A spillslot ghost move (between vregs) resulting from an user-program
+ /// move whose source and destination regs are both vregs that are currently
+ /// spilled.
+ ChangeSpillSlotOwnership {
+ inst_ix: InstIx,
+ slot: SpillSlot,
+ from_reg: Reg,
+ to_reg: Reg,
+ },
+ /// A regular instruction with fixed use and def slots. Contains both
+ /// the original registers (as given to the regalloc) and the allocated ones.
+ Op {
+ inst_ix: InstIx,
+ defs_orig: Vec<Reg>,
+ uses_orig: Vec<Reg>,
+ defs: Vec<RealReg>,
+ uses: Vec<RealReg>,
+ defs_reftyped: Vec<bool>,
+ },
+ /// A safepoint, with a list of expected slots.
+ Safepoint {
+ inst_ix: InstIx,
+ slots: Vec<SpillSlot>,
+ },
+}
+
+#[derive(Debug)]
+pub(crate) struct Checker {
+ bb_entry: BlockIx,
+ bb_in: Map<BlockIx, CheckerState>,
+ bb_succs: Map<BlockIx, Vec<BlockIx>>,
+ bb_insts: Map<BlockIx, Vec<Inst>>,
+ reftyped_vregs: FxHashSet<VirtualReg>,
+}
+
+fn map_regs<F: Fn(VirtualReg) -> Option<RealReg>>(
+ inst: InstIx,
+ regs: &[Reg],
+ f: &F,
+) -> Result<Vec<RealReg>, CheckerErrors> {
+ let mut errors = Vec::new();
+ let real_regs = regs
+ .iter()
+ .map(|r| {
+ if r.is_virtual() {
+ f(r.to_virtual_reg()).unwrap_or_else(|| {
+ errors.push(CheckerError::MissingAllocationForReg {
+ reg: r.to_virtual_reg(),
+ inst,
+ });
+ // Provide a dummy value for the register, it'll never be read.
+ Reg::new_real(r.get_class(), 0x0, 0).to_real_reg()
+ })
+ } else {
+ r.to_real_reg()
+ }
+ })
+ .collect();
+ if errors.is_empty() {
+ Ok(real_regs)
+ } else {
+ Err(CheckerErrors { errors })
+ }
+}
+
+impl Checker {
+ /// Create a new checker for the given function, initializing CFG info immediately.
+ /// The client should call the `add_*()` methods to add abstract instructions to each
+ /// BB before invoking `run()` to check for errors.
+ pub(crate) fn new<F: Function>(
+ f: &F,
+ ru: &RealRegUniverse,
+ reftyped_vregs: &[VirtualReg],
+ ) -> Checker {
+ let mut bb_in = Map::default();
+ let mut bb_succs = Map::default();
+ let mut bb_insts = Map::default();
+
+ for block in f.blocks() {
+ bb_in.insert(block, Default::default());
+ bb_succs.insert(block, f.block_succs(block).to_vec());
+ bb_insts.insert(block, vec![]);
+ }
+
+ bb_in.insert(f.entry_block(), CheckerState::entry_state(ru));
+
+ let reftyped_vregs = reftyped_vregs.iter().cloned().collect::<FxHashSet<_>>();
+ Checker {
+ bb_entry: f.entry_block(),
+ bb_in,
+ bb_succs,
+ bb_insts,
+ reftyped_vregs,
+ }
+ }
+
+ /// Add an abstract instruction (spill, reload, or move) to a BB.
+ ///
+ /// Can also accept an `Inst::Op`, but `add_op()` is better-suited
+ /// for this.
+ pub(crate) fn add_inst(&mut self, block: BlockIx, inst: Inst) {
+ let insts = self.bb_insts.get_mut(&block).unwrap();
+ insts.push(inst);
+ }
+
+ /// Add a "normal" instruction that uses, modifies, and/or defines certain
+ /// registers. The `SanitizedInstRegUses` must be the pre-allocation state;
+ /// the `mapper` must be provided to give the virtual -> real mappings at
+ /// the program points immediately before and after this instruction.
+ pub(crate) fn add_op<RUM: RegUsageMapper>(
+ &mut self,
+ block: BlockIx,
+ inst_ix: InstIx,
+ regsets: &RegSets,
+ mapper: &RUM,
+ ) -> Result<(), CheckerErrors> {
+ debug!(
+ "add_op: block {} inst {} regsets {:?}",
+ block.get(),
+ inst_ix.get(),
+ regsets
+ );
+ assert!(regsets.is_sanitized());
+ let mut uses_set = regsets.uses.clone();
+ let mut defs_set = regsets.defs.clone();
+ uses_set.union(&regsets.mods);
+ defs_set.union(&regsets.mods);
+ if uses_set.is_empty() && defs_set.is_empty() {
+ return Ok(());
+ }
+
+ let uses_orig = uses_set.to_vec();
+ let defs_orig = defs_set.to_vec();
+ let uses = map_regs(inst_ix, &uses_orig[..], &|vreg| mapper.get_use(vreg))?;
+ let defs = map_regs(inst_ix, &defs_orig[..], &|vreg| mapper.get_def(vreg))?;
+ let defs_reftyped = defs_orig
+ .iter()
+ .map(|reg| reg.is_virtual() && self.reftyped_vregs.contains(&reg.to_virtual_reg()))
+ .collect();
+ let insts = self.bb_insts.get_mut(&block).unwrap();
+ let op = Inst::Op {
+ inst_ix,
+ uses_orig,
+ defs_orig,
+ uses,
+ defs,
+ defs_reftyped,
+ };
+ debug!("add_op: adding {:?}", op);
+ insts.push(op);
+ Ok(())
+ }
+
+ /// Perform the dataflow analysis to compute checker state at each BB entry.
+ fn analyze(&mut self) {
+ let mut queue = VecDeque::new();
+ queue.push_back(self.bb_entry);
+
+ while !queue.is_empty() {
+ let block = queue.pop_front().unwrap();
+ let mut state = self.bb_in.get(&block).cloned().unwrap();
+ debug!("analyze: block {} has state {:?}", block.get(), state);
+ for inst in self.bb_insts.get(&block).unwrap() {
+ state.update(inst);
+ debug!("analyze: inst {:?} -> state {:?}", inst, state);
+ }
+
+ for succ in self.bb_succs.get(&block).unwrap() {
+ let cur_succ_in = self.bb_in.get(succ).unwrap();
+ let mut new_state = state.clone();
+ new_state.meet_with(cur_succ_in);
+ let changed = &new_state != cur_succ_in;
+ if changed {
+ debug!(
+ "analyze: block {} state changed from {:?} to {:?}; pushing onto queue",
+ succ.get(),
+ cur_succ_in,
+ new_state
+ );
+ self.bb_in.insert(*succ, new_state);
+ queue.push_back(*succ);
+ }
+ }
+ }
+ }
+
+ /// Using BB-start state computed by `analyze()`, step the checker state
+ /// through each BB and check each instruction's register allocations
+ /// for errors.
+ fn find_errors(&self) -> Result<(), CheckerErrors> {
+ let mut errors = vec![];
+ for (block, input) in &self.bb_in {
+ let mut state = input.clone();
+ for inst in self.bb_insts.get(block).unwrap() {
+ if let Err(e) = state.check(inst) {
+ debug!("Checker error: {:?}", e);
+ errors.push(e);
+ }
+ state.update(inst);
+ }
+ }
+
+ if errors.is_empty() {
+ Ok(())
+ } else {
+ Err(CheckerErrors { errors })
+ }
+ }
+
+ /// Find any errors, returning `Err(CheckerErrors)` with all errors found
+ /// or `Ok(())` otherwise.
+ pub(crate) fn run(mut self) -> Result<(), CheckerErrors> {
+ debug!("Checker: full body is:\n{:?}", self.bb_insts);
+ self.analyze();
+ self.find_errors()
+ }
+}
+
+/// A wrapper around `Checker` that assists its use with `InstToInsertAndExtPoint`s and
+/// `Function` together.
+pub(crate) struct CheckerContext {
+ checker: Checker,
+ checker_inst_map: Map<InstExtPoint, Vec<Inst>>,
+}
+
+impl CheckerContext {
+ /// Create a new checker context for the given function, which is about to be edited with the
+ /// given instruction insertions.
+ pub(crate) fn new<F: Function>(
+ f: &F,
+ ru: &RealRegUniverse,
+ insts_to_add: &Vec<InstToInsertAndExtPoint>,
+ safepoint_insns: &[InstIx],
+ stackmaps: &[Vec<SpillSlot>],
+ reftyped_vregs: &[VirtualReg],
+ ) -> CheckerContext {
+ assert!(safepoint_insns.len() == stackmaps.len());
+ let mut checker_inst_map: Map<InstExtPoint, Vec<Inst>> = Map::default();
+ for &InstToInsertAndExtPoint { ref inst, ref iep } in insts_to_add {
+ let checker_insts = checker_inst_map
+ .entry(iep.clone())
+ .or_insert_with(|| vec![]);
+ checker_insts.push(inst.to_checker_inst());
+ }
+ for (iix, slots) in safepoint_insns.iter().zip(stackmaps.iter()) {
+ let iep = InstExtPoint::new(*iix, ExtPoint::Use);
+ let mut slots = slots.clone();
+ slots.sort();
+ checker_inst_map
+ .entry(iep)
+ .or_insert_with(|| vec![])
+ .push(Inst::Safepoint {
+ inst_ix: *iix,
+ slots,
+ });
+ }
+ let checker = Checker::new(f, ru, reftyped_vregs);
+ CheckerContext {
+ checker,
+ checker_inst_map,
+ }
+ }
+
+ /// Update the checker with the given instruction and the given pre- and post-maps. Instructions
+ /// within a block must be visited in program order.
+ pub(crate) fn handle_insn<F: Function, RUM: RegUsageMapper>(
+ &mut self,
+ ru: &RealRegUniverse,
+ func: &F,
+ bix: BlockIx,
+ iix: InstIx,
+ mapper: &RUM,
+ ) -> Result<(), CheckerErrors> {
+ let empty = vec![];
+ let mut skip_inst = false;
+
+ debug!("CheckerContext::handle_insn: inst {:?}", iix,);
+
+ for &pre_point in &[ExtPoint::Reload, ExtPoint::SpillBefore, ExtPoint::Use] {
+ let pre_point = InstExtPoint::new(iix, pre_point);
+ for checker_inst in self.checker_inst_map.get(&pre_point).unwrap_or(&empty) {
+ debug!("at inst {:?}: pre checker_inst: {:?}", iix, checker_inst);
+ self.checker.add_inst(bix, checker_inst.clone());
+ if let Inst::ChangeSpillSlotOwnership { .. } = checker_inst {
+ // Unlike spills/reloads/moves inserted by the regalloc, ChangeSpillSlotOwnership
+ // pseudo-insts replace the instruction itself.
+ skip_inst = true;
+ }
+ }
+ }
+
+ if !skip_inst {
+ let regsets = get_san_reg_sets_for_insn::<F>(func.get_insn(iix), ru)
+ .expect("only existing real registers at this point");
+ assert!(regsets.is_sanitized());
+
+ debug!(
+ "at inst {:?}: regsets {:?} mapper {:?}",
+ iix, regsets, mapper
+ );
+ self.checker.add_op(bix, iix, &regsets, mapper)?;
+ }
+
+ for &post_point in &[ExtPoint::ReloadAfter, ExtPoint::Spill] {
+ let post_point = InstExtPoint::new(iix, post_point);
+ for checker_inst in self.checker_inst_map.get(&post_point).unwrap_or(&empty) {
+ debug!("at inst {:?}: post checker_inst: {:?}", iix, checker_inst);
+ self.checker.add_inst(bix, checker_inst.clone());
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Run the underlying checker, once all instructions have been added.
+ pub(crate) fn run(self) -> Result<(), CheckerErrors> {
+ self.checker.run()
+ }
+}
diff --git a/third_party/rust/regalloc/src/data_structures.rs b/third_party/rust/regalloc/src/data_structures.rs
new file mode 100644
index 0000000000..e90672e95c
--- /dev/null
+++ b/third_party/rust/regalloc/src/data_structures.rs
@@ -0,0 +1,2505 @@
+//! Data structures for the whole crate.
+
+use rustc_hash::FxHashMap;
+use rustc_hash::FxHashSet;
+use smallvec::SmallVec;
+
+use std::cmp::Ordering;
+use std::collections::VecDeque;
+use std::fmt;
+use std::hash::Hash;
+use std::marker::PhantomData;
+use std::ops::Index;
+use std::ops::IndexMut;
+use std::slice::{Iter, IterMut};
+
+use crate::{Function, RegUsageMapper};
+
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+//=============================================================================
+// Queues
+
+pub type Queue<T> = VecDeque<T>;
+
+//=============================================================================
+// Maps
+
+// NOTE: plain HashMap is nondeterministic, even in a single-threaded
+// scenario, which can make debugging code that uses it really confusing. So
+// we use FxHashMap instead, as it *is* deterministic, and, allegedly, faster
+// too.
+pub type Map<K, V> = FxHashMap<K, V>;
+
+//=============================================================================
+// Sets of things
+
+// Same comment as above for FxHashMap.
+#[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Set<T: Eq + Hash> {
+ set: FxHashSet<T>,
+}
+
+impl<T: Eq + Ord + Hash + Copy + fmt::Debug> Set<T> {
+ #[inline(never)]
+ pub fn empty() -> Self {
+ Self {
+ set: FxHashSet::<T>::default(),
+ }
+ }
+
+ #[inline(never)]
+ pub fn unit(item: T) -> Self {
+ let mut s = Self::empty();
+ s.insert(item);
+ s
+ }
+
+ #[inline(never)]
+ pub fn two(item1: T, item2: T) -> Self {
+ let mut s = Self::empty();
+ s.insert(item1);
+ s.insert(item2);
+ s
+ }
+
+ #[inline(never)]
+ pub fn card(&self) -> usize {
+ self.set.len()
+ }
+
+ #[inline(never)]
+ pub fn insert(&mut self, item: T) {
+ self.set.insert(item);
+ }
+
+ #[inline(never)]
+ pub fn delete(&mut self, item: T) {
+ self.set.remove(&item);
+ }
+
+ #[inline(never)]
+ pub fn is_empty(&self) -> bool {
+ self.set.is_empty()
+ }
+
+ #[inline(never)]
+ pub fn contains(&self, item: T) -> bool {
+ self.set.contains(&item)
+ }
+
+ #[inline(never)]
+ pub fn intersect(&mut self, other: &Self) {
+ let mut res = FxHashSet::<T>::default();
+ for item in self.set.iter() {
+ if other.set.contains(item) {
+ res.insert(*item);
+ }
+ }
+ self.set = res;
+ }
+
+ #[inline(never)]
+ pub fn union(&mut self, other: &Self) {
+ for item in other.set.iter() {
+ self.set.insert(*item);
+ }
+ }
+
+ #[inline(never)]
+ pub fn remove(&mut self, other: &Self) {
+ for item in other.set.iter() {
+ self.set.remove(item);
+ }
+ }
+
+ #[inline(never)]
+ pub fn intersects(&self, other: &Self) -> bool {
+ !self.set.is_disjoint(&other.set)
+ }
+
+ #[inline(never)]
+ pub fn is_subset_of(&self, other: &Self) -> bool {
+ self.set.is_subset(&other.set)
+ }
+
+ #[inline(never)]
+ pub fn to_vec(&self) -> Vec<T> {
+ let mut res = Vec::<T>::new();
+ for item in self.set.iter() {
+ res.push(*item)
+ }
+ // Don't delete this. It is important.
+ res.sort_unstable();
+ res
+ }
+
+ #[inline(never)]
+ pub fn from_vec(vec: Vec<T>) -> Self {
+ let mut res = Set::<T>::empty();
+ for x in vec {
+ res.insert(x);
+ }
+ res
+ }
+
+ #[inline(never)]
+ pub fn equals(&self, other: &Self) -> bool {
+ self.set == other.set
+ }
+
+ #[inline(never)]
+ pub fn retain<F>(&mut self, f: F)
+ where
+ F: FnMut(&T) -> bool,
+ {
+ self.set.retain(f)
+ }
+
+ #[inline(never)]
+ pub fn map<F, U>(&self, f: F) -> Set<U>
+ where
+ F: Fn(&T) -> U,
+ U: Eq + Ord + Hash + Copy + fmt::Debug,
+ {
+ Set {
+ set: self.set.iter().map(f).collect(),
+ }
+ }
+
+ #[inline(never)]
+ pub fn filter_map<F, U>(&self, f: F) -> Set<U>
+ where
+ F: Fn(&T) -> Option<U>,
+ U: Eq + Ord + Hash + Copy + fmt::Debug,
+ {
+ Set {
+ set: self.set.iter().filter_map(f).collect(),
+ }
+ }
+
+ pub fn clear(&mut self) {
+ self.set.clear();
+ }
+}
+
+impl<T: Eq + Ord + Hash + Copy + fmt::Debug> fmt::Debug for Set<T> {
+ #[inline(never)]
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ // Print the elements in some way which depends only on what is
+ // present in the set, and not on any other factor. In particular,
+ // <Debug for FxHashSet> has been observed to to print the elements
+ // of a two element set in both orders on different occasions.
+ let sorted_vec = self.to_vec();
+ let mut s = "{".to_string();
+ for i in 0..sorted_vec.len() {
+ if i > 0 {
+ s = s + &", ".to_string();
+ }
+ s = s + &format!("{:?}", &sorted_vec[i]);
+ }
+ s = s + &"}".to_string();
+ write!(fmt, "{}", s)
+ }
+}
+
+pub struct SetIter<'a, T> {
+ set_iter: std::collections::hash_set::Iter<'a, T>,
+}
+impl<T: Eq + Hash> Set<T> {
+ pub fn iter(&self) -> SetIter<T> {
+ SetIter {
+ set_iter: self.set.iter(),
+ }
+ }
+}
+impl<'a, T> Iterator for SetIter<'a, T> {
+ type Item = &'a T;
+ fn next(&mut self) -> Option<Self::Item> {
+ self.set_iter.next()
+ }
+}
+
+//=============================================================================
+// Iteration boilerplate for entities. The only purpose of this is to support
+// constructions of the form
+//
+// for ent in startEnt .dotdot( endPlus1Ent ) {
+// }
+//
+// until such time as `trait Step` is available in stable Rust. At that point
+// `fn dotdot` and all of the following can be removed, and the loops
+// rewritten using the standard syntax:
+//
+// for ent in startEnt .. endPlus1Ent {
+// }
+
+pub trait Zero {
+ fn zero() -> Self;
+}
+
+pub trait PlusN {
+ fn plus_n(&self, n: usize) -> Self;
+}
+
+#[derive(Clone, Copy)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Range<T> {
+ first: T,
+ len: usize,
+}
+
+impl<T: Copy + PartialOrd + PlusN> IntoIterator for Range<T> {
+ type Item = T;
+ type IntoIter = MyIterator<T>;
+ fn into_iter(self) -> Self::IntoIter {
+ MyIterator {
+ range: self,
+ next: self.first,
+ }
+ }
+}
+
+impl<T: Copy + Eq + Ord + PlusN> Range<T> {
+ /// Create a new range object.
+ pub fn new(from: T, len: usize) -> Range<T> {
+ Range { first: from, len }
+ }
+
+ pub fn start(&self) -> T {
+ self.first
+ }
+
+ pub fn first(&self) -> T {
+ assert!(self.len() > 0);
+ self.start()
+ }
+
+ pub fn last(&self) -> T {
+ assert!(self.len() > 0);
+ self.start().plus_n(self.len() - 1)
+ }
+
+ pub fn last_plus1(&self) -> T {
+ self.start().plus_n(self.len())
+ }
+
+ pub fn len(&self) -> usize {
+ self.len
+ }
+
+ pub fn contains(&self, t: T) -> bool {
+ t >= self.first && t < self.first.plus_n(self.len)
+ }
+}
+
+pub struct MyIterator<T> {
+ range: Range<T>,
+ next: T,
+}
+impl<T: Copy + PartialOrd + PlusN> Iterator for MyIterator<T> {
+ type Item = T;
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.next >= self.range.first.plus_n(self.range.len) {
+ None
+ } else {
+ let res = Some(self.next);
+ self.next = self.next.plus_n(1);
+ res
+ }
+ }
+}
+
+//=============================================================================
+// Vectors where both the index and element types can be specified (and at
+// most 2^32-1 elems can be stored. What if this overflows?)
+
+pub struct TypedIxVec<TyIx, Ty> {
+ vek: Vec<Ty>,
+ ty_ix: PhantomData<TyIx>,
+}
+
+impl<TyIx, Ty> TypedIxVec<TyIx, Ty>
+where
+ Ty: Clone,
+ TyIx: Copy + Eq + Ord + Zero + PlusN + Into<u32>,
+{
+ pub fn new() -> Self {
+ Self {
+ vek: Vec::new(),
+ ty_ix: PhantomData::<TyIx>,
+ }
+ }
+ pub fn from_vec(vek: Vec<Ty>) -> Self {
+ Self {
+ vek,
+ ty_ix: PhantomData::<TyIx>,
+ }
+ }
+ pub fn append(&mut self, other: &mut TypedIxVec<TyIx, Ty>) {
+ // FIXME what if this overflows?
+ self.vek.append(&mut other.vek);
+ }
+ pub fn iter(&self) -> Iter<Ty> {
+ self.vek.iter()
+ }
+ pub fn iter_mut(&mut self) -> IterMut<Ty> {
+ self.vek.iter_mut()
+ }
+ pub fn len(&self) -> u32 {
+ // FIXME what if this overflows?
+ self.vek.len() as u32
+ }
+ pub fn push(&mut self, item: Ty) {
+ // FIXME what if this overflows?
+ self.vek.push(item);
+ }
+ pub fn resize(&mut self, new_len: u32, value: Ty) {
+ self.vek.resize(new_len as usize, value);
+ }
+ pub fn reserve(&mut self, additional: usize) {
+ self.vek.reserve(additional);
+ }
+ pub fn elems(&self) -> &[Ty] {
+ &self.vek[..]
+ }
+ pub fn elems_mut(&mut self) -> &mut [Ty] {
+ &mut self.vek[..]
+ }
+ pub fn range(&self) -> Range<TyIx> {
+ Range::new(TyIx::zero(), self.len() as usize)
+ }
+ pub fn remove(&mut self, idx: TyIx) -> Ty {
+ self.vek.remove(idx.into() as usize)
+ }
+ pub fn sort_by<F: FnMut(&Ty, &Ty) -> Ordering>(&mut self, compare: F) {
+ self.vek.sort_by(compare)
+ }
+ pub fn sort_unstable_by<F: FnMut(&Ty, &Ty) -> Ordering>(&mut self, compare: F) {
+ self.vek.sort_unstable_by(compare)
+ }
+ pub fn clear(&mut self) {
+ self.vek.clear();
+ }
+}
+
+impl<TyIx, Ty> Index<TyIx> for TypedIxVec<TyIx, Ty>
+where
+ TyIx: Into<u32>,
+{
+ type Output = Ty;
+ fn index(&self, ix: TyIx) -> &Ty {
+ &self.vek[ix.into() as usize]
+ }
+}
+
+impl<TyIx, Ty> IndexMut<TyIx> for TypedIxVec<TyIx, Ty>
+where
+ TyIx: Into<u32>,
+{
+ fn index_mut(&mut self, ix: TyIx) -> &mut Ty {
+ &mut self.vek[ix.into() as usize]
+ }
+}
+
+impl<TyIx, Ty> Clone for TypedIxVec<TyIx, Ty>
+where
+ Ty: Clone,
+{
+ // This is only needed for debug printing.
+ fn clone(&self) -> Self {
+ Self {
+ vek: self.vek.clone(),
+ ty_ix: PhantomData::<TyIx>,
+ }
+ }
+}
+
+//=============================================================================
+
+macro_rules! generate_boilerplate {
+ ($TypeIx:ident, $Type:ident, $PrintingPrefix:expr) => {
+ #[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+ #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+ // Firstly, the indexing type (TypeIx)
+ pub enum $TypeIx {
+ $TypeIx(u32),
+ }
+ impl $TypeIx {
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn new(n: u32) -> Self {
+ debug_assert!(n != u32::max_value());
+ Self::$TypeIx(n)
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn max_value() -> Self {
+ Self::$TypeIx(u32::max_value() - 1)
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn min_value() -> Self {
+ Self::$TypeIx(u32::min_value())
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn invalid_value() -> Self {
+ Self::$TypeIx(u32::max_value())
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn is_valid(self) -> bool {
+ self != Self::invalid_value()
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn is_invalid(self) -> bool {
+ self == Self::invalid_value()
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn get(self) -> u32 {
+ debug_assert!(self.is_valid());
+ match self {
+ $TypeIx::$TypeIx(n) => n,
+ }
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn plus(self, delta: u32) -> $TypeIx {
+ debug_assert!(self.is_valid());
+ $TypeIx::$TypeIx(self.get() + delta)
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn minus(self, delta: u32) -> $TypeIx {
+ debug_assert!(self.is_valid());
+ $TypeIx::$TypeIx(self.get() - delta)
+ }
+ #[allow(dead_code)]
+ pub fn dotdot(&self, last_plus1: $TypeIx) -> Range<$TypeIx> {
+ debug_assert!(self.is_valid());
+ let len = (last_plus1.get() - self.get()) as usize;
+ Range::new(*self, len)
+ }
+ }
+ impl fmt::Debug for $TypeIx {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ if self.is_invalid() {
+ write!(fmt, "{}<NONE>", $PrintingPrefix)
+ } else {
+ write!(fmt, "{}{}", $PrintingPrefix, &self.get())
+ }
+ }
+ }
+ impl PlusN for $TypeIx {
+ #[inline(always)]
+ fn plus_n(&self, n: usize) -> Self {
+ debug_assert!(self.is_valid());
+ self.plus(n as u32)
+ }
+ }
+ impl Into<u32> for $TypeIx {
+ #[inline(always)]
+ fn into(self) -> u32 {
+ debug_assert!(self.is_valid());
+ self.get()
+ }
+ }
+ impl Zero for $TypeIx {
+ #[inline(always)]
+ fn zero() -> Self {
+ $TypeIx::new(0)
+ }
+ }
+ };
+}
+
+generate_boilerplate!(InstIx, Inst, "i");
+
+generate_boilerplate!(BlockIx, Block, "b");
+
+generate_boilerplate!(RangeFragIx, RangeFrag, "f");
+
+generate_boilerplate!(VirtualRangeIx, VirtualRange, "vr");
+
+generate_boilerplate!(RealRangeIx, RealRange, "rr");
+
+impl<TyIx, Ty: fmt::Debug> fmt::Debug for TypedIxVec<TyIx, Ty> {
+ // This is something of a hack in the sense that it doesn't show the
+ // indices, but oh well ..
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "{:?}", self.vek)
+ }
+}
+
+//=============================================================================
+// Definitions of register classes, registers and stack slots, and printing
+// thereof. Note that this register class definition is meant to be
+// architecture-independent: it simply captures common integer/float/vector
+// types that machines are likely to use. TODO: investigate whether we need a
+// more flexible register-class definition mechanism.
+
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub enum RegClass {
+ I32 = 0,
+ F32 = 1,
+ I64 = 2,
+ F64 = 3,
+ V128 = 4,
+ INVALID = 5,
+}
+
+/// The number of register classes that exist.
+/// N.B.: must be <= 7 (fit into 3 bits) for 32-bit VReg/RReg packed format!
+pub const NUM_REG_CLASSES: usize = 5;
+
+impl RegClass {
+ /// Convert a register class to a u32 index.
+ #[inline(always)]
+ pub fn rc_to_u32(self) -> u32 {
+ self as u32
+ }
+ /// Convert a register class to a usize index.
+ #[inline(always)]
+ pub fn rc_to_usize(self) -> usize {
+ self as usize
+ }
+ /// Construct a register class from a u32.
+ #[inline(always)]
+ pub fn rc_from_u32(rc: u32) -> RegClass {
+ match rc {
+ 0 => RegClass::I32,
+ 1 => RegClass::F32,
+ 2 => RegClass::I64,
+ 3 => RegClass::F64,
+ 4 => RegClass::V128,
+ _ => panic!("RegClass::rc_from_u32"),
+ }
+ }
+
+ pub fn short_name(self) -> &'static str {
+ match self {
+ RegClass::I32 => "I",
+ RegClass::I64 => "J",
+ RegClass::F32 => "F",
+ RegClass::F64 => "D",
+ RegClass::V128 => "V",
+ RegClass::INVALID => panic!("RegClass::short_name"),
+ }
+ }
+
+ pub fn long_name(self) -> &'static str {
+ match self {
+ RegClass::I32 => "I32",
+ RegClass::I64 => "I32",
+ RegClass::F32 => "F32",
+ RegClass::F64 => "F32",
+ RegClass::V128 => "V128",
+ RegClass::INVALID => panic!("RegClass::long_name"),
+ }
+ }
+}
+
+// Reg represents both real and virtual registers. For compactness and speed,
+// these fields are packed into a single u32. The format is:
+//
+// Virtual Reg: 1 rc:3 index:28
+// Real Reg: 0 rc:3 uu:12 enc:8 index:8
+//
+// `rc` is the register class. `uu` means "unused". `enc` is the hardware
+// encoding for the reg. `index` is a zero based index which has the
+// following meanings:
+//
+// * for a Virtual Reg, `index` is just the virtual register number.
+// * for a Real Reg, `index` is the entry number in the associated
+// `RealRegUniverse`.
+//
+// This scheme gives us:
+//
+// * a compact (32-bit) representation for registers
+// * fast equality tests for registers
+// * ability to handle up to 2^28 (268.4 million) virtual regs per function
+// * ability to handle up to 8 register classes
+// * ability to handle targets with up to 256 real registers
+// * ability to emit instructions containing real regs without having to
+// look up encodings in any side tables, since a real reg carries its
+// encoding
+// * efficient bitsets and arrays of virtual registers, since each has a
+// zero-based index baked in
+// * efficient bitsets and arrays of real registers, for the same reason
+//
+// This scheme makes it impossible to represent overlapping register classes,
+// but that doesn't seem important. AFAIK only ARM32 VFP/Neon has that.
+
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Reg {
+ bits: u32,
+}
+
+static INVALID_REG: u32 = 0xffffffff;
+
+impl Reg {
+ #[inline(always)]
+ pub fn is_virtual(self) -> bool {
+ self.is_valid() && (self.bits & 0x8000_0000) != 0
+ }
+ #[inline(always)]
+ pub fn is_real(self) -> bool {
+ self.is_valid() && (self.bits & 0x8000_0000) == 0
+ }
+ pub fn new_real(rc: RegClass, enc: u8, index: u8) -> Self {
+ let n = (0 << 31) | (rc.rc_to_u32() << 28) | ((enc as u32) << 8) | ((index as u32) << 0);
+ Reg { bits: n }
+ }
+ pub fn new_virtual(rc: RegClass, index: u32) -> Self {
+ if index >= (1 << 28) {
+ panic!("new_virtual(): index too large");
+ }
+ let n = (1 << 31) | (rc.rc_to_u32() << 28) | (index << 0);
+ Reg { bits: n }
+ }
+ pub fn invalid() -> Reg {
+ Reg { bits: INVALID_REG }
+ }
+ #[inline(always)]
+ pub fn is_invalid(self) -> bool {
+ self.bits == INVALID_REG
+ }
+ #[inline(always)]
+ pub fn is_valid(self) -> bool {
+ !self.is_invalid()
+ }
+ pub fn is_virtual_or_invalid(self) -> bool {
+ self.is_virtual() || self.is_invalid()
+ }
+ pub fn is_real_or_invalid(self) -> bool {
+ self.is_real() || self.is_invalid()
+ }
+ #[inline(always)]
+ pub fn get_class(self) -> RegClass {
+ debug_assert!(self.is_valid());
+ RegClass::rc_from_u32((self.bits >> 28) & 0x7)
+ }
+ #[inline(always)]
+ pub fn get_index(self) -> usize {
+ debug_assert!(self.is_valid());
+ // Return type is usize because typically we will want to use the
+ // result for indexing into a Vec
+ if self.is_virtual() {
+ (self.bits & ((1 << 28) - 1)) as usize
+ } else {
+ (self.bits & ((1 << 8) - 1)) as usize
+ }
+ }
+ #[inline(always)]
+ pub fn get_index_u32(self) -> u32 {
+ debug_assert!(self.is_valid());
+ if self.is_virtual() {
+ self.bits & ((1 << 28) - 1)
+ } else {
+ self.bits & ((1 << 8) - 1)
+ }
+ }
+ pub fn get_hw_encoding(self) -> u8 {
+ debug_assert!(self.is_valid());
+ if self.is_virtual() {
+ panic!("Virtual register does not have a hardware encoding")
+ } else {
+ ((self.bits >> 8) & ((1 << 8) - 1)) as u8
+ }
+ }
+ pub fn as_virtual_reg(self) -> Option<VirtualReg> {
+ // Allow invalid virtual regs as well.
+ if self.is_virtual_or_invalid() {
+ Some(VirtualReg { reg: self })
+ } else {
+ None
+ }
+ }
+ pub fn as_real_reg(self) -> Option<RealReg> {
+ // Allow invalid real regs as well.
+ if self.is_real_or_invalid() {
+ Some(RealReg { reg: self })
+ } else {
+ None
+ }
+ }
+ pub fn show_with_rru(self, univ: &RealRegUniverse) -> String {
+ if self.is_real() && self.get_index() < univ.regs.len() {
+ univ.regs[self.get_index()].1.clone()
+ } else if self.is_valid() {
+ format!("{:?}", self)
+ } else {
+ "rINVALID".to_string()
+ }
+ }
+}
+
+impl fmt::Debug for Reg {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ if self.is_valid() {
+ write!(
+ fmt,
+ "{}{}{}",
+ if self.is_virtual() { "v" } else { "r" },
+ self.get_index(),
+ self.get_class().short_name(),
+ )
+ } else {
+ write!(fmt, "rINVALID")
+ }
+ }
+}
+
+// RealReg and VirtualReg are merely wrappers around Reg, which try to
+// dynamically ensure that they are really wrapping the correct flavour of
+// register.
+
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct RealReg {
+ reg: Reg,
+}
+impl Reg /* !!not RealReg!! */ {
+ pub fn to_real_reg(self) -> RealReg {
+ if self.is_virtual() {
+ panic!("Reg::to_real_reg: this is a virtual register")
+ } else {
+ RealReg { reg: self }
+ }
+ }
+}
+impl RealReg {
+ pub fn get_class(self) -> RegClass {
+ self.reg.get_class()
+ }
+ #[inline(always)]
+ pub fn get_index(self) -> usize {
+ self.reg.get_index()
+ }
+ pub fn get_hw_encoding(self) -> usize {
+ self.reg.get_hw_encoding() as usize
+ }
+ #[inline(always)]
+ pub fn to_reg(self) -> Reg {
+ self.reg
+ }
+ pub fn invalid() -> RealReg {
+ RealReg {
+ reg: Reg::invalid(),
+ }
+ }
+ pub fn is_valid(self) -> bool {
+ self.reg.is_valid()
+ }
+ pub fn is_invalid(self) -> bool {
+ self.reg.is_invalid()
+ }
+ pub fn maybe_valid(self) -> Option<RealReg> {
+ if self == RealReg::invalid() {
+ None
+ } else {
+ Some(self)
+ }
+ }
+}
+impl fmt::Debug for RealReg {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "{:?}", self.reg)
+ }
+}
+
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct VirtualReg {
+ reg: Reg,
+}
+impl Reg /* !!not VirtualReg!! */ {
+ #[inline(always)]
+ pub fn to_virtual_reg(self) -> VirtualReg {
+ if self.is_virtual() {
+ VirtualReg { reg: self }
+ } else {
+ panic!("Reg::to_virtual_reg: this is a real register")
+ }
+ }
+}
+impl VirtualReg {
+ pub fn get_class(self) -> RegClass {
+ self.reg.get_class()
+ }
+ #[inline(always)]
+ pub fn get_index(self) -> usize {
+ self.reg.get_index()
+ }
+ #[inline(always)]
+ pub fn to_reg(self) -> Reg {
+ self.reg
+ }
+ pub fn invalid() -> VirtualReg {
+ VirtualReg {
+ reg: Reg::invalid(),
+ }
+ }
+ pub fn is_valid(self) -> bool {
+ self.reg.is_valid()
+ }
+ pub fn is_invalid(self) -> bool {
+ self.reg.is_invalid()
+ }
+ pub fn maybe_valid(self) -> Option<VirtualReg> {
+ if self == VirtualReg::invalid() {
+ None
+ } else {
+ Some(self)
+ }
+ }
+}
+impl fmt::Debug for VirtualReg {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "{:?}", self.reg)
+ }
+}
+
+impl Reg {
+ /// Apply a vreg-rreg mapping to a Reg. This is used for registers used in
+ /// a read-role.
+ pub fn apply_uses<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ self.apply(|vreg| mapper.get_use(vreg));
+ }
+
+ /// Apply a vreg-rreg mapping to a Reg. This is used for registers used in
+ /// a write-role.
+ pub fn apply_defs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ self.apply(|vreg| mapper.get_def(vreg));
+ }
+
+ /// Apply a vreg-rreg mapping to a Reg. This is used for registers used in
+ /// a modify-role.
+ pub fn apply_mods<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ self.apply(|vreg| mapper.get_mod(vreg));
+ }
+
+ fn apply<F: Fn(VirtualReg) -> Option<RealReg>>(&mut self, f: F) {
+ if let Some(vreg) = self.as_virtual_reg() {
+ if let Some(rreg) = f(vreg) {
+ debug_assert!(rreg.get_class() == vreg.get_class());
+ *self = rreg.to_reg();
+ } else {
+ panic!("Reg::apply: no mapping for {:?}", self);
+ }
+ }
+ }
+}
+
+/// A "writable register". This is a zero-cost wrapper that can be used to
+/// create a distinction, at the Rust type level, between a plain "register"
+/// and a "writable register".
+///
+/// Only structs that implement the `WritableBase` trait can be wrapped with
+/// `Writable`. These are the Reg, RealReg and VirtualReg data structures only,
+/// since `WritableBase` is not exposed to end users.
+///
+/// Writable<..> can be used by the client to ensure that, internally, it only
+/// generates instructions that write to registers that should be written. The
+/// `InstRegUses` below, which must be implemented for every instruction,
+/// requires a `Writable<Reg>` (not just `Reg`) in its `defined` and
+/// `modified` sets. While we cannot hide the constructor for `Writable<..>`
+/// from certain parts of the client while exposing it to others, the client
+/// *can* adopt conventions to e.g. only ever call the Writable<..>
+/// constructor from its central vreg-management logic, and decide that any
+/// invocation of this constructor in a machine backend (for example) is an
+/// error.
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct Writable<R: WritableBase> {
+ reg: R,
+}
+
+/// Set of requirements for types that can be wrapped in Writable.
+pub trait WritableBase:
+ Copy + Clone + PartialEq + Eq + Hash + PartialOrd + Ord + fmt::Debug
+{
+}
+
+impl WritableBase for Reg {}
+impl WritableBase for RealReg {}
+impl WritableBase for VirtualReg {}
+
+impl<R: WritableBase> Writable<R> {
+ /// Create a Writable<R> from an R. The client should carefully audit where
+ /// it calls this constructor to ensure correctness (see `Writable<..>`
+ /// struct documentation).
+ #[inline(always)]
+ pub fn from_reg(reg: R) -> Writable<R> {
+ Writable { reg }
+ }
+
+ /// Get the inner Reg.
+ pub fn to_reg(&self) -> R {
+ self.reg
+ }
+
+ pub fn map<F, U>(&self, f: F) -> Writable<U>
+ where
+ F: Fn(R) -> U,
+ U: WritableBase,
+ {
+ Writable { reg: f(self.reg) }
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct SpillSlot(u32);
+
+impl SpillSlot {
+ #[inline(always)]
+ pub fn new(n: u32) -> Self {
+ Self(n)
+ }
+ #[inline(always)]
+ pub fn get(self) -> u32 {
+ self.0
+ }
+ #[inline(always)]
+ pub fn get_usize(self) -> usize {
+ self.get() as usize
+ }
+ pub fn round_up(self, num_slots: u32) -> SpillSlot {
+ assert!(num_slots > 0);
+ SpillSlot::new((self.get() + num_slots - 1) / num_slots * num_slots)
+ }
+ pub fn inc(self, num_slots: u32) -> SpillSlot {
+ SpillSlot::new(self.get() + num_slots)
+ }
+}
+
+impl fmt::Debug for SpillSlot {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "S{}", self.get())
+ }
+}
+
+//=============================================================================
+// Register uses: low level interface
+
+// This minimal struct is visible from outside the regalloc.rs interface. It
+// is intended to be a safe wrapper around `RegVecs`, which isn't externally
+// visible. It is used to collect unsanitized reg use info from client
+// instructions.
+pub struct RegUsageCollector<'a> {
+ pub reg_vecs: &'a mut RegVecs,
+}
+
+impl<'a> RegUsageCollector<'a> {
+ pub fn new(reg_vecs: &'a mut RegVecs) -> Self {
+ Self { reg_vecs }
+ }
+ pub fn add_use(&mut self, r: Reg) {
+ self.reg_vecs.uses.push(r);
+ }
+ pub fn add_uses(&mut self, regs: &[Reg]) {
+ self.reg_vecs.uses.extend(regs.iter());
+ }
+ pub fn add_def(&mut self, r: Writable<Reg>) {
+ self.reg_vecs.defs.push(r.to_reg());
+ }
+ pub fn add_defs(&mut self, regs: &[Writable<Reg>]) {
+ self.reg_vecs.defs.reserve(regs.len());
+ for r in regs {
+ self.reg_vecs.defs.push(r.to_reg());
+ }
+ }
+ pub fn add_mod(&mut self, r: Writable<Reg>) {
+ self.reg_vecs.mods.push(r.to_reg());
+ }
+ pub fn add_mods(&mut self, regs: &[Writable<Reg>]) {
+ self.reg_vecs.mods.reserve(regs.len());
+ for r in regs {
+ self.reg_vecs.mods.push(r.to_reg());
+ }
+ }
+
+ // The presence of the following two is a hack, needed to support fuzzing
+ // in the test framework. Real clients should not call them.
+ pub fn get_use_def_mod_vecs_test_framework_only(&self) -> (Vec<Reg>, Vec<Reg>, Vec<Reg>) {
+ (
+ self.reg_vecs.uses.clone(),
+ self.reg_vecs.defs.clone(),
+ self.reg_vecs.mods.clone(),
+ )
+ }
+
+ pub fn get_empty_reg_vecs_test_framework_only(sanitized: bool) -> RegVecs {
+ RegVecs::new(sanitized)
+ }
+}
+
+// Everything else is not visible outside the regalloc.rs interface.
+
+// There is one of these per function. Note that `defs` and `mods` lose the
+// `Writable` constraint at this point. This is for convenience of having all
+// three vectors be the same type, but comes at the cost of the loss of being
+// able to differentiate readonly vs read/write registers in the Rust type
+// system.
+#[derive(Debug)]
+pub struct RegVecs {
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Reg>,
+ pub mods: Vec<Reg>,
+ sanitized: bool,
+}
+
+impl RegVecs {
+ pub fn new(sanitized: bool) -> Self {
+ Self {
+ uses: vec![],
+ defs: vec![],
+ mods: vec![],
+ sanitized,
+ }
+ }
+ pub fn is_sanitized(&self) -> bool {
+ self.sanitized
+ }
+ pub fn set_sanitized(&mut self, sanitized: bool) {
+ self.sanitized = sanitized;
+ }
+ pub fn clear(&mut self) {
+ self.uses.clear();
+ self.defs.clear();
+ self.mods.clear();
+ }
+}
+
+// There is one of these per insn, so try and keep it as compact as possible.
+// I think this should fit in 16 bytes.
+#[derive(Clone, Debug)]
+pub struct RegVecBounds {
+ // These are the group start indices in RegVecs.{uses, defs, mods}.
+ pub uses_start: u32,
+ pub defs_start: u32,
+ pub mods_start: u32,
+ // And these are the group lengths. This does limit each instruction to
+ // mentioning only 256 registers in any group, but that does not seem like a
+ // problem.
+ pub uses_len: u8,
+ pub defs_len: u8,
+ pub mods_len: u8,
+}
+
+impl RegVecBounds {
+ pub fn new() -> Self {
+ Self {
+ uses_start: 0,
+ defs_start: 0,
+ mods_start: 0,
+ uses_len: 0,
+ defs_len: 0,
+ mods_len: 0,
+ }
+ }
+}
+
+// This is the primary structure. We compute just one of these for an entire
+// function.
+pub struct RegVecsAndBounds {
+ // The three vectors of registers. These can be arbitrarily long.
+ pub vecs: RegVecs,
+ // Admin info which tells us the location, for each insn, of its register
+ // groups in `vecs`.
+ pub bounds: TypedIxVec<InstIx, RegVecBounds>,
+}
+
+impl RegVecsAndBounds {
+ pub fn new(vecs: RegVecs, bounds: TypedIxVec<InstIx, RegVecBounds>) -> Self {
+ Self { vecs, bounds }
+ }
+ pub fn is_sanitized(&self) -> bool {
+ self.vecs.sanitized
+ }
+ #[allow(dead_code)] // XXX for some reason, Rustc 1.43.1 thinks this is currently unused.
+ pub fn num_insns(&self) -> u32 {
+ self.bounds.len()
+ }
+}
+
+//=============================================================================
+// Register uses: convenience interface
+
+// Some call sites want to get reg use information as three Sets. This is a
+// "convenience facility" which is easier to use but much slower than working
+// with a whole-function `RegVecsAndBounds`. It shouldn't be used on critical
+// paths.
+#[derive(Debug)]
+pub struct RegSets {
+ pub uses: Set<Reg>, // registers that are read.
+ pub defs: Set<Reg>, // registers that are written.
+ pub mods: Set<Reg>, // registers that are modified.
+ sanitized: bool,
+}
+
+impl RegSets {
+ pub fn new(sanitized: bool) -> Self {
+ Self {
+ uses: Set::<Reg>::empty(),
+ defs: Set::<Reg>::empty(),
+ mods: Set::<Reg>::empty(),
+ sanitized,
+ }
+ }
+
+ pub fn is_sanitized(&self) -> bool {
+ self.sanitized
+ }
+}
+
+impl RegVecsAndBounds {
+ /* !!not RegSets!! */
+ #[inline(never)]
+ // Convenience function. Try to avoid using this.
+ pub fn get_reg_sets_for_iix(&self, iix: InstIx) -> RegSets {
+ let bounds = &self.bounds[iix];
+ let mut regsets = RegSets::new(self.vecs.sanitized);
+ for i in bounds.uses_start as usize..bounds.uses_start as usize + bounds.uses_len as usize {
+ regsets.uses.insert(self.vecs.uses[i]);
+ }
+ for i in bounds.defs_start as usize..bounds.defs_start as usize + bounds.defs_len as usize {
+ regsets.defs.insert(self.vecs.defs[i]);
+ }
+ for i in bounds.mods_start as usize..bounds.mods_start as usize + bounds.mods_len as usize {
+ regsets.mods.insert(self.vecs.mods[i]);
+ }
+ regsets
+ }
+}
+
+//=============================================================================
+// Definitions of the "real register universe".
+
+// A "Real Register Universe" is a read-only structure that contains all
+// information about real registers on a given host. It serves several
+// purposes:
+//
+// * defines the mapping from real register indices to the registers
+// themselves
+//
+// * defines the size of the initial section of that mapping that is available
+// to the register allocator for use, so that it can treat the registers
+// under its control as a zero based, contiguous array. This is important
+// for its efficiency.
+//
+// * gives meaning to Set<RealReg>, which otherwise would merely be a bunch of
+// bits.
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct RealRegUniverse {
+ // The registers themselves. All must be real registers, and all must
+ // have their index number (.get_index()) equal to the array index here,
+ // since this is the only place where we map index numbers to actual
+ // registers.
+ pub regs: Vec<(RealReg, String)>,
+
+ // This is the size of the initial section of `regs` that is available to
+ // the allocator. It must be <= `regs`.len().
+ pub allocable: usize,
+
+ // Information about groups of allocable registers. Used to quickly address
+ // only a group of allocable registers belonging to the same register class.
+ // Indexes into `allocable_by_class` are RegClass values, such as
+ // RegClass::F32. If the resulting entry is `None` then there are no
+ // registers in that class. Otherwise the value is a `RegClassInfo`, which
+ // provides a register range and possibly information about fixed uses.
+ pub allocable_by_class: [Option<RegClassInfo>; NUM_REG_CLASSES],
+}
+
+/// Information about a single register class in the `RealRegUniverse`.
+#[derive(Clone, Copy, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct RegClassInfo {
+ // Range of allocatable registers in this register class, in terms of
+ // register indices.
+ //
+ // A range (first, last) specifies the range of entries in
+ // `RealRegUniverse.regs` corresponding to that class. The range includes
+ // both `first` and `last`.
+ //
+ // In all cases, `last` must be < `RealRegUniverse.allocable`. In other
+ // words, all ranges together in `allocable_by_class` must describe only the
+ // allocable prefix of `regs`.
+ //
+ // For example, let's say
+ // allocable_by_class[RegClass::F32] ==
+ // Some(RegClassInfo { first: 10, last: 14, .. })
+ // Then regs[10], regs[11], regs[12], regs[13], and regs[14] give all
+ // registers of register class RegClass::F32.
+ //
+ // The effect of the above is that registers in `regs` must form
+ // contiguous groups. This is checked by RealRegUniverse::check_is_sane().
+ pub first: usize,
+ pub last: usize,
+
+ // A register, if any, that is *guaranteed* not to be used as a fixed use
+ // in any code, and so that the register allocator can statically reserve
+ // for its own use as a temporary. Some register allocators may need such
+ // a register for various maneuvers, for example a spillslot-to-spillslot
+ // move when no (other) registers are free.
+ pub suggested_scratch: Option<usize>,
+}
+
+impl RealRegUniverse {
+ /// Show it in a pretty way.
+ pub fn show(&self) -> Vec<String> {
+ let mut res = vec![];
+ // Show the allocables
+ for class_num in 0..NUM_REG_CLASSES {
+ let class_info = match &self.allocable_by_class[class_num] {
+ None => continue,
+ Some(info) => info,
+ };
+ let class = RegClass::rc_from_u32(class_num as u32);
+ let mut class_str = "class ".to_string()
+ + &class.long_name().to_string()
+ + &"(".to_string()
+ + &class.short_name().to_string()
+ + &") at ".to_string();
+ class_str = class_str + &format!("[{} .. {}]: ", class_info.first, class_info.last);
+ for ix in class_info.first..=class_info.last {
+ class_str = class_str + &self.regs[ix].1;
+ if let Some(suggested_ix) = class_info.suggested_scratch {
+ if ix == suggested_ix {
+ class_str = class_str + "*";
+ }
+ }
+ class_str = class_str + " ";
+ }
+ res.push(class_str);
+ }
+ // And the non-allocables
+ if self.allocable < self.regs.len() {
+ let mut stragglers = format!(
+ "not allocable at [{} .. {}]: ",
+ self.allocable,
+ self.regs.len() - 1
+ );
+ for ix in self.allocable..self.regs.len() {
+ stragglers = stragglers + &self.regs[ix].1 + &" ".to_string();
+ }
+ res.push(stragglers);
+ }
+ res
+ }
+
+ /// Check that the given universe satisfies various invariants, and panic
+ /// if not. All the invariants are important.
+ pub fn check_is_sane(&self) {
+ let regs_len = self.regs.len();
+ let regs_allocable = self.allocable;
+ // The universe must contain at most 256 registers. That's because
+ // `Reg` only has an 8-bit index value field, so if the universe
+ // contained more than 256 registers, we'd never be able to index into
+ // entries 256 and above. This is no limitation in practice since all
+ // targets we're interested in contain (many) fewer than 256 regs in
+ // total.
+ let mut ok = regs_len <= 256;
+ // The number of allocable registers must not exceed the number of
+ // `regs` presented. In general it will be less, since the universe
+ // will list some registers (stack pointer, etc) which are not
+ // available for allocation.
+ if ok {
+ ok = regs_allocable <= regs_len;
+ }
+ // All registers must have an index value which points back at the
+ // `regs` slot they are in. Also they really must be real regs.
+ if ok {
+ for i in 0..regs_len {
+ let (reg, _name) = &self.regs[i];
+ if ok && (reg.to_reg().is_virtual() || reg.get_index() != i) {
+ ok = false;
+ }
+ }
+ }
+ // The allocatable regclass groupings defined by `allocable_first` and
+ // `allocable_last` must be contiguous.
+ if ok {
+ let mut regclass_used = [false; NUM_REG_CLASSES];
+ for rc in 0..NUM_REG_CLASSES {
+ regclass_used[rc] = false;
+ }
+ for i in 0..regs_allocable {
+ let (reg, _name) = &self.regs[i];
+ let rc = reg.get_class().rc_to_u32() as usize;
+ regclass_used[rc] = true;
+ }
+ // Scan forward through each grouping, checking that the listed
+ // registers really are of the claimed class. Also count the
+ // total number visited. This seems a fairly reliable way to
+ // ensure that the groupings cover all allocated registers exactly
+ // once, and that all classes are contiguous groups.
+ let mut regs_visited = 0;
+ for rc in 0..NUM_REG_CLASSES {
+ match &self.allocable_by_class[rc] {
+ &None => {
+ if regclass_used[rc] {
+ ok = false;
+ }
+ }
+ &Some(RegClassInfo {
+ first,
+ last,
+ suggested_scratch,
+ }) => {
+ if !regclass_used[rc] {
+ ok = false;
+ }
+ if ok {
+ for i in first..last + 1 {
+ let (reg, _name) = &self.regs[i];
+ if ok && RegClass::rc_from_u32(rc as u32) != reg.get_class() {
+ ok = false;
+ }
+ regs_visited += 1;
+ }
+ }
+ if ok {
+ if let Some(s) = suggested_scratch {
+ if s < first || s > last {
+ ok = false;
+ }
+ }
+ }
+ }
+ }
+ }
+ if ok && regs_visited != regs_allocable {
+ ok = false;
+ }
+ }
+ // So finally ..
+ if !ok {
+ panic!("RealRegUniverse::check_is_sane: invalid RealRegUniverse");
+ }
+ }
+}
+
+//=============================================================================
+// Representing and printing of live range fragments.
+
+#[derive(Copy, Clone, Hash, PartialEq, Eq, Ord)]
+// There are four "points" within an instruction that are of interest, and
+// these have a total ordering: R < U < D < S. They are:
+//
+// * R(eload): this is where any reload insns for the insn itself are
+// considered to live.
+//
+// * U(se): this is where the insn is considered to use values from those of
+// its register operands that appear in a Read or Modify role.
+//
+// * D(ef): this is where the insn is considered to define new values for
+// those of its register operands that appear in a Write or Modify role.
+//
+// * S(pill): this is where any spill insns for the insn itself are considered
+// to live.
+//
+// Instructions in the incoming Func may only exist at the U and D points,
+// and so their associated live range fragments will only mention the U and D
+// points. However, when adding spill code, we need a way to represent live
+// ranges involving the added spill and reload insns, in which case R and S
+// come into play:
+//
+// * A reload for instruction i is considered to be live from i.R to i.U.
+//
+// * A spill for instruction i is considered to be live from i.D to i.S.
+
+pub enum Point {
+ // The values here are important. Don't change them.
+ Reload = 0,
+ Use = 1,
+ Def = 2,
+ Spill = 3,
+}
+
+impl Point {
+ #[inline(always)]
+ pub fn is_reload(self) -> bool {
+ match self {
+ Point::Reload => true,
+ _ => false,
+ }
+ }
+ #[inline(always)]
+ pub fn is_use(self) -> bool {
+ match self {
+ Point::Use => true,
+ _ => false,
+ }
+ }
+ #[inline(always)]
+ pub fn is_def(self) -> bool {
+ match self {
+ Point::Def => true,
+ _ => false,
+ }
+ }
+ #[inline(always)]
+ pub fn is_spill(self) -> bool {
+ match self {
+ Point::Spill => true,
+ _ => false,
+ }
+ }
+ #[inline(always)]
+ pub fn is_use_or_def(self) -> bool {
+ self.is_use() || self.is_def()
+ }
+}
+
+impl PartialOrd for Point {
+ // In short .. R < U < D < S. This is probably what would be #derive'd
+ // anyway, but we need to be sure.
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ (*self as u32).partial_cmp(&(*other as u32))
+ }
+}
+
+// See comments below on `RangeFrag` for the meaning of `InstPoint`.
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct InstPoint {
+ /// This is conceptually:
+ /// pub iix: InstIx,
+ /// pub pt: Point,
+ ///
+ /// but packed into a single 32 bit word, so as
+ /// (1) to ensure it is only 32 bits (and hence to guarantee that `RangeFrag`
+ /// is 64 bits), and
+ /// (2) to make it possible to implement `PartialOrd` using `PartialOrd`
+ /// directly on 32 bit words (and hence we let it be derived).
+ ///
+ /// This has the format:
+ /// InstIx as bits 31:2, Point as bits 1:0.
+ ///
+ /// It does give the slight limitation that all InstIxs must be < 2^30, but
+ /// that's hardly a big deal: the analysis module rejects any input with 2^24
+ /// or more Insns.
+ ///
+ /// Do not access this directly:
+ bits: u32,
+}
+
+impl InstPoint {
+ #[inline(always)]
+ pub fn new(iix: InstIx, pt: Point) -> Self {
+ let iix_n = iix.get();
+ assert!(iix_n < 0x4000_0000u32);
+ let pt_n = pt as u32;
+ InstPoint {
+ bits: (iix_n << 2) | pt_n,
+ }
+ }
+ #[inline(always)]
+ pub fn iix(self) -> InstIx {
+ InstIx::new(self.bits >> 2)
+ }
+ #[inline(always)]
+ pub fn pt(self) -> Point {
+ match self.bits & 3 {
+ 0 => Point::Reload,
+ 1 => Point::Use,
+ 2 => Point::Def,
+ 3 => Point::Spill,
+ // This can never happen, but rustc doesn't seem to know that.
+ _ => panic!("InstPt::pt: unreachable case"),
+ }
+ }
+ #[inline(always)]
+ pub fn set_iix(&mut self, iix: InstIx) {
+ let iix_n = iix.get();
+ assert!(iix_n < 0x4000_0000u32);
+ self.bits = (iix_n << 2) | (self.bits & 3);
+ }
+ #[inline(always)]
+ pub fn set_pt(&mut self, pt: Point) {
+ self.bits = (self.bits & 0xFFFF_FFFCu32) | pt as u32;
+ }
+ #[inline(always)]
+ pub fn new_reload(iix: InstIx) -> Self {
+ InstPoint::new(iix, Point::Reload)
+ }
+ #[inline(always)]
+ pub fn new_use(iix: InstIx) -> Self {
+ InstPoint::new(iix, Point::Use)
+ }
+ #[inline(always)]
+ pub fn new_def(iix: InstIx) -> Self {
+ InstPoint::new(iix, Point::Def)
+ }
+ #[inline(always)]
+ pub fn new_spill(iix: InstIx) -> Self {
+ InstPoint::new(iix, Point::Spill)
+ }
+ #[inline(always)]
+ pub fn invalid_value() -> Self {
+ Self {
+ bits: 0xFFFF_FFFFu32,
+ }
+ }
+ #[inline(always)]
+ pub fn max_value() -> Self {
+ Self {
+ bits: 0xFFFF_FFFFu32,
+ }
+ }
+ #[inline(always)]
+ pub fn min_value() -> Self {
+ Self { bits: 0u32 }
+ }
+}
+
+impl fmt::Debug for InstPoint {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "{:?}{}",
+ self.iix(),
+ match self.pt() {
+ Point::Reload => ".r",
+ Point::Use => ".u",
+ Point::Def => ".d",
+ Point::Spill => ".s",
+ }
+ )
+ }
+}
+
+//=============================================================================
+// Live Range Fragments, and their metrics
+
+// A Live Range Fragment (RangeFrag) describes a consecutive sequence of one or
+// more instructions, in which a Reg is "live". The sequence must exist
+// entirely inside only one basic block.
+//
+// However, merely indicating the start and end instruction numbers isn't
+// enough: we must also include a "Use or Def" indication. These indicate two
+// different "points" within each instruction: the Use position, where
+// incoming registers are read, and the Def position, where outgoing registers
+// are written. The Use position is considered to come before the Def
+// position, as described for `Point` above.
+//
+// When we come to generate spill/restore live ranges, Point::S and Point::R
+// also come into play. Live ranges (and hence, RangeFrags) that do not perform
+// spills or restores should not use either of Point::S or Point::R.
+//
+// The set of positions denoted by
+//
+// {0 .. #insns-1} x {Reload point, Use point, Def point, Spill point}
+//
+// is exactly the set of positions that we need to keep track of when mapping
+// live ranges to registers. This the reason for the type InstPoint. Note
+// that InstPoint values have a total ordering, at least within a single basic
+// block: the insn number is used as the primary key, and the Point part is
+// the secondary key, with Reload < Use < Def < Spill.
+#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct RangeFrag {
+ pub first: InstPoint,
+ pub last: InstPoint,
+}
+
+impl fmt::Debug for RangeFrag {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "(RF: {:?}-{:?})", self.first, self.last)
+ }
+}
+
+impl RangeFrag {
+ #[allow(dead_code)] // XXX for some reason, Rustc 1.43.1 thinks this is unused.
+ pub fn new(first: InstPoint, last: InstPoint) -> Self {
+ debug_assert!(first <= last);
+ RangeFrag { first, last }
+ }
+
+ pub fn invalid_value() -> Self {
+ Self {
+ first: InstPoint::invalid_value(),
+ last: InstPoint::invalid_value(),
+ }
+ }
+
+ pub fn new_with_metrics<F: Function>(
+ f: &F,
+ bix: BlockIx,
+ first: InstPoint,
+ last: InstPoint,
+ count: u16,
+ ) -> (Self, RangeFragMetrics) {
+ debug_assert!(f.block_insns(bix).len() >= 1);
+ debug_assert!(f.block_insns(bix).contains(first.iix()));
+ debug_assert!(f.block_insns(bix).contains(last.iix()));
+ debug_assert!(first <= last);
+ if first == last {
+ debug_assert!(count == 1);
+ }
+ let first_iix_in_block = f.block_insns(bix).first();
+ let last_iix_in_block = f.block_insns(bix).last();
+ let first_pt_in_block = InstPoint::new_use(first_iix_in_block);
+ let last_pt_in_block = InstPoint::new_def(last_iix_in_block);
+ let kind = match (first == first_pt_in_block, last == last_pt_in_block) {
+ (false, false) => RangeFragKind::Local,
+ (false, true) => RangeFragKind::LiveOut,
+ (true, false) => RangeFragKind::LiveIn,
+ (true, true) => RangeFragKind::Thru,
+ };
+ (
+ RangeFrag { first, last },
+ RangeFragMetrics { bix, kind, count },
+ )
+ }
+}
+
+// Comparison of RangeFrags. They form a partial order.
+
+pub fn cmp_range_frags(f1: &RangeFrag, f2: &RangeFrag) -> Option<Ordering> {
+ if f1.last < f2.first {
+ return Some(Ordering::Less);
+ }
+ if f1.first > f2.last {
+ return Some(Ordering::Greater);
+ }
+ if f1.first == f2.first && f1.last == f2.last {
+ return Some(Ordering::Equal);
+ }
+ None
+}
+
+impl RangeFrag {
+ pub fn contains(&self, ipt: &InstPoint) -> bool {
+ self.first <= *ipt && *ipt <= self.last
+ }
+}
+
+// A handy summary hint for a RangeFrag. Note that none of these are correct
+// if the RangeFrag has been extended so as to cover multiple basic blocks.
+// But that ("RangeFrag compression") is something done locally within each
+// algorithm (BT and LSRA). The analysis-phase output will not include any
+// such compressed RangeFrags.
+#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub enum RangeFragKind {
+ Local, // Fragment exists entirely inside one block
+ LiveIn, // Fragment is live in to a block, but ends inside it
+ LiveOut, // Fragment is live out of a block, but starts inside it
+ Thru, // Fragment is live through the block (starts and ends outside it)
+}
+
+impl fmt::Debug for RangeFragKind {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ RangeFragKind::Local => write!(fmt, "Local"),
+ RangeFragKind::LiveIn => write!(fmt, "LiveIn"),
+ RangeFragKind::LiveOut => write!(fmt, "LiveOut"),
+ RangeFragKind::Thru => write!(fmt, "Thru"),
+ }
+ }
+}
+
+// `RangeFrags` resulting from the initial analysis phase (analysis_data_flow.rs)
+// exist only within single basic blocks, and therefore have some associated
+// metrics, held by `RangeFragMetrics`:
+//
+// * a `count` field, which is a u16 indicating how often the associated storage
+// unit (Reg) is mentioned inside the RangeFrag. It is assumed that the RangeFrag
+// is associated with some Reg. If not, the `count` field is meaningless. This
+// field has no effect on the correctness of the resulting allocation. It is used
+// however in the estimation of `VirtualRange` spill costs, which are important
+// for prioritising which `VirtualRange`s get assigned a register vs which have
+// to be spilled.
+//
+// * `bix` field, which indicates which `Block` the fragment exists in. This
+// field is actually redundant, since the containing `Block` can be inferred,
+// laboriously, from the associated `RangeFrag`s `first` and `last` fields,
+// providing you have an `InstIxToBlockIx` mapping table to hand. It is included
+// here for convenience.
+//
+// * `kind` is another convenience field, indicating how the range is included
+// within its owning block.
+//
+// The analysis phase (fn `deref_and_compress_sorted_range_frag_ixs`)
+// compresses ranges and as a result breaks the invariant that a `RangeFrag`
+// exists only within a single `Block`. For a `RangeFrag` spanning multiple
+// `Block`s, all three `RangeFragMetric` fields are meaningless. This is the
+// reason for separating `RangeFrag` and `RangeFragMetrics` -- so that it is
+// possible to merge `RangeFrag`s without being forced to create fake values
+// for the metrics fields.
+#[derive(Clone, PartialEq)]
+pub struct RangeFragMetrics {
+ pub bix: BlockIx,
+ pub kind: RangeFragKind,
+ pub count: u16,
+}
+
+impl fmt::Debug for RangeFragMetrics {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "(RFM: {:?}, count={}, {:?})",
+ self.kind, self.count, self.bix
+ )
+ }
+}
+
+//=============================================================================
+// Vectors of RangeFragIxs, sorted so that the associated RangeFrags are in
+// ascending order, per their InstPoint fields. The associated RangeFrags may
+// not overlap.
+//
+// The "fragment environment" (usually called "frag_env"), to which the
+// RangeFragIxs refer, is not stored here.
+
+#[derive(Clone)]
+pub struct SortedRangeFragIxs {
+ pub frag_ixs: SmallVec<[RangeFragIx; 4]>,
+}
+
+impl fmt::Debug for SortedRangeFragIxs {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ self.frag_ixs.fmt(fmt)
+ }
+}
+
+impl SortedRangeFragIxs {
+ pub(crate) fn check(&self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) {
+ for i in 1..self.frag_ixs.len() {
+ let prev_frag = &fenv[self.frag_ixs[i - 1]];
+ let this_frag = &fenv[self.frag_ixs[i]];
+ if cmp_range_frags(prev_frag, this_frag) != Some(Ordering::Less) {
+ panic!("SortedRangeFragIxs::check: vector not ok");
+ }
+ }
+ }
+
+ pub fn sort(&mut self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) {
+ self.frag_ixs.sort_unstable_by(|fix_a, fix_b| {
+ match cmp_range_frags(&fenv[*fix_a], &fenv[*fix_b]) {
+ Some(Ordering::Less) => Ordering::Less,
+ Some(Ordering::Greater) => Ordering::Greater,
+ Some(Ordering::Equal) | None => {
+ panic!("SortedRangeFragIxs::sort: overlapping Frags!")
+ }
+ }
+ });
+ }
+
+ pub fn new(
+ frag_ixs: SmallVec<[RangeFragIx; 4]>,
+ fenv: &TypedIxVec<RangeFragIx, RangeFrag>,
+ ) -> Self {
+ let mut res = SortedRangeFragIxs { frag_ixs };
+ // check the source is ordered, and clone (or sort it)
+ res.sort(fenv);
+ res.check(fenv);
+ res
+ }
+
+ pub fn unit(fix: RangeFragIx, fenv: &TypedIxVec<RangeFragIx, RangeFrag>) -> Self {
+ let mut res = SortedRangeFragIxs {
+ frag_ixs: SmallVec::<[RangeFragIx; 4]>::new(),
+ };
+ res.frag_ixs.push(fix);
+ res.check(fenv);
+ res
+ }
+
+ /// Does this sorted list of range fragments contain the given instruction point?
+ pub fn contains_pt(&self, fenv: &TypedIxVec<RangeFragIx, RangeFrag>, pt: InstPoint) -> bool {
+ self.frag_ixs
+ .binary_search_by(|&ix| {
+ let frag = &fenv[ix];
+ if pt < frag.first {
+ Ordering::Greater
+ } else if pt >= frag.first && pt <= frag.last {
+ Ordering::Equal
+ } else {
+ Ordering::Less
+ }
+ })
+ .is_ok()
+ }
+}
+
+//=============================================================================
+// Vectors of RangeFrags, sorted so that they are in ascending order, per
+// their InstPoint fields. The RangeFrags may not overlap.
+
+#[derive(Clone)]
+pub struct SortedRangeFrags {
+ pub frags: SmallVec<[RangeFrag; 4]>,
+}
+
+impl fmt::Debug for SortedRangeFrags {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ self.frags.fmt(fmt)
+ }
+}
+
+impl SortedRangeFrags {
+ pub fn unit(frag: RangeFrag) -> Self {
+ let mut res = SortedRangeFrags {
+ frags: SmallVec::<[RangeFrag; 4]>::new(),
+ };
+ res.frags.push(frag);
+ res
+ }
+
+ pub fn empty() -> Self {
+ Self {
+ frags: SmallVec::<[RangeFrag; 4]>::new(),
+ }
+ }
+
+ pub fn overlaps(&self, other: &Self) -> bool {
+ // Since both vectors are sorted and individually non-overlapping, we
+ // can establish that they are mutually non-overlapping by walking
+ // them simultaneously and checking, at each step, that there is a
+ // unique "next lowest" frag available.
+ let frags1 = &self.frags;
+ let frags2 = &other.frags;
+ let n1 = frags1.len();
+ let n2 = frags2.len();
+ let mut c1 = 0;
+ let mut c2 = 0;
+ loop {
+ if c1 >= n1 || c2 >= n2 {
+ // We made it to the end of one (or both) vectors without
+ // finding any conflicts.
+ return false; // "no overlaps"
+ }
+ let f1 = &frags1[c1];
+ let f2 = &frags2[c2];
+ match cmp_range_frags(f1, f2) {
+ Some(Ordering::Less) => c1 += 1,
+ Some(Ordering::Greater) => c2 += 1,
+ _ => {
+ // There's no unique "next frag" -- either they are
+ // identical, or they overlap. So we're done.
+ return true; // "there's an overlap"
+ }
+ }
+ }
+ }
+
+ /// Does this sorted list of range fragments contain the given instruction point?
+ pub fn contains_pt(&self, pt: InstPoint) -> bool {
+ self.frags
+ .binary_search_by(|frag| {
+ if pt < frag.first {
+ Ordering::Greater
+ } else if pt >= frag.first && pt <= frag.last {
+ Ordering::Equal
+ } else {
+ Ordering::Less
+ }
+ })
+ .is_ok()
+ }
+}
+
+//=============================================================================
+// Representing spill costs. A spill cost can either be infinite, in which
+// case the associated VirtualRange may not be spilled, because it's already a
+// spill/reload range. Or it can be finite, in which case it must be a 32-bit
+// floating point number, which is (in the IEEE754 meaning of the terms)
+// non-infinite, non-NaN and it must be non negative. In fact it's
+// meaningless for a VLR to have a zero spill cost (how could that really be
+// the case?) but we allow it here for convenience.
+
+#[derive(Copy, Clone)]
+pub enum SpillCost {
+ Infinite, // Infinite, positive
+ Finite(f32), // Finite, non-negative
+}
+
+impl fmt::Debug for SpillCost {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ SpillCost::Infinite => write!(fmt, "INFINITY"),
+ SpillCost::Finite(c) => write!(fmt, "{:<.3}", c),
+ }
+ }
+}
+
+impl SpillCost {
+ #[inline(always)]
+ pub fn zero() -> Self {
+ SpillCost::Finite(0.0)
+ }
+ #[inline(always)]
+ pub fn infinite() -> Self {
+ SpillCost::Infinite
+ }
+ #[inline(always)]
+ pub fn finite(cost: f32) -> Self {
+ // "`is_normal` returns true if the number is neither zero, infinite,
+ // subnormal, or NaN."
+ assert!(cost.is_normal() || cost == 0.0);
+ // And also it can't be negative.
+ assert!(cost >= 0.0);
+ // Somewhat arbitrarily ..
+ assert!(cost < 1e18);
+ SpillCost::Finite(cost)
+ }
+ #[inline(always)]
+ pub fn is_zero(&self) -> bool {
+ match self {
+ SpillCost::Infinite => false,
+ SpillCost::Finite(c) => *c == 0.0,
+ }
+ }
+ #[inline(always)]
+ pub fn is_infinite(&self) -> bool {
+ match self {
+ SpillCost::Infinite => true,
+ SpillCost::Finite(_) => false,
+ }
+ }
+ #[inline(always)]
+ pub fn is_finite(&self) -> bool {
+ !self.is_infinite()
+ }
+ #[inline(always)]
+ pub fn is_less_than(&self, other: &Self) -> bool {
+ match (self, other) {
+ // Dubious .. both are infinity
+ (SpillCost::Infinite, SpillCost::Infinite) => false,
+ // finite < inf
+ (SpillCost::Finite(_), SpillCost::Infinite) => true,
+ // inf is not < finite
+ (SpillCost::Infinite, SpillCost::Finite(_)) => false,
+ // straightforward
+ (SpillCost::Finite(c1), SpillCost::Finite(c2)) => c1 < c2,
+ }
+ }
+ #[inline(always)]
+ pub fn add(&mut self, other: &Self) {
+ match (*self, other) {
+ (SpillCost::Finite(c1), SpillCost::Finite(c2)) => {
+ // The 10^18 limit above gives us a lot of headroom here, since max
+ // f32 is around 10^37.
+ *self = SpillCost::Finite(c1 + c2);
+ }
+ (_, _) => {
+ // All other cases produce an infinity.
+ *self = SpillCost::Infinite;
+ }
+ }
+ }
+}
+
+//=============================================================================
+// Representing and printing live ranges. These are represented by two
+// different but closely related types, RealRange and VirtualRange.
+
+// RealRanges are live ranges for real regs (RealRegs). VirtualRanges are
+// live ranges for virtual regs (VirtualRegs). VirtualRanges are the
+// fundamental unit of allocation.
+//
+// A RealRange pairs a RealReg with a vector of RangeFragIxs in which it is
+// live. The RangeFragIxs are indices into some vector of RangeFrags (a
+// "fragment environment", 'fenv'), which is not specified here. They are
+// sorted so as to give ascending order to the RangeFrags which they refer to.
+//
+// A VirtualRange pairs a VirtualReg with a vector of RangeFrags in which it
+// is live. Same scheme as for a RealRange, except it avoids the overhead of
+// having to indirect into the fragment environment.
+//
+// VirtualRanges also contain metrics:
+//
+// * `size` is the number of instructions in total spanned by the LR. It must
+// not be zero.
+//
+// * `total cost` is an abstractified measure of the cost of the LR. Each
+// basic block in which the range exists gives a contribution to the `total
+// cost`, which is the number of times the register is mentioned in this
+// block, multiplied by the estimated execution frequency for the block.
+//
+// * `spill_cost` is an abstractified measure of the cost of spilling the LR,
+// and is the `total cost` divided by the `size`. The only constraint
+// (w.r.t. correctness) is that normal LRs have a `Some` value, whilst
+// `None` is reserved for live ranges created for spills and reloads and
+// interpreted to mean "infinity". This is needed to guarantee that
+// allocation can always succeed in the worst case, in which all of the
+// original live ranges of the program are spilled.
+//
+// RealRanges don't carry any metrics info since we are not trying to allocate
+// them. We merely need to work around them.
+//
+// I find it helpful to think of a live range, both RealRange and
+// VirtualRange, as a "renaming equivalence class". That is, if you rename
+// `reg` at some point inside `sorted_frags`, then you must rename *all*
+// occurrences of `reg` inside `sorted_frags`, since otherwise the program will
+// no longer work.
+
+#[derive(Clone)]
+pub struct RealRange {
+ pub rreg: RealReg,
+ pub sorted_frags: SortedRangeFragIxs,
+ pub is_ref: bool,
+}
+
+impl fmt::Debug for RealRange {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "(RR: {:?}{}, {:?})",
+ self.rreg,
+ if self.is_ref { " REF" } else { "" },
+ self.sorted_frags
+ )
+ }
+}
+
+impl RealRange {
+ pub fn show_with_rru(&self, univ: &RealRegUniverse) -> String {
+ format!(
+ "(RR: {}{}, {:?})",
+ self.rreg.to_reg().show_with_rru(univ),
+ if self.is_ref { " REF" } else { "" },
+ self.sorted_frags
+ )
+ }
+}
+
+// VirtualRanges are live ranges for virtual regs (VirtualRegs). This does
+// carry metrics info and also the identity of the RealReg to which it
+// eventually got allocated. (Or in the backtracking allocator, the identity
+// of the RealReg to which it is *currently* assigned; that may be undone at
+// some later point.)
+
+#[derive(Clone)]
+pub struct VirtualRange {
+ pub vreg: VirtualReg,
+ pub rreg: Option<RealReg>,
+ pub sorted_frags: SortedRangeFrags,
+ pub is_ref: bool,
+ pub size: u16,
+ pub total_cost: u32,
+ pub spill_cost: SpillCost, // == total_cost / size
+}
+
+impl VirtualRange {
+ pub fn overlaps(&self, other: &Self) -> bool {
+ self.sorted_frags.overlaps(&other.sorted_frags)
+ }
+}
+
+impl fmt::Debug for VirtualRange {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ fmt,
+ "(VR: {:?}{},",
+ self.vreg,
+ if self.is_ref { " REF" } else { "" }
+ )?;
+ if self.rreg.is_some() {
+ write!(fmt, " -> {:?}", self.rreg.unwrap())?;
+ }
+ write!(
+ fmt,
+ " sz={}, tc={}, sc={:?}, {:?})",
+ self.size, self.total_cost, self.spill_cost, self.sorted_frags
+ )
+ }
+}
+
+//=============================================================================
+// Some auxiliary/miscellaneous data structures that are useful: RegToRangesMaps
+
+// Mappings from RealRegs and VirtualRegs to the sets of RealRanges and VirtualRanges that
+// belong to them. These are needed for BT's coalescing analysis and for the dataflow analysis
+// that supports reftype handling.
+
+pub struct RegToRangesMaps {
+ // This maps RealReg indices to the set of RealRangeIxs for that RealReg. Valid indices are
+ // real register indices for all non-sanitised real regs; that is,
+ // 0 .. RealRegUniverse::allocable, for ".." having the Rust meaning. The Vecs of
+ // RealRangeIxs are duplicate-free. The SmallVec capacity of 6 was chosen after quite
+ // some profiling, of CL/x64/newBE compiling ZenGarden.wasm -- a huge input, with many
+ // relatively small functions. Profiling was performed in August 2020, using Valgrind/DHAT.
+ pub rreg_to_rlrs_map: Vec</*real reg ix, */ SmallVec<[RealRangeIx; 6]>>,
+
+ // This maps VirtualReg indices to the set of VirtualRangeIxs for that VirtualReg. Valid
+ // indices are 0 .. Function::get_num_vregs(). For functions mostly translated from SSA,
+ // most VirtualRegs will have just one VirtualRange, and there are a lot of VirtualRegs in
+ // general. So SmallVec is a definite benefit here.
+ pub vreg_to_vlrs_map: Vec</*virtual reg ix, */ SmallVec<[VirtualRangeIx; 3]>>,
+
+ // As an optimisation heuristic for BT's coalescing analysis, these indicate which
+ // real/virtual registers have "many" `RangeFrag`s in their live ranges. For some
+ // definition of "many", perhaps "200 or more". This is not important for overall
+ // allocation result or correctness: it merely allows the coalescing analysis to switch
+ // between two search strategies, one of which is fast for regs with few `RangeFrag`s (the
+ // vast majority) and the other of which has better asymptotic behaviour for regs with many
+ // `RangeFrag`s (in order to keep out of trouble on some pathological inputs). These
+ // vectors are duplicate-free but the elements may be in an arbitrary order.
+ pub rregs_with_many_frags: Vec<u32 /*RealReg index*/>,
+ pub vregs_with_many_frags: Vec<u32 /*VirtualReg index*/>,
+
+ // And this indicates what the thresh is actually set to. A frag will be in
+ // `r/vregs_with_many_frags` if it has `many_frags_thresh` or more RangeFrags.
+ pub many_frags_thresh: usize,
+}
+
+//=============================================================================
+// Some auxiliary/miscellaneous data structures that are useful: MoveInfo
+
+// `MoveInfoElem` holds info about the two registers connected a move: the source and destination
+// of the move, the insn performing the move, and the estimated execution frequency of the
+// containing block. In `MoveInfo`, the moves are not presented in any particular order, but
+// they are duplicate-free in that each such instruction will be listed only once.
+
+pub struct MoveInfoElem {
+ pub dst: Reg,
+ pub src: Reg,
+ pub iix: InstIx,
+ pub est_freq: u32,
+}
+
+pub struct MoveInfo {
+ pub moves: Vec<MoveInfoElem>,
+}
+
+// Something that can be either a VirtualRangeIx or a RealRangeIx, whilst still being 32 bits
+// (by stealing one bit from those spaces). Note that the resulting thing no longer denotes a
+// contiguous index space, and so it has a name that indicates it is an identifier rather than
+// an index.
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
+pub struct RangeId {
+ // 1 X--(31)--X is a RealRangeIx with value X--(31)--X
+ // 0 X--(31)--X is a VirtualRangeIx with value X--(31)--X
+ bits: u32,
+}
+
+impl RangeId {
+ #[inline(always)]
+ pub fn new_real(rlrix: RealRangeIx) -> Self {
+ let n = rlrix.get();
+ assert!(n <= 0x7FFF_FFFF);
+ Self {
+ bits: n | 0x8000_0000,
+ }
+ }
+ #[inline(always)]
+ pub fn new_virtual(vlrix: VirtualRangeIx) -> Self {
+ let n = vlrix.get();
+ assert!(n <= 0x7FFF_FFFF);
+ Self { bits: n }
+ }
+ #[inline(always)]
+ pub fn is_real(self) -> bool {
+ self.bits & 0x8000_0000 != 0
+ }
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn is_virtual(self) -> bool {
+ self.bits & 0x8000_0000 == 0
+ }
+ #[inline(always)]
+ pub fn to_real(self) -> RealRangeIx {
+ assert!(self.bits & 0x8000_0000 != 0);
+ RealRangeIx::new(self.bits & 0x7FFF_FFFF)
+ }
+ #[inline(always)]
+ pub fn to_virtual(self) -> VirtualRangeIx {
+ assert!(self.bits & 0x8000_0000 == 0);
+ VirtualRangeIx::new(self.bits)
+ }
+ #[inline(always)]
+ pub fn invalid_value() -> Self {
+ // Real, and inplausibly huge
+ Self { bits: 0xFFFF_FFFF }
+ }
+}
+
+impl fmt::Debug for RangeId {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ if self.is_real() {
+ self.to_real().fmt(fmt)
+ } else {
+ self.to_virtual().fmt(fmt)
+ }
+ }
+}
+
+//=============================================================================
+// Test cases
+
+// sewardj 2020Mar04: these are commented out for now, as they no longer
+// compile. They may be useful later though, once BT acquires an interval
+// tree implementation for its CommitmentMap.
+
+/*
+#[test]
+fn test_sorted_frag_ranges() {
+ // Create a RangeFrag and RangeFragIx from two InstPoints.
+ fn gen_fix(
+ fenv: &mut TypedIxVec<RangeFragIx, RangeFrag>, first: InstPoint,
+ last: InstPoint,
+ ) -> RangeFragIx {
+ assert!(first <= last);
+ let res = RangeFragIx::new(fenv.len() as u32);
+ let frag = RangeFrag {
+ bix: BlockIx::new(123),
+ kind: RangeFragKind::Local,
+ first,
+ last,
+ count: 0,
+ };
+ fenv.push(frag);
+ res
+ }
+
+ fn get_range_frag(
+ fenv: &TypedIxVec<RangeFragIx, RangeFrag>, fix: RangeFragIx,
+ ) -> &RangeFrag {
+ &fenv[fix]
+ }
+
+ // Structural equality, at least. Not equality in the sense of
+ // deferencing the contained RangeFragIxes.
+ fn sorted_range_eq(
+ fixs1: &SortedRangeFragIxs, fixs2: &SortedRangeFragIxs,
+ ) -> bool {
+ if fixs1.frag_ixs.len() != fixs2.frag_ixs.len() {
+ return false;
+ }
+ for (mf1, mf2) in fixs1.frag_ixs.iter().zip(&fixs2.frag_ixs) {
+ if mf1 != mf2 {
+ return false;
+ }
+ }
+ true
+ }
+
+ let iix3 = InstIx::new(3);
+ let iix4 = InstIx::new(4);
+ let iix5 = InstIx::new(5);
+ let iix6 = InstIx::new(6);
+ let iix7 = InstIx::new(7);
+ let iix10 = InstIx::new(10);
+ let iix12 = InstIx::new(12);
+
+ let fp_3u = InstPoint::new_use(iix3);
+ let fp_3d = InstPoint::new_def(iix3);
+
+ let fp_4u = InstPoint::new_use(iix4);
+
+ let fp_5u = InstPoint::new_use(iix5);
+ let fp_5d = InstPoint::new_def(iix5);
+
+ let fp_6u = InstPoint::new_use(iix6);
+ let fp_6d = InstPoint::new_def(iix6);
+
+ let fp_7u = InstPoint::new_use(iix7);
+ let fp_7d = InstPoint::new_def(iix7);
+
+ let fp_10u = InstPoint::new_use(iix10);
+ let fp_12u = InstPoint::new_use(iix12);
+
+ let mut fenv = TypedIxVec::<RangeFragIx, RangeFrag>::new();
+
+ let fix_3u = gen_fix(&mut fenv, fp_3u, fp_3u);
+ let fix_3d = gen_fix(&mut fenv, fp_3d, fp_3d);
+ let fix_4u = gen_fix(&mut fenv, fp_4u, fp_4u);
+ let fix_3u_5u = gen_fix(&mut fenv, fp_3u, fp_5u);
+ let fix_3d_5d = gen_fix(&mut fenv, fp_3d, fp_5d);
+ let fix_3d_5u = gen_fix(&mut fenv, fp_3d, fp_5u);
+ let fix_3u_5d = gen_fix(&mut fenv, fp_3u, fp_5d);
+ let fix_6u_6d = gen_fix(&mut fenv, fp_6u, fp_6d);
+ let fix_7u_7d = gen_fix(&mut fenv, fp_7u, fp_7d);
+ let fix_10u = gen_fix(&mut fenv, fp_10u, fp_10u);
+ let fix_12u = gen_fix(&mut fenv, fp_12u, fp_12u);
+
+ // Boundary checks for point ranges, 3u vs 3d
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3u),
+ get_range_frag(&fenv, fix_3u)
+ ) == Some(Ordering::Equal)
+ );
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3u),
+ get_range_frag(&fenv, fix_3d)
+ ) == Some(Ordering::Less)
+ );
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3d),
+ get_range_frag(&fenv, fix_3u)
+ ) == Some(Ordering::Greater)
+ );
+
+ // Boundary checks for point ranges, 3d vs 4u
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3d),
+ get_range_frag(&fenv, fix_3d)
+ ) == Some(Ordering::Equal)
+ );
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3d),
+ get_range_frag(&fenv, fix_4u)
+ ) == Some(Ordering::Less)
+ );
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_4u),
+ get_range_frag(&fenv, fix_3d)
+ ) == Some(Ordering::Greater)
+ );
+
+ // Partially overlapping
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3d_5d),
+ get_range_frag(&fenv, fix_3u_5u)
+ ) == None
+ );
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3u_5u),
+ get_range_frag(&fenv, fix_3d_5d)
+ ) == None
+ );
+
+ // Completely overlapping: one contained within the other
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3d_5u),
+ get_range_frag(&fenv, fix_3u_5d)
+ ) == None
+ );
+ assert!(
+ cmp_range_frags(
+ get_range_frag(&fenv, fix_3u_5d),
+ get_range_frag(&fenv, fix_3d_5u)
+ ) == None
+ );
+
+ // Create a SortedRangeFragIxs from a bunch of RangeFrag indices
+ fn new_sorted_frag_ranges(
+ fenv: &TypedIxVec<RangeFragIx, RangeFrag>, frags: &Vec<RangeFragIx>,
+ ) -> SortedRangeFragIxs {
+ SortedRangeFragIxs::new(&frags, fenv)
+ }
+
+ // Construction tests
+ // These fail due to overlap
+ //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_3u_3u, fix_3u_3u]);
+ //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_3u_5u, fix_3d_5d]);
+
+ // These fail due to not being in order
+ //let _ = new_sorted_frag_ranges(&fenv, &vec![fix_4u_4u, fix_3u_3u]);
+
+ // Simple non-overlap tests for add()
+
+ let smf_empty = new_sorted_frag_ranges(&fenv, &vec![]);
+ let smf_6_7_10 =
+ new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_7u_7d, fix_10u]);
+ let smf_3_12 = new_sorted_frag_ranges(&fenv, &vec![fix_3u, fix_12u]);
+ let smf_3_6_7_10_12 = new_sorted_frag_ranges(
+ &fenv,
+ &vec![fix_3u, fix_6u_6d, fix_7u_7d, fix_10u, fix_12u],
+ );
+ let mut tmp;
+
+ tmp = smf_empty.clone();
+ tmp.add(&smf_empty, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_empty));
+
+ tmp = smf_3_12.clone();
+ tmp.add(&smf_empty, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_3_12));
+
+ tmp = smf_empty.clone();
+ tmp.add(&smf_3_12, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_3_12));
+
+ tmp = smf_6_7_10.clone();
+ tmp.add(&smf_3_12, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_3_6_7_10_12));
+
+ tmp = smf_3_12.clone();
+ tmp.add(&smf_6_7_10, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_3_6_7_10_12));
+
+ // Tests for can_add()
+ assert!(true == smf_empty.can_add(&smf_empty, &fenv));
+ assert!(true == smf_empty.can_add(&smf_3_12, &fenv));
+ assert!(true == smf_3_12.can_add(&smf_empty, &fenv));
+ assert!(false == smf_3_12.can_add(&smf_3_12, &fenv));
+
+ assert!(true == smf_6_7_10.can_add(&smf_3_12, &fenv));
+
+ assert!(true == smf_3_12.can_add(&smf_6_7_10, &fenv));
+
+ // Tests for del()
+ let smf_6_7 = new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_7u_7d]);
+ let smf_6_10 = new_sorted_frag_ranges(&fenv, &vec![fix_6u_6d, fix_10u]);
+ let smf_7 = new_sorted_frag_ranges(&fenv, &vec![fix_7u_7d]);
+ let smf_10 = new_sorted_frag_ranges(&fenv, &vec![fix_10u]);
+
+ tmp = smf_empty.clone();
+ tmp.del(&smf_empty, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_empty));
+
+ tmp = smf_3_12.clone();
+ tmp.del(&smf_empty, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_3_12));
+
+ tmp = smf_empty.clone();
+ tmp.del(&smf_3_12, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_empty));
+
+ tmp = smf_6_7_10.clone();
+ tmp.del(&smf_3_12, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_6_7_10));
+
+ tmp = smf_3_12.clone();
+ tmp.del(&smf_6_7_10, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_3_12));
+
+ tmp = smf_6_7_10.clone();
+ tmp.del(&smf_6_7, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_10));
+
+ tmp = smf_6_7_10.clone();
+ tmp.del(&smf_10, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_6_7));
+
+ tmp = smf_6_7_10.clone();
+ tmp.del(&smf_7, &fenv);
+ assert!(sorted_range_eq(&tmp, &smf_6_10));
+
+ // Tests for can_add_if_we_first_del()
+ let smf_10_12 = new_sorted_frag_ranges(&fenv, &vec![fix_10u, fix_12u]);
+
+ assert!(
+ true
+ == smf_6_7_10
+ .can_add_if_we_first_del(/*d=*/ &smf_10_12, /*a=*/ &smf_3_12, &fenv)
+ );
+
+ assert!(
+ false
+ == smf_6_7_10
+ .can_add_if_we_first_del(/*d=*/ &smf_10_12, /*a=*/ &smf_7, &fenv)
+ );
+}
+*/
diff --git a/third_party/rust/regalloc/src/inst_stream.rs b/third_party/rust/regalloc/src/inst_stream.rs
new file mode 100644
index 0000000000..620b9f8cf8
--- /dev/null
+++ b/third_party/rust/regalloc/src/inst_stream.rs
@@ -0,0 +1,664 @@
+use crate::checker::Inst as CheckerInst;
+use crate::checker::{CheckerContext, CheckerErrors};
+use crate::data_structures::{
+ BlockIx, InstIx, InstPoint, Point, RangeFrag, RealReg, RealRegUniverse, Reg, SpillSlot,
+ TypedIxVec, VirtualReg, Writable,
+};
+use crate::{reg_maps::VrangeRegUsageMapper, Function, RegAllocError};
+use log::trace;
+
+use std::result::Result;
+
+//=============================================================================
+// InstToInsert and InstToInsertAndPoint
+
+#[derive(Clone, Debug)]
+pub(crate) enum InstToInsert {
+ Spill {
+ to_slot: SpillSlot,
+ from_reg: RealReg,
+ for_vreg: Option<VirtualReg>,
+ },
+ Reload {
+ to_reg: Writable<RealReg>,
+ from_slot: SpillSlot,
+ for_vreg: Option<VirtualReg>,
+ },
+ Move {
+ to_reg: Writable<RealReg>,
+ from_reg: RealReg,
+ for_vreg: VirtualReg,
+ },
+ /// A spillslot reassignment (to another vreg). In the edited instruction
+ /// stream, this is a nop, but this is needed for the checker to properly
+ /// track the symbolic values in slots. Always originates from a move
+ /// in the original user program whose source and dest vregs are both
+ /// spilled.
+ ChangeSpillSlotOwnership {
+ inst_ix: InstIx,
+ slot: SpillSlot,
+ from_reg: Reg,
+ to_reg: Reg,
+ },
+}
+
+impl InstToInsert {
+ pub(crate) fn construct<F: Function>(&self, f: &F) -> Option<F::Inst> {
+ match self {
+ &InstToInsert::Spill {
+ to_slot,
+ from_reg,
+ for_vreg,
+ } => Some(f.gen_spill(to_slot, from_reg, for_vreg)),
+ &InstToInsert::Reload {
+ to_reg,
+ from_slot,
+ for_vreg,
+ } => Some(f.gen_reload(to_reg, from_slot, for_vreg)),
+ &InstToInsert::Move {
+ to_reg,
+ from_reg,
+ for_vreg,
+ } => Some(f.gen_move(to_reg, from_reg, for_vreg)),
+ &InstToInsert::ChangeSpillSlotOwnership { .. } => None,
+ }
+ }
+
+ pub(crate) fn to_checker_inst(&self) -> CheckerInst {
+ match self {
+ &InstToInsert::Spill {
+ to_slot, from_reg, ..
+ } => CheckerInst::Spill {
+ into: to_slot,
+ from: from_reg,
+ },
+ &InstToInsert::Reload {
+ to_reg, from_slot, ..
+ } => CheckerInst::Reload {
+ into: to_reg,
+ from: from_slot,
+ },
+ &InstToInsert::Move {
+ to_reg, from_reg, ..
+ } => CheckerInst::Move {
+ into: to_reg,
+ from: from_reg,
+ },
+ &InstToInsert::ChangeSpillSlotOwnership {
+ inst_ix,
+ slot,
+ from_reg,
+ to_reg,
+ } => CheckerInst::ChangeSpillSlotOwnership {
+ inst_ix,
+ slot,
+ from_reg,
+ to_reg,
+ },
+ }
+ }
+}
+
+// ExtPoint is an extended version of Point. It plays no role in dataflow analysis or in the
+// specification of live ranges. It exists only to describe where to place the "extra"
+// spill/reload instructions required to make stackmap/reftype support work. If there was no
+// need to support stackmaps/reftypes, ExtPoint would not be needed, and Point would be
+// adequate.
+//
+// Recall that Point can denote 4 places within an instruction, with R < U < D < S:
+//
+// * R(eload): this is where any reload insns for the insn itself are
+// considered to live.
+//
+// * U(se): this is where the insn is considered to use values from those of
+// its register operands that appear in a Read or Modify role.
+//
+// * D(ef): this is where the insn is considered to define new values for
+// those of its register operands that appear in a Write or Modify role.
+//
+// * S(pill): this is where any spill insns for the insn itself are considered
+// to live.
+//
+// ExtPoint extends that to six places, by adding a new point in between Reload and Use, and one
+// between Def and Spill, giving: R < SB < U < D < RA < S:
+//
+// * (R)eload: unchanged
+//
+// * SB (Spill before): at this point, reftyped regs will be spilled, if this insn is a safepoint
+//
+// * (U)se: unchanged
+//
+// * (D)ef: unchanged
+//
+// * RA (Reload after): at this point, reftyped regs spilled at SB will be reloaded, if needed,
+// and if this insn is a safepoint
+//
+// * (S)pill: unchanged
+//
+// From this it can be seen that the SB and RA points are closest to the instruction "core" --
+// the U and D points. SB and RA describe places where reftyped regs must be spilled/reloaded
+// around the core. Because the SB-RA range falls inside the R-S range, it means the the
+// safepoint spill/reload instructions can be added after "normal" spill/reload instructions
+// have been created, and it doesn't interact with the logic to create those "normal"
+// spill/reload instructions.
+//
+// In the worst case scenario, a value could be reloaded at R, immediately spilled at SB, then
+// possibly modified in memory at the safepoint proper, reloaded at RA, and spilled at S. That
+// is considered to be an unlikely scenario, though.
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum ExtPoint {
+ Reload = 0,
+ SpillBefore = 1,
+ Use = 2,
+ Def = 3,
+ ReloadAfter = 4,
+ Spill = 5,
+}
+
+impl ExtPoint {
+ // Promote a Point to an ExtPoint
+ #[inline(always)]
+ pub fn from_point(pt: Point) -> Self {
+ match pt {
+ Point::Reload => ExtPoint::Reload,
+ Point::Use => ExtPoint::Use,
+ Point::Def => ExtPoint::Def,
+ Point::Spill => ExtPoint::Spill,
+ }
+ }
+}
+
+// As the direct analogy to InstPoint, a InstExtPoint pairs an InstIx with an ExtPoint. In
+// contrast to InstPoint, these aren't so performance critical, so there's no fancy bit-packed
+// representation as there is for InstPoint.
+
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct InstExtPoint {
+ pub iix: InstIx,
+ pub extpt: ExtPoint,
+}
+
+impl InstExtPoint {
+ #[inline(always)]
+ pub fn new(iix: InstIx, extpt: ExtPoint) -> Self {
+ Self { iix, extpt }
+ }
+ // Promote an InstPoint to an InstExtPoint
+ #[inline(always)]
+ pub fn from_inst_point(inst_pt: InstPoint) -> Self {
+ InstExtPoint {
+ iix: inst_pt.iix(),
+ extpt: ExtPoint::from_point(inst_pt.pt()),
+ }
+ }
+}
+
+// So, finally, we can specify what we want: an instruction to insert, and a place to insert it.
+#[derive(Debug)]
+pub(crate) struct InstToInsertAndExtPoint {
+ pub(crate) inst: InstToInsert,
+ pub(crate) iep: InstExtPoint,
+}
+
+impl InstToInsertAndExtPoint {
+ #[inline(always)]
+ pub(crate) fn new(inst: InstToInsert, iep: InstExtPoint) -> Self {
+ Self { inst, iep }
+ }
+}
+
+//=============================================================================
+// Apply all vreg->rreg mappings for the function's instructions, and run
+// the checker if required. This also removes instructions that the core
+// algorithm wants removed, by nop-ing them out.
+
+#[inline(never)]
+fn map_vregs_to_rregs<F: Function>(
+ func: &mut F,
+ frag_map: Vec<(RangeFrag, VirtualReg, RealReg)>,
+ insts_to_add: &Vec<InstToInsertAndExtPoint>,
+ iixs_to_nop_out: &Vec<InstIx>,
+ reg_universe: &RealRegUniverse,
+ use_checker: bool,
+ safepoint_insns: &[InstIx],
+ stackmaps: &[Vec<SpillSlot>],
+ reftyped_vregs: &[VirtualReg],
+) -> Result<(), CheckerErrors> {
+ // Set up checker state, if indicated by our configuration.
+ let mut checker: Option<CheckerContext> = None;
+ let mut insn_blocks: Vec<BlockIx> = vec![];
+ if use_checker {
+ checker = Some(CheckerContext::new(
+ func,
+ reg_universe,
+ insts_to_add,
+ safepoint_insns,
+ stackmaps,
+ reftyped_vregs,
+ ));
+ insn_blocks.resize(func.insns().len(), BlockIx::new(0));
+ for block_ix in func.blocks() {
+ for insn_ix in func.block_insns(block_ix) {
+ insn_blocks[insn_ix.get() as usize] = block_ix;
+ }
+ }
+ }
+
+ // Sort the insn nop-out index list, so we can advance through it
+ // during the main loop.
+ let mut iixs_to_nop_out = iixs_to_nop_out.clone();
+ iixs_to_nop_out.sort();
+
+ // Make two copies of the fragment mapping, one sorted by the fragment start
+ // points (just the InstIx numbers, ignoring the Point), and one sorted by
+ // fragment end points.
+ let mut frag_maps_by_start = frag_map.clone();
+ let mut frag_maps_by_end = frag_map;
+
+ // -------- Edit the instruction stream --------
+ frag_maps_by_start.sort_unstable_by(|(frag, _, _), (other_frag, _, _)| {
+ frag.first
+ .iix()
+ .partial_cmp(&other_frag.first.iix())
+ .unwrap()
+ });
+
+ frag_maps_by_end.sort_unstable_by(|(frag, _, _), (other_frag, _, _)| {
+ frag.last.iix().partial_cmp(&other_frag.last.iix()).unwrap()
+ });
+
+ let mut cursor_starts = 0;
+ let mut cursor_ends = 0;
+ let mut cursor_nop = 0;
+
+ // Allocate the "mapper" data structure that we update incrementally and
+ // pass to instruction reg-mapping routines to query.
+ let mut mapper = VrangeRegUsageMapper::new(func.get_num_vregs());
+
+ fn is_sane(frag: &RangeFrag) -> bool {
+ // "Normal" frag (unrelated to spilling). No normal frag may start or
+ // end at a .s or a .r point.
+ if frag.first.pt().is_use_or_def()
+ && frag.last.pt().is_use_or_def()
+ && frag.first.iix() <= frag.last.iix()
+ {
+ return true;
+ }
+ // A spill-related ("bridge") frag. There are three possibilities,
+ // and they correspond exactly to `BridgeKind`.
+ if frag.first.pt().is_reload()
+ && frag.last.pt().is_use()
+ && frag.last.iix() == frag.first.iix()
+ {
+ // BridgeKind::RtoU
+ return true;
+ }
+ if frag.first.pt().is_reload()
+ && frag.last.pt().is_spill()
+ && frag.last.iix() == frag.first.iix()
+ {
+ // BridgeKind::RtoS
+ return true;
+ }
+ if frag.first.pt().is_def()
+ && frag.last.pt().is_spill()
+ && frag.last.iix() == frag.first.iix()
+ {
+ // BridgeKind::DtoS
+ return true;
+ }
+ // None of the above apply. This RangeFrag is insane \o/
+ false
+ }
+
+ let mut last_insn_ix = -1;
+ for insn_ix in func.insn_indices() {
+ // Ensure instruction indices are in order. Logic below requires this.
+ assert!(insn_ix.get() as i32 > last_insn_ix);
+ last_insn_ix = insn_ix.get() as i32;
+
+ // advance [cursorStarts, +num_starts) to the group for insn_ix
+ while cursor_starts < frag_maps_by_start.len()
+ && frag_maps_by_start[cursor_starts].0.first.iix() < insn_ix
+ {
+ cursor_starts += 1;
+ }
+ let mut num_starts = 0;
+ while cursor_starts + num_starts < frag_maps_by_start.len()
+ && frag_maps_by_start[cursor_starts + num_starts].0.first.iix() == insn_ix
+ {
+ num_starts += 1;
+ }
+
+ // advance [cursorEnds, +num_ends) to the group for insn_ix
+ while cursor_ends < frag_maps_by_end.len()
+ && frag_maps_by_end[cursor_ends].0.last.iix() < insn_ix
+ {
+ cursor_ends += 1;
+ }
+ let mut num_ends = 0;
+ while cursor_ends + num_ends < frag_maps_by_end.len()
+ && frag_maps_by_end[cursor_ends + num_ends].0.last.iix() == insn_ix
+ {
+ num_ends += 1;
+ }
+
+ // advance cursor_nop in the iixs_to_nop_out list.
+ while cursor_nop < iixs_to_nop_out.len() && iixs_to_nop_out[cursor_nop] < insn_ix {
+ cursor_nop += 1;
+ }
+
+ let nop_this_insn =
+ cursor_nop < iixs_to_nop_out.len() && iixs_to_nop_out[cursor_nop] == insn_ix;
+
+ // So now, fragMapsByStart[cursorStarts, +num_starts) are the mappings
+ // for fragments that begin at this instruction, in no particular
+ // order. And fragMapsByEnd[cursorEnd, +numEnd) are the RangeFragIxs
+ // for fragments that end at this instruction.
+
+ // Sanity check all frags. In particular, reload and spill frags are
+ // heavily constrained. No functional effect.
+ for j in cursor_starts..cursor_starts + num_starts {
+ let frag = &frag_maps_by_start[j].0;
+ // "It really starts here, as claimed."
+ debug_assert!(frag.first.iix() == insn_ix);
+ debug_assert!(is_sane(&frag));
+ }
+ for j in cursor_ends..cursor_ends + num_ends {
+ let frag = &frag_maps_by_end[j].0;
+ // "It really ends here, as claimed."
+ debug_assert!(frag.last.iix() == insn_ix);
+ debug_assert!(is_sane(frag));
+ }
+
+ // Here's the plan, conceptually (we don't actually clone the map):
+ // Update map for I.r:
+ // add frags starting at I.r
+ // no frags should end at I.r (it's a reload insn)
+ // Update map for I.u:
+ // add frags starting at I.u
+ // map_uses := map
+ // remove frags ending at I.u
+ // Update map for I.d:
+ // add frags starting at I.d
+ // map_defs := map
+ // remove frags ending at I.d
+ // Update map for I.s:
+ // no frags should start at I.s (it's a spill insn)
+ // remove frags ending at I.s
+ // apply map_uses/map_defs to I
+
+ // To update the running mapper, we:
+ // - call `mapper.set_direct(vreg, Some(rreg))` with pre-insn starts.
+ // ("use"-map snapshot conceptually happens here)
+ // - call `mapper.set_overlay(vreg, None)` with pre-insn, post-reload ends.
+ // - call `mapper.set_overlay(vreg, Some(rreg))` with post-insn, pre-spill starts.
+ // ("post"-map snapshot conceptually happens here)
+ // - call `mapper.finish_overlay()`.
+ //
+ // - Use the map. `pre` and `post` are correct wrt the instruction.
+ //
+ // - call `mapper.merge_overlay()` to merge post-updates to main map.
+ // - call `mapper.set_direct(vreg, None)` with post-insn, post-spill
+ // ends.
+
+ trace!("current mapper {:?}", mapper);
+
+ // Update map for I.r:
+ // add frags starting at I.r
+ // no frags should end at I.r (it's a reload insn)
+ for j in cursor_starts..cursor_starts + num_starts {
+ let frag = &frag_maps_by_start[j].0;
+ if frag.first.pt().is_reload() {
+ //////// STARTS at I.r
+ mapper.set_direct(frag_maps_by_start[j].1, Some(frag_maps_by_start[j].2));
+ }
+ }
+
+ // Update map for I.u:
+ // add frags starting at I.u
+ // map_uses := map
+ // remove frags ending at I.u
+ for j in cursor_starts..cursor_starts + num_starts {
+ let frag = &frag_maps_by_start[j].0;
+ if frag.first.pt().is_use() {
+ //////// STARTS at I.u
+ mapper.set_direct(frag_maps_by_start[j].1, Some(frag_maps_by_start[j].2));
+ }
+ }
+ for j in cursor_ends..cursor_ends + num_ends {
+ let frag = &frag_maps_by_end[j].0;
+ if frag.last.pt().is_use() {
+ //////// ENDS at I.U
+ mapper.set_overlay(frag_maps_by_end[j].1, None);
+ }
+ }
+
+ trace!("maps after I.u {:?}", mapper);
+
+ // Update map for I.d:
+ // add frags starting at I.d
+ // map_defs := map
+ // remove frags ending at I.d
+ for j in cursor_starts..cursor_starts + num_starts {
+ let frag = &frag_maps_by_start[j].0;
+ if frag.first.pt().is_def() {
+ //////// STARTS at I.d
+ mapper.set_overlay(frag_maps_by_start[j].1, Some(frag_maps_by_start[j].2));
+ }
+ }
+
+ mapper.finish_overlay();
+
+ trace!("maps after I.d {:?}", mapper);
+
+ // If we have a checker, update it with spills, reloads, moves, and this
+ // instruction, while we have `map_uses` and `map_defs` available.
+ if let &mut Some(ref mut checker) = &mut checker {
+ let block_ix = insn_blocks[insn_ix.get() as usize];
+ checker
+ .handle_insn(reg_universe, func, block_ix, insn_ix, &mapper)
+ .unwrap();
+ }
+
+ // Finally, we have map_uses/map_defs set correctly for this instruction.
+ // Apply it.
+ if !nop_this_insn {
+ trace!("map_regs for {:?}", insn_ix);
+ let mut insn = func.get_insn_mut(insn_ix);
+ F::map_regs(&mut insn, &mapper);
+ trace!("mapped instruction: {:?}", insn);
+ } else {
+ // N.B. We nop out instructions as requested only *here*, after the
+ // checker call, because the checker must observe even elided moves
+ // (they may carry useful information about a move between two virtual
+ // locations mapped to the same physical location).
+ trace!("nop'ing out {:?}", insn_ix);
+ let nop = func.gen_zero_len_nop();
+ let insn = func.get_insn_mut(insn_ix);
+ *insn = nop;
+ }
+
+ mapper.merge_overlay();
+ for j in cursor_ends..cursor_ends + num_ends {
+ let frag = &frag_maps_by_end[j].0;
+ if frag.last.pt().is_def() {
+ //////// ENDS at I.d
+ mapper.set_direct(frag_maps_by_end[j].1, None);
+ }
+ }
+
+ // Update map for I.s:
+ // no frags should start at I.s (it's a spill insn)
+ // remove frags ending at I.s
+ for j in cursor_ends..cursor_ends + num_ends {
+ let frag = &frag_maps_by_end[j].0;
+ if frag.last.pt().is_spill() {
+ //////// ENDS at I.s
+ mapper.set_direct(frag_maps_by_end[j].1, None);
+ }
+ }
+
+ // Update cursorStarts and cursorEnds for the next iteration
+ cursor_starts += num_starts;
+ cursor_ends += num_ends;
+ }
+
+ debug_assert!(mapper.is_empty());
+
+ if use_checker {
+ checker.unwrap().run()
+ } else {
+ Ok(())
+ }
+}
+
+//=============================================================================
+// Take the real-register-only code created by `map_vregs_to_rregs` and
+// interleave extra instructions (spills, reloads and moves) that the core
+// algorithm has asked us to add.
+
+#[inline(never)]
+pub(crate) fn add_spills_reloads_and_moves<F: Function>(
+ func: &mut F,
+ safepoint_insns: &Vec<InstIx>,
+ mut insts_to_add: Vec<InstToInsertAndExtPoint>,
+) -> Result<
+ (
+ Vec<F::Inst>,
+ TypedIxVec<BlockIx, InstIx>,
+ TypedIxVec<InstIx, InstIx>,
+ Vec<InstIx>,
+ ),
+ String,
+> {
+ // Construct the final code by interleaving the mapped code with the the
+ // spills, reloads and moves that we have been requested to insert. To do
+ // that requires having the latter sorted by InstPoint.
+ //
+ // We also need to examine and update Func::blocks. This is assumed to
+ // be arranged in ascending order of the Block::start fields.
+ //
+ // Also, if the client requested stackmap creation, then `safepoint_insns` will be
+ // non-empty, and we will have to return a vector of the same length, that indicates the
+ // location of each safepoint insn in the final code. `safepoint_insns` is assumed to be
+ // sorted in ascending order and duplicate-free.
+ //
+ // Linear scan relies on the sort being stable here, so make sure to not
+ // use an unstable sort. See the comment in `resolve_moves_across blocks`
+ // in linear scan's code.
+
+ insts_to_add.sort_by_key(|to_add| to_add.iep.clone());
+
+ let mut cur_inst_to_add = 0;
+ let mut cur_block = BlockIx::new(0);
+
+ let mut insns: Vec<F::Inst> = vec![];
+ let mut target_map: TypedIxVec<BlockIx, InstIx> = TypedIxVec::new();
+
+ let mut new_to_old_insn_map: TypedIxVec<InstIx, InstIx> = TypedIxVec::new();
+ target_map.reserve(func.blocks().len());
+ new_to_old_insn_map.reserve(func.insn_indices().len() + insts_to_add.len());
+
+ // Index in `safepoint_insns` of the next safepoint insn we will encounter
+ let mut next_safepoint_insn_index = 0;
+ let mut new_safepoint_insns = Vec::<InstIx>::new();
+ new_safepoint_insns.reserve(safepoint_insns.len());
+
+ for iix in func.insn_indices() {
+ // Is `iix` the first instruction in a block? Meaning, are we
+ // starting a new block?
+ debug_assert!(cur_block.get() < func.blocks().len() as u32);
+ if func.block_insns(cur_block).start() == iix {
+ assert!(cur_block.get() == target_map.len());
+ target_map.push(InstIx::new(insns.len() as u32));
+ }
+
+ // Copy to the output vector, the first the extra insts that are to be placed at the
+ // reload point of `iix`, and then the extras for the spill-before point of `iix`.
+ while cur_inst_to_add < insts_to_add.len()
+ && insts_to_add[cur_inst_to_add].iep <= InstExtPoint::new(iix, ExtPoint::SpillBefore)
+ {
+ if let Some(inst) = insts_to_add[cur_inst_to_add].inst.construct(func) {
+ insns.push(inst);
+ new_to_old_insn_map.push(InstIx::invalid_value());
+ }
+ cur_inst_to_add += 1;
+ }
+
+ // Copy the inst at `iix` itself
+ if next_safepoint_insn_index < safepoint_insns.len()
+ && iix == safepoint_insns[next_safepoint_insn_index]
+ {
+ new_safepoint_insns.push(InstIx::new(insns.len() as u32));
+ next_safepoint_insn_index += 1;
+ }
+ new_to_old_insn_map.push(iix);
+ insns.push(func.get_insn(iix).clone());
+
+ // And copy first, the extra insts that are to be placed at the reload-after point
+ // of `iix`, followed by those to be placed at the spill point of `iix`.
+ while cur_inst_to_add < insts_to_add.len()
+ && insts_to_add[cur_inst_to_add].iep <= InstExtPoint::new(iix, ExtPoint::Spill)
+ {
+ if let Some(inst) = insts_to_add[cur_inst_to_add].inst.construct(func) {
+ insns.push(inst);
+ new_to_old_insn_map.push(InstIx::invalid_value());
+ }
+ cur_inst_to_add += 1;
+ }
+
+ // Is `iix` the last instruction in a block?
+ if iix == func.block_insns(cur_block).last() {
+ debug_assert!(cur_block.get() < func.blocks().len() as u32);
+ cur_block = cur_block.plus(1);
+ }
+ }
+
+ debug_assert!(cur_inst_to_add == insts_to_add.len());
+ debug_assert!(cur_block.get() == func.blocks().len() as u32);
+ debug_assert!(next_safepoint_insn_index == safepoint_insns.len());
+ debug_assert!(new_safepoint_insns.len() == safepoint_insns.len());
+
+ Ok((insns, target_map, new_to_old_insn_map, new_safepoint_insns))
+}
+
+//=============================================================================
+// Main function
+
+#[inline(never)]
+pub(crate) fn edit_inst_stream<F: Function>(
+ func: &mut F,
+ safepoint_insns: &Vec<InstIx>,
+ insts_to_add: Vec<InstToInsertAndExtPoint>,
+ iixs_to_nop_out: &Vec<InstIx>,
+ frag_map: Vec<(RangeFrag, VirtualReg, RealReg)>,
+ reg_universe: &RealRegUniverse,
+ use_checker: bool,
+ stackmaps: &[Vec<SpillSlot>],
+ reftyped_vregs: &[VirtualReg],
+) -> Result<
+ (
+ Vec<F::Inst>,
+ TypedIxVec<BlockIx, InstIx>,
+ TypedIxVec<InstIx, InstIx>,
+ Vec<InstIx>,
+ ),
+ RegAllocError,
+> {
+ map_vregs_to_rregs(
+ func,
+ frag_map,
+ &insts_to_add,
+ iixs_to_nop_out,
+ reg_universe,
+ use_checker,
+ &safepoint_insns[..],
+ stackmaps,
+ reftyped_vregs,
+ )
+ .map_err(|e| RegAllocError::RegChecker(e))?;
+ add_spills_reloads_and_moves(func, safepoint_insns, insts_to_add)
+ .map_err(|e| RegAllocError::Other(e))
+}
diff --git a/third_party/rust/regalloc/src/lib.rs b/third_party/rust/regalloc/src/lib.rs
new file mode 100644
index 0000000000..66216eb7e4
--- /dev/null
+++ b/third_party/rust/regalloc/src/lib.rs
@@ -0,0 +1,637 @@
+//! Main file / top-level module for regalloc library.
+//!
+//! We have tried hard to make the library's interface as simple as possible,
+//! yet flexible enough that the allocators it implements can provide good
+//! quality allocations in reasonable time. Nevertheless, there is still
+//! significant semantic complexity in parts of the interface. If you intend
+//! to use this library in your own code, you would be well advised to read
+//! the comments in this file very carefully.
+
+// Make the analysis module public for fuzzing.
+#[cfg(feature = "fuzzing")]
+pub mod analysis_main;
+#[cfg(not(feature = "fuzzing"))]
+mod analysis_main;
+
+mod analysis_control_flow;
+mod analysis_data_flow;
+mod analysis_reftypes;
+mod avl_tree;
+mod bt_coalescing_analysis;
+mod bt_commitment_map;
+mod bt_main;
+mod bt_spillslot_allocator;
+mod bt_vlr_priority_queue;
+mod checker;
+mod data_structures;
+mod inst_stream;
+mod linear_scan;
+mod pretty_print;
+mod reg_maps;
+mod snapshot;
+mod sparse_set;
+mod union_find;
+
+use log::{info, log_enabled, Level};
+use std::default;
+use std::{borrow::Cow, fmt};
+
+// Stuff that is defined by the library
+
+// Pretty-printing utilities.
+pub use crate::pretty_print::*;
+
+// Sets and maps of things. We can refine these later; but for now the
+// interface needs some way to speak about them, so let's use the
+// library-provided versions.
+
+pub use crate::data_structures::Map;
+pub use crate::data_structures::Set;
+
+// Register classes
+
+pub use crate::data_structures::RegClass;
+
+// Registers, both real and virtual, and ways to create them
+
+pub use crate::data_structures::Reg;
+
+pub use crate::data_structures::RealReg;
+pub use crate::data_structures::VirtualReg;
+
+pub use crate::data_structures::Writable;
+
+pub use crate::data_structures::NUM_REG_CLASSES;
+
+// Spill slots
+
+pub use crate::data_structures::SpillSlot;
+
+// The "register universe". This describes the registers available to the
+// allocator. There are very strict requirements on the structure of the
+// universe. If you fail to observe these requirements, either the allocator
+// itself, or the resulting code, will fail in mysterious ways, and your life
+// will be miserable while you try to figure out what happened. There are
+// lower level details on the definition of RealRegUniverse which you also
+// need to take note of. The overall contract is as follows.
+//
+// === (1) === Basic structure ===
+//
+// A "register universe" is a read-only structure that contains all
+// information about real registers on a given host. For each register class
+// (RegClass) supported by the target, the universe must provide a vector of
+// registers that the allocator may use.
+//
+// The universe may also list other registers that the incoming
+// virtual-registerised code may use, but which are not available for use by
+// the allocator. Indeed, the universe *must* list *all* registers that will
+// ever be mentioned in the incoming code. Failure to do so will cause the
+// allocator's analysis phase to return an error.
+//
+// === (2) === Ordering of registers within each class
+//
+// The ordering of available registers within these vectors does not affect
+// the correctness of the final allocation. However, it will affect the
+// quality of final allocation. Clients are recommended to list, for each
+// class, the callee-saved registers first, and the caller-saved registers
+// after that. The currently supported allocation algorithms (Backtracking
+// and LinearScan) will try to use the first available registers in each
+// class, that is to say, callee-saved ones first. The purpose of this is to
+// try and minimise spilling around calls by avoiding use of caller-saved ones
+// if possible.
+//
+// There is a twist here, however. The abovementioned heuristic works well
+// for non-leaf functions (functions that contain at least one call). But for
+// leaf functions, we would prefer to use the caller-saved registers first,
+// since doing so has potential to minimise the number of registers that must
+// be saved/restored in the prologue and epilogue. Presently there is no way
+// to tell this interface that the function is a leaf function, and so the
+// only way to get optimal code in this case is to present a universe with the
+// registers listed in the opposite order.
+//
+// This is of course inconvenient for the caller, since it requires
+// maintenance of two separate universes. In the future we will add a boolean
+// parameter to the top level function `allocate_registers` that indicates
+// that whether or not the function is a leaf function.
+//
+// === (3) === The "suggested scratch register" ===
+//
+// Some allocation algorithms, particularly linear-scan, may need to have a
+// scratch register available for their own use. The register universe must
+// nominate a scratch register in each class, specified in
+// RealRegUniverse::allocable_by_class[..]::Some(suggested_scratch). The
+// choice of scratch register is influenced by the architecture, the ABI, and
+// client-side fixed-use register conventions. There rules are as follows:
+//
+// (1) For each class, the universe must offer a reserved register.
+//
+// (2) The reserved register may not have any implied-by-the architecture
+// reads/modifies/writes for any instruction in the vcode. Unfortunately
+// there is no easy way for this library to check that.
+//
+// (3) The reserved register must not have any reads or modifies by any
+// instruction in the vcode. In other words, it must not be handed to
+// either the `add_use` or `add_mod` function of the `RegUsageCollector`
+// that is presented to the client's `get_regs` function. If any such
+// mention is detected, the library will return an error.
+//
+// (4) The reserved reg may be mentioned as written by instructions in the
+// vcode, though -- in other words it may be handed to `add_def`. The
+// library will tolerate and correctly handle that. However, because no
+// vcode instruction may read or modify the reserved register, all such
+// writes are "dead". This in turn guarantees that the allocator can, if
+// it wants, change the value in it at any time, without changing the
+// behaviour of the final generated code.
+//
+// Currently, the LinearScan algorithm may use the reserved registers. The
+// Backtracking algorithm will ignore the hints and treat them as "normal"
+// allocatable registers.
+
+pub use crate::data_structures::RealRegUniverse;
+pub use crate::data_structures::RegClassInfo;
+
+// A structure for collecting information about which registers each
+// instruction uses.
+
+pub use crate::data_structures::RegUsageCollector;
+
+/// A trait for providing mapping results for a given instruction.
+///
+/// This provides virtual to real register mappings for every mention in an instruction: use, mod
+/// or def. The main purpose of this trait is to be used when re-writing the instruction stream
+/// after register allocation happened; see also `Function::map_regs`.
+pub trait RegUsageMapper: fmt::Debug {
+ /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a use
+ /// on the current instruction.
+ fn get_use(&self, vreg: VirtualReg) -> Option<RealReg>;
+
+ /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a def
+ /// on the current instruction.
+ fn get_def(&self, vreg: VirtualReg) -> Option<RealReg>;
+
+ /// Return the `RealReg` if mapped, or `None`, for a `vreg` occuring as a
+ /// mod on the current instruction.
+ fn get_mod(&self, vreg: VirtualReg) -> Option<RealReg>;
+}
+
+// TypedIxVector, so that the interface can speak about vectors of blocks and
+// instructions.
+
+pub use crate::data_structures::TypedIxVec;
+pub use crate::data_structures::{BlockIx, InstIx, Range};
+
+/// A trait defined by the regalloc client to provide access to its
+/// machine-instruction / CFG representation.
+pub trait Function {
+ /// Regalloc is parameterized on F: Function and so can use the projected
+ /// type F::Inst.
+ type Inst: Clone + fmt::Debug;
+
+ // -------------
+ // CFG traversal
+ // -------------
+
+ /// Allow access to the underlying vector of instructions.
+ fn insns(&self) -> &[Self::Inst];
+
+ /// Get all instruction indices as an iterable range.
+ fn insn_indices(&self) -> Range<InstIx> {
+ Range::new(InstIx::new(0), self.insns().len())
+ }
+
+ /// Allow mutable access to the underlying vector of instructions.
+ fn insns_mut(&mut self) -> &mut [Self::Inst];
+
+ /// Get an instruction with a type-safe InstIx index.
+ fn get_insn(&self, insn: InstIx) -> &Self::Inst;
+
+ /// Get a mutable borrow of an instruction with the given type-safe InstIx
+ /// index.
+ fn get_insn_mut(&mut self, insn: InstIx) -> &mut Self::Inst;
+
+ /// Allow iteration over basic blocks (in instruction order).
+ fn blocks(&self) -> Range<BlockIx>;
+
+ /// Get the index of the entry block.
+ fn entry_block(&self) -> BlockIx;
+
+ /// Provide the range of instruction indices contained in each block.
+ fn block_insns(&self, block: BlockIx) -> Range<InstIx>;
+
+ /// Get CFG successors for a given block.
+ fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]>;
+
+ /// Determine whether an instruction is a return instruction.
+ fn is_ret(&self, insn: InstIx) -> bool;
+
+ /// Determine whether an instruction should be considered while computing
+ /// the set of registers that need to be saved/restored in the function's
+ /// prologue/epilogue, that is, the registers returned in
+ /// `clobbered_registers` in `RegAllocResult`. computation. Only
+ /// instructions for which this function returns `true` will be used to
+ /// compute that set.
+ ///
+ /// One reason that a client might *not* want an instruction to be included
+ /// would be if it can handle the clobbers some other way: for example,
+ /// ABI-support code might exclude call instructions' defs and mods from the
+ /// clobber set, because (given the callee has same ABI as the caller) the
+ /// registers possibly written by the callee are all registers that the
+ /// caller is also allowed to clobber (not save/restore in
+ /// prologue/epilogue).
+ fn is_included_in_clobbers(&self, _insn: &Self::Inst) -> bool {
+ // Default impl includes all instructions.
+ true
+ }
+
+ // --------------------------
+ // Instruction register slots
+ // --------------------------
+
+ /// Add to `collector` the used, defined, and modified registers for an
+ /// instruction.
+ fn get_regs(insn: &Self::Inst, collector: &mut RegUsageCollector);
+
+ /// Map each register slot through a virtual-to-real mapping indexed
+ /// by virtual register. The two separate maps in `maps.pre` and
+ /// `maps.post` provide the mapping to use for uses (which semantically
+ /// occur just prior to the instruction's effect) and defs (which
+ /// semantically occur just after the instruction's effect). Regs that were
+ /// "modified" can use either map; the vreg should be the same in both.
+ ///
+ /// Note that this does not take a `self`, because we want to allow the
+ /// regalloc to have a mutable borrow of an insn (which borrows the whole
+ /// Function in turn) outstanding while calling this.
+ fn map_regs<RUM: RegUsageMapper>(insn: &mut Self::Inst, maps: &RUM);
+
+ /// Allow the regalloc to query whether this is a move. Returns (dst, src).
+ fn is_move(&self, insn: &Self::Inst) -> Option<(Writable<Reg>, Reg)>;
+
+ /// Get the precise number of `VirtualReg` in use in this function, to allow preallocating data
+ /// structures. This number *must* be a correct lower-bound, otherwise invalid index failures
+ /// may happen; it is of course better if it is exact.
+ fn get_num_vregs(&self) -> usize;
+
+ // --------------
+ // Spills/reloads
+ // --------------
+
+ /// How many logical spill slots does the given regclass require? E.g., on a
+ /// 64-bit machine, spill slots may nominally be 64-bit words, but a 128-bit
+ /// vector value will require two slots. The regalloc will always align on
+ /// this size.
+ ///
+ /// This passes the associated virtual register to the client as well,
+ /// because the way in which we spill a real register may depend on the
+ /// value that we are using it for. E.g., if a machine has V128 registers
+ /// but we also use them for F32 and F64 values, we may use a different
+ /// store-slot size and smaller-operand store/load instructions for an F64
+ /// than for a true V128.
+ fn get_spillslot_size(&self, regclass: RegClass, for_vreg: VirtualReg) -> u32;
+
+ /// Generate a spill instruction for insertion into the instruction
+ /// sequence. The associated virtual register (whose value is being spilled)
+ /// is passed, if it exists, so that the client may make decisions about the
+ /// instruction to generate based on the type of value in question. Because
+ /// the register allocator will insert spill instructions at arbitrary points,
+ /// the returned instruction here must not modify the machine's condition codes.
+ fn gen_spill(
+ &self,
+ to_slot: SpillSlot,
+ from_reg: RealReg,
+ for_vreg: Option<VirtualReg>,
+ ) -> Self::Inst;
+
+ /// Generate a reload instruction for insertion into the instruction
+ /// sequence. The associated virtual register (whose value is being loaded)
+ /// is passed as well, if it exists. The returned instruction must not modify
+ /// the machine's condition codes.
+ fn gen_reload(
+ &self,
+ to_reg: Writable<RealReg>,
+ from_slot: SpillSlot,
+ for_vreg: Option<VirtualReg>,
+ ) -> Self::Inst;
+
+ /// Generate a register-to-register move for insertion into the instruction
+ /// sequence. The associated virtual register is passed as well. The
+ /// returned instruction must not modify the machine's condition codes.
+ fn gen_move(
+ &self,
+ to_reg: Writable<RealReg>,
+ from_reg: RealReg,
+ for_vreg: VirtualReg,
+ ) -> Self::Inst;
+
+ /// Generate an instruction which is a no-op and has zero length.
+ fn gen_zero_len_nop(&self) -> Self::Inst;
+
+ /// Try to alter an existing instruction to use a value directly in a
+ /// spillslot (accessing memory directly) instead of the given register. May
+ /// be useful on ISAs that have mem/reg ops, like x86.
+ ///
+ /// Note that this is not *quite* just fusing a load with the op; if the
+ /// value is def'd or modified, it should be written back to the spill slot
+ /// as well. In other words, it is just using the spillslot as if it were a
+ /// real register, for reads and/or writes.
+ ///
+ /// FIXME JRS 2020Feb06: state precisely the constraints on condition code
+ /// changes.
+ fn maybe_direct_reload(
+ &self,
+ insn: &Self::Inst,
+ reg: VirtualReg,
+ slot: SpillSlot,
+ ) -> Option<Self::Inst>;
+
+ // ----------------------------------------------------------
+ // Function liveins, liveouts, and direct-mode real registers
+ // ----------------------------------------------------------
+
+ /// Return the set of registers that should be considered live at the
+ /// beginning of the function. This is semantically equivalent to an
+ /// instruction at the top of the entry block def'ing all registers in this
+ /// set.
+ fn func_liveins(&self) -> Set<RealReg>;
+
+ /// Return the set of registers that should be considered live at the
+ /// end of the function (after every return instruction). This is
+ /// semantically equivalent to an instruction at each block with no successors
+ /// that uses each of these registers.
+ fn func_liveouts(&self) -> Set<RealReg>;
+}
+
+/// The result of register allocation. Note that allocation can fail!
+pub struct RegAllocResult<F: Function> {
+ /// A new sequence of instructions with all register slots filled with real
+ /// registers, and spills/fills/moves possibly inserted (and unneeded moves
+ /// elided).
+ pub insns: Vec<F::Inst>,
+
+ /// Basic-block start indices for the new instruction list, indexed by the
+ /// original basic block indices. May be used by the client to, e.g., remap
+ /// branch targets appropriately.
+ pub target_map: TypedIxVec<BlockIx, InstIx>,
+
+ /// Full mapping from new instruction indices to original instruction
+ /// indices. May be needed by the client to, for example, update metadata
+ /// such as debug/source-location info as the instructions are spliced
+ /// and reordered.
+ ///
+ /// Each entry is an `InstIx`, but may be `InstIx::invalid_value()` if the
+ /// new instruction at this new index was inserted by the allocator
+ /// (i.e., if it is a load, spill or move instruction).
+ pub orig_insn_map: TypedIxVec</* new */ InstIx, /* orig */ InstIx>,
+
+ /// Which real registers were overwritten? This will contain all real regs
+ /// that appear as defs or modifies in register slots of the output
+ /// instruction list. This will only list registers that are available to
+ /// the allocator. If one of the instructions clobbers a register which
+ /// isn't available to the allocator, it won't be mentioned here.
+ pub clobbered_registers: Set<RealReg>,
+
+ /// How many spill slots were used?
+ pub num_spill_slots: u32,
+
+ /// Block annotation strings, for debugging. Requires requesting in the
+ /// call to `allocate_registers`. Creating of these annotations is
+ /// potentially expensive, so don't request them if you don't need them.
+ pub block_annotations: Option<TypedIxVec<BlockIx, Vec<String>>>,
+
+ /// If stackmap support was requested: one stackmap for each of the safepoint instructions
+ /// declared. Otherwise empty.
+ pub stackmaps: Vec<Vec<SpillSlot>>,
+
+ /// If stackmap support was requested: one InstIx for each safepoint instruction declared,
+ /// indicating the corresponding location in the final instruction stream. Otherwise empty.
+ pub new_safepoint_insns: Vec<InstIx>,
+}
+
+/// A choice of register allocation algorithm to run.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum AlgorithmWithDefaults {
+ Backtracking,
+ LinearScan,
+}
+
+pub use crate::analysis_main::AnalysisError;
+pub use crate::checker::{CheckerError, CheckerErrors};
+
+/// An error from the register allocator.
+#[derive(Clone, Debug)]
+pub enum RegAllocError {
+ OutOfRegisters(RegClass),
+ MissingSuggestedScratchReg(RegClass),
+ Analysis(AnalysisError),
+ RegChecker(CheckerErrors),
+ Other(String),
+}
+
+impl fmt::Display for RegAllocError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{:?}", self)
+ }
+}
+
+pub use crate::bt_main::BacktrackingOptions;
+pub use crate::linear_scan::LinearScanOptions;
+
+#[derive(Clone)]
+pub enum Algorithm {
+ LinearScan(LinearScanOptions),
+ Backtracking(BacktrackingOptions),
+}
+
+impl fmt::Debug for Algorithm {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Algorithm::LinearScan(opts) => write!(fmt, "{:?}", opts),
+ Algorithm::Backtracking(opts) => write!(fmt, "{:?}", opts),
+ }
+ }
+}
+
+/// Tweakable options shared by all the allocators.
+#[derive(Clone)]
+pub struct Options {
+ /// Should the register allocator check that its results are valid? This adds runtime to the
+ /// compiler, so this is disabled by default.
+ pub run_checker: bool,
+
+ /// Which algorithm should be used for register allocation? By default, selects backtracking,
+ /// which is slower to compile but creates code of better quality.
+ pub algorithm: Algorithm,
+}
+
+impl default::Default for Options {
+ fn default() -> Self {
+ Self {
+ run_checker: false,
+ algorithm: Algorithm::Backtracking(Default::default()),
+ }
+ }
+}
+
+impl fmt::Debug for Options {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "checker: {:?}, algorithm: {:?}",
+ self.run_checker, self.algorithm
+ )
+ }
+}
+
+/// A structure with which callers can request stackmap information.
+pub struct StackmapRequestInfo {
+ /// The register class that holds reftypes. This may only be RegClass::I32 or
+ /// RegClass::I64, and it must equal the word size of the target architecture.
+ pub reftype_class: RegClass,
+
+ /// The virtual regs that hold reftyped values. These must be provided in ascending order
+ /// of register index and be duplicate-free. They must have class `reftype_class`.
+ pub reftyped_vregs: Vec<VirtualReg>,
+
+ /// The indices of instructions for which the allocator will construct stackmaps. These
+ /// must be provided in ascending order and be duplicate-free. The specified instructions
+ /// may not be coalescable move instructions (as the allocator may remove those) and they
+ /// may not modify any register carrying a reftyped value (they may "def" or "use" them,
+ /// though). The reason is that, at a safepoint, the client's garbage collector may change
+ /// the values of all live references, so it would be meaningless for a safepoint
+ /// instruction also to attempt to do that -- we'd end up with two competing new values.
+ pub safepoint_insns: Vec<InstIx>,
+}
+
+/// Allocate registers for a function's code, given a universe of real registers that we are
+/// allowed to use. Optionally, stackmap support may be requested.
+///
+/// The control flow graph must not contain any critical edges, that is, any edge coming from a
+/// block with multiple successors must not flow into a block with multiple predecessors. The
+/// embedder must have split critical edges before handing over the function to this function.
+/// Otherwise, an error will be returned.
+///
+/// Allocation may succeed, returning a `RegAllocResult` with the new instruction sequence, or
+/// it may fail, returning an error.
+///
+/// Runtime options can be passed to the allocators, through the use of [Options] for options
+/// common to all the backends. The choice of algorithm is done by passing a given [Algorithm]
+/// instance, with options tailored for each algorithm.
+#[inline(never)]
+pub fn allocate_registers_with_opts<F: Function>(
+ func: &mut F,
+ rreg_universe: &RealRegUniverse,
+ stackmap_info: Option<&StackmapRequestInfo>,
+ opts: Options,
+) -> Result<RegAllocResult<F>, RegAllocError> {
+ info!("");
+ info!("================ regalloc.rs: BEGIN function ================");
+ if log_enabled!(Level::Info) {
+ info!("with options: {:?}", opts);
+ let strs = rreg_universe.show();
+ info!("using RealRegUniverse:");
+ for s in strs {
+ info!(" {}", s);
+ }
+ }
+ // If stackmap support has been requested, perform some initial sanity checks.
+ if let Some(&StackmapRequestInfo {
+ reftype_class,
+ ref reftyped_vregs,
+ ref safepoint_insns,
+ }) = stackmap_info
+ {
+ if let Algorithm::LinearScan(_) = opts.algorithm {
+ return Err(RegAllocError::Other(
+ "stackmap request: not currently available for Linear Scan".to_string(),
+ ));
+ }
+ if reftype_class != RegClass::I64 && reftype_class != RegClass::I32 {
+ return Err(RegAllocError::Other(
+ "stackmap request: invalid reftype_class".to_string(),
+ ));
+ }
+ let num_avail_vregs = func.get_num_vregs();
+ for i in 0..reftyped_vregs.len() {
+ let vreg = &reftyped_vregs[i];
+ if vreg.get_class() != reftype_class {
+ return Err(RegAllocError::Other(
+ "stackmap request: invalid vreg class".to_string(),
+ ));
+ }
+ if vreg.get_index() >= num_avail_vregs {
+ return Err(RegAllocError::Other(
+ "stackmap request: out of range vreg".to_string(),
+ ));
+ }
+ if i > 0 && reftyped_vregs[i - 1].get_index() >= vreg.get_index() {
+ return Err(RegAllocError::Other(
+ "stackmap request: non-ascending vregs".to_string(),
+ ));
+ }
+ }
+ let num_avail_insns = func.insns().len();
+ for i in 0..safepoint_insns.len() {
+ let safepoint_iix = safepoint_insns[i];
+ if safepoint_iix.get() as usize >= num_avail_insns {
+ return Err(RegAllocError::Other(
+ "stackmap request: out of range safepoint insn".to_string(),
+ ));
+ }
+ if i > 0 && safepoint_insns[i - 1].get() >= safepoint_iix.get() {
+ return Err(RegAllocError::Other(
+ "stackmap request: non-ascending safepoint insns".to_string(),
+ ));
+ }
+ if func.is_move(func.get_insn(safepoint_iix)).is_some() {
+ return Err(RegAllocError::Other(
+ "stackmap request: safepoint insn is a move insn".to_string(),
+ ));
+ }
+ }
+ // We can't check here that reftyped regs are not changed by safepoint insns. That is
+ // done deep in the stackmap creation logic, for BT in `get_stackmap_artefacts_at`.
+ }
+
+ let run_checker = opts.run_checker;
+ let res = match &opts.algorithm {
+ Algorithm::Backtracking(opts) => {
+ bt_main::alloc_main(func, rreg_universe, stackmap_info, run_checker, opts)
+ }
+ Algorithm::LinearScan(opts) => linear_scan::run(func, rreg_universe, run_checker, opts),
+ };
+ info!("================ regalloc.rs: END function ================");
+ res
+}
+
+/// Allocate registers for a function's code, given a universe of real registers that we are
+/// allowed to use.
+///
+/// The control flow graph must not contain any critical edges, that is, any edge coming from a
+/// block with multiple successors must not flow into a block with multiple predecessors. The
+/// embedder must have split critical edges before handing over the function to this function.
+/// Otherwise, an error will be returned.
+///
+/// Allocate may succeed, returning a `RegAllocResult` with the new instruction sequence, or it may
+/// fail, returning an error.
+///
+/// This is a convenient function that uses standard options for the allocator, according to the
+/// selected algorithm.
+#[inline(never)]
+pub fn allocate_registers<F: Function>(
+ func: &mut F,
+ rreg_universe: &RealRegUniverse,
+ stackmap_info: Option<&StackmapRequestInfo>,
+ algorithm: AlgorithmWithDefaults,
+) -> Result<RegAllocResult<F>, RegAllocError> {
+ let algorithm = match algorithm {
+ AlgorithmWithDefaults::Backtracking => Algorithm::Backtracking(Default::default()),
+ AlgorithmWithDefaults::LinearScan => Algorithm::LinearScan(Default::default()),
+ };
+ let opts = Options {
+ algorithm,
+ ..Default::default()
+ };
+ allocate_registers_with_opts(func, rreg_universe, stackmap_info, opts)
+}
+
+// Facilities to snapshot regalloc inputs and reproduce them in regalloc.rs.
+pub use crate::snapshot::IRSnapshot;
diff --git a/third_party/rust/regalloc/src/linear_scan/analysis.rs b/third_party/rust/regalloc/src/linear_scan/analysis.rs
new file mode 100644
index 0000000000..9e109ef681
--- /dev/null
+++ b/third_party/rust/regalloc/src/linear_scan/analysis.rs
@@ -0,0 +1,853 @@
+use super::{FixedInterval, IntId, Intervals, Mention, MentionMap, VirtualInterval};
+use crate::{
+ analysis_control_flow::{CFGInfo, InstIxToBlockIxMap},
+ analysis_data_flow::{
+ calc_def_and_use, calc_livein_and_liveout, get_sanitized_reg_uses_for_func, reg_ix_to_reg,
+ reg_to_reg_ix,
+ },
+ data_structures::{BlockIx, InstPoint, RangeFragIx, RangeFragKind, Reg, RegVecsAndBounds},
+ sparse_set::SparseSet,
+ union_find::UnionFind,
+ AnalysisError, Function, RealRegUniverse, RegClass, TypedIxVec,
+};
+use log::{debug, info, log_enabled, Level};
+use smallvec::{smallvec, SmallVec};
+use std::{fmt, mem};
+
+#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub(crate) struct RangeFrag {
+ pub(crate) first: InstPoint,
+ pub(crate) last: InstPoint,
+ pub(crate) mentions: MentionMap,
+}
+
+impl fmt::Debug for RangeFrag {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "[{:?}; {:?}]", self.first, self.last)
+ }
+}
+
+impl RangeFrag {
+ fn new<F: Function>(
+ func: &F,
+ bix: BlockIx,
+ first: InstPoint,
+ last: InstPoint,
+ mentions: MentionMap,
+ ) -> (Self, RangeFragMetrics) {
+ debug_assert!(func.block_insns(bix).len() >= 1);
+ debug_assert!(func.block_insns(bix).contains(first.iix()));
+ debug_assert!(func.block_insns(bix).contains(last.iix()));
+ debug_assert!(first <= last);
+
+ let first_in_block = InstPoint::new_use(func.block_insns(bix).first());
+ let last_in_block = InstPoint::new_def(func.block_insns(bix).last());
+ let kind = match (first == first_in_block, last == last_in_block) {
+ (false, false) => RangeFragKind::Local,
+ (false, true) => RangeFragKind::LiveOut,
+ (true, false) => RangeFragKind::LiveIn,
+ (true, true) => RangeFragKind::Thru,
+ };
+
+ (
+ RangeFrag {
+ first,
+ last,
+ mentions,
+ },
+ RangeFragMetrics { bix, kind },
+ )
+ }
+
+ #[inline(always)]
+ #[cfg(debug_assertions)]
+ pub(crate) fn contains(&self, inst: &InstPoint) -> bool {
+ self.first <= *inst && *inst <= self.last
+ }
+}
+
+struct RangeFragMetrics {
+ bix: BlockIx,
+ kind: RangeFragKind,
+}
+
+pub(crate) struct AnalysisInfo {
+ /// The sanitized per-insn reg-use info.
+ pub(crate) reg_vecs_and_bounds: RegVecsAndBounds,
+ /// All the intervals, fixed or virtual.
+ pub(crate) intervals: Intervals,
+ /// Liveins per block.
+ pub(crate) liveins: TypedIxVec<BlockIx, SparseSet<Reg>>,
+ /// Liveouts per block.
+ pub(crate) liveouts: TypedIxVec<BlockIx, SparseSet<Reg>>,
+ /// Blocks's loop depths.
+ pub(crate) _loop_depth: TypedIxVec<BlockIx, u32>,
+ /// Maps InstIxs to BlockIxs.
+ pub(crate) _inst_to_block_map: InstIxToBlockIxMap,
+}
+
+#[inline(never)]
+pub(crate) fn run<F: Function>(
+ func: &F,
+ reg_universe: &RealRegUniverse,
+) -> Result<AnalysisInfo, AnalysisError> {
+ info!(
+ "run_analysis: begin: {} blocks, {} insns",
+ func.blocks().len(),
+ func.insns().len()
+ );
+
+ // First do control flow analysis. This is (relatively) simple. Note that this can fail, for
+ // various reasons; we propagate the failure if so. Also create the InstIx-to-BlockIx map;
+ // this isn't really control-flow analysis, but needs to be done at some point.
+
+ info!(" run_analysis: begin control flow analysis");
+ let cfg_info = CFGInfo::create(func)?;
+ let inst_to_block_map = InstIxToBlockIxMap::new(func);
+ info!(" run_analysis: end control flow analysis");
+
+ info!(" run_analysis: begin data flow analysis");
+
+ // See `get_sanitized_reg_uses_for_func` for the meaning of "sanitized".
+ let reg_vecs_and_bounds = get_sanitized_reg_uses_for_func(func, reg_universe)
+ .map_err(|reg| AnalysisError::IllegalRealReg(reg))?;
+ assert!(reg_vecs_and_bounds.is_sanitized());
+
+ // Calculate block-local def/use sets.
+ let (def_sets_per_block, use_sets_per_block) =
+ calc_def_and_use(func, &reg_vecs_and_bounds, &reg_universe);
+ debug_assert!(def_sets_per_block.len() == func.blocks().len() as u32);
+ debug_assert!(use_sets_per_block.len() == func.blocks().len() as u32);
+
+ // Calculate live-in and live-out sets per block, using the traditional
+ // iterate-to-a-fixed-point scheme.
+ // `liveout_sets_per_block` is amended below for return blocks, hence `mut`.
+
+ let (livein_sets_per_block, mut liveout_sets_per_block) = calc_livein_and_liveout(
+ func,
+ &def_sets_per_block,
+ &use_sets_per_block,
+ &cfg_info,
+ &reg_universe,
+ );
+ debug_assert!(livein_sets_per_block.len() == func.blocks().len() as u32);
+ debug_assert!(liveout_sets_per_block.len() == func.blocks().len() as u32);
+
+ // Verify livein set of entry block against liveins specified by function (e.g., ABI params).
+ let func_liveins = SparseSet::from_vec(
+ func.func_liveins()
+ .to_vec()
+ .into_iter()
+ .map(|rreg| rreg.to_reg())
+ .collect(),
+ );
+ if !livein_sets_per_block[func.entry_block()].is_subset_of(&func_liveins) {
+ let mut regs = livein_sets_per_block[func.entry_block()].clone();
+ regs.remove(&func_liveins);
+ return Err(AnalysisError::EntryLiveinValues(regs.to_vec()));
+ }
+
+ // Add function liveouts to every block ending in a return.
+ let func_liveouts = SparseSet::from_vec(
+ func.func_liveouts()
+ .to_vec()
+ .into_iter()
+ .map(|rreg| rreg.to_reg())
+ .collect(),
+ );
+ for block in func.blocks() {
+ let last_iix = func.block_insns(block).last();
+ if func.is_ret(last_iix) {
+ liveout_sets_per_block[block].union(&func_liveouts);
+ }
+ }
+
+ info!(" run_analysis: end data flow analysis");
+
+ info!(" run_analysis: begin liveness analysis");
+ let (frag_ixs_per_reg, mut frag_env, frag_metrics_env, vreg_classes) = get_range_frags(
+ func,
+ &reg_vecs_and_bounds,
+ &reg_universe,
+ &livein_sets_per_block,
+ &liveout_sets_per_block,
+ );
+
+ let (mut fixed_intervals, virtual_intervals) = merge_range_frags(
+ &reg_universe,
+ &frag_ixs_per_reg,
+ &mut frag_env,
+ &frag_metrics_env,
+ &cfg_info,
+ &vreg_classes,
+ );
+ info!(" run_analysis: end liveness analysis");
+
+ // Finalize interval construction by doing some last minute sort of the fixed intervals.
+ for fixed in fixed_intervals.iter_mut() {
+ fixed.frags.sort_unstable_by_key(|frag| frag.first);
+ }
+ let intervals = Intervals {
+ virtuals: virtual_intervals,
+ fixeds: fixed_intervals,
+ };
+
+ info!("run_analysis: end");
+
+ Ok(AnalysisInfo {
+ reg_vecs_and_bounds,
+ intervals,
+ liveins: livein_sets_per_block,
+ liveouts: liveout_sets_per_block,
+ _loop_depth: cfg_info.depth_map,
+ _inst_to_block_map: inst_to_block_map,
+ })
+}
+
+/// Calculate all the RangeFrags for `bix`. Add them to `out_frags` and
+/// corresponding metrics data to `out_frag_metrics`. Add to `out_map`, the
+/// associated RangeFragIxs, segregated by Reg. `bix`, `livein`, `liveout` and
+/// `rvb` are expected to be valid in the context of the Func `f` (duh!).
+#[inline(never)]
+fn get_range_frags_for_block<F: Function>(
+ func: &F,
+ rvb: &RegVecsAndBounds,
+ reg_universe: &RealRegUniverse,
+ vreg_classes: &Vec<RegClass>,
+ bix: BlockIx,
+ livein: &SparseSet<Reg>,
+ liveout: &SparseSet<Reg>,
+ // Temporary state reusable across function calls.
+ visited: &mut Vec<u32>,
+ state: &mut Vec</*rreg index, then vreg index, */ Option<RangeFrag>>,
+ // Effectively results.
+ out_map: &mut Vec<SmallVec<[RangeFragIx; 8]>>,
+ out_frags: &mut Vec<RangeFrag>,
+ out_frag_metrics: &mut Vec<RangeFragMetrics>,
+) {
+ let mut emit_range_frag =
+ |r: Reg, frag: RangeFrag, frag_metrics: RangeFragMetrics, num_real_regs: u32| {
+ let fix = RangeFragIx::new(out_frags.len() as u32);
+ out_frags.push(frag);
+ out_frag_metrics.push(frag_metrics);
+
+ let out_map_index = reg_to_reg_ix(num_real_regs, r) as usize;
+ out_map[out_map_index].push(fix);
+ };
+
+ // Some handy constants.
+ debug_assert!(func.block_insns(bix).len() >= 1);
+ let first_pt_in_block = InstPoint::new_use(func.block_insns(bix).first());
+ let last_pt_in_block = InstPoint::new_def(func.block_insns(bix).last());
+
+ // Clear the running state.
+ visited.clear();
+
+ let num_real_regs = reg_universe.regs.len() as u32;
+
+ // First, set up `state` as if all of `livein` had been written just prior to the block.
+ for r in livein.iter() {
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+ debug_assert!(state[r_state_ix].is_none());
+ state[r_state_ix] = Some(RangeFrag {
+ mentions: MentionMap::new(),
+ first: first_pt_in_block,
+ last: first_pt_in_block,
+ });
+ visited.push(r_state_ix as u32);
+ }
+
+ // Now visit each instruction in turn, examining first the registers it reads, then those it
+ // modifies, and finally those it writes.
+ for iix in func.block_insns(bix) {
+ let bounds_for_iix = &rvb.bounds[iix];
+
+ // Examine reads: they extend an existing RangeFrag to the U point of the reading
+ // insn.
+ for i in bounds_for_iix.uses_start as usize
+ ..bounds_for_iix.uses_start as usize + bounds_for_iix.uses_len as usize
+ {
+ let r = &rvb.vecs.uses[i];
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+
+ // There has to be an entry, otherwise we'd do a read of a register not listed in
+ // liveins.
+ let pf = match &mut state[r_state_ix] {
+ None => panic!("get_range_frags_for_block: fail #1"),
+ Some(ref mut pf) => pf,
+ };
+
+ // This the first or subsequent read after a write. Note that the "write" can be
+ // either a real write, or due to the fact that `r` is listed in `livein`. We don't
+ // care here.
+ let new_last = InstPoint::new_use(iix);
+ debug_assert!(pf.last <= new_last);
+ pf.last = new_last;
+
+ // This first loop iterates over all the uses for the first time, so there shouldn't be
+ // any duplicates.
+ debug_assert!(!pf.mentions.iter().any(|tuple| tuple.0 == iix));
+ let mut mention_set = Mention::new();
+ mention_set.add_use();
+ pf.mentions.push((iix, mention_set));
+ }
+
+ // Examine modifies. These are handled almost identically to
+ // reads, except that they extend an existing RangeFrag down to
+ // the D point of the modifying insn.
+ for i in bounds_for_iix.mods_start as usize
+ ..bounds_for_iix.mods_start as usize + bounds_for_iix.mods_len as usize
+ {
+ let r = &rvb.vecs.mods[i];
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+
+ // There has to be an entry here too.
+ let pf = match &mut state[r_state_ix] {
+ None => panic!("get_range_frags_for_block: fail #2"),
+ Some(ref mut pf) => pf,
+ };
+
+ // This the first or subsequent modify after a write.
+ let new_last = InstPoint::new_def(iix);
+ debug_assert!(pf.last <= new_last);
+ pf.last = new_last;
+
+ pf.mentions.push((iix, {
+ let mut mention_set = Mention::new();
+ mention_set.add_mod();
+ mention_set
+ }));
+ }
+
+ // Examine writes (but not writes implied by modifies). The general idea is that a write
+ // causes us to terminate the existing RangeFrag, if any, add it to the results,
+ // and start a new frag.
+ for i in bounds_for_iix.defs_start as usize
+ ..bounds_for_iix.defs_start as usize + bounds_for_iix.defs_len as usize
+ {
+ let r = &rvb.vecs.defs[i];
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+
+ match &mut state[r_state_ix] {
+ // First mention of a Reg we've never heard of before.
+ // Start a new RangeFrag for it and keep going.
+ None => {
+ let new_pt = InstPoint::new_def(iix);
+ let mut mention_set = Mention::new();
+ mention_set.add_def();
+ state[r_state_ix] = Some(RangeFrag {
+ first: new_pt,
+ last: new_pt,
+ mentions: smallvec![(iix, mention_set)],
+ })
+ }
+
+ // There's already a RangeFrag for `r`. This write will start a new one, so
+ // flush the existing one and note this write.
+ Some(RangeFrag {
+ ref mut first,
+ ref mut last,
+ ref mut mentions,
+ }) => {
+ // Steal the mentions and replace the mutable ref by an empty vector for reuse.
+ let stolen_mentions = mem::replace(mentions, MentionMap::new());
+
+ let (frag, frag_metrics) =
+ RangeFrag::new(func, bix, *first, *last, stolen_mentions);
+ emit_range_frag(*r, frag, frag_metrics, num_real_regs);
+
+ let mut mention_set = Mention::new();
+ mention_set.add_def();
+ mentions.push((iix, mention_set));
+
+ // Reuse the previous entry for this new definition of the same vreg.
+ let new_pt = InstPoint::new_def(iix);
+ *first = new_pt;
+ *last = new_pt;
+ }
+ }
+
+ visited.push(r_state_ix as u32);
+ }
+ }
+
+ // We are at the end of the block. We still have to deal with live-out Regs. We must also
+ // deal with RangeFrag in `state` that are for registers not listed as live-out.
+
+ // Deal with live-out Regs. Treat each one as if it is read just after the block.
+ for r in liveout.iter() {
+ // Remove the entry from `state` so that the following loop doesn't process it again.
+ let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
+ let entry = mem::replace(&mut state[r_state_ix], None);
+ match entry {
+ None => panic!("get_range_frags_for_block: fail #3"),
+ Some(pf) => {
+ let (frag, frag_metrics) =
+ RangeFrag::new(func, bix, pf.first, last_pt_in_block, pf.mentions);
+ emit_range_frag(*r, frag, frag_metrics, num_real_regs);
+ }
+ }
+ }
+
+ // Finally, round up any remaining RangeFrag left in `state`.
+ for r_state_ix in visited {
+ if let Some(pf) = &mut state[*r_state_ix as usize] {
+ let r = reg_ix_to_reg(reg_universe, vreg_classes, *r_state_ix);
+ let (frag, frag_metrics) = RangeFrag::new(
+ func,
+ bix,
+ pf.first,
+ pf.last,
+ mem::replace(&mut pf.mentions, MentionMap::new()),
+ );
+ emit_range_frag(r, frag, frag_metrics, num_real_regs);
+ state[*r_state_ix as usize] = None;
+ }
+ }
+}
+
+#[inline(never)]
+fn get_range_frags<F: Function>(
+ func: &F,
+ rvb: &RegVecsAndBounds,
+ reg_universe: &RealRegUniverse,
+ liveins: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ liveouts: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+) -> (
+ Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
+ Vec<RangeFrag>,
+ Vec<RangeFragMetrics>,
+ Vec</*vreg index,*/ RegClass>,
+) {
+ info!(" get_range_frags: begin");
+ debug_assert!(liveins.len() == func.blocks().len() as u32);
+ debug_assert!(liveouts.len() == func.blocks().len() as u32);
+ debug_assert!(rvb.is_sanitized());
+
+ let mut vreg_classes = vec![RegClass::INVALID; func.get_num_vregs()];
+ for r in rvb
+ .vecs
+ .uses
+ .iter()
+ .chain(rvb.vecs.defs.iter())
+ .chain(rvb.vecs.mods.iter())
+ {
+ if r.is_real() {
+ continue;
+ }
+ let r_ix = r.get_index();
+ let vreg_classes_ptr = &mut vreg_classes[r_ix];
+ if *vreg_classes_ptr == RegClass::INVALID {
+ *vreg_classes_ptr = r.get_class();
+ } else {
+ debug_assert_eq!(*vreg_classes_ptr, r.get_class());
+ }
+ }
+
+ let num_real_regs = reg_universe.regs.len();
+ let num_virtual_regs = vreg_classes.len();
+ let num_regs = num_real_regs + num_virtual_regs;
+
+ // Reused by the function below.
+ let mut tmp_state = vec![None; num_regs];
+ let mut tmp_visited = Vec::with_capacity(32);
+
+ let mut result_map = vec![SmallVec::new(); num_regs];
+ let mut result_frags = Vec::new();
+ let mut result_frag_metrics = Vec::new();
+ for bix in func.blocks() {
+ get_range_frags_for_block(
+ func,
+ &rvb,
+ reg_universe,
+ &vreg_classes,
+ bix,
+ &liveins[bix],
+ &liveouts[bix],
+ &mut tmp_visited,
+ &mut tmp_state,
+ &mut result_map,
+ &mut result_frags,
+ &mut result_frag_metrics,
+ );
+ }
+
+ assert!(tmp_state.len() == num_regs);
+ assert!(result_map.len() == num_regs);
+ assert!(vreg_classes.len() == num_virtual_regs);
+ // This is pretty cheap (once per fn) and any failure will be catastrophic since it means we
+ // may have forgotten some live range fragments. Hence `assert!` and not `debug_assert!`.
+ for state_elem in &tmp_state {
+ assert!(state_elem.is_none());
+ }
+
+ if log_enabled!(Level::Debug) {
+ debug!("");
+ let mut n = 0;
+ for frag in result_frags.iter() {
+ debug!("{:<3?} {:?}", RangeFragIx::new(n), frag);
+ n += 1;
+ }
+
+ debug!("");
+ for (reg_ix, frag_ixs) in result_map.iter().enumerate() {
+ if frag_ixs.len() == 0 {
+ continue;
+ }
+ let reg = reg_ix_to_reg(reg_universe, &vreg_classes, reg_ix as u32);
+ debug!(
+ "frags for {} {:?}",
+ reg.show_with_rru(reg_universe),
+ frag_ixs
+ );
+ }
+ }
+
+ info!(" get_range_frags: end");
+ assert!(result_frags.len() == result_frag_metrics.len());
+
+ (result_map, result_frags, result_frag_metrics, vreg_classes)
+}
+
+#[inline(never)]
+fn merge_range_frags(
+ reg_universe: &RealRegUniverse,
+ frag_ix_vec_per_reg: &[SmallVec<[RangeFragIx; 8]>],
+ frag_env: &mut Vec<RangeFrag>,
+ frag_metrics_env: &Vec<RangeFragMetrics>,
+ cfg_info: &CFGInfo,
+ vreg_classes: &Vec</*vreg index,*/ RegClass>,
+) -> (Vec<FixedInterval>, Vec<VirtualInterval>) {
+ info!(" merge_range_frags: begin");
+ if log_enabled!(Level::Info) {
+ let mut stats_num_total_incoming_frags = 0;
+ for all_frag_ixs_for_reg in frag_ix_vec_per_reg.iter() {
+ stats_num_total_incoming_frags += all_frag_ixs_for_reg.len();
+ }
+ info!(" in: {} in frag_env", frag_env.len());
+ info!(
+ " in: {} regs containing in total {} frags",
+ frag_ix_vec_per_reg.len(),
+ stats_num_total_incoming_frags
+ );
+ }
+
+ debug_assert!(frag_env.len() == frag_metrics_env.len());
+
+ // Prefill fixed intervals, one per real register.
+ let mut result_fixed = Vec::with_capacity(reg_universe.regs.len() as usize);
+ for rreg in reg_universe.regs.iter() {
+ result_fixed.push(FixedInterval {
+ reg: rreg.0,
+ frags: Vec::new(),
+ });
+ }
+
+ let mut result_virtual = Vec::new();
+
+ let mut triples = Vec::<(RangeFragIx, RangeFragKind, BlockIx)>::new();
+
+ // BEGIN per_reg_loop
+ for (reg_ix, all_frag_ixs_for_reg) in frag_ix_vec_per_reg.iter().enumerate() {
+ let reg = reg_ix_to_reg(reg_universe, vreg_classes, reg_ix as u32);
+
+ let num_reg_frags = all_frag_ixs_for_reg.len();
+
+ // The reg might never have been mentioned at all, especially if it's a real reg.
+ if num_reg_frags == 0 {
+ continue;
+ }
+
+ // Do some shortcutting. First off, if there's only one frag for this reg, we can directly
+ // give it its own live range, and have done.
+ if num_reg_frags == 1 {
+ flush_interval(
+ &mut result_fixed,
+ &mut result_virtual,
+ reg,
+ all_frag_ixs_for_reg,
+ frag_env,
+ );
+ continue;
+ }
+
+ // BEGIN merge `all_frag_ixs_for_reg` entries as much as possible.
+ // but .. if we come across independents (RangeKind::Local), pull them out
+ // immediately.
+ triples.clear();
+
+ // Create `triples`. We will use it to guide the merging phase, but it is immutable there.
+ for fix in all_frag_ixs_for_reg {
+ let frag_metrics = &frag_metrics_env[fix.get() as usize];
+
+ if frag_metrics.kind == RangeFragKind::Local {
+ // This frag is Local (standalone). Give it its own Range and move on. This is an
+ // optimisation, but it's also necessary: the main fragment-merging logic below
+ // relies on the fact that the fragments it is presented with are all either
+ // LiveIn, LiveOut or Thru.
+ flush_interval(
+ &mut result_fixed,
+ &mut result_virtual,
+ reg,
+ &[*fix],
+ frag_env,
+ );
+ continue;
+ }
+
+ // This frag isn't Local (standalone) so we have to process it the slow way.
+ triples.push((*fix, frag_metrics.kind, frag_metrics.bix));
+ }
+
+ let triples_len = triples.len();
+
+ // This is the core of the merging algorithm.
+ //
+ // For each ix@(fix, kind, bix) in `triples` (order unimportant):
+ //
+ // (1) "Merge with blocks that are live 'downstream' from here":
+ // if fix is live-out or live-through:
+ // for b in succs[bix]
+ // for each ix2@(fix2, kind2, bix2) in `triples`
+ // if bix2 == b && kind2 is live-in or live-through:
+ // merge(ix, ix2)
+ //
+ // (2) "Merge with blocks that are live 'upstream' from here":
+ // if fix is live-in or live-through:
+ // for b in preds[bix]
+ // for each ix2@(fix2, kind2, bix2) in `triples`
+ // if bix2 == b && kind2 is live-out or live-through:
+ // merge(ix, ix2)
+ //
+ // `triples` remains unchanged. The equivalence class info is accumulated
+ // in `eclasses_uf` instead. `eclasses_uf` entries are indices into
+ // `triples`.
+ //
+ // Now, you might think it necessary to do both (1) and (2). But no, they
+ // are mutually redundant, since if two blocks are connected by a live
+ // flow from one to the other, then they are also connected in the other
+ // direction. Hence checking one of the directions is enough.
+ let mut eclasses_uf = UnionFind::<usize>::new(triples_len);
+
+ // We have two schemes for group merging, one of which is N^2 in the
+ // length of triples, the other is N-log-N, but with higher constant
+ // factors. Some experimentation with the bz2 test on a Cortex A57 puts
+ // the optimal crossover point between 200 and 300; it's not critical.
+ // Having this protects us against bad behaviour for huge inputs whilst
+ // still being fast for small inputs.
+ if triples_len <= 250 {
+ // The simple way, which is N^2 in the length of `triples`.
+ for (ix, (_fix, kind, bix)) in triples.iter().enumerate() {
+ // Deal with liveness flows outbound from `fix`. Meaning, (1) above.
+ if *kind == RangeFragKind::LiveOut || *kind == RangeFragKind::Thru {
+ for b in cfg_info.succ_map[*bix].iter() {
+ // Visit all entries in `triples` that are for `b`.
+ for (ix2, (_fix2, kind2, bix2)) in triples.iter().enumerate() {
+ if *bix2 != *b || *kind2 == RangeFragKind::LiveOut {
+ continue;
+ }
+ debug_assert!(
+ *kind2 == RangeFragKind::LiveIn || *kind2 == RangeFragKind::Thru
+ );
+ // Now we know that liveness for this reg "flows" from `triples[ix]` to
+ // `triples[ix2]`. So those two frags must be part of the same live
+ // range. Note this.
+ if ix != ix2 {
+ eclasses_uf.union(ix, ix2); // Order of args irrelevant
+ }
+ }
+ }
+ }
+ } // outermost iteration over `triples`
+ } else {
+ // The more complex way, which is N-log-N in the length of `triples`. This is the same
+ // as the simple way, except that the innermost loop, which is a linear search in
+ // `triples` to find entries for some block `b`, is replaced by a binary search. This
+ // means that `triples` first needs to be sorted by block index.
+ triples.sort_unstable_by_key(|(_, _, bix)| *bix);
+
+ for (ix, (_fix, kind, bix)) in triples.iter().enumerate() {
+ // Deal with liveness flows outbound from `fix`. Meaning, (1) above.
+ if *kind == RangeFragKind::LiveOut || *kind == RangeFragKind::Thru {
+ for b in cfg_info.succ_map[*bix].iter() {
+ // Visit all entries in `triples` that are for `b`. Binary search
+ // `triples` to find the lowest-indexed entry for `b`.
+ let mut ix_left = 0;
+ let mut ix_right = triples_len;
+ while ix_left < ix_right {
+ let m = (ix_left + ix_right) >> 1;
+ if triples[m].2 < *b {
+ ix_left = m + 1;
+ } else {
+ ix_right = m;
+ }
+ }
+
+ // It might be that there is no block for `b` in the sequence. That's
+ // legit; it just means that block `bix` jumps to a successor where the
+ // associated register isn't live-in/thru. A failure to find `b` can be
+ // indicated one of two ways:
+ //
+ // * ix_left == triples_len
+ // * ix_left < triples_len and b < triples[ix_left].b
+ //
+ // In both cases I *think* the 'loop_over_entries_for_b below will not do
+ // anything. But this is all a bit hairy, so let's convert the second
+ // variant into the first, so as to make it obvious that the loop won't do
+ // anything.
+
+ // ix_left now holds the lowest index of any `triples` entry for block `b`.
+ // Assert this.
+ if ix_left < triples_len && *b < triples[ix_left].2 {
+ ix_left = triples_len;
+ }
+ if ix_left < triples_len {
+ assert!(ix_left == 0 || triples[ix_left - 1].2 < *b);
+ }
+
+ // ix2 plays the same role as in the quadratic version. ix_left and
+ // ix_right are not used after this point.
+ let mut ix2 = ix_left;
+ loop {
+ let (_fix2, kind2, bix2) = match triples.get(ix2) {
+ None => break,
+ Some(triple) => *triple,
+ };
+ if *b < bix2 {
+ // We've come to the end of the sequence of `b`-blocks.
+ break;
+ }
+ debug_assert!(*b == bix2);
+ if kind2 == RangeFragKind::LiveOut {
+ ix2 += 1;
+ continue;
+ }
+ // Now we know that liveness for this reg "flows" from `triples[ix]` to
+ // `triples[ix2]`. So those two frags must be part of the same live
+ // range. Note this.
+ eclasses_uf.union(ix, ix2);
+ ix2 += 1;
+ }
+
+ if ix2 + 1 < triples_len {
+ debug_assert!(*b < triples[ix2 + 1].2);
+ }
+ }
+ }
+ }
+ }
+
+ // Now `eclasses_uf` contains the results of the merging-search. Visit each of its
+ // equivalence classes in turn, and convert each into a virtual or real live range as
+ // appropriate.
+ let eclasses = eclasses_uf.get_equiv_classes();
+ for leader_triple_ix in eclasses.equiv_class_leaders_iter() {
+ // `leader_triple_ix` is an eclass leader. Enumerate the whole eclass.
+ let mut frag_ixs = SmallVec::<[RangeFragIx; 4]>::new();
+ for triple_ix in eclasses.equiv_class_elems_iter(leader_triple_ix) {
+ frag_ixs.push(triples[triple_ix].0 /*first field is frag ix*/);
+ }
+ flush_interval(
+ &mut result_fixed,
+ &mut result_virtual,
+ reg,
+ &frag_ixs,
+ frag_env,
+ );
+ }
+ // END merge `all_frag_ixs_for_reg` entries as much as possible
+ } // END per reg loop
+
+ info!(" merge_range_frags: end");
+
+ (result_fixed, result_virtual)
+}
+
+#[inline(never)]
+fn flush_interval(
+ result_real: &mut Vec<FixedInterval>,
+ result_virtual: &mut Vec<VirtualInterval>,
+ reg: Reg,
+ frag_ixs: &[RangeFragIx],
+ frags: &mut Vec<RangeFrag>,
+) {
+ if reg.is_real() {
+ // Append all the RangeFrags to this fixed interval. They'll get sorted later.
+ result_real[reg.to_real_reg().get_index()]
+ .frags
+ .extend(frag_ixs.iter().map(|&i| {
+ let frag = &mut frags[i.get() as usize];
+ RangeFrag {
+ first: frag.first,
+ last: frag.last,
+ mentions: mem::replace(&mut frag.mentions, MentionMap::new()),
+ }
+ }));
+ return;
+ }
+
+ debug_assert!(reg.is_virtual());
+
+ let (start, end, mentions) = {
+ // Merge all the mentions together.
+ let capacity = frag_ixs
+ .iter()
+ .map(|fix| frags[fix.get() as usize].mentions.len())
+ .fold(0, |a, b| a + b);
+
+ let mut start = InstPoint::max_value();
+ let mut end = InstPoint::min_value();
+
+ // TODO rework this!
+ let mut mentions = MentionMap::with_capacity(capacity);
+ for frag in frag_ixs.iter().map(|fix| &frags[fix.get() as usize]) {
+ mentions.extend(frag.mentions.iter().cloned());
+ start = InstPoint::min(start, frag.first);
+ end = InstPoint::max(end, frag.last);
+ }
+ mentions.sort_unstable_by_key(|tuple| tuple.0);
+
+ // Merge mention set that are at the same instruction.
+ let mut s = 0;
+ let mut e;
+ let mut to_remove = Vec::new();
+ while s < mentions.len() {
+ e = s;
+ while e + 1 < mentions.len() && mentions[s].0 == mentions[e + 1].0 {
+ e += 1;
+ }
+ if s != e {
+ let mut i = s + 1;
+ while i <= e {
+ if mentions[i].1.is_use() {
+ mentions[s].1.add_use();
+ }
+ if mentions[i].1.is_mod() {
+ mentions[s].1.add_mod();
+ }
+ if mentions[i].1.is_def() {
+ mentions[s].1.add_def();
+ }
+ i += 1;
+ }
+ for i in s + 1..=e {
+ to_remove.push(i);
+ }
+ }
+ s = e + 1;
+ }
+
+ for &i in to_remove.iter().rev() {
+ // TODO not efficient.
+ mentions.remove(i);
+ }
+
+ (start, end, mentions)
+ };
+
+ let id = IntId(result_virtual.len());
+ let mut int = VirtualInterval::new(id, reg.to_virtual_reg(), start, end, mentions);
+ int.ancestor = Some(id);
+
+ result_virtual.push(int);
+}
diff --git a/third_party/rust/regalloc/src/linear_scan/assign_registers.rs b/third_party/rust/regalloc/src/linear_scan/assign_registers.rs
new file mode 100644
index 0000000000..dd57ffed48
--- /dev/null
+++ b/third_party/rust/regalloc/src/linear_scan/assign_registers.rs
@@ -0,0 +1,1248 @@
+use super::{
+ last_use, next_use, IntId, Intervals, Mention, MentionMap, OptimalSplitStrategy, RegUses,
+ Statistics, VirtualInterval,
+};
+use crate::{
+ data_structures::{InstPoint, Point, RegVecsAndBounds},
+ Function, InstIx, LinearScanOptions, RealReg, RealRegUniverse, Reg, RegAllocError, SpillSlot,
+ VirtualReg, NUM_REG_CLASSES,
+};
+
+use log::{debug, info, log_enabled, trace, Level};
+use rustc_hash::FxHashMap as HashMap;
+use smallvec::SmallVec;
+use std::collections::BinaryHeap;
+use std::{cmp, cmp::Ordering, fmt};
+
+macro_rules! lsra_assert {
+ ($arg:expr) => {
+ #[cfg(debug_assertions)]
+ debug_assert!($arg);
+ };
+
+ ($arg:expr, $text:expr) => {
+ #[cfg(debug_assertions)]
+ debug_assert!($arg, $text);
+ };
+}
+
+#[derive(Clone, Copy, PartialEq)]
+enum ActiveInt {
+ Virtual(IntId),
+ Fixed((RealReg, usize)),
+}
+
+impl fmt::Debug for ActiveInt {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ ActiveInt::Virtual(id) => write!(fmt, "virtual({:?})", id),
+ ActiveInt::Fixed((rreg, _)) => write!(fmt, "real({:?})", rreg),
+ }
+ }
+}
+
+struct ActivityTracker {
+ /// Intervals that are covering the current interval's start position.
+ /// TODO Invariant: they always have a register attached to them.
+ active: Vec<ActiveInt>,
+
+ /// Intervals that are not covering but end after the current interval's start position.
+ /// None means that the interval may have fragments, but they all live after the current
+ /// position.
+ /// TODO Invariant: they're all fixed registers, so they must have a register attached to them.
+ inactive: Vec<(RealReg, usize)>,
+}
+
+impl ActivityTracker {
+ fn new(intervals: &Intervals) -> Self {
+ let mut inactive = Vec::with_capacity(intervals.fixeds.len());
+ for fixed in &intervals.fixeds {
+ if !fixed.frags.is_empty() {
+ inactive.push((fixed.reg, 0))
+ }
+ }
+
+ Self {
+ active: Vec::new(),
+ inactive,
+ }
+ }
+
+ fn set_active(&mut self, id: IntId) {
+ self.active.push(ActiveInt::Virtual(id));
+ }
+
+ fn update(&mut self, start: InstPoint, stats: &mut Option<Statistics>, intervals: &Intervals) {
+ // From active, only possible transitions are to active or expired.
+ // From inactive, only possible transitions are to inactive, active or expired.
+ // => active has an upper bound.
+ // => inactive only shrinks.
+ let mut to_delete: SmallVec<[usize; 16]> = SmallVec::new();
+ let mut new_inactive: SmallVec<[(RealReg, usize); 16]> = SmallVec::new();
+
+ for (i, id) in self.active.iter_mut().enumerate() {
+ match id {
+ ActiveInt::Virtual(int_id) => {
+ let int = intervals.get(*int_id);
+
+ if int.location.spill().is_some() {
+ // TODO these shouldn't appear here...
+ to_delete.push(i);
+ continue;
+ }
+ //debug_assert!(int.location.spill().is_none(), "active int must have a reg");
+
+ if int.end < start {
+ // It's expired, forget about it.
+ to_delete.push(i);
+ } else {
+ // Stays active.
+ lsra_assert!(int.covers(start), "no active to inactive transition");
+ }
+ }
+
+ ActiveInt::Fixed((rreg, ref mut fix)) => {
+ // Possible transitions: active => { active, inactive, expired }.
+ let frags = &intervals.fixeds[rreg.get_index()].frags;
+
+ // Fast-forward to the first fragment that contains or is after start.
+ while *fix < frags.len() && start > frags[*fix].last {
+ *fix += 1;
+ }
+
+ if *fix == frags.len() {
+ // It expired, remove it from the active list.
+ to_delete.push(i);
+ } else if start < frags[*fix].first {
+ // It is now inactive.
+ lsra_assert!(!frags[*fix].contains(&start));
+ new_inactive.push((*rreg, *fix));
+ to_delete.push(i);
+ } else {
+ // Otherwise, it's still active.
+ lsra_assert!(frags[*fix].contains(&start));
+ }
+ }
+ }
+ }
+
+ for &i in to_delete.iter().rev() {
+ self.active.swap_remove(i);
+ }
+ to_delete.clear();
+
+ for (i, (rreg, fix)) in self.inactive.iter_mut().enumerate() {
+ // Possible transitions: inactive => { active, inactive, expired }.
+ let frags = &intervals.fixeds[rreg.get_index()].frags;
+
+ // Fast-forward to the first fragment that contains or is after start.
+ while *fix < frags.len() && start > frags[*fix].last {
+ *fix += 1;
+ }
+
+ if *fix == frags.len() {
+ // It expired, remove it from the inactive list.
+ to_delete.push(i);
+ } else if start >= frags[*fix].first {
+ // It is now active.
+ lsra_assert!(frags[*fix].contains(&start));
+ self.active.push(ActiveInt::Fixed((*rreg, *fix)));
+ to_delete.push(i);
+ } else {
+ // Otherwise it remains inactive.
+ lsra_assert!(!frags[*fix].contains(&start));
+ }
+ }
+
+ for &i in to_delete.iter().rev() {
+ self.inactive.swap_remove(i);
+ }
+ self.inactive.extend(new_inactive.into_vec());
+
+ trace!("active:");
+ for aid in &self.active {
+ match aid {
+ ActiveInt::Virtual(id) => {
+ trace!(" {}", intervals.get(*id));
+ }
+ ActiveInt::Fixed((real_reg, _frag)) => {
+ trace!(" {}", intervals.fixeds[real_reg.get_index()]);
+ }
+ }
+ }
+ trace!("inactive:");
+ for &(rreg, fix) in &self.inactive {
+ trace!(
+ " {:?} {:?}",
+ rreg,
+ intervals.fixeds[rreg.get_index()].frags[fix]
+ );
+ }
+ trace!("end update state");
+
+ stats.as_mut().map(|stats| {
+ stats.peak_active = usize::max(stats.peak_active, self.active.len());
+ stats.peak_inactive = usize::max(stats.peak_inactive, self.inactive.len());
+ });
+ }
+}
+
+pub(crate) fn run<F: Function>(
+ opts: &LinearScanOptions,
+ func: &F,
+ reg_uses: &RegVecsAndBounds,
+ reg_universe: &RealRegUniverse,
+ scratches_by_rc: &Vec<Option<RealReg>>,
+ intervals: Intervals,
+ stats: Option<Statistics>,
+) -> Result<(Intervals, u32), RegAllocError> {
+ let mut state = State::new(opts, func, &reg_uses, intervals, stats);
+ let mut reusable = ReusableState::new(reg_universe, &scratches_by_rc);
+
+ #[cfg(debug_assertions)]
+ let mut prev_start = InstPoint::min_value();
+
+ while let Some(id) = state.next_unhandled() {
+ info!("main loop: allocating {}", state.intervals.get(id));
+
+ #[cfg(debug_assertions)]
+ {
+ let int = state.intervals.get(id);
+ debug_assert!(prev_start <= int.start, "main loop must make progress");
+ prev_start = int.start;
+ }
+
+ if state.intervals.get(id).location.is_none() {
+ let int = state.intervals.get(id);
+
+ state
+ .activity
+ .update(int.start, &mut state.stats, &state.intervals);
+
+ let ok = try_allocate_reg(&mut reusable, id, &mut state);
+ if !ok {
+ allocate_blocked_reg(&mut reusable, id, &mut state)?;
+ }
+
+ if state.intervals.get(id).location.reg().is_some() {
+ state.activity.set_active(id);
+ }
+
+ // Reset reusable state.
+ reusable.computed_inactive = false;
+ }
+
+ debug!("");
+ }
+
+ if log_enabled!(Level::Debug) {
+ debug!("allocation results (in order):");
+ for int in state.intervals.virtuals.iter() {
+ debug!("{}", int);
+ }
+ debug!("");
+ }
+
+ Ok((state.intervals, state.next_spill_slot.get()))
+}
+
+/// A mapping from real reg to some T.
+#[derive(Clone)]
+struct RegisterMapping<T> {
+ offset: usize,
+ regs: Vec<(RealReg, T)>,
+ scratch: Option<RealReg>,
+ initial_value: T,
+ reg_class_index: usize,
+}
+
+impl<T: Copy> RegisterMapping<T> {
+ fn with_default(
+ reg_class_index: usize,
+ reg_universe: &RealRegUniverse,
+ scratch: Option<RealReg>,
+ initial_value: T,
+ ) -> Self {
+ let mut regs = Vec::new();
+ let mut offset = 0;
+ // Collect all the registers for the current class.
+ if let Some(ref info) = reg_universe.allocable_by_class[reg_class_index] {
+ lsra_assert!(info.first <= info.last);
+ offset = info.first;
+ for reg in &reg_universe.regs[info.first..=info.last] {
+ lsra_assert!(regs.len() == reg.0.get_index() - offset);
+ regs.push((reg.0, initial_value));
+ }
+ };
+ Self {
+ offset,
+ regs,
+ scratch,
+ initial_value,
+ reg_class_index,
+ }
+ }
+
+ fn clear(&mut self) {
+ for reg in self.regs.iter_mut() {
+ reg.1 = self.initial_value;
+ }
+ }
+
+ fn iter<'a>(&'a self) -> RegisterMappingIter<T> {
+ RegisterMappingIter {
+ iter: self.regs.iter(),
+ scratch: self.scratch,
+ }
+ }
+}
+
+struct RegisterMappingIter<'a, T: Copy> {
+ iter: std::slice::Iter<'a, (RealReg, T)>,
+ scratch: Option<RealReg>,
+}
+
+impl<'a, T: Copy> std::iter::Iterator for RegisterMappingIter<'a, T> {
+ type Item = &'a (RealReg, T);
+ fn next(&mut self) -> Option<Self::Item> {
+ match self.iter.next() {
+ Some(pair) => {
+ if Some(pair.0) == self.scratch {
+ // Skip to the next one.
+ self.iter.next()
+ } else {
+ Some(pair)
+ }
+ }
+ None => None,
+ }
+ }
+}
+
+impl<T> std::ops::Index<RealReg> for RegisterMapping<T> {
+ type Output = T;
+ fn index(&self, rreg: RealReg) -> &Self::Output {
+ lsra_assert!(
+ rreg.get_class() as usize == self.reg_class_index,
+ "trying to index a reg from the wrong class"
+ );
+ lsra_assert!(Some(rreg) != self.scratch, "trying to use the scratch");
+ &self.regs[rreg.get_index() - self.offset].1
+ }
+}
+
+impl<T> std::ops::IndexMut<RealReg> for RegisterMapping<T> {
+ fn index_mut(&mut self, rreg: RealReg) -> &mut Self::Output {
+ lsra_assert!(
+ rreg.get_class() as usize == self.reg_class_index,
+ "trying to index a reg from the wrong class"
+ );
+ lsra_assert!(Some(rreg) != self.scratch, "trying to use the scratch");
+ &mut self.regs[rreg.get_index() - self.offset].1
+ }
+}
+
+// State management.
+
+/// Parts of state just reused for recycling memory.
+struct ReusableState {
+ inactive_intersecting: Vec<(RealReg, InstPoint)>,
+ computed_inactive: bool,
+ reg_to_instpoint_1: Vec<RegisterMapping<InstPoint>>,
+ reg_to_instpoint_2: Vec<RegisterMapping<InstPoint>>,
+}
+
+impl ReusableState {
+ fn new(reg_universe: &RealRegUniverse, scratches: &[Option<RealReg>]) -> Self {
+ let mut reg_to_instpoint_1 = Vec::with_capacity(NUM_REG_CLASSES);
+
+ for i in 0..NUM_REG_CLASSES {
+ let scratch = scratches[i];
+ reg_to_instpoint_1.push(RegisterMapping::with_default(
+ i,
+ reg_universe,
+ scratch,
+ InstPoint::max_value(),
+ ));
+ }
+
+ let reg_to_instpoint_2 = reg_to_instpoint_1.clone();
+
+ Self {
+ inactive_intersecting: Vec::new(),
+ computed_inactive: false,
+ reg_to_instpoint_1,
+ reg_to_instpoint_2,
+ }
+ }
+}
+
+/// A small pair containing the interval id and the instruction point of an interval that is still
+/// to be allocated, to be stored in the unhandled list of intervals.
+struct IntervalStart(IntId, InstPoint);
+
+impl cmp::PartialEq for IntervalStart {
+ #[inline(always)]
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0
+ }
+}
+impl cmp::Eq for IntervalStart {}
+
+impl cmp::PartialOrd for IntervalStart {
+ #[inline(always)]
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ // Note: we want a reverse ordering on start positions, so that we have a MinHeap and not a
+ // MaxHeap in UnhandledIntervals.
+ other.1.partial_cmp(&self.1)
+ }
+}
+
+impl cmp::Ord for IntervalStart {
+ #[inline(always)]
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+}
+
+struct UnhandledIntervals {
+ heap: BinaryHeap<IntervalStart>,
+}
+
+impl UnhandledIntervals {
+ fn new() -> Self {
+ Self {
+ heap: BinaryHeap::with_capacity(16),
+ }
+ }
+
+ /// Insert a virtual interval that's unallocated in the list of unhandled intervals.
+ ///
+ /// This relies on the fact that unhandled intervals's start positions can't change over time.
+ fn insert(&mut self, id: IntId, intervals: &Intervals) {
+ self.heap.push(IntervalStart(id, intervals.get(id).start))
+ }
+
+ /// Get the new unhandled interval, in start order.
+ fn next_unhandled(&mut self, _intervals: &Intervals) -> Option<IntId> {
+ self.heap.pop().map(|entry| {
+ let ret = entry.0;
+ lsra_assert!(_intervals.get(ret).start == entry.1);
+ ret
+ })
+ }
+}
+
+/// State structure, which can be cleared between different calls to register allocation.
+/// TODO: split this into clearable fields and non-clearable fields.
+struct State<'a, F: Function> {
+ func: &'a F,
+ reg_uses: &'a RegUses,
+ opts: &'a LinearScanOptions,
+
+ intervals: Intervals,
+
+ /// Intervals that are starting after the current interval's start position.
+ unhandled: UnhandledIntervals,
+
+ /// Next available spill slot.
+ next_spill_slot: SpillSlot,
+
+ /// Maps given virtual registers to the spill slots they should be assigned
+ /// to.
+ spill_map: HashMap<VirtualReg, SpillSlot>,
+
+ activity: ActivityTracker,
+ stats: Option<Statistics>,
+}
+
+impl<'a, F: Function> State<'a, F> {
+ fn new(
+ opts: &'a LinearScanOptions,
+ func: &'a F,
+ reg_uses: &'a RegUses,
+ intervals: Intervals,
+ stats: Option<Statistics>,
+ ) -> Self {
+ let mut unhandled = UnhandledIntervals::new();
+ for int in intervals.virtuals.iter() {
+ unhandled.insert(int.id, &intervals);
+ }
+
+ let activity = ActivityTracker::new(&intervals);
+
+ Self {
+ func,
+ reg_uses,
+ opts,
+ intervals,
+ unhandled,
+ next_spill_slot: SpillSlot::new(0),
+ spill_map: HashMap::default(),
+ stats,
+ activity,
+ }
+ }
+
+ fn next_unhandled(&mut self) -> Option<IntId> {
+ self.unhandled.next_unhandled(&self.intervals)
+ }
+ fn insert_unhandled(&mut self, id: IntId) {
+ self.unhandled.insert(id, &self.intervals);
+ }
+
+ fn spill(&mut self, id: IntId) {
+ let int = self.intervals.get(id);
+ debug_assert!(int.location.spill().is_none(), "already spilled");
+ debug!("spilling {:?}", id);
+
+ let vreg = int.vreg;
+ let spill_slot = if let Some(spill_slot) = self.spill_map.get(&vreg) {
+ *spill_slot
+ } else {
+ let size_slot = self.func.get_spillslot_size(vreg.get_class(), vreg);
+ let spill_slot = self.next_spill_slot.round_up(size_slot);
+ self.next_spill_slot = self.next_spill_slot.inc(1);
+ self.spill_map.insert(vreg, spill_slot);
+ spill_slot
+ };
+
+ self.intervals.set_spill(id, spill_slot);
+ }
+}
+
+#[inline(never)]
+fn lazy_compute_inactive(
+ intervals: &Intervals,
+ activity: &ActivityTracker,
+ cur_id: IntId,
+ inactive_intersecting: &mut Vec<(RealReg, InstPoint)>,
+ computed_inactive: &mut bool,
+) {
+ if *computed_inactive {
+ return;
+ }
+ inactive_intersecting.clear();
+
+ let int = intervals.get(cur_id);
+ let reg_class = int.vreg.get_class();
+
+ for &(rreg, fix) in &activity.inactive {
+ if rreg.get_class() != reg_class {
+ continue;
+ }
+
+ let frags = &intervals.fixeds[rreg.get_index()].frags;
+ let mut i = fix;
+ while let Some(ref frag) = frags.get(i) {
+ if frag.first > int.end {
+ break;
+ }
+ if frag.first >= int.start {
+ inactive_intersecting.push((rreg, frag.first));
+ break;
+ }
+ i += 1;
+ }
+ }
+
+ *computed_inactive = true;
+}
+
+/// Transitions intervals from active/inactive into active/inactive/handled.
+///
+/// An interval tree is stored in the state, containing all the active and
+/// inactive intervals. The comparison key is the interval's start point.
+///
+/// A state update consists in the following. We consider the next interval to
+/// allocate, and in particular its start point S.
+///
+/// 1. remove all the active/inactive intervals that have expired, i.e. their
+/// end point is before S.
+/// 2. reconsider active/inactive intervals:
+/// - if they contain S, they become (or stay) active.
+/// - otherwise, they become (or stay) inactive.
+///
+/// Item 1 is easy to implement, and fast enough.
+///
+/// Item 2 is a bit trickier. While we could just call `Intervals::covers` for
+/// each interval on S, this is quite expensive. In addition to this, it happens
+/// that most intervals are inactive. This is explained by the fact that linear
+/// scan can create large intervals, if a value is used much later after it's
+/// been created, *according to the block ordering*.
+///
+/// For each interval, we remember the last active fragment, or the first
+/// inactive fragment that starts after S. This makes search really fast:
+///
+/// - if the considered (active or inactive) interval start is before S, then we
+/// should look more precisely if it's active or inactive. This might include
+/// seeking to the next fragment that contains S.
+/// - otherwise, if the considered interval start is *after* S, then it means
+/// this interval, as well as all the remaining ones in the interval tree (since
+/// they're sorted by starting position) are inactive, and we can escape the
+/// loop eagerly.
+///
+/// The escape for inactive intervals make this function overall cheap.
+
+/// Naive heuristic to select a register when we're not aware of any conflict.
+/// Currently, it chooses the register with the furthest next use.
+#[inline(never)]
+fn select_naive_reg<F: Function>(
+ reusable: &mut ReusableState,
+ state: &mut State<F>,
+ id: IntId,
+) -> Option<(RealReg, InstPoint)> {
+ let reg_class = state.intervals.get(id).vreg.get_class();
+ let free_until_pos = &mut reusable.reg_to_instpoint_1[reg_class as usize];
+ free_until_pos.clear();
+
+ let mut num_free = usize::max(1, free_until_pos.regs.len()) - 1;
+
+ // All registers currently in use are blocked.
+ for &aid in &state.activity.active {
+ let reg = match aid {
+ ActiveInt::Virtual(int_id) => {
+ if let Some(reg) = state.intervals.get(int_id).location.reg() {
+ reg
+ } else {
+ continue;
+ }
+ }
+ ActiveInt::Fixed((real_reg, _)) => real_reg,
+ };
+
+ if reg.get_class() == reg_class {
+ free_until_pos[reg] = InstPoint::min_value();
+ num_free -= 1;
+ }
+ }
+
+ // Shortcut: if all the registers are taken, don't even bother.
+ if num_free == 0 {
+ lsra_assert!(!free_until_pos
+ .iter()
+ .any(|pair| pair.1 != InstPoint::min_value()));
+ return None;
+ }
+
+ // All registers that would be used at the same time as the current interval
+ // are partially blocked, up to the point when they start being used.
+ lazy_compute_inactive(
+ &state.intervals,
+ &state.activity,
+ id,
+ &mut reusable.inactive_intersecting,
+ &mut reusable.computed_inactive,
+ );
+
+ for &(reg, intersect_at) in reusable.inactive_intersecting.iter() {
+ if intersect_at < free_until_pos[reg] {
+ free_until_pos[reg] = intersect_at;
+ }
+ }
+
+ // Find the register with the furthest next use, if there's any.
+ let mut best_reg = None;
+ let mut best_pos = InstPoint::min_value();
+ for &(reg, pos) in free_until_pos.iter() {
+ if pos > best_pos {
+ best_pos = pos;
+ best_reg = Some(reg);
+ }
+ }
+
+ best_reg.and_then(|reg| Some((reg, best_pos)))
+}
+
+#[inline(never)]
+fn try_allocate_reg<F: Function>(
+ reusable: &mut ReusableState,
+ id: IntId,
+ state: &mut State<F>,
+) -> bool {
+ state
+ .stats
+ .as_mut()
+ .map(|stats| stats.num_try_allocate_reg += 1);
+
+ let (best_reg, best_pos) = if let Some(solution) = select_naive_reg(reusable, state, id) {
+ solution
+ } else {
+ debug!("try_allocate_reg: all registers taken, need to spill.");
+ return false;
+ };
+ debug!(
+ "try_allocate_reg: best register {:?} has next use at {:?}",
+ best_reg, best_pos
+ );
+
+ if best_pos <= state.intervals.get(id).end {
+ if !state.opts.partial_split || !try_split_regs(state, id, best_pos) {
+ return false;
+ }
+ }
+
+ // At least a partial match: allocate.
+ debug!(
+ "{:?}: {:?} <- {:?}",
+ id,
+ state.intervals.get(id).vreg,
+ best_reg
+ );
+ state.intervals.set_reg(id, best_reg);
+
+ state
+ .stats
+ .as_mut()
+ .map(|stats| stats.num_try_allocate_reg_success += 1);
+
+ true
+}
+
+#[inline(never)]
+fn allocate_blocked_reg<F: Function>(
+ reusable: &mut ReusableState,
+ cur_id: IntId,
+ state: &mut State<F>,
+) -> Result<(), RegAllocError> {
+ // If the current interval has no uses, spill it directly.
+ let first_use = match next_use(
+ &state.intervals.get(cur_id),
+ InstPoint::min_value(),
+ &state.reg_uses,
+ ) {
+ Some(u) => u,
+ None => {
+ state.spill(cur_id);
+ return Ok(());
+ }
+ };
+
+ let (start_pos, reg_class) = {
+ let int = state.intervals.get(cur_id);
+ (int.start, int.vreg.get_class())
+ };
+
+ // Note: in this function, "use" isn't just a use as in use-def; it really
+ // means a mention, so either a use or a definition.
+ //
+ // 1. Compute all the positions of next uses for registers of active intervals
+ // and inactive intervals that might intersect with the current one.
+ // 2. Then use this to select the interval with the further next use.
+ // 3. Spill either the current interval or active/inactive intervals with the
+ // selected register.
+ // 4. Make sure that the current interval doesn't intersect with the fixed
+ // interval for the selected register.
+
+ // Step 1: compute all the next use positions.
+ let next_use_pos = &mut reusable.reg_to_instpoint_1[reg_class as usize];
+ next_use_pos.clear();
+
+ let block_pos = &mut reusable.reg_to_instpoint_2[reg_class as usize];
+ block_pos.clear();
+
+ trace!(
+ "allocate_blocked_reg: searching reg with next use after {:?}",
+ start_pos
+ );
+
+ for &aid in &state.activity.active {
+ match aid {
+ ActiveInt::Virtual(int_id) => {
+ let int = state.intervals.get(int_id);
+ if int.vreg.get_class() != reg_class {
+ continue;
+ }
+ if let Some(reg) = int.location.reg() {
+ if next_use_pos[reg] != InstPoint::min_value() {
+ if let Some(next_use) =
+ next_use(&state.intervals.get(int_id), start_pos, &state.reg_uses)
+ {
+ next_use_pos[reg] = InstPoint::min(next_use_pos[reg], next_use);
+ }
+ }
+ }
+ }
+
+ ActiveInt::Fixed((reg, _frag)) => {
+ if reg.get_class() == reg_class {
+ block_pos[reg] = InstPoint::min_value();
+ next_use_pos[reg] = InstPoint::min_value();
+ }
+ }
+ }
+ }
+
+ lazy_compute_inactive(
+ &state.intervals,
+ &state.activity,
+ cur_id,
+ &mut reusable.inactive_intersecting,
+ &mut reusable.computed_inactive,
+ );
+
+ for &(reg, intersect_pos) in &reusable.inactive_intersecting {
+ debug_assert!(reg.get_class() == reg_class);
+ if block_pos[reg] == InstPoint::min_value() {
+ // This register is already blocked.
+ debug_assert!(next_use_pos[reg] == InstPoint::min_value());
+ continue;
+ }
+ block_pos[reg] = InstPoint::min(block_pos[reg], intersect_pos);
+ next_use_pos[reg] = InstPoint::min(next_use_pos[reg], intersect_pos);
+ }
+
+ // Step 2: find the register with the furthest next use.
+ let best_reg = {
+ let mut best = None;
+ for (reg, pos) in next_use_pos.iter() {
+ trace!("allocate_blocked_reg: {:?} has next use at {:?}", reg, pos);
+ match best {
+ None => best = Some((reg, pos)),
+ Some((ref mut best_reg, ref mut best_pos)) => {
+ if *best_pos < pos {
+ *best_pos = pos;
+ *best_reg = reg;
+ }
+ }
+ }
+ }
+ match best {
+ Some(best) => *best.0,
+ None => {
+ return Err(RegAllocError::Other(format!(
+ "the {:?} register class has no registers!",
+ reg_class
+ )));
+ }
+ }
+ };
+ debug!(
+ "selecting blocked register {:?} with furthest next use at {:?}",
+ best_reg, next_use_pos[best_reg]
+ );
+
+ // Step 3: if the next use of the current interval is after the furthest use
+ // of the selected register, then we should spill the current interval.
+ // Otherwise, spill other intervals.
+ debug!(
+ "current first used at {:?}, next use of best reg at {:?}",
+ first_use, next_use_pos[best_reg]
+ );
+
+ if first_use >= next_use_pos[best_reg] {
+ if first_use == start_pos {
+ return Err(RegAllocError::OutOfRegisters(reg_class));
+ }
+ debug!("spill current interval");
+ let new_int = split(state, cur_id, first_use);
+ state.insert_unhandled(new_int);
+ state.spill(cur_id);
+ } else {
+ debug!("taking over register, spilling intersecting intervals");
+
+ // Spill intervals that currently block the selected register.
+ state.intervals.set_reg(cur_id, best_reg);
+
+ // If there's an interference with a fixed interval, split at the
+ // intersection.
+ let int_end = state.intervals.get(cur_id).end;
+ if block_pos[best_reg] <= int_end {
+ debug!(
+ "allocate_blocked_reg: fixed conflict! blocked at {:?}, while ending at {:?}",
+ block_pos[best_reg], int_end
+ );
+
+ if !state.opts.partial_split || !try_split_regs(state, cur_id, block_pos[best_reg]) {
+ split_and_spill(state, cur_id, block_pos[best_reg]);
+ }
+ }
+
+ for &aid in &state.activity.active {
+ match aid {
+ ActiveInt::Virtual(int_id) => {
+ let int = state.intervals.get(int_id);
+ if int.vreg.get_class() != reg_class {
+ continue;
+ }
+ if let Some(reg) = int.location.reg() {
+ if reg == best_reg {
+ // spill it!
+ debug!("allocate_blocked_reg: split and spill active stolen reg");
+ split_and_spill(state, int_id, start_pos);
+ break;
+ }
+ }
+ }
+
+ ActiveInt::Fixed((_reg, _fix)) => {
+ lsra_assert!(
+ _reg != best_reg
+ || state.intervals.get(cur_id).end
+ < state.intervals.fixeds[_reg.get_index()].frags[_fix].first,
+ "can't split fixed active interval"
+ );
+ }
+ }
+ }
+
+ // Inactive virtual intervals would need to be split and spilled here too, but we can't
+ // have inactive virtual intervals.
+ #[cfg(debug_assertions)]
+ for &(reg, intersect_pos) in &reusable.inactive_intersecting {
+ debug_assert!(
+ reg != best_reg || state.intervals.get(cur_id).end < intersect_pos,
+ "can't split fixed inactive interval"
+ );
+ }
+ }
+
+ Ok(())
+}
+
+/// Finds an optimal split position, whenever we're given a range of possible
+/// positions where to split.
+fn find_optimal_split_pos<F: Function>(
+ state: &State<F>,
+ id: IntId,
+ from: InstPoint,
+ to: InstPoint,
+) -> InstPoint {
+ trace!("find_optimal_split_pos between {:?} and {:?}", from, to);
+
+ debug_assert!(from <= to, "split between positions are inconsistent");
+ let int = state.intervals.get(id);
+ debug_assert!(from >= int.start, "split should happen after the start");
+ debug_assert!(to <= int.end, "split should happen before the end");
+
+ if from == to {
+ return from;
+ }
+
+ let candidate = match state.opts.split_strategy {
+ OptimalSplitStrategy::To => Some(to),
+ OptimalSplitStrategy::NextFrom => Some(next_pos(from)),
+ OptimalSplitStrategy::NextNextFrom => Some(next_pos(next_pos(from))),
+ OptimalSplitStrategy::From => {
+ // This is the general setting, so win some time and eagerly return here.
+ return from;
+ }
+ OptimalSplitStrategy::PrevTo => Some(prev_pos(to)),
+ OptimalSplitStrategy::PrevPrevTo => Some(prev_pos(prev_pos(to))),
+ OptimalSplitStrategy::Mid => Some(InstPoint::new_use(InstIx::new(
+ (from.iix().get() + to.iix().get()) / 2,
+ ))),
+ };
+
+ if let Some(pos) = candidate {
+ if pos >= from && pos <= to && state.intervals.get(id).covers(pos) {
+ return pos;
+ }
+ }
+
+ from
+}
+
+fn prev_pos(mut pos: InstPoint) -> InstPoint {
+ match pos.pt() {
+ Point::Def => {
+ pos.set_pt(Point::Use);
+ pos
+ }
+ Point::Use => {
+ pos.set_iix(pos.iix().minus(1));
+ pos.set_pt(Point::Def);
+ pos
+ }
+ _ => unreachable!(),
+ }
+}
+
+fn next_pos(mut pos: InstPoint) -> InstPoint {
+ match pos.pt() {
+ Point::Use => pos.set_pt(Point::Def),
+ Point::Def => {
+ pos.set_pt(Point::Use);
+ pos.set_iix(pos.iix().plus(1));
+ }
+ _ => unreachable!(),
+ };
+ pos
+}
+
+/// Splits the given interval between the last use before `split_pos` and
+/// `split_pos`.
+///
+/// In case of two-ways split (i.e. only place to split is precisely split_pos),
+/// returns the live interval id for the middle child, to be added back to the
+/// list of active/inactive intervals after iterating on these.
+fn split_and_spill<F: Function>(state: &mut State<F>, id: IntId, split_pos: InstPoint) {
+ let child = match last_use(&state.intervals.get(id), split_pos, &state.reg_uses) {
+ Some(last_use) => {
+ debug!(
+ "split_and_spill {:?}: spill between {:?} and {:?}",
+ id, last_use, split_pos
+ );
+
+ // Maintain ascending order between the min and max positions.
+ let min_pos = InstPoint::min(next_pos(last_use), split_pos);
+
+ // Make sure that if the two positions are the same, we'll be splitting in
+ // a position that's in the current interval.
+ let optimal_pos = find_optimal_split_pos(state, id, min_pos, split_pos);
+
+ let child = split(state, id, optimal_pos);
+ state.spill(child);
+ child
+ }
+
+ None => {
+ // The current interval has no uses before the split position, it can
+ // safely be spilled.
+ debug!(
+ "split_and_spill {:?}: spilling it since no uses before split position",
+ id
+ );
+ state.spill(id);
+ id
+ }
+ };
+
+ // Split until the next register use.
+ match next_use(&state.intervals.get(child), split_pos, &state.reg_uses) {
+ Some(next_use_pos) => {
+ debug!(
+ "split spilled interval before next use @ {:?}",
+ next_use_pos
+ );
+ let child = split(state, child, next_use_pos);
+ state.insert_unhandled(child);
+ }
+ None => {
+ // Let it be spilled for the rest of its lifetime.
+ }
+ }
+
+ // In both cases, the spilled child interval can remain on the stack.
+ debug!("spilled split child {:?} silently expires", child);
+}
+
+/// Try to find a (use) position where to split the interval until the next point at which it
+/// becomes unavailable, and put it back into the queue of intervals to allocate later on. Returns
+/// true if it succeeded in finding such a position, false otherwise.
+fn try_split_regs<F: Function>(
+ state: &mut State<F>,
+ id: IntId,
+ available_until: InstPoint,
+) -> bool {
+ state.stats.as_mut().map(|stats| stats.num_reg_splits += 1);
+
+ // Find a position for the split: we'll iterate backwards from the point until the register is
+ // available, down to the previous use of the current interval.
+ let prev_use = match last_use(&state.intervals.get(id), available_until, &state.reg_uses) {
+ Some(prev_use) => prev_use,
+ None => state.intervals.get(id).start,
+ };
+
+ let split_pos = if state.opts.partial_split_near_end {
+ // Split at the position closest to the available_until position.
+ let pos = match available_until.pt() {
+ Point::Use => prev_pos(prev_pos(available_until)),
+ Point::Def => prev_pos(available_until),
+ _ => unreachable!(),
+ };
+ if pos <= prev_use {
+ return false;
+ }
+ pos
+ } else {
+ // Split at the position closest to the prev_use position. If it was a def, we can split
+ // just thereafter, if it was at a use, go to the next use.
+ let pos = match prev_use.pt() {
+ Point::Use => next_pos(next_pos(prev_use)),
+ Point::Def => next_pos(prev_use),
+ _ => unreachable!(),
+ };
+ if pos >= available_until {
+ return false;
+ }
+ pos
+ };
+
+ let child = split(state, id, split_pos);
+ state.insert_unhandled(child);
+
+ state
+ .stats
+ .as_mut()
+ .map(|stats| stats.num_reg_splits_success += 1);
+
+ true
+}
+
+/// Splits the interval at the given position.
+///
+/// The split position must either be a Def of the current vreg, or it must be
+/// at a Use position (otherwise there's no place to put the moves created by
+/// the split).
+///
+/// The id of the new interval is returned, while the parent interval is mutated
+/// in place. The child interval starts after (including) at_pos.
+#[inline(never)]
+fn split<F: Function>(state: &mut State<F>, id: IntId, at_pos: InstPoint) -> IntId {
+ debug!("split {:?} at {:?}", id, at_pos);
+ trace!("interval: {}", state.intervals.get(id));
+
+ let int = state.intervals.get(id);
+ debug_assert!(int.start <= at_pos, "must split after the start");
+ debug_assert!(at_pos <= int.end, "must split before the end");
+
+ // We're splitting in the middle of a fragment: [L, R].
+ // Split it into two fragments: parent [L, pos[ + child [pos, R].
+ debug_assert!(int.start < int.end, "trying to split unit fragment");
+ debug_assert!(int.start <= at_pos, "no space to split fragment");
+
+ let parent_start = int.start;
+ let parent_end = prev_pos(at_pos);
+ let child_start = at_pos;
+ let child_end = int.end;
+
+ trace!(
+ "split fragment [{:?}; {:?}] into two parts: [{:?}; {:?}] to [{:?}; {:?}]",
+ int.start,
+ int.end,
+ parent_start,
+ parent_end,
+ child_start,
+ child_end
+ );
+
+ debug_assert!(parent_start <= parent_end);
+ debug_assert!(parent_end <= child_start);
+ debug_assert!(child_start <= child_end);
+
+ let vreg = int.vreg;
+ let ancestor = int.ancestor;
+
+ let parent_mentions = state.intervals.get_mut(id).mentions_mut();
+ let index = parent_mentions.binary_search_by(|mention| {
+ // The comparator function returns the position of the argument compared to the target.
+
+ // Search by index first.
+ let iix = mention.0;
+ if iix < at_pos.iix() {
+ return Ordering::Less;
+ }
+ if iix > at_pos.iix() {
+ return Ordering::Greater;
+ }
+
+ // The instruction index is the same. Consider the instruction side now, and compare it
+ // with the set. For the purpose of LSRA, mod means use and def.
+ let set = mention.1;
+ if at_pos.pt() == Point::Use {
+ if set.is_use_or_mod() {
+ Ordering::Equal
+ } else {
+ // It has to be Mod or Def. We need to look more to the right of the seeked array.
+ // Thus indicate this mention is after the target.
+ Ordering::Greater
+ }
+ } else {
+ debug_assert!(at_pos.pt() == Point::Def);
+ if set.is_mod_or_def() {
+ Ordering::Equal
+ } else {
+ // Look to the left.
+ Ordering::Less
+ }
+ }
+ });
+
+ let (index, may_need_fixup) = match index {
+ Ok(index) => (index, true),
+ Err(index) => (index, false),
+ };
+
+ // Emulate split_off for SmallVec here.
+ let mut child_mentions = MentionMap::with_capacity(parent_mentions.len() - index);
+ for mention in parent_mentions.iter().skip(index) {
+ child_mentions.push(mention.clone());
+ }
+ parent_mentions.truncate(index);
+
+ // In the situation where we split at the def point of an instruction, and the mention set
+ // contains the use point, we need to refine the sets:
+ // - the parent must still contain the use point (and the modified point if present)
+ // - the child must only contain the def point (and the modified point if present).
+ // Note that if we split at the use point of an instruction, and the mention set contains the
+ // def point, it is fine: we're not splitting between the two of them.
+ if may_need_fixup && at_pos.pt() == Point::Def && child_mentions.first().unwrap().1.is_use() {
+ let first_child_mention = child_mentions.first_mut().unwrap();
+ first_child_mention.1.remove_use();
+
+ let last_parent_mention = parent_mentions.last_mut().unwrap();
+ last_parent_mention.1.add_use();
+
+ if first_child_mention.1.is_mod() {
+ last_parent_mention.1.add_mod();
+ }
+ }
+
+ let child_id = IntId(state.intervals.num_virtual_intervals());
+ let mut child_int =
+ VirtualInterval::new(child_id, vreg, child_start, child_end, child_mentions);
+ child_int.parent = Some(id);
+ child_int.ancestor = ancestor;
+
+ state.intervals.push_interval(child_int);
+
+ state.intervals.get_mut(id).end = parent_end;
+ state.intervals.set_child(id, child_id);
+
+ if log_enabled!(Level::Trace) {
+ trace!("split results:");
+ trace!("- {}", state.intervals.get(id));
+ trace!("- {}", state.intervals.get(child_id));
+ }
+
+ child_id
+}
+
+fn _build_mention_map(reg_uses: &RegUses) -> HashMap<Reg, MentionMap> {
+ // Maps reg to its mentions.
+ let mut reg_mentions: HashMap<Reg, MentionMap> = HashMap::default();
+
+ // Collect all the mentions.
+ for i in 0..reg_uses.num_insns() {
+ let iix = InstIx::new(i as u32);
+ let regsets = reg_uses.get_reg_sets_for_iix(iix);
+ debug_assert!(regsets.is_sanitized());
+
+ for reg in regsets.uses.iter() {
+ let mentions = reg_mentions.entry(*reg).or_default();
+ if mentions.is_empty() || mentions.last().unwrap().0 != iix {
+ mentions.push((iix, Mention::new()));
+ }
+ mentions.last_mut().unwrap().1.add_use();
+ }
+
+ for reg in regsets.mods.iter() {
+ let mentions = reg_mentions.entry(*reg).or_default();
+ if mentions.is_empty() || mentions.last().unwrap().0 != iix {
+ mentions.push((iix, Mention::new()));
+ }
+ mentions.last_mut().unwrap().1.add_mod();
+ }
+
+ for reg in regsets.defs.iter() {
+ let mentions = reg_mentions.entry(*reg).or_default();
+ if mentions.is_empty() || mentions.last().unwrap().0 != iix {
+ mentions.push((iix, Mention::new()));
+ }
+ mentions.last_mut().unwrap().1.add_def();
+ }
+ }
+
+ reg_mentions
+}
diff --git a/third_party/rust/regalloc/src/linear_scan/mod.rs b/third_party/rust/regalloc/src/linear_scan/mod.rs
new file mode 100644
index 0000000000..44c92e2e7a
--- /dev/null
+++ b/third_party/rust/regalloc/src/linear_scan/mod.rs
@@ -0,0 +1,807 @@
+//! pub(crate) Implementation of the linear scan allocator algorithm.
+//!
+//! This tries to follow the implementation as suggested by:
+//! Optimized Interval Splitting in a Linear Scan Register Allocator,
+//! by Wimmer et al., 2005
+
+use log::{info, log_enabled, trace, Level};
+
+use std::default;
+use std::env;
+use std::fmt;
+
+use crate::data_structures::{BlockIx, InstIx, InstPoint, Point, RealReg, RegVecsAndBounds};
+use crate::inst_stream::{add_spills_reloads_and_moves, InstToInsertAndExtPoint};
+use crate::{
+ checker::CheckerContext, reg_maps::MentionRegUsageMapper, Function, RealRegUniverse,
+ RegAllocError, RegAllocResult, RegClass, Set, SpillSlot, VirtualReg, NUM_REG_CLASSES,
+};
+
+use analysis::{AnalysisInfo, RangeFrag};
+use smallvec::SmallVec;
+
+mod analysis;
+mod assign_registers;
+mod resolve_moves;
+
+#[derive(Default)]
+pub(crate) struct Statistics {
+ only_large: bool,
+
+ num_fixed: usize,
+ num_vregs: usize,
+ num_virtual_ranges: usize,
+
+ peak_active: usize,
+ peak_inactive: usize,
+
+ num_try_allocate_reg: usize,
+ num_try_allocate_reg_success: usize,
+
+ num_reg_splits: usize,
+ num_reg_splits_success: usize,
+}
+
+impl Drop for Statistics {
+ fn drop(&mut self) {
+ if self.only_large && self.num_vregs < 1000 {
+ return;
+ }
+ println!(
+ "stats: {} fixed; {} vreg; {} vranges; {} peak-active; {} peak-inactive, {} direct-alloc; {} total-alloc; {} partial-splits; {} partial-splits-attempts",
+ self.num_fixed,
+ self.num_vregs,
+ self.num_virtual_ranges,
+ self.peak_active,
+ self.peak_inactive,
+ self.num_try_allocate_reg_success,
+ self.num_try_allocate_reg,
+ self.num_reg_splits_success,
+ self.num_reg_splits,
+ );
+ }
+}
+
+/// Which strategy should we use when trying to find the best split position?
+/// TODO Consider loop depth to avoid splitting in the middle of a loop
+/// whenever possible.
+#[derive(Copy, Clone, Debug)]
+enum OptimalSplitStrategy {
+ From,
+ To,
+ NextFrom,
+ NextNextFrom,
+ PrevTo,
+ PrevPrevTo,
+ Mid,
+}
+
+#[derive(Clone)]
+pub struct LinearScanOptions {
+ split_strategy: OptimalSplitStrategy,
+ partial_split: bool,
+ partial_split_near_end: bool,
+ stats: bool,
+ large_stats: bool,
+}
+
+impl default::Default for LinearScanOptions {
+ fn default() -> Self {
+ // Useful for debugging.
+ let optimal_split_strategy = match env::var("LSRA_SPLIT") {
+ Ok(s) => match s.as_str() {
+ "t" | "to" => OptimalSplitStrategy::To,
+ "n" => OptimalSplitStrategy::NextFrom,
+ "nn" => OptimalSplitStrategy::NextNextFrom,
+ "p" => OptimalSplitStrategy::PrevTo,
+ "pp" => OptimalSplitStrategy::PrevPrevTo,
+ "m" | "mid" => OptimalSplitStrategy::Mid,
+ _ => OptimalSplitStrategy::From,
+ },
+ Err(_) => OptimalSplitStrategy::From,
+ };
+
+ let large_stats = env::var("LSRA_LARGE_STATS").is_ok();
+ let stats = env::var("LSRA_STATS").is_ok() || large_stats;
+
+ let partial_split = env::var("LSRA_PARTIAL").is_ok();
+ let partial_split_near_end = env::var("LSRA_PARTIAL_END").is_ok();
+
+ Self {
+ split_strategy: optimal_split_strategy,
+ partial_split,
+ partial_split_near_end,
+ stats,
+ large_stats,
+ }
+ }
+}
+
+impl fmt::Debug for LinearScanOptions {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ writeln!(fmt, "linear scan")?;
+ write!(fmt, " split: {:?}", self.split_strategy)
+ }
+}
+
+// Local shorthands.
+type RegUses = RegVecsAndBounds;
+
+/// A unique identifier for an interval.
+#[derive(Clone, Copy, PartialEq, Eq)]
+struct IntId(pub(crate) usize);
+
+impl fmt::Debug for IntId {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "int{}", self.0)
+ }
+}
+
+#[derive(Clone)]
+struct FixedInterval {
+ reg: RealReg,
+ frags: Vec<RangeFrag>,
+}
+
+impl fmt::Display for FixedInterval {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "fixed {:?} [", self.reg)?;
+ for (i, frag) in self.frags.iter().enumerate() {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ write!(f, "({:?}, {:?})", frag.first, frag.last)?;
+ }
+ write!(f, "]")
+ }
+}
+
+#[derive(Clone)]
+pub(crate) struct VirtualInterval {
+ id: IntId,
+ vreg: VirtualReg,
+
+ /// Parent interval in the split tree.
+ parent: Option<IntId>,
+ ancestor: Option<IntId>,
+ /// Child interval, if it has one, in the split tree.
+ child: Option<IntId>,
+
+ /// Location assigned to this live interval.
+ location: Location,
+
+ mentions: MentionMap,
+ start: InstPoint,
+ end: InstPoint,
+}
+
+impl fmt::Display for VirtualInterval {
+ fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(fmt, "virtual {:?}", self.id)?;
+ if let Some(ref p) = self.parent {
+ write!(fmt, " (parent={:?})", p)?;
+ }
+ write!(
+ fmt,
+ ": {:?} {} [{:?}; {:?}]",
+ self.vreg, self.location, self.start, self.end
+ )
+ }
+}
+
+impl VirtualInterval {
+ fn new(
+ id: IntId,
+ vreg: VirtualReg,
+ start: InstPoint,
+ end: InstPoint,
+ mentions: MentionMap,
+ ) -> Self {
+ Self {
+ id,
+ vreg,
+ parent: None,
+ ancestor: None,
+ child: None,
+ location: Location::None,
+ mentions,
+ start,
+ end,
+ }
+ }
+ fn mentions(&self) -> &MentionMap {
+ &self.mentions
+ }
+ fn mentions_mut(&mut self) -> &mut MentionMap {
+ &mut self.mentions
+ }
+ fn covers(&self, pos: InstPoint) -> bool {
+ self.start <= pos && pos <= self.end
+ }
+}
+
+/// This data structure tracks the mentions of a register (virtual or real) at a precise
+/// instruction point. It's a set encoded as three flags, one for each of use/mod/def.
+#[derive(Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub struct Mention(u8);
+
+impl fmt::Debug for Mention {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let mut comma = false;
+ if self.0 & 1 == 1 {
+ write!(fmt, "use")?;
+ comma = true;
+ }
+ if (self.0 >> 1) & 1 == 1 {
+ if comma {
+ write!(fmt, ",")?;
+ }
+ write!(fmt, "mod")?;
+ comma = true;
+ }
+ if (self.0 >> 2) & 1 == 1 {
+ if comma {
+ write!(fmt, ",")?;
+ }
+ write!(fmt, "def")?;
+ }
+ Ok(())
+ }
+}
+
+impl Mention {
+ fn new() -> Self {
+ Self(0)
+ }
+
+ // Setters.
+ fn add_use(&mut self) {
+ self.0 |= 1 << 0;
+ }
+ fn add_mod(&mut self) {
+ self.0 |= 1 << 1;
+ }
+ fn add_def(&mut self) {
+ self.0 |= 1 << 2;
+ }
+
+ fn remove_use(&mut self) {
+ self.0 &= !(1 << 0);
+ }
+
+ // Getters.
+ fn is_use(&self) -> bool {
+ (self.0 & 0b001) != 0
+ }
+ fn is_mod(&self) -> bool {
+ (self.0 & 0b010) != 0
+ }
+ fn is_def(&self) -> bool {
+ (self.0 & 0b100) != 0
+ }
+ fn is_use_or_mod(&self) -> bool {
+ (self.0 & 0b011) != 0
+ }
+ fn is_mod_or_def(&self) -> bool {
+ (self.0 & 0b110) != 0
+ }
+}
+
+pub type MentionMap = SmallVec<[(InstIx, Mention); 2]>;
+
+#[derive(Debug, Clone, Copy)]
+pub(crate) enum Location {
+ None,
+ Reg(RealReg),
+ Stack(SpillSlot),
+}
+
+impl Location {
+ pub(crate) fn reg(&self) -> Option<RealReg> {
+ match self {
+ Location::Reg(reg) => Some(*reg),
+ _ => None,
+ }
+ }
+ pub(crate) fn spill(&self) -> Option<SpillSlot> {
+ match self {
+ Location::Stack(slot) => Some(*slot),
+ _ => None,
+ }
+ }
+ pub(crate) fn is_none(&self) -> bool {
+ match self {
+ Location::None => true,
+ _ => false,
+ }
+ }
+}
+
+impl fmt::Display for Location {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Location::None => write!(fmt, "none"),
+ Location::Reg(reg) => write!(fmt, "{:?}", reg),
+ Location::Stack(slot) => write!(fmt, "{:?}", slot),
+ }
+ }
+}
+
+/// A group of live intervals.
+pub struct Intervals {
+ virtuals: Vec<VirtualInterval>,
+ fixeds: Vec<FixedInterval>,
+}
+
+impl Intervals {
+ fn get(&self, int_id: IntId) -> &VirtualInterval {
+ &self.virtuals[int_id.0]
+ }
+ fn get_mut(&mut self, int_id: IntId) -> &mut VirtualInterval {
+ &mut self.virtuals[int_id.0]
+ }
+ fn num_virtual_intervals(&self) -> usize {
+ self.virtuals.len()
+ }
+
+ // Mutators.
+ fn set_reg(&mut self, int_id: IntId, reg: RealReg) {
+ let int = self.get_mut(int_id);
+ debug_assert!(int.location.is_none());
+ int.location = Location::Reg(reg);
+ }
+ fn set_spill(&mut self, int_id: IntId, slot: SpillSlot) {
+ let int = self.get_mut(int_id);
+ debug_assert!(int.location.spill().is_none());
+ int.location = Location::Stack(slot);
+ }
+ fn push_interval(&mut self, int: VirtualInterval) {
+ debug_assert!(int.id.0 == self.virtuals.len());
+ self.virtuals.push(int);
+ }
+ fn set_child(&mut self, int_id: IntId, child_id: IntId) {
+ if let Some(prev_child) = self.virtuals[int_id.0].child.clone() {
+ self.virtuals[child_id.0].child = Some(prev_child);
+ self.virtuals[prev_child.0].parent = Some(child_id);
+ }
+ self.virtuals[int_id.0].child = Some(child_id);
+ }
+}
+
+/// Finds the first use for the current interval that's located after the given
+/// `pos` (included), in a broad sense of use (any of use, def or mod).
+///
+/// Extends to the left, that is, "modified" means "used".
+#[inline(never)]
+fn next_use(interval: &VirtualInterval, pos: InstPoint, _reg_uses: &RegUses) -> Option<InstPoint> {
+ if log_enabled!(Level::Trace) {
+ trace!("find next use of {} after {:?}", interval, pos);
+ }
+
+ let mentions = interval.mentions();
+ let target = InstPoint::max(pos, interval.start);
+
+ let ret = match mentions.binary_search_by_key(&target.iix(), |mention| mention.0) {
+ Ok(index) => {
+ // Either the selected index is a perfect match, or the next mention is
+ // the correct answer.
+ let mention = &mentions[index];
+ if target.pt() == Point::Use {
+ if mention.1.is_use_or_mod() {
+ Some(InstPoint::new_use(mention.0))
+ } else {
+ Some(InstPoint::new_def(mention.0))
+ }
+ } else if target.pt() == Point::Def && mention.1.is_mod_or_def() {
+ Some(target)
+ } else if index == mentions.len() - 1 {
+ None
+ } else {
+ let mention = &mentions[index + 1];
+ if mention.1.is_use_or_mod() {
+ Some(InstPoint::new_use(mention.0))
+ } else {
+ Some(InstPoint::new_def(mention.0))
+ }
+ }
+ }
+
+ Err(index) => {
+ if index == mentions.len() {
+ None
+ } else {
+ let mention = &mentions[index];
+ if mention.1.is_use_or_mod() {
+ Some(InstPoint::new_use(mention.0))
+ } else {
+ Some(InstPoint::new_def(mention.0))
+ }
+ }
+ }
+ };
+
+ // TODO once the mentions are properly split, this could be removed, in
+ // theory.
+ let ret = match ret {
+ Some(pos) => {
+ if pos <= interval.end {
+ Some(pos)
+ } else {
+ None
+ }
+ }
+ None => None,
+ };
+
+ ret
+}
+
+/// Finds the last use of a vreg before a given target, including it in possible
+/// return values.
+/// Extends to the right, that is, modified means "def".
+#[inline(never)]
+fn last_use(interval: &VirtualInterval, pos: InstPoint, _reg_uses: &RegUses) -> Option<InstPoint> {
+ if log_enabled!(Level::Trace) {
+ trace!("searching last use of {} before {:?}", interval, pos,);
+ }
+
+ let mentions = interval.mentions();
+
+ let target = InstPoint::min(pos, interval.end);
+
+ let ret = match mentions.binary_search_by_key(&target.iix(), |mention| mention.0) {
+ Ok(index) => {
+ // Either the selected index is a perfect match, or the previous mention
+ // is the correct answer.
+ let mention = &mentions[index];
+ if target.pt() == Point::Def {
+ if mention.1.is_mod_or_def() {
+ Some(InstPoint::new_def(mention.0))
+ } else {
+ Some(InstPoint::new_use(mention.0))
+ }
+ } else if target.pt() == Point::Use && mention.1.is_use() {
+ Some(target)
+ } else if index == 0 {
+ None
+ } else {
+ let mention = &mentions[index - 1];
+ if mention.1.is_mod_or_def() {
+ Some(InstPoint::new_def(mention.0))
+ } else {
+ Some(InstPoint::new_use(mention.0))
+ }
+ }
+ }
+
+ Err(index) => {
+ if index == 0 {
+ None
+ } else {
+ let mention = &mentions[index - 1];
+ if mention.1.is_mod_or_def() {
+ Some(InstPoint::new_def(mention.0))
+ } else {
+ Some(InstPoint::new_use(mention.0))
+ }
+ }
+ }
+ };
+
+ // TODO once the mentions are properly split, this could be removed, in
+ // theory.
+ let ret = match ret {
+ Some(pos) => {
+ if pos >= interval.start {
+ Some(pos)
+ } else {
+ None
+ }
+ }
+ None => None,
+ };
+
+ trace!("mentions: {:?}", mentions);
+ trace!("found: {:?}", ret);
+
+ ret
+}
+
+/// Checks that each register class has its own scratch register in addition to one available
+/// register, and creates a mapping of register class -> scratch register.
+fn compute_scratches(
+ reg_universe: &RealRegUniverse,
+) -> Result<Vec<Option<RealReg>>, RegAllocError> {
+ let mut scratches_by_rc = vec![None; NUM_REG_CLASSES];
+ for i in 0..NUM_REG_CLASSES {
+ if let Some(info) = &reg_universe.allocable_by_class[i] {
+ if info.first == info.last {
+ return Err(RegAllocError::Other(
+ "at least 2 registers required for linear scan".into(),
+ ));
+ }
+ let scratch = if let Some(suggested_reg) = info.suggested_scratch {
+ reg_universe.regs[suggested_reg].0
+ } else {
+ return Err(RegAllocError::MissingSuggestedScratchReg(
+ RegClass::rc_from_u32(i as u32),
+ ));
+ };
+ scratches_by_rc[i] = Some(scratch);
+ }
+ }
+ Ok(scratches_by_rc)
+}
+
+/// Allocator top level.
+///
+/// `func` is modified so that, when this function returns, it will contain no VirtualReg uses.
+///
+/// Allocation can fail if there are insufficient registers to even generate spill/reload code, or
+/// if the function appears to have any undefined VirtualReg/RealReg uses.
+#[inline(never)]
+pub(crate) fn run<F: Function>(
+ func: &mut F,
+ reg_universe: &RealRegUniverse,
+ use_checker: bool,
+ opts: &LinearScanOptions,
+) -> Result<RegAllocResult<F>, RegAllocError> {
+ let AnalysisInfo {
+ reg_vecs_and_bounds: reg_uses,
+ intervals,
+ liveins,
+ liveouts,
+ ..
+ } = analysis::run(func, reg_universe).map_err(|err| RegAllocError::Analysis(err))?;
+
+ let scratches_by_rc = compute_scratches(reg_universe)?;
+
+ let stats = if opts.stats {
+ let mut stats = Statistics::default();
+ stats.num_fixed = intervals.fixeds.len();
+ stats.num_virtual_ranges = intervals.virtuals.len();
+ stats.num_vregs = intervals
+ .virtuals
+ .iter()
+ .map(|virt| virt.vreg.get_index())
+ .fold(0, |a, b| usize::max(a, b));
+ stats.only_large = opts.large_stats;
+ Some(stats)
+ } else {
+ None
+ };
+
+ if log_enabled!(Level::Trace) {
+ trace!("fixed intervals:");
+ for int in &intervals.fixeds {
+ trace!("{}", int);
+ }
+ trace!("");
+ trace!("unassigned intervals:");
+ for int in &intervals.virtuals {
+ trace!("{}", int);
+ for mention in &int.mentions {
+ trace!(" mention @ {:?}: {:?}", mention.0, mention.1);
+ }
+ }
+ trace!("");
+ }
+
+ let (intervals, mut num_spill_slots) = assign_registers::run(
+ opts,
+ func,
+ &reg_uses,
+ reg_universe,
+ &scratches_by_rc,
+ intervals,
+ stats,
+ )?;
+
+ let virtuals = &intervals.virtuals;
+
+ let memory_moves = resolve_moves::run(
+ func,
+ &reg_uses,
+ virtuals,
+ &liveins,
+ &liveouts,
+ &mut num_spill_slots,
+ &scratches_by_rc,
+ );
+
+ apply_registers(
+ func,
+ virtuals,
+ memory_moves,
+ reg_universe,
+ num_spill_slots,
+ use_checker,
+ )
+}
+
+#[inline(never)]
+fn set_registers<F: Function>(
+ func: &mut F,
+ virtual_intervals: &Vec<VirtualInterval>,
+ reg_universe: &RealRegUniverse,
+ use_checker: bool,
+ memory_moves: &Vec<InstToInsertAndExtPoint>,
+) -> Set<RealReg> {
+ info!("set_registers");
+
+ // Set up checker state, if indicated by our configuration.
+ let mut checker: Option<CheckerContext> = None;
+ let mut insn_blocks: Vec<BlockIx> = vec![];
+ if use_checker {
+ checker = Some(CheckerContext::new(
+ func,
+ reg_universe,
+ memory_moves,
+ &[],
+ &[],
+ &[],
+ ));
+ insn_blocks.resize(func.insns().len(), BlockIx::new(0));
+ for block_ix in func.blocks() {
+ for insn_ix in func.block_insns(block_ix) {
+ insn_blocks[insn_ix.get() as usize] = block_ix;
+ }
+ }
+ }
+
+ let mut clobbered_registers = Set::empty();
+
+ // Collect all the regs per instruction and mention set.
+ let capacity = virtual_intervals
+ .iter()
+ .map(|int| int.mentions.len())
+ .fold(0, |a, b| a + b);
+
+ if capacity == 0 {
+ // No virtual registers have been allocated, exit early.
+ return clobbered_registers;
+ }
+
+ let mut mention_map = Vec::with_capacity(capacity);
+
+ for int in virtual_intervals {
+ let rreg = match int.location.reg() {
+ Some(rreg) => rreg,
+ _ => continue,
+ };
+ trace!("int: {}", int);
+ trace!(" {:?}", int.mentions);
+ for &mention in &int.mentions {
+ mention_map.push((mention.0, mention.1, int.vreg, rreg));
+ }
+ }
+
+ // Sort by instruction index.
+ mention_map.sort_unstable_by_key(|quad| quad.0);
+
+ // Iterate over all the mentions.
+ let mut mapper = MentionRegUsageMapper::new();
+
+ let flush_inst = |func: &mut F,
+ mapper: &mut MentionRegUsageMapper,
+ iix: InstIx,
+ checker: Option<&mut CheckerContext>| {
+ trace!("map_regs for {:?}", iix);
+ let mut inst = func.get_insn_mut(iix);
+ F::map_regs(&mut inst, mapper);
+
+ if let Some(checker) = checker {
+ let block_ix = insn_blocks[iix.get() as usize];
+ checker
+ .handle_insn(reg_universe, func, block_ix, iix, mapper)
+ .unwrap();
+ }
+
+ mapper.clear();
+ };
+
+ let mut prev_iix = mention_map[0].0;
+ for (iix, mention_set, vreg, rreg) in mention_map {
+ if prev_iix != iix {
+ // Flush previous instruction.
+ flush_inst(func, &mut mapper, prev_iix, checker.as_mut());
+ prev_iix = iix;
+ }
+
+ trace!(
+ "{:?}: {:?} is in {:?} at {:?}",
+ iix,
+ vreg,
+ rreg,
+ mention_set
+ );
+
+ // Fill in new information at the given index.
+ if mention_set.is_use() {
+ if let Some(prev_rreg) = mapper.lookup_use(vreg) {
+ debug_assert_eq!(prev_rreg, rreg, "different use allocs for {:?}", vreg);
+ }
+ mapper.set_use(vreg, rreg);
+ }
+
+ let included_in_clobbers = func.is_included_in_clobbers(func.get_insn(iix));
+ if mention_set.is_mod() {
+ if let Some(prev_rreg) = mapper.lookup_use(vreg) {
+ debug_assert_eq!(prev_rreg, rreg, "different use allocs for {:?}", vreg);
+ }
+ if let Some(prev_rreg) = mapper.lookup_def(vreg) {
+ debug_assert_eq!(prev_rreg, rreg, "different def allocs for {:?}", vreg);
+ }
+
+ mapper.set_use(vreg, rreg);
+ mapper.set_def(vreg, rreg);
+ if included_in_clobbers {
+ clobbered_registers.insert(rreg);
+ }
+ }
+
+ if mention_set.is_def() {
+ if let Some(prev_rreg) = mapper.lookup_def(vreg) {
+ debug_assert_eq!(prev_rreg, rreg, "different def allocs for {:?}", vreg);
+ }
+
+ mapper.set_def(vreg, rreg);
+ if included_in_clobbers {
+ clobbered_registers.insert(rreg);
+ }
+ }
+ }
+
+ // Flush last instruction.
+ flush_inst(func, &mut mapper, prev_iix, checker.as_mut());
+
+ clobbered_registers
+}
+
+/// Fills in the register assignments into instructions.
+#[inline(never)]
+fn apply_registers<F: Function>(
+ func: &mut F,
+ virtual_intervals: &Vec<VirtualInterval>,
+ memory_moves: Vec<InstToInsertAndExtPoint>,
+ reg_universe: &RealRegUniverse,
+ num_spill_slots: u32,
+ use_checker: bool,
+) -> Result<RegAllocResult<F>, RegAllocError> {
+ info!("apply_registers");
+
+ let clobbered_registers = set_registers(
+ func,
+ virtual_intervals,
+ reg_universe,
+ use_checker,
+ &memory_moves,
+ );
+
+ let safepoint_insns = vec![];
+ let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) =
+ add_spills_reloads_and_moves(func, &safepoint_insns, memory_moves)
+ .map_err(|e| RegAllocError::Other(e))?;
+ assert!(new_safepoint_insns.is_empty()); // because `safepoint_insns` is also empty.
+
+ // And now remove from the clobbered registers set, all those not available to the allocator.
+ // But not removing the reserved regs, since we might have modified those.
+ clobbered_registers.filter_map(|&reg| {
+ if reg.get_index() >= reg_universe.allocable {
+ None
+ } else {
+ Some(reg)
+ }
+ });
+
+ Ok(RegAllocResult {
+ insns: final_insns,
+ target_map,
+ orig_insn_map: new_to_old_insn_map,
+ clobbered_registers,
+ num_spill_slots,
+ block_annotations: None,
+ stackmaps: vec![],
+ new_safepoint_insns,
+ })
+}
diff --git a/third_party/rust/regalloc/src/linear_scan/resolve_moves.rs b/third_party/rust/regalloc/src/linear_scan/resolve_moves.rs
new file mode 100644
index 0000000000..8012404a86
--- /dev/null
+++ b/third_party/rust/regalloc/src/linear_scan/resolve_moves.rs
@@ -0,0 +1,889 @@
+use super::{next_use, IntId, Location, RegUses, VirtualInterval};
+use crate::{
+ data_structures::{BlockIx, InstPoint, Point},
+ inst_stream::{InstExtPoint, InstToInsert, InstToInsertAndExtPoint},
+ sparse_set::SparseSet,
+ Function, RealReg, Reg, SpillSlot, TypedIxVec, VirtualReg, Writable,
+};
+
+use log::{debug, info, trace};
+use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
+use smallvec::SmallVec;
+use std::fmt;
+
+fn resolve_moves_in_block<F: Function>(
+ func: &F,
+ intervals: &Vec<VirtualInterval>,
+ reg_uses: &RegUses,
+ scratches_by_rc: &[Option<RealReg>],
+ spill_slot: &mut u32,
+ moves_in_blocks: &mut Vec<InstToInsertAndExtPoint>,
+ tmp_ordered_moves: &mut Vec<MoveOp>,
+ tmp_stack: &mut Vec<MoveOp>,
+) {
+ let mut block_ends = HashSet::default();
+ let mut block_starts = HashSet::default();
+ for bix in func.blocks() {
+ let insts = func.block_insns(bix);
+ block_ends.insert(insts.last());
+ block_starts.insert(insts.first());
+ }
+
+ let mut reloads_at_inst = HashMap::default();
+ let mut spills_at_inst = Vec::new();
+
+ for interval in intervals {
+ let parent_id = match interval.parent {
+ Some(pid) => pid,
+ None => {
+ // In unreachable code, it's possible that a given interval has no
+ // parents and is assigned to a stack location for its whole lifetime.
+ //
+ // In reachable code, the analysis only create intervals for virtual
+ // registers with at least one register use, so a parentless interval (=
+ // hasn't ever been split) can't live in a stack slot.
+ #[cfg(debug_assertions)]
+ debug_assert!(
+ interval.location.spill().is_none()
+ || (next_use(interval, InstPoint::min_value(), reg_uses,).is_none())
+ );
+ continue;
+ }
+ };
+
+ let parent = &intervals[parent_id.0];
+
+ // If this is a move between blocks, handle it as such.
+ if parent.end.pt() == Point::Def
+ && interval.start.pt() == Point::Use
+ && block_ends.contains(&parent.end.iix())
+ && block_starts.contains(&interval.start.iix())
+ {
+ continue;
+ }
+
+ let child_start = interval.start;
+ let vreg = interval.vreg;
+
+ match interval.location {
+ Location::None => panic!("interval has no location after regalloc!"),
+
+ Location::Reg(rreg) => {
+ // Reconnect with the parent location, by adding a move if needed.
+ if let Some(next_use) = next_use(interval, child_start, reg_uses) {
+ // No need to reload before a new definition.
+ if next_use.pt() == Point::Def {
+ continue;
+ }
+ };
+
+ let mut at_inst = child_start;
+ match at_inst.pt() {
+ Point::Use => {
+ at_inst.set_pt(Point::Reload);
+ }
+ Point::Def => {
+ at_inst.set_pt(Point::Spill);
+ }
+ _ => unreachable!(),
+ }
+
+ let entry = reloads_at_inst.entry(at_inst).or_insert_with(|| Vec::new());
+
+ match parent.location {
+ Location::None => unreachable!(),
+
+ Location::Reg(from_rreg) => {
+ if from_rreg != rreg {
+ debug!(
+ "inblock fixup: {:?} move {:?} -> {:?} at {:?}",
+ interval.id, from_rreg, rreg, at_inst
+ );
+ entry.push(MoveOp::new_move(from_rreg, rreg, vreg));
+ }
+ }
+
+ Location::Stack(spill) => {
+ debug!(
+ "inblock fixup: {:?} reload {:?} -> {:?} at {:?}",
+ interval.id, spill, rreg, at_inst
+ );
+ entry.push(MoveOp::new_reload(spill, rreg, vreg));
+ }
+ }
+ }
+
+ Location::Stack(spill) => {
+ // This interval has been spilled (i.e. split). Spill after the last def or before
+ // the last use.
+ let mut at_inst = parent.end;
+ at_inst.set_pt(if at_inst.pt() == Point::Use {
+ Point::Reload
+ } else {
+ debug_assert!(at_inst.pt() == Point::Def);
+ Point::Spill
+ });
+
+ match parent.location {
+ Location::None => unreachable!(),
+
+ Location::Reg(rreg) => {
+ debug!(
+ "inblock fixup: {:?} spill {:?} -> {:?} at {:?}",
+ interval.id, rreg, spill, at_inst
+ );
+ spills_at_inst.push(InstToInsertAndExtPoint::new(
+ InstToInsert::Spill {
+ to_slot: spill,
+ from_reg: rreg,
+ for_vreg: Some(vreg),
+ },
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ }
+
+ Location::Stack(parent_spill) => {
+ debug_assert_eq!(parent_spill, spill);
+ }
+ }
+ }
+ }
+ }
+
+ // Flush the memory moves caused by in-block fixups. Conceptually, the spills
+ // must happen after the right locations have been set, that is, after the
+ // reloads. Reloads may include several moves that must happen in parallel
+ // (e.g. if two real regs must be swapped), so process them first. Once all
+ // the parallel assignments have been done, push forward all the spills.
+ for (at_inst, mut pending_moves) in reloads_at_inst {
+ schedule_moves(&mut pending_moves, tmp_ordered_moves, tmp_stack);
+ emit_moves(
+ at_inst,
+ &tmp_ordered_moves,
+ spill_slot,
+ scratches_by_rc,
+ moves_in_blocks,
+ );
+ }
+
+ moves_in_blocks.append(&mut spills_at_inst);
+}
+
+#[derive(Clone, Copy)]
+enum BlockPos {
+ Start,
+ End,
+}
+
+#[derive(Default, Clone)]
+struct BlockInfo {
+ start: SmallVec<[(VirtualReg, IntId); 4]>,
+ end: SmallVec<[(VirtualReg, IntId); 4]>,
+}
+
+static UNSORTED_THRESHOLD: usize = 8;
+
+impl BlockInfo {
+ #[inline(never)]
+ fn insert(&mut self, pos: BlockPos, vreg: VirtualReg, id: IntId) {
+ match pos {
+ BlockPos::Start => {
+ #[cfg(debug_assertions)]
+ debug_assert!(self.start.iter().find(|prev| prev.0 == vreg).is_none());
+ self.start.push((vreg, id));
+ }
+ BlockPos::End => {
+ #[cfg(debug_assertions)]
+ debug_assert!(self.end.iter().find(|prev| prev.0 == vreg).is_none());
+ self.end.push((vreg, id));
+ }
+ }
+ }
+
+ #[inline(never)]
+ fn finish(&mut self) {
+ if self.start.len() >= UNSORTED_THRESHOLD {
+ self.start.sort_unstable_by_key(|pair| pair.0);
+ }
+ if self.end.len() >= UNSORTED_THRESHOLD {
+ self.end.sort_unstable_by_key(|pair| pair.0);
+ }
+ }
+
+ #[inline(never)]
+ fn lookup(&self, pos: BlockPos, vreg: &VirtualReg) -> IntId {
+ let array = match pos {
+ BlockPos::Start => &self.start,
+ BlockPos::End => &self.end,
+ };
+ if array.len() >= UNSORTED_THRESHOLD {
+ array[array.binary_search_by_key(vreg, |pair| pair.0).unwrap()].1
+ } else {
+ array
+ .iter()
+ .find(|el| el.0 == *vreg)
+ .expect("should have found target reg")
+ .1
+ }
+ }
+}
+
+/// For each block, collect a mapping of block_{start, end} -> actual location, to make the
+/// across-blocks fixup phase fast.
+#[inline(never)]
+fn collect_block_infos<F: Function>(
+ func: &F,
+ intervals: &Vec<VirtualInterval>,
+ liveins: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ liveouts: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+) -> Vec<BlockInfo> {
+ // First, collect the first and last instructions of each block.
+ let mut block_start_and_ends = Vec::with_capacity(2 * func.blocks().len());
+ for bix in func.blocks() {
+ let insts = func.block_insns(bix);
+ block_start_and_ends.push((InstPoint::new_use(insts.first()), BlockPos::Start, bix));
+ block_start_and_ends.push((InstPoint::new_def(insts.last()), BlockPos::End, bix));
+ }
+
+ // Sort this array by instruction point, to be able to do binary search later.
+ block_start_and_ends.sort_unstable_by_key(|pair| pair.0);
+
+ // Preallocate the block information, with the final size of each vector.
+ let mut infos = Vec::with_capacity(func.blocks().len());
+ for bix in func.blocks() {
+ infos.push(BlockInfo {
+ start: SmallVec::with_capacity(liveins[bix].card()),
+ end: SmallVec::with_capacity(liveouts[bix].card()),
+ });
+ }
+
+ // For each interval:
+ // - find the first block start or end instruction that's in the interval, with a binary search
+ // on the previous array.
+ // - add an entry for each livein ou liveout variable in the block info.
+ for int in intervals {
+ let mut i = match block_start_and_ends.binary_search_by_key(&int.start, |pair| pair.0) {
+ Ok(i) => i,
+ Err(i) => i,
+ };
+
+ let vreg = int.vreg;
+ let id = int.id;
+
+ while let Some(&(inst, pos, bix)) = block_start_and_ends.get(i) {
+ if inst > int.end {
+ break;
+ }
+
+ #[cfg(debug_assertions)]
+ debug_assert!(int.covers(inst));
+
+ // Skip virtual registers that are not live-in (at start) or live-out (at end).
+ match pos {
+ BlockPos::Start => {
+ if !liveins[bix].contains(vreg.to_reg()) {
+ i += 1;
+ continue;
+ }
+ }
+ BlockPos::End => {
+ if !liveouts[bix].contains(vreg.to_reg()) {
+ i += 1;
+ continue;
+ }
+ }
+ }
+
+ infos[bix.get() as usize].insert(pos, vreg, id);
+ i += 1;
+ }
+ }
+
+ for info in infos.iter_mut() {
+ info.finish();
+ }
+
+ infos
+}
+
+/// Figure the sequence of parallel moves to insert at block boundaries:
+/// - for each block
+/// - for each liveout vreg in this block
+/// - for each successor of this block
+/// - if the locations allocated in the block and its successor don't
+/// match, insert a pending move from one location to the other.
+///
+/// Once that's done:
+/// - resolve cycles in the pending moves
+/// - generate real moves from the pending moves.
+#[inline(never)]
+fn resolve_moves_across_blocks<F: Function>(
+ func: &F,
+ liveins: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ liveouts: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ intervals: &Vec<VirtualInterval>,
+ scratches_by_rc: &[Option<RealReg>],
+ spill_slot: &mut u32,
+ moves_at_block_starts: &mut Vec<InstToInsertAndExtPoint>,
+ moves_at_block_ends: &mut Vec<InstToInsertAndExtPoint>,
+ tmp_ordered_moves: &mut Vec<MoveOp>,
+ tmp_stack: &mut Vec<MoveOp>,
+) {
+ let mut parallel_move_map = HashMap::default();
+
+ let block_info = collect_block_infos(func, intervals, liveins, liveouts);
+
+ let mut seen_successors = HashSet::default();
+ for block in func.blocks() {
+ let successors = func.block_succs(block);
+
+ // Where to insert the fixup move, if needed? If there's more than one
+ // successor to the current block, inserting in the current block will
+ // impact all the successors.
+ //
+ // We assume critical edges have been split, so
+ // if the current block has more than one successor, then its successors
+ // have at most one predecessor.
+ let cur_has_one_succ = successors.len() == 1;
+
+ for &reg in liveouts[block].iter() {
+ let vreg = if let Some(vreg) = reg.as_virtual_reg() {
+ vreg
+ } else {
+ continue;
+ };
+
+ seen_successors.clear();
+
+ let cur_id = block_info[block.get() as usize].lookup(BlockPos::End, &vreg);
+ let cur_int = &intervals[cur_id.0];
+ let loc_at_cur_end = cur_int.location;
+
+ for &succ in successors.iter() {
+ if !liveins[succ].contains(reg) {
+ // This variable isn't live in this block.
+ continue;
+ }
+ if !seen_successors.insert(succ) {
+ continue;
+ }
+
+ let succ_id = block_info[succ.get() as usize].lookup(BlockPos::Start, &vreg);
+ let succ_int = &intervals[succ_id.0];
+
+ // If the two intervals aren't related to the same virtual range, then the move is
+ // not required.
+ if cur_int.ancestor != succ_int.ancestor {
+ continue;
+ }
+
+ let loc_at_succ_start = succ_int.location;
+
+ let (at_inst, block_pos) = if cur_has_one_succ {
+ // Before the control flow instruction.
+ let pos = InstPoint::new_reload(func.block_insns(block).last());
+ (pos, BlockPos::End)
+ } else {
+ let pos = InstPoint::new_reload(func.block_insns(succ).first());
+ (pos, BlockPos::Start)
+ };
+
+ let pending_moves = parallel_move_map
+ .entry(at_inst)
+ .or_insert_with(|| (Vec::new(), block_pos));
+
+ match (loc_at_cur_end, loc_at_succ_start) {
+ (Location::Reg(cur_rreg), Location::Reg(succ_rreg)) => {
+ if cur_rreg == succ_rreg {
+ continue;
+ }
+ debug!(
+ "boundary fixup: move {:?} -> {:?} at {:?} for {:?} between {:?} and {:?}",
+ cur_rreg,
+ succ_rreg,
+ at_inst,
+ vreg,
+ block,
+ succ
+ );
+ pending_moves
+ .0
+ .push(MoveOp::new_move(cur_rreg, succ_rreg, vreg));
+ }
+
+ (Location::Reg(cur_rreg), Location::Stack(spillslot)) => {
+ debug!(
+ "boundary fixup: spill {:?} -> {:?} at {:?} for {:?} between {:?} and {:?}",
+ cur_rreg,
+ spillslot,
+ at_inst,
+ vreg,
+ block,
+ succ
+ );
+ pending_moves
+ .0
+ .push(MoveOp::new_spill(cur_rreg, spillslot, vreg));
+ }
+
+ (Location::Stack(spillslot), Location::Reg(rreg)) => {
+ debug!(
+ "boundary fixup: reload {:?} -> {:?} at {:?} for {:?} between {:?} and {:?}",
+ spillslot,
+ rreg,
+ at_inst,
+ vreg,
+ block,
+ succ
+ );
+ pending_moves
+ .0
+ .push(MoveOp::new_reload(spillslot, rreg, vreg));
+ }
+
+ (Location::Stack(left_spill_slot), Location::Stack(right_spill_slot)) => {
+ // Stack to stack should not happen here, since two ranges for the
+ // same vreg can't be intersecting, so the same stack slot ought to
+ // be reused in this case.
+ debug_assert_eq!(
+ left_spill_slot, right_spill_slot,
+ "Moves from stack to stack only happen on the same vreg, thus the same stack slot"
+ );
+ continue;
+ }
+
+ (_, _) => {
+ panic!("register or stack slots must have been allocated.");
+ }
+ };
+ }
+ }
+
+ // Flush the memory moves caused by block fixups for this block.
+ for (at_inst, (move_insts, block_pos)) in parallel_move_map.iter_mut() {
+ schedule_moves(move_insts, tmp_ordered_moves, tmp_stack);
+
+ match block_pos {
+ BlockPos::Start => {
+ emit_moves(
+ *at_inst,
+ &tmp_ordered_moves,
+ spill_slot,
+ scratches_by_rc,
+ moves_at_block_starts,
+ );
+ }
+ BlockPos::End => {
+ emit_moves(
+ *at_inst,
+ &tmp_ordered_moves,
+ spill_slot,
+ scratches_by_rc,
+ moves_at_block_ends,
+ );
+ }
+ };
+ }
+
+ parallel_move_map.clear();
+ }
+
+ debug!("");
+}
+
+#[inline(never)]
+pub(crate) fn run<F: Function>(
+ func: &F,
+ reg_uses: &RegUses,
+ intervals: &Vec<VirtualInterval>,
+ liveins: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ liveouts: &TypedIxVec<BlockIx, SparseSet<Reg>>,
+ spill_slot: &mut u32,
+ scratches_by_rc: &[Option<RealReg>],
+) -> Vec<InstToInsertAndExtPoint> {
+ info!("resolve_moves");
+
+ // Keep three lists of moves to insert:
+ // - moves across blocks, that must happen at the start of blocks,
+ // - moves within a given block,
+ // - moves across blocks, that must happen at the end of blocks.
+ //
+ // To maintain the property that these moves are eventually sorted at the end, we'll compute
+ // the final array of moves by concatenating these three arrays. `inst_stream` uses a stable
+ // sort, making sure the at-block-start/within-block/at-block-end will be respected.
+ let mut moves_at_block_starts = Vec::new();
+ let mut moves_at_block_ends = Vec::new();
+ let mut moves_in_blocks = Vec::new();
+
+ let mut tmp_stack = Vec::new();
+ let mut tmp_ordered_moves = Vec::new();
+ resolve_moves_in_block(
+ func,
+ intervals,
+ reg_uses,
+ scratches_by_rc,
+ spill_slot,
+ &mut moves_in_blocks,
+ &mut tmp_ordered_moves,
+ &mut tmp_stack,
+ );
+
+ resolve_moves_across_blocks(
+ func,
+ liveins,
+ liveouts,
+ intervals,
+ scratches_by_rc,
+ spill_slot,
+ &mut moves_at_block_starts,
+ &mut moves_at_block_ends,
+ &mut tmp_ordered_moves,
+ &mut tmp_stack,
+ );
+
+ let mut insts_and_points = moves_at_block_starts;
+ insts_and_points.reserve(moves_in_blocks.len() + moves_at_block_ends.len());
+ insts_and_points.append(&mut moves_in_blocks);
+ insts_and_points.append(&mut moves_at_block_ends);
+
+ insts_and_points
+}
+
+#[derive(PartialEq, Debug)]
+enum MoveOperand {
+ Reg(RealReg),
+ Stack(SpillSlot),
+}
+
+impl MoveOperand {
+ fn aliases(&self, other: &Self) -> bool {
+ self == other
+ }
+}
+
+struct MoveOp {
+ from: MoveOperand,
+ to: MoveOperand,
+ vreg: VirtualReg,
+ cycle_begin: Option<usize>,
+ cycle_end: Option<usize>,
+}
+
+impl fmt::Debug for MoveOp {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "{:?}: {:?} -> {:?}", self.vreg, self.from, self.to)?;
+ if let Some(ref begin) = self.cycle_begin {
+ write!(fmt, ", start of cycle #{}", begin)?;
+ }
+ if let Some(ref end) = self.cycle_end {
+ write!(fmt, ", end of cycle #{}", end)?;
+ }
+ Ok(())
+ }
+}
+
+impl MoveOp {
+ fn new_move(from: RealReg, to: RealReg, vreg: VirtualReg) -> Self {
+ Self {
+ from: MoveOperand::Reg(from),
+ to: MoveOperand::Reg(to),
+ vreg,
+ cycle_begin: None,
+ cycle_end: None,
+ }
+ }
+
+ fn new_spill(from: RealReg, to: SpillSlot, vreg: VirtualReg) -> Self {
+ Self {
+ from: MoveOperand::Reg(from),
+ to: MoveOperand::Stack(to),
+ vreg,
+ cycle_begin: None,
+ cycle_end: None,
+ }
+ }
+
+ fn new_reload(from: SpillSlot, to: RealReg, vreg: VirtualReg) -> Self {
+ Self {
+ from: MoveOperand::Stack(from),
+ to: MoveOperand::Reg(to),
+ vreg,
+ cycle_begin: None,
+ cycle_end: None,
+ }
+ }
+
+ fn gen_inst(&self) -> InstToInsert {
+ match self.from {
+ MoveOperand::Reg(from) => match self.to {
+ MoveOperand::Reg(to) => InstToInsert::Move {
+ to_reg: Writable::from_reg(to),
+ from_reg: from,
+ for_vreg: self.vreg,
+ },
+ MoveOperand::Stack(to) => InstToInsert::Spill {
+ to_slot: to,
+ from_reg: from,
+ for_vreg: Some(self.vreg),
+ },
+ },
+ MoveOperand::Stack(from) => match self.to {
+ MoveOperand::Reg(to) => InstToInsert::Reload {
+ to_reg: Writable::from_reg(to),
+ from_slot: from,
+ for_vreg: Some(self.vreg),
+ },
+ MoveOperand::Stack(_to) => unreachable!("stack to stack move"),
+ },
+ }
+ }
+}
+
+fn find_blocking_move<'a>(
+ pending: &'a mut Vec<MoveOp>,
+ last: &MoveOp,
+) -> Option<(usize, &'a mut MoveOp)> {
+ for (i, other) in pending.iter_mut().enumerate() {
+ if other.from.aliases(&last.to) {
+ return Some((i, other));
+ }
+ }
+ None
+}
+
+fn find_cycled_move<'a>(
+ stack: &'a mut Vec<MoveOp>,
+ from: &mut usize,
+ last: &MoveOp,
+) -> Option<&'a mut MoveOp> {
+ for i in *from..stack.len() {
+ *from += 1;
+ let other = &stack[i];
+ if other.from.aliases(&last.to) {
+ return Some(&mut stack[i]);
+ }
+ }
+ None
+}
+
+/// Given a pending list of moves, returns a list of moves ordered in a correct
+/// way, i.e., no move clobbers another one.
+#[inline(never)]
+fn schedule_moves(
+ pending: &mut Vec<MoveOp>,
+ ordered_moves: &mut Vec<MoveOp>,
+ stack: &mut Vec<MoveOp>,
+) {
+ ordered_moves.clear();
+
+ let mut num_cycles = 0;
+ let mut cur_cycles = 0;
+
+ trace!("pending moves: {:#?}", pending);
+
+ while let Some(pm) = pending.pop() {
+ trace!("handling pending move {:?}", pm);
+ debug_assert!(
+ pm.from != pm.to,
+ "spurious moves should not have been inserted"
+ );
+
+ stack.clear();
+ stack.push(pm);
+
+ while !stack.is_empty() {
+ let blocking_pair = find_blocking_move(pending, stack.last().unwrap());
+
+ if let Some((blocking_idx, blocking)) = blocking_pair {
+ trace!("found blocker: {:?}", blocking);
+ let mut stack_cur = 0;
+
+ let has_cycles =
+ if let Some(mut cycled) = find_cycled_move(stack, &mut stack_cur, blocking) {
+ trace!("found cycle: {:?}", cycled);
+ debug_assert!(cycled.cycle_end.is_none());
+ cycled.cycle_end = Some(cur_cycles);
+ true
+ } else {
+ false
+ };
+
+ if has_cycles {
+ loop {
+ match find_cycled_move(stack, &mut stack_cur, blocking) {
+ Some(ref mut cycled) => {
+ trace!("found more cycles ending on blocker: {:?}", cycled);
+ debug_assert!(cycled.cycle_end.is_none());
+ cycled.cycle_end = Some(cur_cycles);
+ }
+ None => break,
+ }
+ }
+
+ debug_assert!(blocking.cycle_begin.is_none());
+ blocking.cycle_begin = Some(cur_cycles);
+ cur_cycles += 1;
+ }
+
+ let blocking = pending.remove(blocking_idx);
+ stack.push(blocking);
+ } else {
+ // There's no blocking move! We can push this in the ordered list of
+ // moves.
+ // TODO IonMonkey has more optimizations for this case.
+ let last = stack.pop().unwrap();
+ ordered_moves.push(last);
+ }
+ }
+
+ if num_cycles < cur_cycles {
+ num_cycles = cur_cycles;
+ }
+ cur_cycles = 0;
+ }
+}
+
+#[inline(never)]
+fn emit_moves(
+ at_inst: InstPoint,
+ ordered_moves: &Vec<MoveOp>,
+ num_spill_slots: &mut u32,
+ scratches_by_rc: &[Option<RealReg>],
+ moves_in_blocks: &mut Vec<InstToInsertAndExtPoint>,
+) {
+ let mut spill_slot = None;
+ let mut in_cycle = false;
+
+ trace!("emit_moves");
+
+ for mov in ordered_moves {
+ if let Some(_) = &mov.cycle_end {
+ debug_assert!(in_cycle);
+
+ // There is some pattern:
+ // (A -> B)
+ // (B -> A)
+ // This case handles (B -> A), which we reach last. We emit a move from
+ // the saved value of B, to A.
+ match mov.to {
+ MoveOperand::Reg(dst_reg) => {
+ let inst = InstToInsert::Reload {
+ to_reg: Writable::from_reg(dst_reg),
+ from_slot: spill_slot.expect("should have a cycle spill slot"),
+ for_vreg: Some(mov.vreg),
+ };
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ trace!(
+ "finishing cycle: {:?} -> {:?}",
+ spill_slot.unwrap(),
+ dst_reg
+ );
+ }
+ MoveOperand::Stack(dst_spill) => {
+ let scratch = scratches_by_rc[mov.vreg.get_class() as usize]
+ .expect("missing scratch reg");
+ let inst = InstToInsert::Reload {
+ to_reg: Writable::from_reg(scratch),
+ from_slot: spill_slot.expect("should have a cycle spill slot"),
+ for_vreg: Some(mov.vreg),
+ };
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ let inst = InstToInsert::Spill {
+ to_slot: dst_spill,
+ from_reg: scratch,
+ for_vreg: Some(mov.vreg),
+ };
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ trace!(
+ "finishing cycle: {:?} -> {:?} -> {:?}",
+ spill_slot.unwrap(),
+ scratch,
+ dst_spill
+ );
+ }
+ };
+
+ in_cycle = false;
+ continue;
+ }
+
+ if let Some(_) = &mov.cycle_begin {
+ debug_assert!(!in_cycle);
+
+ // There is some pattern:
+ // (A -> B)
+ // (B -> A)
+ // This case handles (A -> B), which we reach first. We save B, then allow
+ // the original move to continue.
+ match spill_slot {
+ Some(_) => {}
+ None => {
+ spill_slot = Some(SpillSlot::new(*num_spill_slots));
+ *num_spill_slots += 1;
+ }
+ }
+
+ match mov.to {
+ MoveOperand::Reg(src_reg) => {
+ let inst = InstToInsert::Spill {
+ to_slot: spill_slot.unwrap(),
+ from_reg: src_reg,
+ for_vreg: Some(mov.vreg),
+ };
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ trace!("starting cycle: {:?} -> {:?}", src_reg, spill_slot.unwrap());
+ }
+ MoveOperand::Stack(src_spill) => {
+ let scratch = scratches_by_rc[mov.vreg.get_class() as usize]
+ .expect("missing scratch reg");
+ let inst = InstToInsert::Reload {
+ to_reg: Writable::from_reg(scratch),
+ from_slot: src_spill,
+ for_vreg: Some(mov.vreg),
+ };
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ let inst = InstToInsert::Spill {
+ to_slot: spill_slot.expect("should have a cycle spill slot"),
+ from_reg: scratch,
+ for_vreg: Some(mov.vreg),
+ };
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ trace!(
+ "starting cycle: {:?} -> {:?} -> {:?}",
+ src_spill,
+ scratch,
+ spill_slot.unwrap()
+ );
+ }
+ };
+
+ in_cycle = true;
+ }
+
+ // A normal move which is not part of a cycle.
+ let inst = mov.gen_inst();
+ moves_in_blocks.push(InstToInsertAndExtPoint::new(
+ inst,
+ InstExtPoint::from_inst_point(at_inst),
+ ));
+ trace!("moving {:?} -> {:?}", mov.from, mov.to);
+ }
+}
diff --git a/third_party/rust/regalloc/src/pretty_print.rs b/third_party/rust/regalloc/src/pretty_print.rs
new file mode 100644
index 0000000000..8f01c0766e
--- /dev/null
+++ b/third_party/rust/regalloc/src/pretty_print.rs
@@ -0,0 +1,56 @@
+//! Pretty-printing for the main data structures.
+
+use crate::data_structures::WritableBase;
+use crate::{RealRegUniverse, Reg, Writable};
+
+/// A trait for printing instruction bits and pieces, with the the ability to take a
+/// contextualising `RealRegUniverse` that is used to give proper names to registers.
+pub trait PrettyPrint {
+ /// Return a string that shows the implementing object in context of the given
+ /// `RealRegUniverse`, if provided.
+ fn show_rru(&self, maybe_reg_universe: Option<&RealRegUniverse>) -> String;
+}
+
+/// Same as `PrettyPrint`, but can also take a size hint into account to specialize the displayed
+/// string.
+pub trait PrettyPrintSized: PrettyPrint {
+ /// The same as |show_rru|, but with an optional hint giving a size in bytes. Its
+ /// interpretation is object-dependent, and it is intended to pass around enough information to
+ /// facilitate printing sub-parts of real registers correctly. Objects may ignore size hints
+ /// that are irrelevant to them.
+ ///
+ /// The default implementation ignores the size hint.
+ fn show_rru_sized(&self, maybe_reg_universe: Option<&RealRegUniverse>, _size: u8) -> String {
+ self.show_rru(maybe_reg_universe)
+ }
+}
+
+impl PrettyPrint for Reg {
+ fn show_rru(&self, maybe_reg_universe: Option<&RealRegUniverse>) -> String {
+ if self.is_real() {
+ if let Some(rru) = maybe_reg_universe {
+ let reg_ix = self.get_index();
+ assert!(
+ reg_ix < rru.regs.len(),
+ "unknown real register with index {:?}",
+ reg_ix
+ );
+ return rru.regs[reg_ix].1.to_string();
+ }
+ }
+ // The reg is virtual, or we have no universe. Be generic.
+ format!("%{:?}", self)
+ }
+}
+
+impl<R: PrettyPrint + WritableBase> PrettyPrint for Writable<R> {
+ fn show_rru(&self, maybe_reg_universe: Option<&RealRegUniverse>) -> String {
+ self.to_reg().show_rru(maybe_reg_universe)
+ }
+}
+
+impl<R: PrettyPrintSized + WritableBase> PrettyPrintSized for Writable<R> {
+ fn show_rru_sized(&self, maybe_reg_universe: Option<&RealRegUniverse>, size: u8) -> String {
+ self.to_reg().show_rru_sized(maybe_reg_universe, size)
+ }
+}
diff --git a/third_party/rust/regalloc/src/reg_maps.rs b/third_party/rust/regalloc/src/reg_maps.rs
new file mode 100644
index 0000000000..f65ea372a3
--- /dev/null
+++ b/third_party/rust/regalloc/src/reg_maps.rs
@@ -0,0 +1,347 @@
+use crate::{RealReg, RegUsageMapper, VirtualReg};
+use smallvec::SmallVec;
+use std::mem;
+
+/// This data structure holds the mappings needed to map an instruction's uses, mods and defs from
+/// virtual to real registers.
+///
+/// It remembers the sets of mappings (of a virtual register to a real register) over time, based
+/// on precise virtual ranges and their allocations.
+///
+/// This is the right implementation to use when a register allocation algorithm keeps track of
+/// precise virtual ranges, and maintains them over time.
+#[derive(Debug)]
+pub struct VrangeRegUsageMapper {
+ /// Dense vector-map indexed by virtual register number. This is consulted
+ /// directly for use-queries and augmented with the overlay for def-queries.
+ slots: Vec<RealReg>,
+
+ /// Overlay for def-queries. This is a set of updates that occurs "during"
+ /// the instruction in question, and will be applied to the slots array
+ /// once we are done processing this instruction (in preparation for
+ /// the next one).
+ overlay: SmallVec<[(VirtualReg, RealReg); 16]>,
+}
+
+impl VrangeRegUsageMapper {
+ /// Allocate a reg-usage mapper with the given predicted vreg capacity.
+ pub(crate) fn new(vreg_capacity: usize) -> VrangeRegUsageMapper {
+ VrangeRegUsageMapper {
+ slots: Vec::with_capacity(vreg_capacity),
+ overlay: SmallVec::new(),
+ }
+ }
+
+ /// Is the overlay past the sorted-size threshold?
+ fn is_overlay_large_enough_to_sort(&self) -> bool {
+ // Use the SmallVec spill-to-heap threshold as a threshold for "large
+ // enough to sort"; this has the effect of amortizing the cost of
+ // sorting along with the cost of copying out to heap memory, and also
+ // ensures that when we access heap (more likely to miss in cache), we
+ // do it with O(log N) accesses instead of O(N).
+ self.overlay.spilled()
+ }
+
+ /// Update the overlay.
+ pub(crate) fn set_overlay(&mut self, vreg: VirtualReg, rreg: Option<RealReg>) {
+ let rreg = rreg.unwrap_or(RealReg::invalid());
+ self.overlay.push((vreg, rreg));
+ }
+
+ /// Finish updates to the overlay, sorting if necessary.
+ pub(crate) fn finish_overlay(&mut self) {
+ if self.overlay.len() == 0 || !self.is_overlay_large_enough_to_sort() {
+ return;
+ }
+
+ // Sort stably, so that later updates continue to come after earlier
+ // ones.
+ self.overlay.sort_by_key(|pair| pair.0);
+ // Remove duplicates by collapsing runs of same-vreg pairs down to
+ // the last one.
+ let mut last_vreg = self.overlay[0].0;
+ let mut out = 0;
+ for i in 1..self.overlay.len() {
+ let this_vreg = self.overlay[i].0;
+ if this_vreg != last_vreg {
+ out += 1;
+ }
+ if i != out {
+ self.overlay[out] = self.overlay[i];
+ }
+ last_vreg = this_vreg;
+ }
+ let new_len = out + 1;
+ self.overlay.truncate(new_len);
+ }
+
+ /// Merge the overlay into the main map.
+ pub(crate) fn merge_overlay(&mut self) {
+ // Take the SmallVec and swap with empty to allow `&mut self` method
+ // call below.
+ let mappings = mem::replace(&mut self.overlay, SmallVec::new());
+ for (vreg, rreg) in mappings.into_iter() {
+ self.set_direct_internal(vreg, rreg);
+ }
+ }
+
+ /// Make a direct update to the mapping. Only usable when the overlay
+ /// is empty.
+ pub(crate) fn set_direct(&mut self, vreg: VirtualReg, rreg: Option<RealReg>) {
+ debug_assert!(self.overlay.is_empty());
+ let rreg = rreg.unwrap_or(RealReg::invalid());
+ self.set_direct_internal(vreg, rreg);
+ }
+
+ fn set_direct_internal(&mut self, vreg: VirtualReg, rreg: RealReg) {
+ let idx = vreg.get_index();
+ if idx >= self.slots.len() {
+ self.slots.resize(idx + 1, RealReg::invalid());
+ }
+ self.slots[idx] = rreg;
+ }
+
+ /// Perform a lookup directly in the main map. Returns `None` for
+ /// not-present.
+ fn lookup_direct(&self, vreg: VirtualReg) -> Option<RealReg> {
+ let idx = vreg.get_index();
+ if idx >= self.slots.len() {
+ None
+ } else {
+ Some(self.slots[idx])
+ }
+ }
+
+ /// Perform a lookup in the overlay. Returns `None` for not-present. No
+ /// fallback to main map (that happens in callers). Returns `Some` even
+ /// if mapped to `RealReg::invalid()`, because this is a tombstone
+ /// (represents deletion) in the overlay.
+ fn lookup_overlay(&self, vreg: VirtualReg) -> Option<RealReg> {
+ if self.is_overlay_large_enough_to_sort() {
+ // Do a binary search; we are guaranteed to have at most one
+ // matching because duplicates were collapsed after sorting.
+ if let Ok(idx) = self.overlay.binary_search_by_key(&vreg, |pair| pair.0) {
+ return Some(self.overlay[idx].1);
+ }
+ } else {
+ // Search in reverse order to find later updates first.
+ for &(this_vreg, this_rreg) in self.overlay.iter().rev() {
+ if this_vreg == vreg {
+ return Some(this_rreg);
+ }
+ }
+ }
+ None
+ }
+
+ /// Sanity check: check that all slots are empty. Typically for use at the
+ /// end of processing as a debug-assert.
+ pub(crate) fn is_empty(&self) -> bool {
+ self.overlay.iter().all(|pair| pair.1.is_invalid())
+ && self.slots.iter().all(|rreg| rreg.is_invalid())
+ }
+}
+
+impl RegUsageMapper for VrangeRegUsageMapper {
+ /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a use
+ /// on the current instruction.
+ fn get_use(&self, vreg: VirtualReg) -> Option<RealReg> {
+ self.lookup_direct(vreg)
+ // Convert Some(RealReg::invalid()) to None.
+ .and_then(|reg| reg.maybe_valid())
+ }
+
+ /// Return the `RealReg` if mapped, or `None`, for `vreg` occuring as a def
+ /// on the current instruction.
+ fn get_def(&self, vreg: VirtualReg) -> Option<RealReg> {
+ self.lookup_overlay(vreg)
+ .or_else(|| self.lookup_direct(vreg))
+ // Convert Some(RealReg::invalid()) to None.
+ .and_then(|reg| reg.maybe_valid())
+ }
+
+ /// Return the `RealReg` if mapped, or `None`, for a `vreg` occuring as a
+ /// mod on the current instruction.
+ fn get_mod(&self, vreg: VirtualReg) -> Option<RealReg> {
+ let result = self.get_use(vreg);
+ debug_assert_eq!(result, self.get_def(vreg));
+ result
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::{Reg, RegClass, VirtualReg};
+
+ fn vreg(idx: u32) -> VirtualReg {
+ Reg::new_virtual(RegClass::I64, idx).to_virtual_reg()
+ }
+ fn rreg(idx: u8) -> RealReg {
+ Reg::new_real(RegClass::I64, /* enc = */ 0, /* index = */ idx).to_real_reg()
+ }
+
+ #[test]
+ fn test_reg_use_mapper() {
+ let mut mapper = VrangeRegUsageMapper::new(/* estimated vregs = */ 16);
+ assert_eq!(None, mapper.get_use(vreg(0)));
+ assert_eq!(None, mapper.get_def(vreg(0)));
+ assert_eq!(None, mapper.get_mod(vreg(0)));
+
+ mapper.set_direct(vreg(0), Some(rreg(1)));
+ mapper.set_direct(vreg(1), Some(rreg(2)));
+
+ assert_eq!(Some(rreg(1)), mapper.get_use(vreg(0)));
+ assert_eq!(Some(rreg(1)), mapper.get_def(vreg(0)));
+ assert_eq!(Some(rreg(1)), mapper.get_mod(vreg(0)));
+ assert_eq!(Some(rreg(2)), mapper.get_use(vreg(1)));
+ assert_eq!(Some(rreg(2)), mapper.get_def(vreg(1)));
+ assert_eq!(Some(rreg(2)), mapper.get_mod(vreg(1)));
+
+ mapper.set_overlay(vreg(0), Some(rreg(3)));
+ mapper.set_overlay(vreg(2), Some(rreg(4)));
+ mapper.finish_overlay();
+
+ assert_eq!(Some(rreg(1)), mapper.get_use(vreg(0)));
+ assert_eq!(Some(rreg(3)), mapper.get_def(vreg(0)));
+ // vreg 0 not valid for mod (use and def differ).
+ assert_eq!(Some(rreg(2)), mapper.get_use(vreg(1)));
+ assert_eq!(Some(rreg(2)), mapper.get_def(vreg(1)));
+ assert_eq!(Some(rreg(2)), mapper.get_mod(vreg(1)));
+ assert_eq!(None, mapper.get_use(vreg(2)));
+ assert_eq!(Some(rreg(4)), mapper.get_def(vreg(2)));
+ // vreg 2 not valid for mod (use and def differ).
+
+ mapper.merge_overlay();
+ assert_eq!(Some(rreg(3)), mapper.get_use(vreg(0)));
+ assert_eq!(Some(rreg(2)), mapper.get_use(vreg(1)));
+ assert_eq!(Some(rreg(4)), mapper.get_use(vreg(2)));
+ assert_eq!(None, mapper.get_use(vreg(3)));
+
+ // Check tombstoning behavior.
+ mapper.set_overlay(vreg(0), None);
+ mapper.finish_overlay();
+ assert_eq!(Some(rreg(3)), mapper.get_use(vreg(0)));
+ assert_eq!(None, mapper.get_def(vreg(0)));
+ mapper.merge_overlay();
+
+ // Check large (sorted) overlay mode.
+ for i in (2..50).rev() {
+ mapper.set_overlay(vreg(i), Some(rreg((i + 100) as u8)));
+ }
+ mapper.finish_overlay();
+ assert_eq!(None, mapper.get_use(vreg(0)));
+ assert_eq!(Some(rreg(2)), mapper.get_use(vreg(1)));
+ assert_eq!(Some(rreg(4)), mapper.get_use(vreg(2)));
+ for i in 2..50 {
+ assert_eq!(Some(rreg((i + 100) as u8)), mapper.get_def(vreg(i)));
+ }
+ mapper.merge_overlay();
+
+ for i in (0..100).rev() {
+ mapper.set_overlay(vreg(i), None);
+ }
+ mapper.finish_overlay();
+ for i in 0..100 {
+ assert_eq!(None, mapper.get_def(vreg(i)));
+ }
+ assert_eq!(false, mapper.is_empty());
+ mapper.merge_overlay();
+ assert_eq!(true, mapper.is_empty());
+
+ // Check multiple-update behavior in small mode.
+ mapper.set_overlay(vreg(1), Some(rreg(1)));
+ mapper.set_overlay(vreg(1), Some(rreg(2)));
+ mapper.finish_overlay();
+ assert_eq!(Some(rreg(2)), mapper.get_def(vreg(1)));
+ mapper.merge_overlay();
+ assert_eq!(Some(rreg(2)), mapper.get_use(vreg(1)));
+
+ mapper.set_overlay(vreg(1), Some(rreg(2)));
+ mapper.set_overlay(vreg(1), None);
+ mapper.finish_overlay();
+ assert_eq!(None, mapper.get_def(vreg(1)));
+ mapper.merge_overlay();
+ assert_eq!(None, mapper.get_use(vreg(1)));
+
+ // Check multiple-update behavior in sorted mode.
+ for i in 0..100 {
+ mapper.set_overlay(vreg(2), Some(rreg(i)));
+ }
+ for i in 0..100 {
+ mapper.set_overlay(vreg(2), Some(rreg(2 * i)));
+ }
+ mapper.finish_overlay();
+ assert_eq!(Some(rreg(198)), mapper.get_def(vreg(2)));
+ mapper.merge_overlay();
+ assert_eq!(Some(rreg(198)), mapper.get_use(vreg(2)));
+
+ for i in 0..100 {
+ mapper.set_overlay(vreg(2), Some(rreg(i)));
+ }
+ for _ in 0..100 {
+ mapper.set_overlay(vreg(2), None);
+ }
+ mapper.finish_overlay();
+ assert_eq!(None, mapper.get_def(vreg(50)));
+ mapper.merge_overlay();
+ assert_eq!(None, mapper.get_use(vreg(50)));
+ }
+}
+
+/// This implementation of RegUsageMapper relies on explicit mentions of vregs in instructions. The
+/// caller must keep them, and for each instruction:
+///
+/// - clear the previous mappings, using `clear()`,
+/// - feed the mappings from vregs to rregs for uses and defs, with `set_use`/`set_def`,
+/// - then call the `Function::map_regs` function with this structure.
+///
+/// This avoids a lot of resizes, and makes it possible for algorithms that don't have precise live
+/// ranges to fill in vreg -> rreg mappings.
+#[derive(Debug)]
+pub struct MentionRegUsageMapper {
+ /// Sparse vector-map indexed by virtual register number. This is consulted for use-queries.
+ uses: SmallVec<[(VirtualReg, RealReg); 8]>,
+
+ /// Sparse vector-map indexed by virtual register number. This is consulted for def-queries.
+ defs: SmallVec<[(VirtualReg, RealReg); 8]>,
+}
+
+impl MentionRegUsageMapper {
+ pub(crate) fn new() -> Self {
+ Self {
+ uses: SmallVec::new(),
+ defs: SmallVec::new(),
+ }
+ }
+ pub(crate) fn clear(&mut self) {
+ self.uses.clear();
+ self.defs.clear();
+ }
+ pub(crate) fn lookup_use(&self, vreg: VirtualReg) -> Option<RealReg> {
+ self.uses.iter().find(|&pair| pair.0 == vreg).map(|x| x.1)
+ }
+ pub(crate) fn lookup_def(&self, vreg: VirtualReg) -> Option<RealReg> {
+ self.defs.iter().find(|&pair| pair.0 == vreg).map(|x| x.1)
+ }
+ pub(crate) fn set_use(&mut self, vreg: VirtualReg, rreg: RealReg) {
+ self.uses.push((vreg, rreg));
+ }
+ pub(crate) fn set_def(&mut self, vreg: VirtualReg, rreg: RealReg) {
+ self.defs.push((vreg, rreg));
+ }
+}
+
+impl RegUsageMapper for MentionRegUsageMapper {
+ fn get_use(&self, vreg: VirtualReg) -> Option<RealReg> {
+ return self.lookup_use(vreg);
+ }
+ fn get_def(&self, vreg: VirtualReg) -> Option<RealReg> {
+ return self.lookup_def(vreg);
+ }
+ fn get_mod(&self, vreg: VirtualReg) -> Option<RealReg> {
+ let result = self.lookup_use(vreg);
+ debug_assert_eq!(result, self.lookup_def(vreg));
+ return result;
+ }
+}
diff --git a/third_party/rust/regalloc/src/snapshot.rs b/third_party/rust/regalloc/src/snapshot.rs
new file mode 100644
index 0000000000..7442805ebb
--- /dev/null
+++ b/third_party/rust/regalloc/src/snapshot.rs
@@ -0,0 +1,320 @@
+//! Snapshotting facilities.
+//!
+//! This makes it possible to save one entire IR input in a generic form that encapsulates all the
+//! constraints, so as to be replayed only in the regalloc.rs environment. The main structure,
+//! `GenericFunction`, can be created from any type implementing `Function`, acting as a generic
+//! Function wrapper. Its layout is simple enough that it can be optionally serialized and
+//! deserialized, making it easy to transfer test cases from regalloc.rs users to the crate's
+//! maintainers.
+
+use crate::data_structures::RegVecs;
+use crate::*;
+use std::borrow::Cow;
+
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+enum IRInstKind {
+ Spill { vreg: Option<VirtualReg> },
+ Reload { vreg: Option<VirtualReg> },
+ Move { vreg: VirtualReg },
+ ZeroLenNop,
+ UserReturn,
+ UserMove,
+ UserOther,
+}
+
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct IRInst {
+ reg_uses: Vec<Reg>,
+ reg_mods: Vec<Writable<Reg>>,
+ reg_defs: Vec<Writable<Reg>>,
+ kind: IRInstKind,
+}
+
+#[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct IRFunction {
+ instructions: Vec<IRInst>,
+ block_ranges: Vec<Range<InstIx>>,
+ block_succs: Vec<Vec<BlockIx>>,
+ entry_block: BlockIx,
+ liveins: Set<RealReg>,
+ liveouts: Set<RealReg>,
+ vreg_spill_slot_sizes: Vec<Option<(u32, RegClass)>>,
+ num_vregs: usize,
+}
+
+#[derive(Clone)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct IRSnapshot {
+ reg_universe: RealRegUniverse,
+ func: IRFunction,
+}
+
+impl IRSnapshot {
+ fn new_inst<F: Function>(func: &F, ix: InstIx, inst: &F::Inst) -> IRInst {
+ let mut reg_vecs = RegVecs::new(/* sanitized */ false);
+
+ let mut collector = RegUsageCollector::new(&mut reg_vecs);
+ F::get_regs(inst, &mut collector);
+
+ let kind = if let Some((_wreg, _reg)) = func.is_move(inst) {
+ IRInstKind::UserMove
+ } else if func.is_ret(ix) {
+ IRInstKind::UserReturn
+ } else {
+ IRInstKind::UserOther
+ };
+
+ IRInst {
+ reg_uses: reg_vecs.uses,
+ reg_mods: reg_vecs
+ .mods
+ .into_iter()
+ .map(|reg| Writable::from_reg(reg))
+ .collect(),
+ reg_defs: reg_vecs
+ .defs
+ .into_iter()
+ .map(|reg| Writable::from_reg(reg))
+ .collect(),
+ kind,
+ }
+ }
+
+ pub fn from_function<F: Function>(func: &F, reg_universe: &RealRegUniverse) -> Self {
+ let instructions: Vec<IRInst> = func
+ .insns()
+ .iter()
+ .enumerate()
+ .map(|(ix, inst)| IRSnapshot::new_inst(func, InstIx::new(ix as u32), inst))
+ .collect();
+
+ let mut block_ranges = Vec::new();
+ let mut block_succs = Vec::new();
+ for block in func.blocks() {
+ block_ranges.push(func.block_insns(block));
+ block_succs.push(func.block_succs(block).into());
+ }
+
+ let vreg_spill_slot_sizes = {
+ let mut array: Vec<Option<(u32, RegClass)>> = Vec::new();
+
+ let mut handle_reg = |reg: &Reg| {
+ if let Some(vreg) = reg.as_virtual_reg() {
+ let rc = vreg.get_class();
+ let spill_slot_size = func.get_spillslot_size(rc, vreg);
+ let index = vreg.get_index();
+ if index >= array.len() {
+ array.resize(index + 1, None);
+ }
+ let entry = &mut array[vreg.get_index()];
+ match entry {
+ None => *entry = Some((spill_slot_size, rc)),
+ Some((prev_size, prev_rc)) => {
+ assert_eq!(*prev_rc, rc);
+ assert_eq!(*prev_size, spill_slot_size);
+ }
+ }
+ }
+ };
+
+ for inst in &instructions {
+ for reg in &inst.reg_uses {
+ handle_reg(reg);
+ }
+ for reg in &inst.reg_mods {
+ handle_reg(&reg.to_reg());
+ }
+ for reg in &inst.reg_defs {
+ handle_reg(&reg.to_reg());
+ }
+ }
+
+ array
+ };
+
+ let entry_block = func.entry_block();
+ let liveins = func.func_liveins();
+ let liveouts = func.func_liveouts();
+
+ Self {
+ reg_universe: reg_universe.clone(),
+ func: IRFunction {
+ instructions,
+ block_ranges,
+ block_succs,
+ entry_block,
+ liveins,
+ liveouts,
+ vreg_spill_slot_sizes,
+ num_vregs: func.get_num_vregs(),
+ },
+ }
+ }
+
+ pub fn allocate(&mut self, opts: Options) -> Result<RegAllocResult<IRFunction>, RegAllocError> {
+ allocate_registers_with_opts(
+ &mut self.func,
+ &self.reg_universe,
+ None, /*no stackmap request*/
+ opts,
+ )
+ }
+}
+
+impl Function for IRFunction {
+ type Inst = IRInst;
+
+ // Liveins, liveouts.
+ fn func_liveins(&self) -> Set<RealReg> {
+ self.liveins.clone()
+ }
+ fn func_liveouts(&self) -> Set<RealReg> {
+ self.liveouts.clone()
+ }
+ fn get_num_vregs(&self) -> usize {
+ self.num_vregs
+ }
+
+ // Instructions.
+ fn insns(&self) -> &[Self::Inst] {
+ &self.instructions
+ }
+ fn insns_mut(&mut self) -> &mut [Self::Inst] {
+ &mut self.instructions
+ }
+ fn get_insn(&self, insn: InstIx) -> &Self::Inst {
+ &self.instructions[insn.get() as usize]
+ }
+ fn get_insn_mut(&mut self, insn: InstIx) -> &mut Self::Inst {
+ &mut self.instructions[insn.get() as usize]
+ }
+
+ fn is_ret(&self, insn: InstIx) -> bool {
+ let inst = &self.instructions[insn.get() as usize];
+ if let IRInstKind::UserReturn = inst.kind {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn is_move(&self, insn: &Self::Inst) -> Option<(Writable<Reg>, Reg)> {
+ if let IRInstKind::UserMove = insn.kind {
+ let from = insn.reg_uses[0];
+ let to = insn.reg_defs[0];
+ Some((to, from))
+ } else {
+ None
+ }
+ }
+
+ // Blocks.
+ fn blocks(&self) -> Range<BlockIx> {
+ Range::new(BlockIx::new(0), self.block_ranges.len())
+ }
+ fn entry_block(&self) -> BlockIx {
+ self.entry_block
+ }
+ fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
+ self.block_ranges[block.get() as usize]
+ }
+ fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
+ Cow::Borrowed(&self.block_succs[block.get() as usize])
+ }
+
+ fn get_regs(insn: &Self::Inst, collector: &mut RegUsageCollector) {
+ collector.add_uses(&insn.reg_uses);
+ collector.add_mods(&insn.reg_mods);
+ collector.add_defs(&insn.reg_defs);
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(insn: &mut Self::Inst, maps: &RUM) {
+ for reg_use in insn.reg_uses.iter_mut() {
+ if let Some(vreg) = reg_use.as_virtual_reg() {
+ *reg_use = maps.get_use(vreg).expect("missing alloc for use").to_reg();
+ }
+ }
+ for reg_mod in insn.reg_mods.iter_mut() {
+ if let Some(vreg) = reg_mod.to_reg().as_virtual_reg() {
+ *reg_mod =
+ Writable::from_reg(maps.get_mod(vreg).expect("missing alloc for mod").to_reg());
+ }
+ }
+ for reg_def in insn.reg_defs.iter_mut() {
+ if let Some(vreg) = reg_def.to_reg().as_virtual_reg() {
+ *reg_def =
+ Writable::from_reg(maps.get_def(vreg).expect("missing alloc for def").to_reg());
+ }
+ }
+ }
+
+ fn gen_spill(
+ &self,
+ _to_slot: SpillSlot,
+ from_reg: RealReg,
+ for_vreg: Option<VirtualReg>,
+ ) -> Self::Inst {
+ IRInst {
+ reg_uses: vec![from_reg.to_reg()],
+ reg_mods: vec![],
+ reg_defs: vec![],
+ kind: IRInstKind::Spill { vreg: for_vreg },
+ }
+ }
+ fn gen_reload(
+ &self,
+ to_reg: Writable<RealReg>,
+ _from_slot: SpillSlot,
+ for_vreg: Option<VirtualReg>,
+ ) -> Self::Inst {
+ IRInst {
+ reg_uses: vec![],
+ reg_mods: vec![],
+ reg_defs: vec![Writable::from_reg(to_reg.to_reg().to_reg())],
+ kind: IRInstKind::Reload { vreg: for_vreg },
+ }
+ }
+ fn gen_move(
+ &self,
+ to_reg: Writable<RealReg>,
+ from_reg: RealReg,
+ for_vreg: VirtualReg,
+ ) -> Self::Inst {
+ IRInst {
+ reg_uses: vec![from_reg.to_reg()],
+ reg_mods: vec![],
+ reg_defs: vec![Writable::from_reg(to_reg.to_reg().to_reg())],
+ kind: IRInstKind::Move { vreg: for_vreg },
+ }
+ }
+ fn gen_zero_len_nop(&self) -> Self::Inst {
+ IRInst {
+ reg_uses: vec![],
+ reg_mods: vec![],
+ reg_defs: vec![],
+ kind: IRInstKind::ZeroLenNop,
+ }
+ }
+
+ fn get_spillslot_size(&self, regclass: RegClass, for_vreg: VirtualReg) -> u32 {
+ let entry =
+ self.vreg_spill_slot_sizes[for_vreg.get_index()].expect("missing spillslot info");
+ assert_eq!(entry.1, regclass);
+ return entry.0;
+ }
+
+ fn maybe_direct_reload(
+ &self,
+ _insn: &Self::Inst,
+ _reg: VirtualReg,
+ _slot: SpillSlot,
+ ) -> Option<Self::Inst> {
+ unimplemented!();
+ }
+}
diff --git a/third_party/rust/regalloc/src/sparse_set.rs b/third_party/rust/regalloc/src/sparse_set.rs
new file mode 100644
index 0000000000..f07fab8792
--- /dev/null
+++ b/third_party/rust/regalloc/src/sparse_set.rs
@@ -0,0 +1,881 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+//! An implementation of sets which aims to be fast for both large sets and
+//! very small sets, even if the elements are sparse relative to the universe.
+
+use rustc_hash::FxHashSet;
+use std::fmt;
+use std::hash::Hash;
+
+//=============================================================================
+// SparseSet
+
+// Handy wrappers around `SparseSetU`, if you don't want to have to guess at an "optimal"
+// in-line size.
+pub type SparseSet<T> = SparseSetU<[T; 12]>;
+//pub type SparseSetIter<'a, T> = SparseSetUIter<'a, [T; 12]>; // No use case yet
+
+// Implementation: for small, unordered but no dups
+
+use core::mem::MaybeUninit;
+use core::ptr::{read, write};
+
+// Types that can be used as the backing store for a SparseSet.
+pub trait Array {
+ // The type of the array's elements.
+ type Item;
+ // Returns the number of items the array can hold.
+ fn size() -> usize;
+}
+macro_rules! impl_array(
+ ($($size:expr),+) => {
+ $(
+ impl<T> Array for [T; $size] {
+ type Item = T;
+ fn size() -> usize { $size }
+ }
+ )+
+ }
+);
+impl_array!(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 24, 28, 32);
+
+// The U here stands for "unordered". It refers to the fact that the elements
+// in `Small::arr` are in no particular order, although they are
+// duplicate-free.
+pub enum SparseSetU<A: Array> {
+ Large { set: FxHashSet<A::Item> },
+ Small { card: usize, arr: MaybeUninit<A> },
+}
+
+// ================ Admin (private) methods ================
+
+impl<A> SparseSetU<A>
+where
+ A: Array + Eq + Ord + Hash + Copy + fmt::Debug,
+ A::Item: Eq + Ord + Hash + Copy + fmt::Debug,
+{
+ #[cfg(test)]
+ fn is_small(&self) -> bool {
+ match self {
+ SparseSetU::Large { .. } => false,
+ SparseSetU::Small { .. } => true,
+ }
+ }
+ #[cfg(test)]
+ fn is_large(&self) -> bool {
+ !self.is_small()
+ }
+ #[inline(never)]
+ fn upgrade(&mut self) {
+ match self {
+ SparseSetU::Large { .. } => panic!("SparseSetU: upgrade"),
+ SparseSetU::Small { card, arr } => {
+ assert!(*card == A::size());
+ let mut set = FxHashSet::<A::Item>::default();
+ set.reserve(A::size());
+ // Could this be done faster?
+ let arr_p = arr.as_mut_ptr() as *mut A::Item;
+ for i in 0..*card {
+ set.insert(unsafe { read(arr_p.add(i)) });
+ }
+ *self = SparseSetU::Large { set }
+ }
+ }
+ }
+ // A large set is only downgradeable if its card does not exceed this value.
+ #[inline(always)]
+ fn small_halfmax_card(&self) -> usize {
+ let limit = A::size();
+ //if limit >= 4 {
+ // limit / 2
+ //} else {
+ // limit - 1
+ //}
+ if false {
+ // Set the transition point as roughly half of the inline size
+ match limit {
+ 0 | 1 => panic!("SparseSetU: small_halfmax_card"),
+ 2 => 1,
+ 3 => 2,
+ 4 => 2,
+ 5 => 3,
+ 6 => 3,
+ _ => limit / 2,
+ }
+ } else {
+ // Set the transition point as roughly two thirds of the inline size
+ match limit {
+ 0 | 1 => panic!("SparseSetU: small_halfmax_card"),
+ 2 => 1,
+ 3 => 2,
+ 4 => 3,
+ 5 => 4,
+ 6 => 4,
+ // FIXME JRS 2020Apr10 avoid possible integer overflow here:
+ _ => (2 * limit) / 3,
+ }
+ }
+ }
+ // If we have a large-format set, but the cardinality has fallen below half
+ // the size of a small format set, convert it to the small format. This
+ // isn't done at the point when the cardinality falls to the max capacity of
+ // a small set in order to give some hysteresis -- we don't want to be
+ // constantly converting back and forth for a set whose size repeatedly
+ // crosses the border.
+ #[inline(never)]
+ fn maybe_downgrade(&mut self) {
+ let small_halfmax_card = self.small_halfmax_card();
+ match self {
+ SparseSetU::Large { set } => {
+ if set.len() <= small_halfmax_card {
+ let mut arr = MaybeUninit::<A>::uninit();
+ let arr_p = arr.as_mut_ptr() as *mut A::Item;
+ let mut i = 0;
+ for e in set.iter() {
+ unsafe { write(arr_p.add(i), *e) };
+ i += 1;
+ }
+ assert!(i <= small_halfmax_card);
+ *self = SparseSetU::Small { card: i, arr };
+ }
+ }
+ SparseSetU::Small { .. } => {
+ panic!("SparseSetU::maybe_downgrade: is already small");
+ }
+ }
+ }
+ #[inline(always)]
+ fn insert_no_dup_check(&mut self, item: A::Item) {
+ match self {
+ SparseSetU::Large { set } => {
+ set.insert(item);
+ }
+ SparseSetU::Small { card, arr } => {
+ assert!(*card <= A::size());
+ if *card < A::size() {
+ // Stay small
+ let arr_p = arr.as_mut_ptr() as *mut A::Item;
+ unsafe {
+ write(arr_p.add(*card), item);
+ }
+ *card += 1;
+ } else {
+ // Transition up
+ self.upgrade();
+ match self {
+ SparseSetU::Large { set } => {
+ let _ = set.insert(item);
+ }
+ SparseSetU::Small { .. } => {
+ // Err, what? Still Small after upgrade?
+ panic!("SparseSetU::insert_no_dup_check")
+ }
+ }
+ }
+ }
+ }
+ }
+}
+#[inline(always)]
+fn small_contains<A>(card: usize, arr: &MaybeUninit<A>, item: A::Item) -> bool
+where
+ A: Array,
+ A::Item: Eq,
+{
+ let arr_p = arr.as_ptr() as *const A::Item;
+ for i in 0..card {
+ if unsafe { read(arr_p.add(i)) } == item {
+ return true;
+ }
+ }
+ false
+}
+
+// ================ Public methods ================
+
+impl<A> SparseSetU<A>
+where
+ A: Array + Eq + Ord + Hash + Copy + fmt::Debug,
+ A::Item: Eq + Ord + Hash + Copy + fmt::Debug,
+{
+ #[inline(always)]
+ pub fn empty() -> Self {
+ SparseSetU::Small {
+ card: 0,
+ arr: MaybeUninit::uninit(),
+ }
+ }
+
+ #[inline(always)]
+ pub fn is_empty(&self) -> bool {
+ match self {
+ SparseSetU::Small { card, .. } => *card == 0,
+ SparseSetU::Large { set } => {
+ // This holds because `maybe_downgrade` will always convert a
+ // zero-sized large variant into a small variant.
+ assert!(set.len() > 0);
+ false
+ }
+ }
+ }
+
+ #[inline(never)]
+ pub fn card(&self) -> usize {
+ match self {
+ SparseSetU::Large { set } => set.len(),
+ SparseSetU::Small { card, .. } => *card,
+ }
+ }
+
+ #[inline(never)]
+ pub fn insert(&mut self, item: A::Item) {
+ match self {
+ SparseSetU::Large { set } => {
+ set.insert(item);
+ }
+ SparseSetU::Small { card, arr } => {
+ assert!(*card <= A::size());
+ // Do we already have it?
+ if small_contains(*card, arr, item) {
+ return;
+ }
+ // No.
+ let arr_p = arr.as_mut_ptr() as *mut A::Item;
+ if *card < A::size() {
+ // Stay small
+ unsafe {
+ write(arr_p.add(*card), item);
+ }
+ *card += 1;
+ } else {
+ // Transition up
+ self.upgrade();
+ self.insert(item);
+ }
+ }
+ }
+ }
+
+ #[inline(always)]
+ pub fn contains(&self, item: A::Item) -> bool {
+ match self {
+ SparseSetU::Large { set } => set.contains(&item),
+ SparseSetU::Small { card, arr } => small_contains(*card, arr, item),
+ }
+ }
+
+ #[inline(never)]
+ pub fn union(&mut self, other: &Self) {
+ match self {
+ SparseSetU::Large { set: set1 } => match other {
+ SparseSetU::Large { set: set2 } => {
+ for item in set2.iter() {
+ set1.insert(*item);
+ }
+ }
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ } => {
+ let arr2_p = arr2.as_ptr() as *const A::Item;
+ for i in 0..*card2 {
+ let item = unsafe { read(arr2_p.add(i)) };
+ set1.insert(item);
+ }
+ }
+ },
+ SparseSetU::Small {
+ card: card1,
+ arr: arr1,
+ } => {
+ let arr1_p = arr1.as_mut_ptr() as *mut A::Item;
+ match other {
+ SparseSetU::Large { set: set2 } => {
+ let mut set2c = set2.clone();
+ for i in 0..*card1 {
+ let item = unsafe { read(arr1_p.add(i)) };
+ set2c.insert(item);
+ }
+ *self = SparseSetU::Large { set: set2c };
+ }
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ } => {
+ let mut extras: MaybeUninit<A> = MaybeUninit::uninit();
+ let mut n_extras = 0;
+ let extras_p = extras.as_mut_ptr() as *mut A::Item;
+ let arr2_p = arr2.as_ptr() as *const A::Item;
+ // Iterate through the second set. Add every item not in the
+ // first set to `extras`.
+ for i in 0..*card2 {
+ let item2 = unsafe { read(arr2_p.add(i)) };
+ let mut in1 = false;
+ for j in 0..*card1 {
+ let item1 = unsafe { read(arr1_p.add(j)) };
+ if item1 == item2 {
+ in1 = true;
+ break;
+ }
+ }
+ if !in1 {
+ debug_assert!(n_extras < A::size());
+ unsafe {
+ write(extras_p.add(n_extras), item2);
+ }
+ n_extras += 1;
+ }
+ }
+ // The result is the concatenation of arr1 and extras.
+ for i in 0..n_extras {
+ let item = unsafe { read(extras_p.add(i)) };
+ self.insert_no_dup_check(item);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ #[inline(never)]
+ pub fn remove(&mut self, other: &Self) {
+ match self {
+ SparseSetU::Large { set: set1 } => {
+ match other {
+ SparseSetU::Large { set: set2 } => {
+ for item in set2.iter() {
+ set1.remove(item);
+ }
+ }
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ } => {
+ let arr2_p = arr2.as_ptr() as *const A::Item;
+ for i in 0..*card2 {
+ let item = unsafe { read(arr2_p.add(i)) };
+ set1.remove(&item);
+ }
+ }
+ }
+ self.maybe_downgrade();
+ }
+ SparseSetU::Small {
+ card: card1,
+ arr: arr1,
+ } => {
+ let arr1_p = arr1.as_mut_ptr() as *mut A::Item;
+ match other {
+ SparseSetU::Large { set: set2 } => {
+ let mut w = 0;
+ for r in 0..*card1 {
+ let item = unsafe { read(arr1_p.add(r)) };
+ let is_in2 = set2.contains(&item);
+ if !is_in2 {
+ // Keep it.
+ if r != w {
+ unsafe {
+ write(arr1_p.add(w), item);
+ }
+ }
+ w += 1;
+ }
+ }
+ *card1 = w;
+ }
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ } => {
+ let arr2_p = arr2.as_ptr() as *const A::Item;
+ let mut w = 0;
+ for r in 0..*card1 {
+ let item = unsafe { read(arr1_p.add(r)) };
+ let mut is_in2 = false;
+ for i in 0..*card2 {
+ if unsafe { read(arr2_p.add(i)) } == item {
+ is_in2 = true;
+ break;
+ }
+ }
+ if !is_in2 {
+ // Keep it.
+ if r != w {
+ unsafe {
+ write(arr1_p.add(w), item);
+ }
+ }
+ w += 1;
+ }
+ }
+ *card1 = w;
+ }
+ }
+ }
+ }
+ }
+
+ // return true if `self` is a subset of `other`
+ #[inline(never)]
+ pub fn is_subset_of(&self, other: &Self) -> bool {
+ if self.card() > other.card() {
+ return false;
+ }
+ // Visit all items in `self`, and see if they are in `other`. If so
+ // return true.
+ match self {
+ SparseSetU::Large { set: set1 } => match other {
+ SparseSetU::Large { set: set2 } => set1.is_subset(set2),
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ } => {
+ for item in set1.iter() {
+ if !small_contains(*card2, arr2, *item) {
+ return false;
+ }
+ }
+ true
+ }
+ },
+ SparseSetU::Small {
+ card: card1,
+ arr: arr1,
+ } => {
+ let arr1_p = arr1.as_ptr() as *const A::Item;
+ match other {
+ SparseSetU::Large { set: set2 } => {
+ for i in 0..*card1 {
+ let item = unsafe { read(arr1_p.add(i)) };
+ if !set2.contains(&item) {
+ return false;
+ }
+ }
+ true
+ }
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ } => {
+ for i in 0..*card1 {
+ let item = unsafe { read(arr1_p.add(i)) };
+ if !small_contains(*card2, arr2, item) {
+ return false;
+ }
+ }
+ true
+ }
+ }
+ }
+ }
+ }
+
+ #[inline(never)]
+ pub fn to_vec(&self) -> Vec<A::Item> {
+ let mut res = Vec::<A::Item>::new();
+ match self {
+ SparseSetU::Large { set } => {
+ for item in set.iter() {
+ res.push(*item);
+ }
+ }
+ SparseSetU::Small { card, arr } => {
+ let arr_p = arr.as_ptr() as *const A::Item;
+ for i in 0..*card {
+ res.push(unsafe { read(arr_p.add(i)) });
+ }
+ }
+ }
+ // Don't delete this. It is important.
+ res.sort_unstable();
+ res
+ }
+
+ #[inline(never)]
+ pub fn from_vec(vec: Vec<A::Item>) -> Self {
+ let vec_len = vec.len();
+ if vec_len <= A::size() {
+ let mut card = 0;
+ let mut arr: MaybeUninit<A> = MaybeUninit::uninit();
+ for i in 0..vec_len {
+ let item = vec[i];
+ if small_contains(card, &arr, item) {
+ continue;
+ }
+ let arr_p = arr.as_mut_ptr() as *mut A::Item;
+ unsafe { write(arr_p.add(card), item) }
+ card += 1;
+ }
+ SparseSetU::Small { card, arr }
+ } else {
+ let mut set = FxHashSet::<A::Item>::default();
+ for i in 0..vec_len {
+ set.insert(vec[i]);
+ }
+ SparseSetU::Large { set }
+ }
+ }
+
+ #[inline(never)]
+ pub fn equals(&self, other: &Self) -> bool {
+ if self.card() != other.card() {
+ return false;
+ }
+ match (self, other) {
+ (SparseSetU::Large { set: set1 }, SparseSetU::Large { set: set2 }) => set1 == set2,
+ (
+ SparseSetU::Small {
+ card: card1,
+ arr: arr1,
+ },
+ SparseSetU::Small {
+ card: card2,
+ arr: arr2,
+ },
+ ) => {
+ assert!(*card1 == *card2);
+ // Check to see that all items in arr1 are present in arr2. Since the
+ // arrays have the same length and are duplicate free, although
+ // unordered, this is a sufficient equality test.
+ let arr1_p = arr1.as_ptr() as *const A::Item;
+ let arr2_p = arr2.as_ptr() as *const A::Item;
+ for i1 in 0..*card1 {
+ let item1 = unsafe { read(arr1_p.add(i1)) };
+ let mut found1 = false;
+ for i2 in 0..*card2 {
+ let item2 = unsafe { read(arr2_p.add(i2)) };
+ if item1 == item2 {
+ found1 = true;
+ break;
+ }
+ }
+ if !found1 {
+ return false;
+ }
+ }
+ true
+ }
+ (SparseSetU::Small { card, arr }, SparseSetU::Large { set })
+ | (SparseSetU::Large { set }, SparseSetU::Small { card, arr }) => {
+ // Same rationale as above as to why this is a sufficient test.
+ let arr_p = arr.as_ptr() as *const A::Item;
+ for i in 0..*card {
+ let item = unsafe { read(arr_p.add(i)) };
+ if !set.contains(&item) {
+ return false;
+ }
+ }
+ true
+ }
+ }
+ }
+}
+
+impl<A> fmt::Debug for SparseSetU<A>
+where
+ A: Array + Eq + Ord + Hash + Copy + fmt::Debug,
+ A::Item: Eq + Ord + Hash + Copy + fmt::Debug,
+{
+ #[inline(never)]
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ // Print the elements in some way which depends only on what is
+ // present in the set, and not on any other factor. In particular,
+ // <Debug for FxHashSet> has been observed to to print the elements
+ // of a two element set in both orders on different occasions.
+ let sorted_vec = self.to_vec();
+ let mut s = "{".to_string();
+ for i in 0..sorted_vec.len() {
+ if i > 0 {
+ s = s + &", ".to_string();
+ }
+ s = s + &format!("{:?}", &sorted_vec[i]);
+ }
+ s = s + &"}".to_string();
+ write!(fmt, "{}", s)
+ }
+}
+
+impl<A> Clone for SparseSetU<A>
+where
+ A: Array + Eq + Ord + Hash + Copy + Clone + fmt::Debug,
+ A::Item: Eq + Ord + Hash + Copy + Clone + fmt::Debug,
+{
+ #[inline(never)]
+ fn clone(&self) -> Self {
+ match self {
+ SparseSetU::Large { set } => SparseSetU::Large { set: set.clone() },
+ SparseSetU::Small { card, arr } => {
+ let arr2 = arr.clone();
+ SparseSetU::Small {
+ card: *card,
+ arr: arr2,
+ }
+ }
+ }
+ }
+}
+
+pub enum SparseSetUIter<'a, A: Array> {
+ Large {
+ set_iter: std::collections::hash_set::Iter<'a, A::Item>,
+ },
+ Small {
+ card: usize,
+ arr: &'a MaybeUninit<A>,
+ next: usize,
+ },
+}
+impl<A: Array> SparseSetU<A> {
+ pub fn iter(&self) -> SparseSetUIter<A> {
+ match self {
+ SparseSetU::Large { set } => SparseSetUIter::Large {
+ set_iter: set.iter(),
+ },
+ SparseSetU::Small { card, arr } => SparseSetUIter::Small {
+ card: *card,
+ arr,
+ next: 0,
+ },
+ }
+ }
+}
+impl<'a, A: Array> Iterator for SparseSetUIter<'a, A> {
+ type Item = &'a A::Item;
+ fn next(&mut self) -> Option<Self::Item> {
+ match self {
+ SparseSetUIter::Large { set_iter } => set_iter.next(),
+ SparseSetUIter::Small { card, arr, next } => {
+ if next < card {
+ let arr_p = arr.as_ptr() as *const A::Item;
+ let item_p = unsafe { arr_p.add(*next) };
+ *next += 1;
+ Some(unsafe { &*item_p })
+ } else {
+ None
+ }
+ }
+ }
+ }
+}
+
+// ================ Testing machinery for SparseSetU ================
+
+#[cfg(test)]
+mod sparse_set_test_utils {
+ // As currently set up, each number (from rand, not rand_base) has a 1-in-4
+ // chance of being a dup of the last 8 numbers produced.
+ pub struct RNGwithDups {
+ seed: u32,
+ circ: [u32; 8],
+ circC: usize, // the cursor for `circ`
+ }
+ impl RNGwithDups {
+ pub fn new() -> Self {
+ Self {
+ seed: 0,
+ circ: [0; 8],
+ circC: 0,
+ }
+ }
+ fn rand_base(&mut self) -> u32 {
+ self.seed = self.seed.wrapping_mul(1103515245).wrapping_add(12345);
+ self.seed
+ }
+ pub fn rand(&mut self) -> u32 {
+ let r = self.rand_base();
+ let rlo = r & 0xFFFF;
+ let rhi = (r >> 16) & 0xFF;
+ if rhi < 64 {
+ self.circ[(rhi % 8) as usize]
+ } else {
+ self.circ[self.circC as usize] = rlo;
+ self.circC += 1;
+ if self.circC == 8 {
+ self.circC = 0
+ };
+ rlo
+ }
+ }
+ pub fn rand_vec(&mut self, len: usize) -> Vec<u32> {
+ let mut res = vec![];
+ for _ in 0..len {
+ res.push(self.rand());
+ }
+ res
+ }
+ }
+}
+
+#[test]
+fn test_sparse_set() {
+ use crate::data_structures::Set;
+ let mut set = SparseSetU::<[u32; 3]>::empty();
+ assert!(set.is_small());
+ set.insert(3);
+ assert!(set.is_small());
+ set.insert(1);
+ assert!(set.is_small());
+ set.insert(4);
+ assert!(set.is_small());
+ set.insert(7);
+ assert!(set.is_large());
+
+ let iters = 20;
+ let mut rng = sparse_set_test_utils::RNGwithDups::new();
+
+ // empty
+ {
+ let spa = SparseSetU::<[u32; 10]>::empty();
+ assert!(spa.card() == 0);
+ }
+
+ // card, is_empty
+ for _ in 0..iters * 3 {
+ for n1 in 0..100 {
+ let size1 = n1 % 25;
+ let vec1a = rng.rand_vec(size1);
+ let vec1b = vec1a.clone(); // This is very stupid.
+ let spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let std1 = Set::<u32>::from_vec(vec1b);
+ assert!(spa1.card() == std1.card());
+ assert!(spa1.is_empty() == (size1 == 0));
+ }
+ }
+
+ // insert
+ for _ in 0..iters * 3 {
+ for n1 in 0..100 {
+ let size1 = n1 % 25;
+ let vec1a = rng.rand_vec(size1);
+ let vec1b = vec1a.clone();
+ let tmp = if size1 == 0 { 0 } else { vec1a[0] };
+ let mut spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let mut std1 = Set::<u32>::from_vec(vec1b);
+ // Insert an item which is almost certainly not in the set.
+ let n = rng.rand();
+ spa1.insert(n);
+ std1.insert(n);
+ assert!(spa1.card() == std1.card());
+ assert!(spa1.to_vec() == std1.to_vec());
+ // Insert an item which is already in the set.
+ if n1 > 0 {
+ spa1.insert(tmp);
+ std1.insert(tmp);
+ assert!(spa1.card() == std1.card());
+ assert!(spa1.to_vec() == std1.to_vec());
+ }
+ }
+ }
+
+ // contains
+ for _ in 0..iters * 2 {
+ for n1 in 0..100 {
+ let size1 = n1 % 25;
+ let vec1a = rng.rand_vec(size1);
+ let vec1b = vec1a.clone();
+ let tmp = if size1 == 0 { 0 } else { vec1a[0] };
+ let spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let std1 = Set::<u32>::from_vec(vec1b);
+ // Check for an item which is almost certainly not in the set.
+ let n = rng.rand();
+ assert!(spa1.contains(n) == std1.contains(n));
+ // Check for an item which is already in the set.
+ if n1 > 0 {
+ assert!(spa1.contains(tmp) == std1.contains(tmp));
+ }
+ }
+ }
+
+ // union
+ for _ in 0..iters * 2 {
+ for size1 in 0..25 {
+ for size2 in 0..25 {
+ let vec1a = rng.rand_vec(size1);
+ let vec2a = rng.rand_vec(size2);
+ let vec1b = vec1a.clone();
+ let vec2b = vec2a.clone();
+ let mut spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let spa2 = SparseSetU::<[u32; 10]>::from_vec(vec2a);
+ let mut std1 = Set::<u32>::from_vec(vec1b);
+ let std2 = Set::<u32>::from_vec(vec2b);
+ spa1.union(&spa2);
+ std1.union(&std2);
+ assert!(spa1.to_vec() == std1.to_vec());
+ }
+ }
+ }
+
+ // remove
+ for _ in 0..iters * 2 {
+ for size1 in 0..25 {
+ for size2 in 0..25 {
+ let vec1a = rng.rand_vec(size1);
+ let vec2a = rng.rand_vec(size2);
+ let vec1b = vec1a.clone();
+ let vec2b = vec2a.clone();
+ let mut spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let spa2 = SparseSetU::<[u32; 10]>::from_vec(vec2a);
+ let mut std1 = Set::<u32>::from_vec(vec1b);
+ let std2 = Set::<u32>::from_vec(vec2b);
+ spa1.remove(&spa2);
+ std1.remove(&std2);
+ assert!(spa1.to_vec() == std1.to_vec());
+ }
+ }
+ }
+
+ // is_subset_of
+ for _ in 0..iters * 2 {
+ for size1 in 0..25 {
+ for size2 in 0..25 {
+ let vec1a = rng.rand_vec(size1);
+ let vec2a = rng.rand_vec(size2);
+ let vec1b = vec1a.clone();
+ let vec2b = vec2a.clone();
+ let spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let spa2 = SparseSetU::<[u32; 10]>::from_vec(vec2a);
+ let std1 = Set::<u32>::from_vec(vec1b);
+ let std2 = Set::<u32>::from_vec(vec2b);
+ assert!(spa1.is_subset_of(&spa2) == std1.is_subset_of(&std2));
+ }
+ }
+ }
+
+ // to_vec and from_vec are implicitly tested by the above; there's no way
+ // they could be wrong and still have the above tests succeed.
+ // (Famous last words!)
+
+ // equals
+ for _ in 0..iters * 2 {
+ for size1 in 0..25 {
+ for size2 in 0..25 {
+ let vec1a = rng.rand_vec(size1);
+ let vec2a = rng.rand_vec(size2);
+ let vec1b = vec1a.clone();
+ let vec2b = vec2a.clone();
+ let spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let spa2 = SparseSetU::<[u32; 10]>::from_vec(vec2a);
+ let std1 = Set::<u32>::from_vec(vec1b);
+ let std2 = Set::<u32>::from_vec(vec2b);
+ assert!(std1.equals(&std1)); // obviously
+ assert!(std2.equals(&std2)); // obviously
+ assert!(spa1.equals(&spa1)); // obviously
+ assert!(spa2.equals(&spa2)); // obviously
+ // More seriously
+ assert!(spa1.equals(&spa2) == std1.equals(&std2));
+ }
+ }
+ }
+
+ // clone
+ for _ in 0..iters * 3 {
+ for n1 in 0..100 {
+ let size1 = n1 % 25;
+ let vec1a = rng.rand_vec(size1);
+ let spa1 = SparseSetU::<[u32; 10]>::from_vec(vec1a);
+ let spa2 = spa1.clone();
+ assert!(spa1.equals(&spa2));
+ }
+ }
+}
diff --git a/third_party/rust/regalloc/src/union_find.rs b/third_party/rust/regalloc/src/union_find.rs
new file mode 100644
index 0000000000..bb5347abe3
--- /dev/null
+++ b/third_party/rust/regalloc/src/union_find.rs
@@ -0,0 +1,749 @@
+#![allow(non_snake_case)]
+#![allow(non_camel_case_types)]
+
+//! An implementation of a fast union-find implementation for "T: ToFromU32" items
+//! in some dense range [0, N-1].
+
+use std::marker::PhantomData;
+
+//=============================================================================
+// ToFromU32
+
+// First, we need this. You can store anything you like in this union-find
+// mechanism, so long as it is really a u32. Reminds me of that old joke
+// about the Model T Ford being available in any colour you want, so long as
+// it is black.
+pub trait ToFromU32<T: Sized = Self> {
+ fn to_u32(x: Self) -> u32;
+ fn from_u32(x: u32) -> Self;
+}
+//impl ToFromU32 for i32 {
+// fn to_u32(x: i32) -> u32 {
+// x as u32
+// }
+// fn from_u32(x: u32) -> i32 {
+// x as i32
+// }
+//}
+impl ToFromU32 for u32 {
+ fn to_u32(x: u32) -> u32 {
+ x
+ }
+ fn from_u32(x: u32) -> u32 {
+ x
+ }
+}
+
+//=============================================================================
+// UnionFind
+
+// This is a fast union-find implementation for "T: ToFromU32" items in some
+// dense range [0, N-1]. The allowed operations are:
+//
+// (1) create a new `UnionFind`er
+//
+// (2) mark two elements as being in the same equivalence class
+//
+// (3) get the equivalence classes wrapped up in an opaque structure
+// `UnionFindEquivClasses`, which makes it possible to cheaply find and
+// iterate through the equivalence class of any item.
+//
+// (4) get an iterator over the "equivalence class leaders". Iterating this
+// produces one value from each equivalence class. By presenting each of
+// these to (3), it is possible to enumerate all the equivalence classes
+// exactly once.
+//
+// `UnionFind` and the operations `union` and `find` are loosely based on the
+// discussion in Chapter 8 of "Data Structures and Algorithm Analysis in C"
+// (Mark Allen Weiss, 1993). `UnionFindEquivClasses` and the algorithm to
+// construct it is home-grown; although I'm sure the same idea has been
+// implemented many times before.
+
+pub struct UnionFind<T: ToFromU32> {
+ // These are the trees that we are building. A value that is negative means
+ // that this node is a tree root, and the negation of its value is the size
+ // of the tree. A value that is positive (which must be in the range [0,
+ // N-1]) indicates that this tree is a subtree and that its parent has the
+ // given index.
+ //
+ // One consequence of this representation is that at most 2^31-1 values can
+ // be supported. Doesn't seem like much of a limitation in practice, given
+ // that all of this allocator's data structures are limited to 2^32 entries.
+ /*priv*/
+ parent_or_size: Vec<i32>,
+
+ // Keep the typechecker happy
+ /*priv*/
+ anchor: PhantomData<T>,
+}
+
+/*priv*/
+const UF_MAX_SIZE: u32 = 0x7FFF_FFF0;
+
+impl<T: ToFromU32> UnionFind<T> {
+ pub fn new(size: usize) -> Self {
+ // Test a slightly conservative limit to avoid any off-by-one errors.
+ if size > UF_MAX_SIZE as usize {
+ panic!("UnionFind::new: too many elements; max = 2^31 - 16.");
+ }
+ let mut parent_or_size = Vec::<i32>::new();
+ parent_or_size.resize(size, -1);
+ Self {
+ parent_or_size,
+ anchor: PhantomData,
+ }
+ }
+
+ // Find, with path compression. Returns the index of tree root for the
+ // given element. This is not for external use. There's no boundary
+ // checking since Rust will do that anyway.
+ //
+ // This was initially implemented using a recursive function. However,
+ // this function gets called a lot, and the recursion led to significant
+ // expense. Attempts to replace the recursion with an explicit stack
+ // didn't give much speedup. Hence the following scheme, which retains
+ // the recursion but unrolls the function. To avoid performance problems
+ // caused by the interaction of inlining and recursion, it is split into
+ // two functions: `find` and `find_slow`.
+ //
+ // This is the main function. It is hot, so it is unrolled 4 times. If
+ // those 4 iterations don't complete the traversal back to the root, it
+ // calls onwards to `find_slow`, which recurses. The idea is that `find`
+ // handles the majority of the cases and can always be inlined, and we
+ // hand off the remaining cases to `find_slow` which will never be inlined
+ // (and hence will not interfere with the inlining of this function).
+ //
+ // As a reminder of the comments above:
+ //
+ // * A `parent_or_size` value that is negative means that this node is a
+ // tree root.
+ //
+ // * A `parent_or_size` that is non-negative indicates that this tree is a
+ // subtree, and its parent has the given index in `parent_or_size`.
+ #[inline(always)]
+ fn find(&mut self, elem0: u32) -> u32 {
+ // Handle up to 4 steps up the tree in-line.
+ let elem0_parent_or_size: i32 = self.parent_or_size[elem0 as usize];
+ if elem0_parent_or_size < 0 {
+ // We're at a tree root.
+ return elem0;
+ }
+
+ let elem1 = elem0_parent_or_size as u32;
+ let elem1_parent_or_size: i32 = self.parent_or_size[elem1 as usize];
+ if elem1_parent_or_size < 0 {
+ self.parent_or_size[elem0 as usize] = elem1 as i32;
+ return elem1;
+ }
+
+ let elem2 = elem1_parent_or_size as u32;
+ let elem2_parent_or_size: i32 = self.parent_or_size[elem2 as usize];
+ if elem2_parent_or_size < 0 {
+ self.parent_or_size[elem1 as usize] = elem2 as i32;
+ self.parent_or_size[elem0 as usize] = elem2 as i32;
+ return elem2;
+ }
+
+ let elem3 = elem2_parent_or_size as u32;
+ let elem3_parent_or_size: i32 = self.parent_or_size[elem3 as usize];
+ if elem3_parent_or_size < 0 {
+ self.parent_or_size[elem2 as usize] = elem3 as i32;
+ self.parent_or_size[elem1 as usize] = elem3 as i32;
+ self.parent_or_size[elem0 as usize] = elem3 as i32;
+ return elem3;
+ }
+
+ // Hand off to `find_slow` to deal with all the remaining steps.
+ let elem4 = elem3_parent_or_size as u32;
+ let root = self.find_slow(elem4);
+ assert!(root < UF_MAX_SIZE);
+ self.parent_or_size[elem3 as usize] = root as i32;
+ self.parent_or_size[elem2 as usize] = root as i32;
+ self.parent_or_size[elem1 as usize] = root as i32;
+ self.parent_or_size[elem0 as usize] = root as i32;
+ return root;
+ }
+
+ // This is the same as `find`, except with unroll factor of 2 rather than
+ // 4, and self-recursive. Don't call it directly. It is intended only as
+ // a fallback for `find`.
+ #[inline(never)]
+ fn find_slow(&mut self, elem0: u32) -> u32 {
+ // Recurse up to the root. On the way back out, make all nodes point
+ // directly at the root index.
+
+ let elem0_parent_or_size: i32 = self.parent_or_size[elem0 as usize];
+ if elem0_parent_or_size < 0 {
+ // We're at a tree root.
+ return elem0;
+ }
+
+ let elem1 = elem0_parent_or_size as u32;
+ let elem1_parent_or_size: i32 = self.parent_or_size[elem1 as usize];
+ if elem1_parent_or_size < 0 {
+ self.parent_or_size[elem0 as usize] = elem1 as i32;
+ return elem1;
+ }
+
+ let elem2 = elem1_parent_or_size as u32;
+ let root = self.find_slow(elem2);
+ assert!(root < UF_MAX_SIZE);
+ self.parent_or_size[elem1 as usize] = root as i32;
+ self.parent_or_size[elem0 as usize] = root as i32;
+ return root;
+ }
+
+ // Union, by size (weight). This is publicly visible.
+ pub fn union(&mut self, elem1t: T, elem2t: T) {
+ let elem1 = ToFromU32::to_u32(elem1t);
+ let elem2 = ToFromU32::to_u32(elem2t);
+ if elem1 == elem2 {
+ // Ideally, we'd alert the callers they're mistakenly do `union` on
+ // identical values repeatedly, but fuzzing hits this repeatedly.
+ return;
+ }
+ let root1: u32 = self.find(elem1);
+ let root2: u32 = self.find(elem2);
+ if root1 == root2 {
+ // `elem1` and `elem2` are already in the same tree. Do nothing.
+ return;
+ }
+ let size1: i32 = self.parent_or_size[root1 as usize];
+ let size2: i32 = self.parent_or_size[root2 as usize];
+ // "They are both roots"
+ assert!(size1 < 0 && size2 < 0);
+ // Make the root of the smaller tree point at the root of the bigger tree.
+ // Update the root of the bigger tree to reflect its increased size. That
+ // only requires adding the two `size` values, since they are both
+ // negative, so adding them will (correctly) drive it more negative.
+ if size1 < size2 {
+ self.parent_or_size[root1 as usize] = root2 as i32;
+ self.parent_or_size[root2 as usize] += size1;
+ } else {
+ self.parent_or_size[root2 as usize] = root1 as i32;
+ self.parent_or_size[root1 as usize] += size2;
+ }
+ }
+}
+
+//=============================================================================
+// UnionFindEquivClasses
+
+// This is a compact representation for all the equivalence classes in a
+// `UnionFind`, that can be constructed in more-or-less linear time (meaning,
+// O(universe size), and allows iteration over the elements of each
+// equivalence class in time linear in the size of the equivalence class (you
+// can't ask for better). It doesn't support queries of the form "are these
+// two elements in the same equivalence class" in linear time, but we don't
+// care about that. What we care about is being able to find and visit the
+// equivalence class of an element quickly.
+//
+// The fields are non-public. What is publically available is the ability to
+// get an iterator (for the equivalence class elements), given a starting
+// element.
+
+/*priv*/
+const UFEC_NULL: u32 = 0xFFFF_FFFF;
+
+/*priv*/
+#[derive(Clone)]
+struct LLElem {
+ // This list element
+ elem: u32,
+ // Pointer to the rest of the list (index in `llelems`), or UFEC_NULL.
+ tail: u32,
+}
+
+pub struct UnionFindEquivClasses<T: ToFromU32> {
+ // Linked list start "pointers". Has .len() == universe size. Entries must
+ // not be UFEC_NULL since each element is at least a member of its own
+ // equivalence class.
+ /*priv*/
+ heads: Vec<u32>,
+
+ // Linked list elements. Has .len() == universe size.
+ /*priv*/
+ lists: Vec<LLElem>,
+
+ // Keep the typechecker happy
+ /*priv*/
+ anchor: PhantomData<T>,
+ // This struct doesn't have a `new` method since construction is done by a
+ // carefully designed algorithm, `UnionFind::get_equiv_classes`.
+}
+
+impl<T: ToFromU32> UnionFind<T> {
+ // This requires mutable `self` because it needs to do a bunch of `find`
+ // operations, and those modify `self` in order to perform path compression.
+ // We could avoid this by using a non-path-compressing `find` operation, but
+ // that could have the serious side effect of making the big-O complexity of
+ // `get_equiv_classes` worse. Hence we play safe and accept the mutability
+ // requirement.
+ pub fn get_equiv_classes(&mut self) -> UnionFindEquivClasses<T> {
+ let nElemsUSize = self.parent_or_size.len();
+ // The construction algorithm requires that all elements have a value
+ // strictly less than 2^31. The union-find machinery, that builds
+ // `parent_or_size` that we read here, however relies on a slightly
+ // tighter bound, which which we reiterate here due to general paranoia:
+ assert!(nElemsUSize < UF_MAX_SIZE as usize);
+ let nElems = nElemsUSize as u32;
+
+ // Avoid reallocation; we know how big these need to be.
+ let mut heads = Vec::<u32>::new();
+ heads.resize(nElems as usize, UFEC_NULL); // all invalid
+
+ let mut lists = Vec::<LLElem>::new();
+ lists.resize(
+ nElems as usize,
+ LLElem {
+ elem: 0,
+ tail: UFEC_NULL,
+ },
+ );
+
+ // As explanation, let there be N elements (`nElems`) which have been
+ // partitioned into M <= N equivalence classes by calls to `union`.
+ //
+ // When we are finished, `lists` will contain M independent linked lists,
+ // each of which represents one equivalence class, and which is terminated
+ // by UFEC_NULL. And `heads` is used to point to the starting point of
+ // each elem's equivalence class, as follows:
+ //
+ // * if heads[elem][bit 31] == 1, then heads[i][bits 30:0] contain the
+ // index in lists[] of the first element in `elem`s equivalence class.
+ //
+ // * if heads[elem][bit 31] == 0, then heads[i][bits 30:0] contain tell us
+ // what `elem`s equivalence class leader is. That is, heads[i][bits
+ // 30:0] tells us the index in `heads` of the entry that contains the
+ // first element in `elem`s equivalence class.
+ //
+ // With this arrangement, we can:
+ //
+ // * detect whether `elem` is an equivalence class leader, by inspecting
+ // heads[elem][bit 31]
+ //
+ // * find the start of `elem`s equivalence class list, either by using
+ // heads[elem][bits 30:0] directly if heads[elem][bit 31] == 1, or
+ // using a single indirection if heads[elem][bit 31] == 0.
+ //
+ // For a universe of size N, this makes it possible to:
+ //
+ // * find the equivalence class list of any elem in O(1) time.
+ //
+ // * find and iterate through any single equivalence class in time O(1) +
+ // O(size of the equivalence class).
+ //
+ // * find all the equivalence class headers in O(N) time.
+ //
+ // * find all the equivalence class headers, and then iterate through each
+ // equivalence class exactly once, in time k1.O(N) + k2.O(N). The first
+ // term is the cost of finding all the headers. The second term is the
+ // cost of visiting all elements of each equivalence class exactly once.
+ //
+ // The construction algorithm requires two forward passes over
+ // `parent_or_size`.
+ //
+ // In the first pass, we visit each element. If a element is a tree root,
+ // its `heads` entry is left at UFEC_NULL. If a element isn't a tree
+ // root, we use `find` to find the root element, and set
+ // `heads[elem][30:0]` to be the tree root, and heads[elem][31] to 0.
+ // Hence, after the first pass, `heads` maps each non-root element to its
+ // equivalence class leader.
+ //
+ // The second pass builds the lists. We again visit each element. If a
+ // element is a tree root, it is added as a list element, and its `heads`
+ // entry is updated to point at the list element. If a element isn't a
+ // tree root, we find its root in constant time by inspecting its `head`
+ // entry. The element is added to the the root element's list, and the
+ // root element's `head` entry is accordingly updated. Hence, after the
+ // second pass, the `head` entry for root elements points to a linked list
+ // that contains all elements in that tree. And the `head` entry for
+ // non-root elements is unchanged from the first pass, that is, it points
+ // to the `head` entry for that element's root element.
+ //
+ // Note that the heads[] entry for any class leader (tree root) can never
+ // be UFEC_NULL, since all elements must at least be in an equivalence
+ // class of size 1. Hence there is no confusion possible resulting from
+ // using the heads bit 31 entries as a direct/indirect flag.
+
+ // First pass
+ for i in 0..nElems {
+ if self.parent_or_size[i as usize] >= 0 {
+ // i is non-root
+ let root_i: u32 = self.find(i);
+ assert!(root_i < 0x8000_0000u32);
+ heads[i as usize] = root_i; // .direct flag == 0
+ }
+ }
+
+ // Second pass
+ let mut list_bump = 0u32;
+ for i in 0..nElems {
+ if self.parent_or_size[i as usize] < 0 {
+ // i is root
+ lists[list_bump as usize] = LLElem {
+ elem: i,
+ tail: if heads[i as usize] == UFEC_NULL {
+ UFEC_NULL
+ } else {
+ heads[i as usize] & 0x7FFF_FFFF
+ },
+ };
+ assert!(list_bump < 0x8000_0000u32);
+ heads[i as usize] = list_bump | 0x8000_0000u32; // .direct flag == 1
+ list_bump += 1;
+ } else {
+ // i is non-root
+ let i_root = heads[i as usize];
+ lists[list_bump as usize] = LLElem {
+ elem: i,
+ tail: if heads[i_root as usize] == UFEC_NULL {
+ UFEC_NULL
+ } else {
+ heads[i_root as usize] & 0x7FFF_FFFF
+ },
+ };
+ assert!(list_bump < 0x8000_0000u32);
+ heads[i_root as usize] = list_bump | 0x8000_0000u32; // .direct flag == 1
+ list_bump += 1;
+ }
+ }
+ assert!(list_bump == nElems);
+
+ // It's a wrap!
+ assert!(heads.len() == nElemsUSize);
+ assert!(lists.len() == nElemsUSize);
+ //{
+ // for i in 0 .. heads.len() {
+ // println!("{}: heads {:x} lists.elem {} .tail {:x}", i,
+ // heads[i], lists[i].elem, lists[i].tail);
+ // }
+ //}
+ UnionFindEquivClasses {
+ heads,
+ lists,
+ anchor: PhantomData,
+ }
+ }
+}
+
+impl<T: ToFromU32> UnionFindEquivClasses<T> {
+ // Indicates whether `item1` and `item2` are in the same equivalence
+ // class. If either falls outside the "universe", returns `None`.
+ pub fn in_same_equivalence_class(&self, item1: T, item2: T) -> Option<bool> {
+ let mut item1num = ToFromU32::to_u32(item1) as usize;
+ let mut item2num = ToFromU32::to_u32(item2) as usize;
+ // If either item is outside our "universe", say we don't know.
+ if item1num >= self.heads.len() || item2num >= self.heads.len() {
+ return None;
+ }
+ // Ensure that `item1num` and `item2num` both point at class leaders.
+ if (self.heads[item1num] & 0x8000_0000) == 0 {
+ item1num = self.heads[item1num] as usize;
+ }
+ if (self.heads[item2num] & 0x8000_0000) == 0 {
+ item2num = self.heads[item2num] as usize;
+ }
+ debug_assert!((self.heads[item1num] & 0x8000_0000) == 0x8000_0000);
+ debug_assert!((self.heads[item2num] & 0x8000_0000) == 0x8000_0000);
+ Some(item1num == item2num)
+ }
+}
+
+//=============================================================================
+// UnionFindEquivClassElemsIter
+
+// We may want to find the equivalence class for some given element, and
+// iterate through its elements. This iterator provides that.
+
+pub struct UnionFindEquivClassElemsIter<'a, T: ToFromU32> {
+ // The equivalence classes
+ /*priv*/
+ ufec: &'a UnionFindEquivClasses<T>,
+ // Index into `ufec.lists`, or UFEC_NULL.
+ /*priv*/
+ next: u32,
+}
+
+impl<T: ToFromU32> UnionFindEquivClasses<T> {
+ pub fn equiv_class_elems_iter<'a>(&'a self, item: T) -> UnionFindEquivClassElemsIter<'a, T> {
+ let mut itemU32 = ToFromU32::to_u32(item);
+ assert!((itemU32 as usize) < self.heads.len());
+ if (self.heads[itemU32 as usize] & 0x8000_0000) == 0 {
+ // .direct flag is not set. This is not a class leader. We must
+ // indirect.
+ itemU32 = self.heads[itemU32 as usize];
+ }
+ // Now `itemU32` must point at a class leader.
+ assert!((self.heads[itemU32 as usize] & 0x8000_0000) == 0x8000_0000);
+ let next = self.heads[itemU32 as usize] & 0x7FFF_FFFF;
+ // Now `next` points at the first element in the list.
+ UnionFindEquivClassElemsIter { ufec: &self, next }
+ }
+}
+
+impl<'a, T: ToFromU32> Iterator for UnionFindEquivClassElemsIter<'a, T> {
+ type Item = T;
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.next == UFEC_NULL {
+ None
+ } else {
+ let res: T = ToFromU32::from_u32(self.ufec.lists[self.next as usize].elem);
+ self.next = self.ufec.lists[self.next as usize].tail;
+ Some(res)
+ }
+ }
+}
+
+// In order to visit all equivalence classes exactly once, we need something
+// else: a way to enumerate their leaders (some value arbitrarily drawn from
+// each one). This provides that.
+
+pub struct UnionFindEquivClassLeadersIter<'a, T: ToFromU32> {
+ // The equivalence classes
+ /*priv*/
+ ufec: &'a UnionFindEquivClasses<T>,
+ // Index into `ufec.heads` of the next unvisited item.
+ /*priv*/
+ next: u32,
+}
+
+impl<T: ToFromU32> UnionFindEquivClasses<T> {
+ pub fn equiv_class_leaders_iter<'a>(&'a self) -> UnionFindEquivClassLeadersIter<'a, T> {
+ UnionFindEquivClassLeadersIter {
+ ufec: &self,
+ next: 0,
+ }
+ }
+}
+
+impl<'a, T: ToFromU32> Iterator for UnionFindEquivClassLeadersIter<'a, T> {
+ type Item = T;
+ fn next(&mut self) -> Option<Self::Item> {
+ // Scan forwards through `ufec.heads` to find the next unvisited one which
+ // is a leader (a tree root).
+ loop {
+ if self.next as usize >= self.ufec.heads.len() {
+ return None;
+ }
+ if (self.ufec.heads[self.next as usize] & 0x8000_0000) == 0x8000_0000 {
+ // This is a leader.
+ let res = ToFromU32::from_u32(self.next);
+ self.next += 1;
+ return Some(res);
+ }
+ // No luck, keep one searching.
+ self.next += 1;
+ }
+ /*NOTREACHED*/
+ }
+}
+
+//=============================================================================
+// Testing machinery for UnionFind
+
+#[cfg(test)]
+mod union_find_test_utils {
+ use super::UnionFindEquivClasses;
+ // Test that the eclass for `elem` is `expected` (modulo ordering).
+ pub fn test_eclass(eclasses: &UnionFindEquivClasses<u32>, elem: u32, expected: &Vec<u32>) {
+ let mut expected_sorted = expected.clone();
+ let mut actual = vec![];
+ for ecm in eclasses.equiv_class_elems_iter(elem) {
+ actual.push(ecm);
+ }
+ expected_sorted.sort();
+ actual.sort();
+ assert!(actual == expected_sorted);
+ }
+ // Test that the eclass leaders are exactly `expected`.
+ pub fn test_leaders(
+ univ_size: u32,
+ eclasses: &UnionFindEquivClasses<u32>,
+ expected: &Vec<u32>,
+ ) {
+ let mut actual = vec![];
+ for leader in eclasses.equiv_class_leaders_iter() {
+ actual.push(leader);
+ }
+ assert!(actual == *expected);
+ // Now use the headers to enumerate each eclass exactly once, and collect
+ // up the elements. The resulting vector should be some permutation of
+ // [0 .. univ_size-1].
+ let mut univ_actual = vec![];
+ for leader in eclasses.equiv_class_leaders_iter() {
+ for elem in eclasses.equiv_class_elems_iter(leader) {
+ univ_actual.push(elem);
+ }
+ }
+ univ_actual.sort();
+ let mut univ_expected = vec![];
+ for i in 0..univ_size {
+ univ_expected.push(i);
+ }
+ assert!(univ_actual == univ_expected);
+ }
+ // Test that `in_same_equivalence_class` produces the expected results.
+ pub fn test_in_same_eclass(
+ eclasses: &UnionFindEquivClasses<u32>,
+ elem1: u32,
+ elem2: u32,
+ expected: Option<bool>,
+ ) {
+ assert!(eclasses.in_same_equivalence_class(elem1, elem2) == expected);
+ assert!(eclasses.in_same_equivalence_class(elem2, elem1) == expected);
+ }
+}
+
+#[test]
+fn test_union_find() {
+ const UNIV_SIZE: u32 = 8;
+ let mut uf = UnionFind::new(UNIV_SIZE as usize);
+ let mut uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![3]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![4]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![5]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![6]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 1, 2, 3, 4, 5, 6, 7]);
+
+ uf.union(2, 4);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![4, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![3]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![4, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![5]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![6]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 1, 2, 3, 5, 6, 7]);
+
+ uf.union(5, 3);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![4, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![5, 3]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![4, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![5, 3]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![6]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 1, 2, 5, 6, 7]);
+
+ uf.union(2, 5);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![6]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 1, 2, 6, 7]);
+ // At this point, also check the "in same equivalence class?" function.
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 0, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 1, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 2, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 3, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 4, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 5, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 6, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 1, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 2, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 3, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 4, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 5, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 6, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 1, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 2, 2, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 2, 3, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 2, 4, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 2, 5, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 2, 6, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 2, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 3, 3, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 3, 4, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 3, 5, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 3, 6, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 3, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 4, 4, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 4, 5, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 4, 6, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 4, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 5, 5, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 5, 6, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 5, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 6, 6, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 6, 7, Some(false));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 7, 7, Some(true));
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 0, 8, None);
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 8, 0, None);
+ union_find_test_utils::test_in_same_eclass(&uf_eclasses, 8, 8, None);
+
+ uf.union(7, 1);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![7, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![6]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7, 1]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 2, 6, 7]);
+
+ uf.union(6, 7);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![7, 6, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![5, 4, 3, 2]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![7, 6, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7, 6, 1]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 2, 6]);
+
+ uf.union(4, 1);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7, 6, 5, 4, 3, 2, 1]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0, 6]);
+
+ uf.union(0, 3);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0]);
+
+ // Pointless, because the classes are already maximal.
+ uf.union(1, 2);
+ uf_eclasses = uf.get_equiv_classes();
+ union_find_test_utils::test_eclass(&uf_eclasses, 0, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 1, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 2, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 3, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 4, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 5, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 6, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_eclass(&uf_eclasses, 7, &vec![7, 6, 5, 4, 3, 2, 1, 0]);
+ union_find_test_utils::test_leaders(UNIV_SIZE, &uf_eclasses, &vec![0]);
+}