diff options
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/machinst/blockorder.rs')
-rw-r--r-- | third_party/rust/cranelift-codegen/src/machinst/blockorder.rs | 624 |
1 files changed, 624 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs b/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs new file mode 100644 index 0000000000..1052d83858 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs @@ -0,0 +1,624 @@ +//! Computation of basic block order in emitted code. +//! +//! This module handles the translation from CLIF BBs to VCode BBs. +//! +//! The basic idea is that we compute a sequence of "lowered blocks" that +//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit +//! block on *every* edge). Conceptually, the lowering pipeline wants to insert +//! moves for phi-nodes on every block-to-block transfer; these blocks always +//! conceptually exist, but may be merged with an "original" CLIF block (and +//! hence not actually exist; this is equivalent to inserting the blocks only on +//! critical edges). +//! +//! In other words, starting from a CFG like this (where each "CLIF block" and +//! "(edge N->M)" is a separate basic block): +//! +//! ```plain +//! +//! CLIF block 0 +//! / \ +//! (edge 0->1) (edge 0->2) +//! | | +//! CLIF block 1 CLIF block 2 +//! \ / +//! (edge 1->3) (edge 2->3) +//! \ / +//! CLIF block 3 +//! ``` +//! +//! We can produce a CFG of lowered blocks like so: +//! +//! ```plain +//! +--------------+ +//! | CLIF block 0 | +//! +--------------+ +//! / \ +//! +--------------+ +--------------+ +//! | (edge 0->1) | |(edge 0->2) | +//! | CLIF block 1 | | CLIF block 2 | +//! +--------------+ +--------------+ +//! \ / +//! +-----------+ +-----------+ +//! |(edge 1->3)| |(edge 2->3)| +//! +-----------+ +-----------+ +//! \ / +//! +------------+ +//! |CLIF block 3| +//! +------------+ +//! ``` +//! +//! (note that the edges into CLIF blocks 1 and 2 could be merged with those +//! blocks' original bodies, but the out-edges could not because for simplicity +//! in the successor-function definition, we only ever merge an edge onto one +//! side of an original CLIF block.) +//! +//! Each `LoweredBlock` names just an original CLIF block, an original CLIF +//! block prepended or appended with an edge block (never both, though), or just +//! an edge block. +//! +//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph +//! (never actually materialized, just defined by a "successors" function), and +//! compute the reverse postorder. +//! +//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for +//! example, consider any information about whether edge blocks will actually +//! have content, because this computation happens as part of lowering *before* +//! regalloc, and regalloc may or may not insert moves/spills/reloads on any +//! particular edge. But it works relatively well and is conceptually simple. +//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like +//! branch editing that in practice elides empty blocks and simplifies some of +//! the other redundancies that this scheme produces. + +use crate::entity::SecondaryMap; +use crate::fx::{FxHashMap, FxHashSet}; +use crate::ir::{Block, Function, Inst, Opcode}; +use crate::machinst::lower::visit_block_succs; +use crate::machinst::*; + +use log::debug; +use smallvec::SmallVec; + +/// Mapping from CLIF BBs to VCode BBs. +#[derive(Debug)] +pub struct BlockLoweringOrder { + /// Lowered blocks, in BlockIndex order. Each block is some combination of + /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after; + /// see [LoweredBlock] for details. + lowered_order: Vec<LoweredBlock>, + /// Successors for all lowered blocks, in one serialized vector. Indexed by + /// the ranges in `lowered_succ_ranges`. + lowered_succs: Vec<(Inst, LoweredBlock)>, + /// BlockIndex values for successors for all lowered blocks, in the same + /// order as `lowered_succs`. + lowered_succ_indices: Vec<(Inst, BlockIndex)>, + /// Ranges in `lowered_succs` giving the successor lists for each lowered + /// block. Indexed by lowering-order index (`BlockIndex`). + lowered_succ_ranges: Vec<(usize, usize)>, + /// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that + /// some CLIF BBs may not be lowered; in particular, we skip unreachable + /// blocks. + orig_map: SecondaryMap<Block, Option<BlockIndex>>, +} + +/// The origin of a block in the lowered block-order: either an original CLIF +/// block, or an inserted edge-block, or a combination of the two if an edge is +/// non-critical. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum LoweredBlock { + /// Block in original CLIF, with no merged edge-blocks. + Orig { + /// Original CLIF block. + block: Block, + }, + /// Block in the original CLIF, plus edge-block to one succ (which is the + /// one successor of the original block). + OrigAndEdge { + /// The original CLIF block contained in this lowered block. + block: Block, + /// The edge (jump) instruction transitioning from this block + /// to the next, i.e., corresponding to the included edge-block. This + /// will be an instruction in `block`. + edge_inst: Inst, + /// The successor CLIF block. + succ: Block, + }, + /// Block in the original CLIF, preceded by edge-block from one pred (which + /// is the one pred of the original block). + EdgeAndOrig { + /// The previous CLIF block, i.e., the edge block's predecessor. + pred: Block, + /// The edge (jump) instruction corresponding to the included + /// edge-block. This will be an instruction in `pred`. + edge_inst: Inst, + /// The original CLIF block included in this lowered block. + block: Block, + }, + /// Split critical edge between two CLIF blocks. This lowered block does not + /// correspond to any original CLIF blocks; it only serves as an insertion + /// point for work to happen on the transition from `pred` to `succ`. + Edge { + /// The predecessor CLIF block. + pred: Block, + /// The edge (jump) instruction corresponding to this edge's transition. + /// This will be an instruction in `pred`. + edge_inst: Inst, + /// The successor CLIF block. + succ: Block, + }, +} + +impl LoweredBlock { + /// The associated original (CLIF) block included in this lowered block, if + /// any. + pub fn orig_block(self) -> Option<Block> { + match self { + LoweredBlock::Orig { block, .. } + | LoweredBlock::OrigAndEdge { block, .. } + | LoweredBlock::EdgeAndOrig { block, .. } => Some(block), + LoweredBlock::Edge { .. } => None, + } + } + + /// The associated in-edge, if any. + pub fn in_edge(self) -> Option<(Block, Inst, Block)> { + match self { + LoweredBlock::EdgeAndOrig { + pred, + edge_inst, + block, + } => Some((pred, edge_inst, block)), + _ => None, + } + } + + /// the associated out-edge, if any. Also includes edge-only blocks. + pub fn out_edge(self) -> Option<(Block, Inst, Block)> { + match self { + LoweredBlock::OrigAndEdge { + block, + edge_inst, + succ, + } => Some((block, edge_inst, succ)), + LoweredBlock::Edge { + pred, + edge_inst, + succ, + } => Some((pred, edge_inst, succ)), + _ => None, + } + } +} + +impl BlockLoweringOrder { + /// Compute and return a lowered block order for `f`. + pub fn new(f: &Function) -> BlockLoweringOrder { + debug!("BlockLoweringOrder: function body {:?}", f); + + // Step 1: compute the in-edge and out-edge count of every block. + let mut block_in_count = SecondaryMap::with_default(0); + let mut block_out_count = SecondaryMap::with_default(0); + + // Cache the block successors to avoid re-examining branches below. + let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new(); + let mut block_succ_range = SecondaryMap::with_default((0, 0)); + let mut fallthrough_return_block = None; + for block in f.layout.blocks() { + let block_succ_start = block_succs.len(); + visit_block_succs(f, block, |inst, succ| { + block_out_count[block] += 1; + block_in_count[succ] += 1; + block_succs.push((inst, succ)); + }); + let block_succ_end = block_succs.len(); + block_succ_range[block] = (block_succ_start, block_succ_end); + + for inst in f.layout.block_likely_branches(block) { + if f.dfg[inst].opcode() == Opcode::Return { + // Implicit output edge for any return. + block_out_count[block] += 1; + } + if f.dfg[inst].opcode() == Opcode::FallthroughReturn { + // Fallthrough return block must come last. + debug_assert!(fallthrough_return_block == None); + fallthrough_return_block = Some(block); + } + } + } + // Implicit input edge for entry block. + if let Some(entry) = f.layout.entry_block() { + block_in_count[entry] += 1; + } + + // Here we define the implicit CLIF-plus-edges graph. There are + // conceptually two such graphs: the original, with every edge explicit, + // and the merged one, with blocks (represented by `LoweredBlock` + // values) that contain original CLIF blocks, edges, or both. This + // function returns a lowered block's successors as per the latter, with + // consideration to edge-block merging. + // + // Note that there is a property of the block-merging rules below + // that is very important to ensure we don't miss any lowered blocks: + // any block in the implicit CLIF-plus-edges graph will *only* be + // included in one block in the merged graph. + // + // This, combined with the property that every edge block is reachable + // only from one predecessor (and hence cannot be reached by a DFS + // backedge), means that it is sufficient in our DFS below to track + // visited-bits per original CLIF block only, not per edge. This greatly + // simplifies the data structures (no need to keep a sparse hash-set of + // (block, block) tuples). + let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| { + let start_idx = ret.len(); + match block { + LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => { + // At an orig block; successors are always edge blocks, + // possibly with orig blocks following. + let range = block_succ_range[block]; + for &(edge_inst, succ) in &block_succs[range.0..range.1] { + if block_in_count[succ] == 1 { + ret.push(( + edge_inst, + LoweredBlock::EdgeAndOrig { + pred: block, + edge_inst, + block: succ, + }, + )); + } else { + ret.push(( + edge_inst, + LoweredBlock::Edge { + pred: block, + edge_inst, + succ, + }, + )); + } + } + } + LoweredBlock::Edge { + succ, edge_inst, .. + } + | LoweredBlock::OrigAndEdge { + succ, edge_inst, .. + } => { + // At an edge block; successors are always orig blocks, + // possibly with edge blocks following. + if block_out_count[succ] == 1 { + let range = block_succ_range[succ]; + // check if the one succ is a real CFG edge (vs. + // implicit return succ). + if range.1 - range.0 > 0 { + debug_assert!(range.1 - range.0 == 1); + let (succ_edge_inst, succ_succ) = block_succs[range.0]; + ret.push(( + edge_inst, + LoweredBlock::OrigAndEdge { + block: succ, + edge_inst: succ_edge_inst, + succ: succ_succ, + }, + )); + } else { + ret.push((edge_inst, LoweredBlock::Orig { block: succ })); + } + } else { + ret.push((edge_inst, LoweredBlock::Orig { block: succ })); + } + } + } + let end_idx = ret.len(); + (start_idx, end_idx) + }; + + // Build the explicit LoweredBlock-to-LoweredBlock successors list. + let mut lowered_succs = vec![]; + let mut lowered_succ_indices = vec![]; + + // Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an + // explicit stack so we don't overflow the real stack with a deep DFS. + #[derive(Debug)] + struct StackEntry { + this: LoweredBlock, + succs: (usize, usize), // range in lowered_succs + cur_succ: usize, // index in lowered_succs + } + + let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new(); + let mut visited = FxHashSet::default(); + let mut postorder = vec![]; + if let Some(entry) = f.layout.entry_block() { + // FIXME(cfallin): we might be able to use OrigAndEdge. Find a way + // to not special-case the entry block here. + let block = LoweredBlock::Orig { block: entry }; + visited.insert(block); + let range = compute_lowered_succs(&mut lowered_succs, block); + lowered_succ_indices.resize(lowered_succs.len(), 0); + stack.push(StackEntry { + this: block, + succs: range, + cur_succ: range.1, + }); + } + + let mut deferred_last = None; + while !stack.is_empty() { + let stack_entry = stack.last_mut().unwrap(); + let range = stack_entry.succs; + if stack_entry.cur_succ == range.0 { + let orig_block = stack_entry.this.orig_block(); + if orig_block.is_some() && orig_block == fallthrough_return_block { + deferred_last = Some((stack_entry.this, range)); + } else { + postorder.push((stack_entry.this, range)); + } + stack.pop(); + } else { + // Heuristic: chase the children in reverse. This puts the first + // successor block first in RPO, all other things being equal, + // which tends to prioritize loop backedges over out-edges, + // putting the edge-block closer to the loop body and minimizing + // live-ranges in linear instruction space. + let next = lowered_succs[stack_entry.cur_succ - 1].1; + stack_entry.cur_succ -= 1; + if visited.contains(&next) { + continue; + } + visited.insert(next); + let range = compute_lowered_succs(&mut lowered_succs, next); + lowered_succ_indices.resize(lowered_succs.len(), 0); + stack.push(StackEntry { + this: next, + succs: range, + cur_succ: range.1, + }); + } + } + + postorder.reverse(); + let mut rpo = postorder; + if let Some(d) = deferred_last { + rpo.push(d); + } + + // Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps. + let mut lowered_order = vec![]; + let mut lowered_succ_ranges = vec![]; + let mut lb_to_bindex = FxHashMap::default(); + for (block, succ_range) in rpo.into_iter() { + lb_to_bindex.insert(block, lowered_order.len() as BlockIndex); + lowered_order.push(block); + lowered_succ_ranges.push(succ_range); + } + + let lowered_succ_indices = lowered_succs + .iter() + .map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap())) + .collect(); + + let mut orig_map = SecondaryMap::with_default(None); + for (i, lb) in lowered_order.iter().enumerate() { + let i = i as BlockIndex; + if let Some(b) = lb.orig_block() { + orig_map[b] = Some(i); + } + } + + let result = BlockLoweringOrder { + lowered_order, + lowered_succs, + lowered_succ_indices, + lowered_succ_ranges, + orig_map, + }; + debug!("BlockLoweringOrder: {:?}", result); + result + } + + /// Get the lowered order of blocks. + pub fn lowered_order(&self) -> &[LoweredBlock] { + &self.lowered_order[..] + } + + /// Get the successors for a lowered block, by index in `lowered_order()`'s + /// returned slice. Each successsor is paired with the edge-instruction + /// (branch) corresponding to this edge. + pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] { + let range = self.lowered_succ_ranges[block as usize]; + &self.lowered_succs[range.0..range.1] + } + + /// Get the successor indices for a lowered block. + pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] { + let range = self.lowered_succ_ranges[block as usize]; + &self.lowered_succ_indices[range.0..range.1] + } + + /// Get the lowered block index containing a CLIF block, if any. (May not be + /// present if the original CLIF block was unreachable.) + pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> { + self.orig_map[bb] + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types::*; + use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; + use crate::isa::CallConv; + + fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function { + assert!(n_blocks > 0); + + let name = ExternalName::testcase("test0"); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(I32)); + let mut func = Function::with_name_signature(name, sig); + let blocks = (0..n_blocks) + .map(|i| { + let bb = func.dfg.make_block(); + assert!(bb.as_u32() == i as u32); + bb + }) + .collect::<Vec<_>>(); + + let arg0 = func.dfg.append_block_param(blocks[0], I32); + + let mut pos = FuncCursor::new(&mut func); + + let mut edge = 0; + for i in 0..n_blocks { + pos.insert_block(blocks[i]); + let mut succs = vec![]; + while edge < edges.len() && edges[edge].0 == i { + succs.push(edges[edge].1); + edge += 1; + } + if succs.len() == 0 { + pos.ins().return_(&[arg0]); + } else if succs.len() == 1 { + pos.ins().jump(blocks[succs[0]], &[]); + } else if succs.len() == 2 { + pos.ins().brnz(arg0, blocks[succs[0]], &[]); + pos.ins().jump(blocks[succs[1]], &[]); + } else { + panic!("Too many successors"); + } + } + + func + } + + #[test] + fn test_blockorder_diamond() { + let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + let order = BlockLoweringOrder::new(&func); + + assert_eq!(order.lowered_order.len(), 6); + + assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0); + assert!(order.lowered_order[0].in_edge().is_none()); + assert!(order.lowered_order[0].out_edge().is_none()); + + assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1); + assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0); + assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1); + + assert!(order.lowered_order[2].orig_block().is_none()); + assert!(order.lowered_order[2].in_edge().is_none()); + assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1); + assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3); + + assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2); + assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0); + assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2); + assert!(order.lowered_order[3].out_edge().is_none()); + + assert!(order.lowered_order[4].orig_block().is_none()); + assert!(order.lowered_order[4].in_edge().is_none()); + assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2); + assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3); + + assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3); + assert!(order.lowered_order[5].in_edge().is_none()); + assert!(order.lowered_order[5].out_edge().is_none()); + } + + #[test] + fn test_blockorder_critedge() { + // 0 + // / \ + // 1 2 + // / \ \ + // 3 4 | + // |\ _|____| + // | \/ | + // | /\ | + // 5 6 + // + // (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split) + // + let func = build_test_func( + 7, + &[ + (0, 1), + (0, 2), + (1, 3), + (1, 4), + (2, 5), + (3, 5), + (3, 6), + (4, 6), + ], + ); + let order = BlockLoweringOrder::new(&func); + + assert_eq!(order.lowered_order.len(), 11); + println!("ordered = {:?}", order.lowered_order); + + // block 0 + assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0); + assert!(order.lowered_order[0].in_edge().is_none()); + assert!(order.lowered_order[0].out_edge().is_none()); + + // edge 0->1 + block 1 + assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1); + assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0); + assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1); + assert!(order.lowered_order[1].out_edge().is_none()); + + // edge 1->3 + block 3 + assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3); + assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1); + assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3); + assert!(order.lowered_order[2].out_edge().is_none()); + + // edge 3->5 + assert!(order.lowered_order[3].orig_block().is_none()); + assert!(order.lowered_order[3].in_edge().is_none()); + assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3); + assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5); + + // edge 3->6 + assert!(order.lowered_order[4].orig_block().is_none()); + assert!(order.lowered_order[4].in_edge().is_none()); + assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3); + assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6); + + // edge 1->4 + block 4 + assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4); + assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1); + assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4); + assert!(order.lowered_order[5].out_edge().is_none()); + + // edge 4->6 + assert!(order.lowered_order[6].orig_block().is_none()); + assert!(order.lowered_order[6].in_edge().is_none()); + assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4); + assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6); + + // block 6 + assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6); + assert!(order.lowered_order[7].in_edge().is_none()); + assert!(order.lowered_order[7].out_edge().is_none()); + + // edge 0->2 + block 2 + assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2); + assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0); + assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2); + assert!(order.lowered_order[8].out_edge().is_none()); + + // edge 2->5 + assert!(order.lowered_order[9].orig_block().is_none()); + assert!(order.lowered_order[9].in_edge().is_none()); + assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2); + assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5); + + // block 5 + assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5); + assert!(order.lowered_order[10].in_edge().is_none()); + assert!(order.lowered_order[10].out_edge().is_none()); + } +} |