diff options
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/binemit/relaxation.rs')
-rw-r--r-- | third_party/rust/cranelift-codegen/src/binemit/relaxation.rs | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs new file mode 100644 index 0000000000..142e400985 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs @@ -0,0 +1,397 @@ +//! Branch relaxation and offset computation. +//! +//! # block header offsets +//! +//! Before we can generate binary machine code for branch instructions, we need to know the final +//! offsets of all the block headers in the function. This information is encoded in the +//! `func.offsets` table. +//! +//! # Branch relaxation +//! +//! Branch relaxation is the process of ensuring that all branches in the function have enough +//! range to encode their destination. It is common to have multiple branch encodings in an ISA. +//! For example, x86 branches can have either an 8-bit or a 32-bit displacement. +//! +//! On RISC architectures, it can happen that conditional branches have a shorter range than +//! unconditional branches: +//! +//! ```clif +//! brz v1, block17 +//! ``` +//! +//! can be transformed into: +//! +//! ```clif +//! brnz v1, block23 +//! jump block17 +//! block23: +//! ``` + +use crate::binemit::{CodeInfo, CodeOffset}; +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList}; +use crate::isa::{EncInfo, TargetIsa}; +use crate::iterators::IteratorExtras; +use crate::regalloc::RegDiversions; +use crate::timing; +use crate::CodegenResult; +use core::convert::TryFrom; +use log::debug; + +/// Relax branches and compute the final layout of block headers in `func`. +/// +/// Fill in the `func.offsets` table so the function is ready for binary emission. +pub fn relax_branches( + func: &mut Function, + _cfg: &mut ControlFlowGraph, + _domtree: &mut DominatorTree, + isa: &dyn TargetIsa, +) -> CodegenResult<CodeInfo> { + let _tt = timing::relax_branches(); + + let encinfo = isa.encoding_info(); + + // Clear all offsets so we can recognize blocks that haven't been visited yet. + func.offsets.clear(); + func.offsets.resize(func.dfg.num_blocks()); + + // Start by removing redundant jumps. + fold_redundant_jumps(func, _cfg, _domtree); + + // Convert jumps to fallthrough instructions where possible. + fallthroughs(func); + + let mut offset = 0; + let mut divert = RegDiversions::new(); + + // First, compute initial offsets for every block. + { + let mut cur = FuncCursor::new(func); + while let Some(block) = cur.next_block() { + divert.at_block(&cur.func.entry_diversions, block); + cur.func.offsets[block] = offset; + while let Some(inst) = cur.next_inst() { + divert.apply(&cur.func.dfg[inst]); + let enc = cur.func.encodings[inst]; + offset += encinfo.byte_size(enc, inst, &divert, &cur.func); + } + } + } + + // Then, run the relaxation algorithm until it converges. + let mut go_again = true; + while go_again { + go_again = false; + offset = 0; + + // Visit all instructions in layout order. + let mut cur = FuncCursor::new(func); + while let Some(block) = cur.next_block() { + divert.at_block(&cur.func.entry_diversions, block); + + // Record the offset for `block` and make sure we iterate until offsets are stable. + if cur.func.offsets[block] != offset { + cur.func.offsets[block] = offset; + go_again = true; + } + + while let Some(inst) = cur.next_inst() { + divert.apply(&cur.func.dfg[inst]); + + let enc = cur.func.encodings[inst]; + + // See if this is a branch has a range and a destination, and if the target is in + // range. + if let Some(range) = encinfo.branch_range(enc) { + if let Some(dest) = cur.func.dfg[inst].branch_destination() { + let dest_offset = cur.func.offsets[dest]; + if !range.contains(offset, dest_offset) { + offset += + relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa); + continue; + } + } + } + + offset += encinfo.byte_size(enc, inst, &divert, &cur.func); + } + } + } + + let code_size = offset; + let jumptables = offset; + + for (jt, jt_data) in func.jump_tables.iter() { + func.jt_offsets[jt] = offset; + // TODO: this should be computed based on the min size needed to hold the furthest branch. + offset += jt_data.len() as u32 * 4; + } + + let jumptables_size = offset - jumptables; + let rodata = offset; + + for constant in func.dfg.constants.entries_mut() { + constant.set_offset(offset); + offset += + u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32") + } + + let rodata_size = offset - rodata; + + Ok(CodeInfo { + code_size, + jumptables_size, + rodata_size, + total_size: offset, + }) +} + +/// Folds an instruction if it is a redundant jump. +/// Returns whether folding was performed (which invalidates the CFG). +fn try_fold_redundant_jump( + func: &mut Function, + cfg: &mut ControlFlowGraph, + block: Block, + first_inst: Inst, +) -> bool { + let first_dest = match func.dfg[first_inst].branch_destination() { + Some(block) => block, // The instruction was a single-target branch. + None => { + return false; // The instruction was either multi-target or not a branch. + } + }; + + // For the moment, only attempt to fold a branch to a block that is parameterless. + // These blocks are mainly produced by critical edge splitting. + // + // TODO: Allow folding blocks that define SSA values and function as phi nodes. + if func.dfg.num_block_params(first_dest) != 0 { + return false; + } + + // Look at the first instruction of the first branch's destination. + // If it is an unconditional branch, maybe the second jump can be bypassed. + let second_inst = func.layout.first_inst(first_dest).expect("Instructions"); + if func.dfg[second_inst].opcode() != Opcode::Jump { + return false; + } + + // Now we need to fix up first_inst's block parameters to match second_inst's, + // without changing the branch-specific arguments. + // + // The intermediary block is allowed to reference any SSA value that dominates it, + // but that SSA value may not necessarily also dominate the instruction that's + // being patched. + + // Get the arguments and parameters passed by the first branch. + let num_fixed = func.dfg[first_inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let (first_args, first_params) = func.dfg[first_inst] + .arguments(&func.dfg.value_lists) + .split_at(num_fixed); + + // Get the parameters passed by the second jump. + let num_fixed = func.dfg[second_inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let (_, second_params) = func.dfg[second_inst] + .arguments(&func.dfg.value_lists) + .split_at(num_fixed); + let mut second_params = second_params.to_vec(); // Clone for rewriting below. + + // For each parameter passed by the second jump, if any of those parameters + // was a block parameter, rewrite it to refer to the value that the first jump + // passed in its parameters. Otherwise, make sure it dominates first_inst. + // + // For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`, + // we want to rewrite the original jump to `jump block2(v1)`. + let block_params: &[Value] = func.dfg.block_params(first_dest); + debug_assert!(block_params.len() == first_params.len()); + + for value in second_params.iter_mut() { + if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) { + // This value was the Nth parameter passed to the second_inst's block. + // Rewrite it as the Nth parameter passed by first_inst. + *value = first_params[n]; + } + } + + // Build a value list of first_args (unchanged) followed by second_params (rewritten). + let arguments_vec: alloc::vec::Vec<_> = first_args + .iter() + .chain(second_params.iter()) + .copied() + .collect(); + let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists); + + func.dfg[first_inst].take_value_list(); // Drop the current list. + func.dfg[first_inst].put_value_list(value_list); // Put the new list. + + // Bypass the second jump. + // This can disconnect the Block containing `second_inst`, to be cleaned up later. + let second_dest = func.dfg[second_inst].branch_destination().expect("Dest"); + func.change_branch_destination(first_inst, second_dest); + cfg.recompute_block(func, block); + + // The previously-intermediary Block may now be unreachable. Update CFG. + if cfg.pred_iter(first_dest).count() == 0 { + // Remove all instructions from that block. + while let Some(inst) = func.layout.first_inst(first_dest) { + func.layout.remove_inst(inst); + } + + // Remove the block... + cfg.recompute_block(func, first_dest); // ...from predecessor lists. + func.layout.remove_block(first_dest); // ...from the layout. + } + + true +} + +/// Redirects `jump` instructions that point to other `jump` instructions to the final destination. +/// This transformation may orphan some blocks. +fn fold_redundant_jumps( + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, +) { + let mut folded = false; + + // Postorder iteration guarantees that a chain of jumps is visited from + // the end of the chain to the start of the chain. + for &block in domtree.cfg_postorder() { + // Only proceed if the first terminator instruction is a single-target branch. + let first_inst = func + .layout + .last_inst(block) + .expect("Block has no terminator"); + folded |= try_fold_redundant_jump(func, cfg, block, first_inst); + + // Also try the previous instruction. + if let Some(prev_inst) = func.layout.prev_inst(first_inst) { + folded |= try_fold_redundant_jump(func, cfg, block, prev_inst); + } + } + + // Folding jumps invalidates the dominator tree. + if folded { + domtree.compute(func, cfg); + } +} + +/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any +/// existing `fallthrough` instructions are correct. +fn fallthroughs(func: &mut Function) { + for (block, succ) in func.layout.blocks().adjacent_pairs() { + let term = func + .layout + .last_inst(block) + .expect("block has no terminator."); + if let InstructionData::Jump { + ref mut opcode, + destination, + .. + } = func.dfg[term] + { + match *opcode { + Opcode::Fallthrough => { + // Somebody used a fall-through instruction before the branch relaxation pass. + // Make sure it is correct, i.e. the destination is the layout successor. + debug_assert_eq!( + destination, succ, + "Illegal fallthrough from {} to {}, but {}'s successor is {}", + block, destination, block, succ + ) + } + Opcode::Jump => { + // If this is a jump to the successor block, change it to a fall-through. + if destination == succ { + *opcode = Opcode::Fallthrough; + func.encodings[term] = Default::default(); + } + } + _ => {} + } + } + } +} + +/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`. +/// +/// Return the size of the replacement instructions up to and including the location where `cur` is +/// left. +fn relax_branch( + cur: &mut FuncCursor, + divert: &RegDiversions, + offset: CodeOffset, + dest_offset: CodeOffset, + encinfo: &EncInfo, + isa: &dyn TargetIsa, +) -> CodeOffset { + let inst = cur.current_inst().unwrap(); + debug!( + "Relaxing [{}] {} for {:#x}-{:#x} range", + encinfo.display(cur.func.encodings[inst]), + cur.func.dfg.display_inst(inst, isa), + offset, + dest_offset + ); + + // Pick the smallest encoding that can handle the branch range. + let dfg = &cur.func.dfg; + let ctrl_type = dfg.ctrl_typevar(inst); + if let Some(enc) = isa + .legal_encodings(cur.func, &dfg[inst], ctrl_type) + .filter(|&enc| { + let range = encinfo.branch_range(enc).expect("Branch with no range"); + if !range.contains(offset, dest_offset) { + debug!(" trying [{}]: out of range", encinfo.display(enc)); + false + } else if encinfo.operand_constraints(enc) + != encinfo.operand_constraints(cur.func.encodings[inst]) + { + // Conservatively give up if the encoding has different constraints + // than the original, so that we don't risk picking a new encoding + // which the existing operands don't satisfy. We can't check for + // validity directly because we don't have a RegDiversions active so + // we don't know which registers are actually in use. + debug!(" trying [{}]: constraints differ", encinfo.display(enc)); + false + } else { + debug!(" trying [{}]: OK", encinfo.display(enc)); + true + } + }) + .min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func)) + { + debug_assert!(enc != cur.func.encodings[inst]); + cur.func.encodings[inst] = enc; + return encinfo.byte_size(enc, inst, &divert, &cur.func); + } + + // Note: On some RISC ISAs, conditional branches have shorter range than unconditional + // branches, so one way of extending the range of a conditional branch is to invert its + // condition and make it branch over an unconditional jump which has the larger range. + // + // Splitting the block is problematic this late because there may be register diversions in + // effect across the conditional branch, and they can't survive the control flow edge to a new + // block. We have two options for handling that: + // + // 1. Set a flag on the new block that indicates it wants the preserve the register diversions of + // its layout predecessor, or + // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block. + // + // It seems that 1. would allow us to share code among RISC ISAs that need this. + // + // We can't allow register diversions to survive from the layout predecessor because the layout + // predecessor could contain kill points for some values that are live in this block, and + // diversions are not automatically cancelled when the live range of a value ends. + + // This assumes solution 2. above: + panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset); +} |