summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/binemit/relaxation.rs')
-rw-r--r--third_party/rust/cranelift-codegen/src/binemit/relaxation.rs397
1 files changed, 397 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
new file mode 100644
index 0000000000..142e400985
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
@@ -0,0 +1,397 @@
+//! Branch relaxation and offset computation.
+//!
+//! # block header offsets
+//!
+//! Before we can generate binary machine code for branch instructions, we need to know the final
+//! offsets of all the block headers in the function. This information is encoded in the
+//! `func.offsets` table.
+//!
+//! # Branch relaxation
+//!
+//! Branch relaxation is the process of ensuring that all branches in the function have enough
+//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
+//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
+//!
+//! On RISC architectures, it can happen that conditional branches have a shorter range than
+//! unconditional branches:
+//!
+//! ```clif
+//! brz v1, block17
+//! ```
+//!
+//! can be transformed into:
+//!
+//! ```clif
+//! brnz v1, block23
+//! jump block17
+//! block23:
+//! ```
+
+use crate::binemit::{CodeInfo, CodeOffset};
+use crate::cursor::{Cursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList};
+use crate::isa::{EncInfo, TargetIsa};
+use crate::iterators::IteratorExtras;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use crate::CodegenResult;
+use core::convert::TryFrom;
+use log::debug;
+
+/// Relax branches and compute the final layout of block headers in `func`.
+///
+/// Fill in the `func.offsets` table so the function is ready for binary emission.
+pub fn relax_branches(
+ func: &mut Function,
+ _cfg: &mut ControlFlowGraph,
+ _domtree: &mut DominatorTree,
+ isa: &dyn TargetIsa,
+) -> CodegenResult<CodeInfo> {
+ let _tt = timing::relax_branches();
+
+ let encinfo = isa.encoding_info();
+
+ // Clear all offsets so we can recognize blocks that haven't been visited yet.
+ func.offsets.clear();
+ func.offsets.resize(func.dfg.num_blocks());
+
+ // Start by removing redundant jumps.
+ fold_redundant_jumps(func, _cfg, _domtree);
+
+ // Convert jumps to fallthrough instructions where possible.
+ fallthroughs(func);
+
+ let mut offset = 0;
+ let mut divert = RegDiversions::new();
+
+ // First, compute initial offsets for every block.
+ {
+ let mut cur = FuncCursor::new(func);
+ while let Some(block) = cur.next_block() {
+ divert.at_block(&cur.func.entry_diversions, block);
+ cur.func.offsets[block] = offset;
+ while let Some(inst) = cur.next_inst() {
+ divert.apply(&cur.func.dfg[inst]);
+ let enc = cur.func.encodings[inst];
+ offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
+ }
+ }
+ }
+
+ // Then, run the relaxation algorithm until it converges.
+ let mut go_again = true;
+ while go_again {
+ go_again = false;
+ offset = 0;
+
+ // Visit all instructions in layout order.
+ let mut cur = FuncCursor::new(func);
+ while let Some(block) = cur.next_block() {
+ divert.at_block(&cur.func.entry_diversions, block);
+
+ // Record the offset for `block` and make sure we iterate until offsets are stable.
+ if cur.func.offsets[block] != offset {
+ cur.func.offsets[block] = offset;
+ go_again = true;
+ }
+
+ while let Some(inst) = cur.next_inst() {
+ divert.apply(&cur.func.dfg[inst]);
+
+ let enc = cur.func.encodings[inst];
+
+ // See if this is a branch has a range and a destination, and if the target is in
+ // range.
+ if let Some(range) = encinfo.branch_range(enc) {
+ if let Some(dest) = cur.func.dfg[inst].branch_destination() {
+ let dest_offset = cur.func.offsets[dest];
+ if !range.contains(offset, dest_offset) {
+ offset +=
+ relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
+ continue;
+ }
+ }
+ }
+
+ offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
+ }
+ }
+ }
+
+ let code_size = offset;
+ let jumptables = offset;
+
+ for (jt, jt_data) in func.jump_tables.iter() {
+ func.jt_offsets[jt] = offset;
+ // TODO: this should be computed based on the min size needed to hold the furthest branch.
+ offset += jt_data.len() as u32 * 4;
+ }
+
+ let jumptables_size = offset - jumptables;
+ let rodata = offset;
+
+ for constant in func.dfg.constants.entries_mut() {
+ constant.set_offset(offset);
+ offset +=
+ u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32")
+ }
+
+ let rodata_size = offset - rodata;
+
+ Ok(CodeInfo {
+ code_size,
+ jumptables_size,
+ rodata_size,
+ total_size: offset,
+ })
+}
+
+/// Folds an instruction if it is a redundant jump.
+/// Returns whether folding was performed (which invalidates the CFG).
+fn try_fold_redundant_jump(
+ func: &mut Function,
+ cfg: &mut ControlFlowGraph,
+ block: Block,
+ first_inst: Inst,
+) -> bool {
+ let first_dest = match func.dfg[first_inst].branch_destination() {
+ Some(block) => block, // The instruction was a single-target branch.
+ None => {
+ return false; // The instruction was either multi-target or not a branch.
+ }
+ };
+
+ // For the moment, only attempt to fold a branch to a block that is parameterless.
+ // These blocks are mainly produced by critical edge splitting.
+ //
+ // TODO: Allow folding blocks that define SSA values and function as phi nodes.
+ if func.dfg.num_block_params(first_dest) != 0 {
+ return false;
+ }
+
+ // Look at the first instruction of the first branch's destination.
+ // If it is an unconditional branch, maybe the second jump can be bypassed.
+ let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
+ if func.dfg[second_inst].opcode() != Opcode::Jump {
+ return false;
+ }
+
+ // Now we need to fix up first_inst's block parameters to match second_inst's,
+ // without changing the branch-specific arguments.
+ //
+ // The intermediary block is allowed to reference any SSA value that dominates it,
+ // but that SSA value may not necessarily also dominate the instruction that's
+ // being patched.
+
+ // Get the arguments and parameters passed by the first branch.
+ let num_fixed = func.dfg[first_inst]
+ .opcode()
+ .constraints()
+ .num_fixed_value_arguments();
+ let (first_args, first_params) = func.dfg[first_inst]
+ .arguments(&func.dfg.value_lists)
+ .split_at(num_fixed);
+
+ // Get the parameters passed by the second jump.
+ let num_fixed = func.dfg[second_inst]
+ .opcode()
+ .constraints()
+ .num_fixed_value_arguments();
+ let (_, second_params) = func.dfg[second_inst]
+ .arguments(&func.dfg.value_lists)
+ .split_at(num_fixed);
+ let mut second_params = second_params.to_vec(); // Clone for rewriting below.
+
+ // For each parameter passed by the second jump, if any of those parameters
+ // was a block parameter, rewrite it to refer to the value that the first jump
+ // passed in its parameters. Otherwise, make sure it dominates first_inst.
+ //
+ // For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`,
+ // we want to rewrite the original jump to `jump block2(v1)`.
+ let block_params: &[Value] = func.dfg.block_params(first_dest);
+ debug_assert!(block_params.len() == first_params.len());
+
+ for value in second_params.iter_mut() {
+ if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) {
+ // This value was the Nth parameter passed to the second_inst's block.
+ // Rewrite it as the Nth parameter passed by first_inst.
+ *value = first_params[n];
+ }
+ }
+
+ // Build a value list of first_args (unchanged) followed by second_params (rewritten).
+ let arguments_vec: alloc::vec::Vec<_> = first_args
+ .iter()
+ .chain(second_params.iter())
+ .copied()
+ .collect();
+ let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
+
+ func.dfg[first_inst].take_value_list(); // Drop the current list.
+ func.dfg[first_inst].put_value_list(value_list); // Put the new list.
+
+ // Bypass the second jump.
+ // This can disconnect the Block containing `second_inst`, to be cleaned up later.
+ let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
+ func.change_branch_destination(first_inst, second_dest);
+ cfg.recompute_block(func, block);
+
+ // The previously-intermediary Block may now be unreachable. Update CFG.
+ if cfg.pred_iter(first_dest).count() == 0 {
+ // Remove all instructions from that block.
+ while let Some(inst) = func.layout.first_inst(first_dest) {
+ func.layout.remove_inst(inst);
+ }
+
+ // Remove the block...
+ cfg.recompute_block(func, first_dest); // ...from predecessor lists.
+ func.layout.remove_block(first_dest); // ...from the layout.
+ }
+
+ true
+}
+
+/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
+/// This transformation may orphan some blocks.
+fn fold_redundant_jumps(
+ func: &mut Function,
+ cfg: &mut ControlFlowGraph,
+ domtree: &mut DominatorTree,
+) {
+ let mut folded = false;
+
+ // Postorder iteration guarantees that a chain of jumps is visited from
+ // the end of the chain to the start of the chain.
+ for &block in domtree.cfg_postorder() {
+ // Only proceed if the first terminator instruction is a single-target branch.
+ let first_inst = func
+ .layout
+ .last_inst(block)
+ .expect("Block has no terminator");
+ folded |= try_fold_redundant_jump(func, cfg, block, first_inst);
+
+ // Also try the previous instruction.
+ if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
+ folded |= try_fold_redundant_jump(func, cfg, block, prev_inst);
+ }
+ }
+
+ // Folding jumps invalidates the dominator tree.
+ if folded {
+ domtree.compute(func, cfg);
+ }
+}
+
+/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
+/// existing `fallthrough` instructions are correct.
+fn fallthroughs(func: &mut Function) {
+ for (block, succ) in func.layout.blocks().adjacent_pairs() {
+ let term = func
+ .layout
+ .last_inst(block)
+ .expect("block has no terminator.");
+ if let InstructionData::Jump {
+ ref mut opcode,
+ destination,
+ ..
+ } = func.dfg[term]
+ {
+ match *opcode {
+ Opcode::Fallthrough => {
+ // Somebody used a fall-through instruction before the branch relaxation pass.
+ // Make sure it is correct, i.e. the destination is the layout successor.
+ debug_assert_eq!(
+ destination, succ,
+ "Illegal fallthrough from {} to {}, but {}'s successor is {}",
+ block, destination, block, succ
+ )
+ }
+ Opcode::Jump => {
+ // If this is a jump to the successor block, change it to a fall-through.
+ if destination == succ {
+ *opcode = Opcode::Fallthrough;
+ func.encodings[term] = Default::default();
+ }
+ }
+ _ => {}
+ }
+ }
+ }
+}
+
+/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
+///
+/// Return the size of the replacement instructions up to and including the location where `cur` is
+/// left.
+fn relax_branch(
+ cur: &mut FuncCursor,
+ divert: &RegDiversions,
+ offset: CodeOffset,
+ dest_offset: CodeOffset,
+ encinfo: &EncInfo,
+ isa: &dyn TargetIsa,
+) -> CodeOffset {
+ let inst = cur.current_inst().unwrap();
+ debug!(
+ "Relaxing [{}] {} for {:#x}-{:#x} range",
+ encinfo.display(cur.func.encodings[inst]),
+ cur.func.dfg.display_inst(inst, isa),
+ offset,
+ dest_offset
+ );
+
+ // Pick the smallest encoding that can handle the branch range.
+ let dfg = &cur.func.dfg;
+ let ctrl_type = dfg.ctrl_typevar(inst);
+ if let Some(enc) = isa
+ .legal_encodings(cur.func, &dfg[inst], ctrl_type)
+ .filter(|&enc| {
+ let range = encinfo.branch_range(enc).expect("Branch with no range");
+ if !range.contains(offset, dest_offset) {
+ debug!(" trying [{}]: out of range", encinfo.display(enc));
+ false
+ } else if encinfo.operand_constraints(enc)
+ != encinfo.operand_constraints(cur.func.encodings[inst])
+ {
+ // Conservatively give up if the encoding has different constraints
+ // than the original, so that we don't risk picking a new encoding
+ // which the existing operands don't satisfy. We can't check for
+ // validity directly because we don't have a RegDiversions active so
+ // we don't know which registers are actually in use.
+ debug!(" trying [{}]: constraints differ", encinfo.display(enc));
+ false
+ } else {
+ debug!(" trying [{}]: OK", encinfo.display(enc));
+ true
+ }
+ })
+ .min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func))
+ {
+ debug_assert!(enc != cur.func.encodings[inst]);
+ cur.func.encodings[inst] = enc;
+ return encinfo.byte_size(enc, inst, &divert, &cur.func);
+ }
+
+ // Note: On some RISC ISAs, conditional branches have shorter range than unconditional
+ // branches, so one way of extending the range of a conditional branch is to invert its
+ // condition and make it branch over an unconditional jump which has the larger range.
+ //
+ // Splitting the block is problematic this late because there may be register diversions in
+ // effect across the conditional branch, and they can't survive the control flow edge to a new
+ // block. We have two options for handling that:
+ //
+ // 1. Set a flag on the new block that indicates it wants the preserve the register diversions of
+ // its layout predecessor, or
+ // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block.
+ //
+ // It seems that 1. would allow us to share code among RISC ISAs that need this.
+ //
+ // We can't allow register diversions to survive from the layout predecessor because the layout
+ // predecessor could contain kill points for some values that are live in this block, and
+ // diversions are not automatically cancelled when the live range of a value ends.
+
+ // This assumes solution 2. above:
+ panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
+}