//! A pre-legalization rewriting pass. //! //! This module provides early-stage optimizations. The optimizations found //! should be useful for already well-optimized code. More general purpose //! early-stage optimizations can be found in the preopt crate. use crate::cursor::{Cursor, FuncCursor}; use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64}; use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64}; use crate::flowgraph::ControlFlowGraph; use crate::ir::{ condcodes::{CondCode, IntCC}, instructions::Opcode, types::{I32, I64}, Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value, }; use crate::isa::TargetIsa; use crate::timing; #[inline] /// Replaces the unique result of the instruction inst to an alias of the given value, and /// replaces the instruction with a nop. Can be used only on instructions producing one unique /// result, otherwise will assert. fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) { // Replace the result value by an alias. let results = dfg.detach_results(inst); debug_assert!(results.len(&dfg.value_lists) == 1); let result = results.get(0, &dfg.value_lists).unwrap(); dfg.change_to_alias(result, value); // Replace instruction by a nop. dfg.replace(inst).nop(); } //---------------------------------------------------------------------- // // Pattern-match helpers and transformation for div and rem by constants. // Simple math helpers /// if `x` is a power of two, or the negation thereof, return the power along /// with a boolean that indicates whether `x` is negative. Else return None. #[inline] fn i32_is_power_of_two(x: i32) -> Option<(bool, u32)> { // We have to special-case this because abs(x) isn't representable. if x == -0x8000_0000 { return Some((true, 31)); } let abs_x = i32::wrapping_abs(x) as u32; if abs_x.is_power_of_two() { return Some((x < 0, abs_x.trailing_zeros())); } None } /// Same comments as for i32_is_power_of_two apply. #[inline] fn i64_is_power_of_two(x: i64) -> Option<(bool, u32)> { // We have to special-case this because abs(x) isn't representable. if x == -0x8000_0000_0000_0000 { return Some((true, 63)); } let abs_x = i64::wrapping_abs(x) as u64; if abs_x.is_power_of_two() { return Some((x < 0, abs_x.trailing_zeros())); } None } /// Representation of an instruction that can be replaced by a single division/remainder operation /// between a left Value operand and a right immediate operand. #[derive(Debug)] enum DivRemByConstInfo { DivU32(Value, u32), DivU64(Value, u64), DivS32(Value, i32), DivS64(Value, i64), RemU32(Value, u32), RemU64(Value, u64), RemS32(Value, i32), RemS64(Value, i64), } /// Possibly create a DivRemByConstInfo from the given components, by figuring out which, if any, /// of the 8 cases apply, and also taking care to sanity-check the immediate. fn package_up_divrem_info( value: Value, value_type: Type, imm_i64: i64, is_signed: bool, is_rem: bool, ) -> Option { let imm_u64 = imm_i64 as u64; match (is_signed, value_type) { (false, I32) => { if imm_u64 < 0x1_0000_0000 { if is_rem { Some(DivRemByConstInfo::RemU32(value, imm_u64 as u32)) } else { Some(DivRemByConstInfo::DivU32(value, imm_u64 as u32)) } } else { None } } (false, I64) => { // unsigned 64, no range constraint. if is_rem { Some(DivRemByConstInfo::RemU64(value, imm_u64)) } else { Some(DivRemByConstInfo::DivU64(value, imm_u64)) } } (true, I32) => { if imm_u64 <= 0x7fff_ffff || imm_u64 >= 0xffff_ffff_8000_0000 { if is_rem { Some(DivRemByConstInfo::RemS32(value, imm_u64 as i32)) } else { Some(DivRemByConstInfo::DivS32(value, imm_u64 as i32)) } } else { None } } (true, I64) => { // signed 64, no range constraint. if is_rem { Some(DivRemByConstInfo::RemS64(value, imm_u64 as i64)) } else { Some(DivRemByConstInfo::DivS64(value, imm_u64 as i64)) } } _ => None, } } /// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands, /// signedness, operation size and div-vs-rem-ness in a handy bundle. fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option { if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] { let (is_signed, is_rem) = match opcode { Opcode::UdivImm => (false, false), Opcode::UremImm => (false, true), Opcode::SdivImm => (true, false), Opcode::SremImm => (true, true), _ => return None, }; return package_up_divrem_info(arg, dfg.value_type(arg), imm.into(), is_signed, is_rem); } None } /// Actually do the transformation given a bundle containing the relevant information. /// `divrem_info` describes a div or rem by a constant, that `pos` currently points at, and `inst` /// is the associated instruction. `inst` is replaced by a sequence of other operations that /// calculate the same result. Note that there are various `divrem_info` cases where we cannot do /// any transformation, in which case `inst` is left unchanged. fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) { let is_rem = match *divrem_info { DivRemByConstInfo::DivU32(_, _) | DivRemByConstInfo::DivU64(_, _) | DivRemByConstInfo::DivS32(_, _) | DivRemByConstInfo::DivS64(_, _) => false, DivRemByConstInfo::RemU32(_, _) | DivRemByConstInfo::RemU64(_, _) | DivRemByConstInfo::RemS32(_, _) | DivRemByConstInfo::RemS64(_, _) => true, }; match *divrem_info { // -------------------- U32 -------------------- // U32 div, rem by zero: ignore DivRemByConstInfo::DivU32(_n1, 0) | DivRemByConstInfo::RemU32(_n1, 0) => {} // U32 div by 1: identity // U32 rem by 1: zero DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => { if is_rem { pos.func.dfg.replace(inst).iconst(I32, 0); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); } } // U32 div, rem by a power-of-2 DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) if d.is_power_of_two() => { debug_assert!(d >= 2); // compute k where d == 2^k let k = d.trailing_zeros(); debug_assert!(k >= 1 && k <= 31); if is_rem { let mask = (1u64 << k) - 1; pos.func.dfg.replace(inst).band_imm(n1, mask as i64); } else { pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); } } // U32 div, rem by non-power-of-2 DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => { debug_assert!(d >= 3); let MU32 { mul_by, do_add, shift_by, } = magic_u32(d); let qf; // final quotient let q0 = pos.ins().iconst(I32, mul_by as i64); let q1 = pos.ins().umulhi(n1, q0); if do_add { debug_assert!(shift_by >= 1 && shift_by <= 32); let t1 = pos.ins().isub(n1, q1); let t2 = pos.ins().ushr_imm(t1, 1); let t3 = pos.ins().iadd(t2, q1); // I never found any case where shift_by == 1 here. // So there's no attempt to fold out a zero shift. debug_assert_ne!(shift_by, 1); qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64); } else { debug_assert!(shift_by >= 0 && shift_by <= 31); // Whereas there are known cases here for shift_by == 0. if shift_by > 0 { qf = pos.ins().ushr_imm(q1, shift_by as i64); } else { qf = q1; } } // Now qf holds the final quotient. If necessary calculate the // remainder instead. if is_rem { let tt = pos.ins().imul_imm(qf, d as i64); pos.func.dfg.replace(inst).isub(n1, tt); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); } } // -------------------- U64 -------------------- // U64 div, rem by zero: ignore DivRemByConstInfo::DivU64(_n1, 0) | DivRemByConstInfo::RemU64(_n1, 0) => {} // U64 div by 1: identity // U64 rem by 1: zero DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => { if is_rem { pos.func.dfg.replace(inst).iconst(I64, 0); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); } } // U64 div, rem by a power-of-2 DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) if d.is_power_of_two() => { debug_assert!(d >= 2); // compute k where d == 2^k let k = d.trailing_zeros(); debug_assert!(k >= 1 && k <= 63); if is_rem { let mask = (1u64 << k) - 1; pos.func.dfg.replace(inst).band_imm(n1, mask as i64); } else { pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); } } // U64 div, rem by non-power-of-2 DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) => { debug_assert!(d >= 3); let MU64 { mul_by, do_add, shift_by, } = magic_u64(d); let qf; // final quotient let q0 = pos.ins().iconst(I64, mul_by as i64); let q1 = pos.ins().umulhi(n1, q0); if do_add { debug_assert!(shift_by >= 1 && shift_by <= 64); let t1 = pos.ins().isub(n1, q1); let t2 = pos.ins().ushr_imm(t1, 1); let t3 = pos.ins().iadd(t2, q1); // I never found any case where shift_by == 1 here. // So there's no attempt to fold out a zero shift. debug_assert_ne!(shift_by, 1); qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64); } else { debug_assert!(shift_by >= 0 && shift_by <= 63); // Whereas there are known cases here for shift_by == 0. if shift_by > 0 { qf = pos.ins().ushr_imm(q1, shift_by as i64); } else { qf = q1; } } // Now qf holds the final quotient. If necessary calculate the // remainder instead. if is_rem { let tt = pos.ins().imul_imm(qf, d as i64); pos.func.dfg.replace(inst).isub(n1, tt); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); } } // -------------------- S32 -------------------- // S32 div, rem by zero or -1: ignore DivRemByConstInfo::DivS32(_n1, -1) | DivRemByConstInfo::RemS32(_n1, -1) | DivRemByConstInfo::DivS32(_n1, 0) | DivRemByConstInfo::RemS32(_n1, 0) => {} // S32 div by 1: identity // S32 rem by 1: zero DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => { if is_rem { pos.func.dfg.replace(inst).iconst(I32, 0); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); } } DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => { if let Some((is_negative, k)) = i32_is_power_of_two(d) { // k can be 31 only in the case that d is -2^31. debug_assert!(k >= 1 && k <= 31); let t1 = if k - 1 == 0 { n1 } else { pos.ins().sshr_imm(n1, (k - 1) as i64) }; let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64); let t3 = pos.ins().iadd(n1, t2); if is_rem { // S32 rem by a power-of-2 let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64); // Curiously, we don't care here what the sign of d is. pos.func.dfg.replace(inst).isub(n1, t4); } else { // S32 div by a power-of-2 let t4 = pos.ins().sshr_imm(t3, k as i64); if is_negative { pos.func.dfg.replace(inst).irsub_imm(t4, 0); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, t4); } } } else { // S32 div, rem by a non-power-of-2 debug_assert!(d < -2 || d > 2); let MS32 { mul_by, shift_by } = magic_s32(d); let q0 = pos.ins().iconst(I32, mul_by as i64); let q1 = pos.ins().smulhi(n1, q0); let q2 = if d > 0 && mul_by < 0 { pos.ins().iadd(q1, n1) } else if d < 0 && mul_by > 0 { pos.ins().isub(q1, n1) } else { q1 }; debug_assert!(shift_by >= 0 && shift_by <= 31); let q3 = if shift_by == 0 { q2 } else { pos.ins().sshr_imm(q2, shift_by as i64) }; let t1 = pos.ins().ushr_imm(q3, 31); let qf = pos.ins().iadd(q3, t1); // Now qf holds the final quotient. If necessary calculate // the remainder instead. if is_rem { let tt = pos.ins().imul_imm(qf, d as i64); pos.func.dfg.replace(inst).isub(n1, tt); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); } } } // -------------------- S64 -------------------- // S64 div, rem by zero or -1: ignore DivRemByConstInfo::DivS64(_n1, -1) | DivRemByConstInfo::RemS64(_n1, -1) | DivRemByConstInfo::DivS64(_n1, 0) | DivRemByConstInfo::RemS64(_n1, 0) => {} // S64 div by 1: identity // S64 rem by 1: zero DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => { if is_rem { pos.func.dfg.replace(inst).iconst(I64, 0); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); } } DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => { if let Some((is_negative, k)) = i64_is_power_of_two(d) { // k can be 63 only in the case that d is -2^63. debug_assert!(k >= 1 && k <= 63); let t1 = if k - 1 == 0 { n1 } else { pos.ins().sshr_imm(n1, (k - 1) as i64) }; let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64); let t3 = pos.ins().iadd(n1, t2); if is_rem { // S64 rem by a power-of-2 let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k)); // Curiously, we don't care here what the sign of d is. pos.func.dfg.replace(inst).isub(n1, t4); } else { // S64 div by a power-of-2 let t4 = pos.ins().sshr_imm(t3, k as i64); if is_negative { pos.func.dfg.replace(inst).irsub_imm(t4, 0); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, t4); } } } else { // S64 div, rem by a non-power-of-2 debug_assert!(d < -2 || d > 2); let MS64 { mul_by, shift_by } = magic_s64(d); let q0 = pos.ins().iconst(I64, mul_by); let q1 = pos.ins().smulhi(n1, q0); let q2 = if d > 0 && mul_by < 0 { pos.ins().iadd(q1, n1) } else if d < 0 && mul_by > 0 { pos.ins().isub(q1, n1) } else { q1 }; debug_assert!(shift_by >= 0 && shift_by <= 63); let q3 = if shift_by == 0 { q2 } else { pos.ins().sshr_imm(q2, shift_by as i64) }; let t1 = pos.ins().ushr_imm(q3, 63); let qf = pos.ins().iadd(q3, t1); // Now qf holds the final quotient. If necessary calculate // the remainder instead. if is_rem { let tt = pos.ins().imul_imm(qf, d); pos.func.dfg.replace(inst).isub(n1, tt); } else { replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); } } } } } enum BranchOrderKind { BrzToBrnz(Value), BrnzToBrz(Value), InvertIcmpCond(IntCC, Value, Value), } /// Reorder branches to encourage fallthroughs. /// /// When a block ends with a conditional branch followed by an unconditional /// branch, this will reorder them if one of them is branching to the next Block /// layout-wise. The unconditional jump can then become a fallthrough. fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, inst: Inst) { let (term_inst, term_inst_args, term_dest, cond_inst, cond_inst_args, cond_dest, kind) = match pos.func.dfg[inst] { InstructionData::Jump { opcode: Opcode::Jump, destination, ref args, } => { let next_block = if let Some(next_block) = pos.func.layout.next_block(block) { next_block } else { return; }; if destination == next_block { return; } let prev_inst = if let Some(prev_inst) = pos.func.layout.prev_inst(inst) { prev_inst } else { return; }; let prev_inst_data = &pos.func.dfg[prev_inst]; if let Some(prev_dest) = prev_inst_data.branch_destination() { if prev_dest != next_block { return; } } else { return; } match prev_inst_data { InstructionData::Branch { opcode, args: ref prev_args, destination: cond_dest, } => { let cond_arg = { let args = pos.func.dfg.inst_args(prev_inst); args[0] }; let kind = match opcode { Opcode::Brz => BranchOrderKind::BrzToBrnz(cond_arg), Opcode::Brnz => BranchOrderKind::BrnzToBrz(cond_arg), _ => panic!("unexpected opcode"), }; ( inst, args.clone(), destination, prev_inst, prev_args.clone(), *cond_dest, kind, ) } InstructionData::BranchIcmp { opcode: Opcode::BrIcmp, cond, destination: cond_dest, args: ref prev_args, } => { let (x_arg, y_arg) = { let args = pos.func.dfg.inst_args(prev_inst); (args[0], args[1]) }; ( inst, args.clone(), destination, prev_inst, prev_args.clone(), *cond_dest, BranchOrderKind::InvertIcmpCond(*cond, x_arg, y_arg), ) } _ => return, } } _ => return, }; let cond_args = cond_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec(); let term_args = term_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec(); match kind { BranchOrderKind::BrnzToBrz(cond_arg) => { pos.func .dfg .replace(term_inst) .jump(cond_dest, &cond_args[1..]); pos.func .dfg .replace(cond_inst) .brz(cond_arg, term_dest, &term_args); } BranchOrderKind::BrzToBrnz(cond_arg) => { pos.func .dfg .replace(term_inst) .jump(cond_dest, &cond_args[1..]); pos.func .dfg .replace(cond_inst) .brnz(cond_arg, term_dest, &term_args); } BranchOrderKind::InvertIcmpCond(cond, x_arg, y_arg) => { pos.func .dfg .replace(term_inst) .jump(cond_dest, &cond_args[2..]); pos.func.dfg.replace(cond_inst).br_icmp( cond.inverse(), x_arg, y_arg, term_dest, &term_args, ); } } cfg.recompute_block(pos.func, block); } #[cfg(feature = "enable-peepmatic")] mod simplify { use super::*; use crate::peepmatic::ValueOrInst; pub type PeepholeOptimizer<'a, 'b> = peepmatic_runtime::optimizer::PeepholeOptimizer<'static, 'a, &'b dyn TargetIsa>; pub fn peephole_optimizer<'a, 'b>(isa: &'b dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> { crate::peepmatic::preopt(isa) } pub fn apply_all<'a, 'b>( optimizer: &mut PeepholeOptimizer<'a, 'b>, pos: &mut FuncCursor<'a>, inst: Inst, _native_word_width: u32, ) { // After we apply one optimization, that might make another // optimization applicable. Keep running the peephole optimizer // until either: // // * No optimization applied, and therefore it doesn't make sense to // try again, because no optimization will apply again. // // * Or when we replaced an instruction with an alias to an existing // value, because we already ran the peephole optimizer over the // aliased value's instruction in an early part of the traversal // over the function. while let Some(ValueOrInst::Inst(new_inst)) = optimizer.apply_one(pos, ValueOrInst::Inst(inst)) { // We transplanted a new instruction into the current // instruction, so the "new" instruction is actually the same // one, just with different data. debug_assert_eq!(new_inst, inst); } debug_assert_eq!(pos.current_inst(), Some(inst)); } } #[cfg(not(feature = "enable-peepmatic"))] mod simplify { use super::*; use crate::ir::{ dfg::ValueDef, immediates, instructions::{Opcode, ValueList}, types::{B8, I16, I32, I8}, }; use std::marker::PhantomData; pub struct PeepholeOptimizer<'a, 'b> { phantom: PhantomData<(&'a (), &'b ())>, } pub fn peephole_optimizer<'a, 'b>(_: &dyn TargetIsa) -> PeepholeOptimizer<'a, 'b> { PeepholeOptimizer { phantom: PhantomData, } } pub fn apply_all<'a, 'b>( _optimizer: &mut PeepholeOptimizer<'a, 'b>, pos: &mut FuncCursor<'a>, inst: Inst, native_word_width: u32, ) { simplify(pos, inst, native_word_width); branch_opt(pos, inst); } #[inline] fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option { if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) { if let InstructionData::UnaryImm { opcode: Opcode::Iconst, imm, } = dfg[candidate_inst] { return Some(imm); } } None } /// Try to transform [(x << N) >> N] into a (un)signed-extending move. /// Returns true if the final instruction has been converted to such a move. fn try_fold_extended_move( pos: &mut FuncCursor, inst: Inst, opcode: Opcode, arg: Value, imm: immediates::Imm64, ) -> bool { if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { if let InstructionData::BinaryImm64 { opcode: Opcode::IshlImm, arg: prev_arg, imm: prev_imm, } = &pos.func.dfg[arg_inst] { if imm != *prev_imm { return false; } let dest_ty = pos.func.dfg.ctrl_typevar(inst); if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() { return false; } let imm_bits: i64 = imm.into(); let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) { 8 => I8, 16 => I16, 32 => I32, _ => return false, }; let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap(); // This becomes a no-op, since ireduce_ty has a smaller lane width than // the argument type (also the destination type). let arg = *prev_arg; let narrower_arg = pos.ins().ireduce(ireduce_ty, arg); if opcode == Opcode::UshrImm { pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg); } else { pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg); } return true; } } false } /// Apply basic simplifications. /// /// This folds constants with arithmetic to form `_imm` instructions, and other minor /// simplifications. /// /// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the /// controlling type's width of the instruction. This would result in an illegal instruction that /// would likely be expanded back into an instruction on smaller types with the same initial /// opcode, creating unnecessary churn. fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) { match pos.func.dfg[inst] { InstructionData::Binary { opcode, args } => { if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { let new_opcode = match opcode { Opcode::Iadd => Opcode::IaddImm, Opcode::Imul => Opcode::ImulImm, Opcode::Sdiv => Opcode::SdivImm, Opcode::Udiv => Opcode::UdivImm, Opcode::Srem => Opcode::SremImm, Opcode::Urem => Opcode::UremImm, Opcode::Band => Opcode::BandImm, Opcode::Bor => Opcode::BorImm, Opcode::Bxor => Opcode::BxorImm, Opcode::Rotl => Opcode::RotlImm, Opcode::Rotr => Opcode::RotrImm, Opcode::Ishl => Opcode::IshlImm, Opcode::Ushr => Opcode::UshrImm, Opcode::Sshr => Opcode::SshrImm, Opcode::Isub => { imm = imm.wrapping_neg(); Opcode::IaddImm } Opcode::Ifcmp => Opcode::IfcmpImm, _ => return, }; let ty = pos.func.dfg.ctrl_typevar(inst); if ty.bytes() <= native_word_width { pos.func .dfg .replace(inst) .BinaryImm64(new_opcode, ty, imm, args[0]); // Repeat for BinaryImm simplification. simplify(pos, inst, native_word_width); } } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) { let new_opcode = match opcode { Opcode::Iadd => Opcode::IaddImm, Opcode::Imul => Opcode::ImulImm, Opcode::Band => Opcode::BandImm, Opcode::Bor => Opcode::BorImm, Opcode::Bxor => Opcode::BxorImm, Opcode::Isub => Opcode::IrsubImm, _ => return, }; let ty = pos.func.dfg.ctrl_typevar(inst); if ty.bytes() <= native_word_width { pos.func .dfg .replace(inst) .BinaryImm64(new_opcode, ty, imm, args[1]); } } } InstructionData::Unary { opcode, arg } => { if let Opcode::AdjustSpDown = opcode { if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) { // Note this works for both positive and negative immediate values. pos.func.dfg.replace(inst).adjust_sp_down_imm(imm); } } } InstructionData::BinaryImm64 { opcode, arg, imm } => { let ty = pos.func.dfg.ctrl_typevar(inst); let mut arg = arg; let mut imm = imm; match opcode { Opcode::IaddImm | Opcode::ImulImm | Opcode::BorImm | Opcode::BandImm | Opcode::BxorImm => { // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x) if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { if let InstructionData::BinaryImm64 { opcode: prev_opcode, arg: prev_arg, imm: prev_imm, } = &pos.func.dfg[arg_inst] { if opcode == *prev_opcode && ty == pos.func.dfg.ctrl_typevar(arg_inst) { let lhs: i64 = imm.into(); let rhs: i64 = (*prev_imm).into(); let new_imm = match opcode { Opcode::BorImm => lhs | rhs, Opcode::BandImm => lhs & rhs, Opcode::BxorImm => lhs ^ rhs, Opcode::IaddImm => lhs.wrapping_add(rhs), Opcode::ImulImm => lhs.wrapping_mul(rhs), _ => panic!("can't happen"), }; let new_imm = immediates::Imm64::from(new_imm); let new_arg = *prev_arg; pos.func .dfg .replace(inst) .BinaryImm64(opcode, ty, new_imm, new_arg); imm = new_imm; arg = new_arg; } } } } Opcode::UshrImm | Opcode::SshrImm => { if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width && try_fold_extended_move(pos, inst, opcode, arg, imm) { return; } } _ => {} }; // Replace operations that are no-ops. match (opcode, imm.into()) { (Opcode::IaddImm, 0) | (Opcode::ImulImm, 1) | (Opcode::SdivImm, 1) | (Opcode::UdivImm, 1) | (Opcode::BorImm, 0) | (Opcode::BandImm, -1) | (Opcode::BxorImm, 0) | (Opcode::RotlImm, 0) | (Opcode::RotrImm, 0) | (Opcode::IshlImm, 0) | (Opcode::UshrImm, 0) | (Opcode::SshrImm, 0) => { // Alias the result value with the original argument. replace_single_result_with_alias(&mut pos.func.dfg, inst, arg); } (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => { // Replace by zero. pos.func.dfg.replace(inst).iconst(ty, 0); } (Opcode::BorImm, -1) => { // Replace by minus one. pos.func.dfg.replace(inst).iconst(ty, -1); } _ => {} } } InstructionData::IntCompare { opcode, cond, args } => { debug_assert_eq!(opcode, Opcode::Icmp); if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width { pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm); } } } InstructionData::CondTrap { .. } | InstructionData::Branch { .. } | InstructionData::Ternary { opcode: Opcode::Select, .. } => { // Fold away a redundant `bint`. let condition_def = { let args = pos.func.dfg.inst_args(inst); pos.func.dfg.value_def(args[0]) }; if let ValueDef::Result(def_inst, _) = condition_def { if let InstructionData::Unary { opcode: Opcode::Bint, arg: bool_val, } = pos.func.dfg[def_inst] { let args = pos.func.dfg.inst_args_mut(inst); args[0] = bool_val; } } } InstructionData::Ternary { opcode: Opcode::Bitselect, args, } => { let old_cond_type = pos.func.dfg.value_type(args[0]); if !old_cond_type.is_vector() { return; } // Replace bitselect with vselect if each lane of controlling mask is either // all ones or all zeroes; on x86 bitselect is encoded using 3 instructions, // while vselect can be encoded using single BLEND instruction. if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) { let (cond_val, cond_type) = match pos.func.dfg[def_inst] { InstructionData::Unary { opcode: Opcode::RawBitcast, arg, } => { // If controlling mask is raw-bitcasted boolean vector then // we know each lane is either all zeroes or ones, // so we can use vselect instruction instead. let arg_type = pos.func.dfg.value_type(arg); if !arg_type.is_vector() || !arg_type.lane_type().is_bool() { return; } (arg, arg_type) } InstructionData::UnaryConst { opcode: Opcode::Vconst, constant_handle, } => { // If each byte of controlling mask is 0x00 or 0xFF then // we will always bitcast our way to vselect(B8x16, I8x16, I8x16). // Bitselect operates at bit level, so the lane types don't matter. let const_data = pos.func.dfg.constants.get(constant_handle); if !const_data.iter().all(|&b| b == 0 || b == 0xFF) { return; } let new_type = B8.by(old_cond_type.bytes() as u16).unwrap(); (pos.ins().raw_bitcast(new_type, args[0]), new_type) } _ => return, }; let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap(); let arg_type = lane_type.by(cond_type.lane_count()).unwrap(); let old_arg_type = pos.func.dfg.value_type(args[1]); if arg_type != old_arg_type { // Operands types must match, we need to add bitcasts. let arg1 = pos.ins().raw_bitcast(arg_type, args[1]); let arg2 = pos.ins().raw_bitcast(arg_type, args[2]); let ret = pos.ins().vselect(cond_val, arg1, arg2); pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret); } else { pos.func .dfg .replace(inst) .vselect(cond_val, args[1], args[2]); } } } _ => {} } } struct BranchOptInfo { br_inst: Inst, cmp_arg: Value, args: ValueList, new_opcode: Opcode, } /// Fold comparisons into branch operations when possible. /// /// This matches against operations which compare against zero, then use the /// result in a `brz` or `brnz` branch. It folds those two operations into a /// single `brz` or `brnz`. fn branch_opt(pos: &mut FuncCursor, inst: Inst) { let mut info = if let InstructionData::Branch { opcode: br_opcode, args: ref br_args, .. } = pos.func.dfg[inst] { let first_arg = { let args = pos.func.dfg.inst_args(inst); args[0] }; let icmp_inst = if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) { icmp_inst } else { return; }; if let InstructionData::IntCompareImm { opcode: Opcode::IcmpImm, arg: cmp_arg, cond: cmp_cond, imm: cmp_imm, } = pos.func.dfg[icmp_inst] { let cmp_imm: i64 = cmp_imm.into(); if cmp_imm != 0 { return; } // icmp_imm returns non-zero when the comparison is true. So, if // we're branching on zero, we need to invert the condition. let cond = match br_opcode { Opcode::Brz => cmp_cond.inverse(), Opcode::Brnz => cmp_cond, _ => return, }; let new_opcode = match cond { IntCC::Equal => Opcode::Brz, IntCC::NotEqual => Opcode::Brnz, _ => return, }; BranchOptInfo { br_inst: inst, cmp_arg, args: br_args.clone(), new_opcode, } } else { return; } } else { return; }; info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg; if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] { *opcode = info.new_opcode; } else { panic!(); } } } /// The main pre-opt pass. pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { let _tt = timing::preopt(); let mut pos = FuncCursor::new(func); let native_word_width = isa.pointer_bytes() as u32; let mut optimizer = simplify::peephole_optimizer(isa); while let Some(block) = pos.next_block() { while let Some(inst) = pos.next_inst() { simplify::apply_all(&mut optimizer, &mut pos, inst, native_word_width); // Try to transform divide-by-constant into simpler operations. if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) { do_divrem_transformation(&divrem_info, &mut pos, inst); continue; } branch_order(&mut pos, cfg, block, inst); } } }