summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen/src/isa
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/isa')
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs850
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs728
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs2359
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs5143
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs1025
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs4057
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs351
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs201
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs158
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs1196
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs3409
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs274
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/abi.rs471
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/inst/args.rs335
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit.rs829
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit_tests.rs1959
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/inst/mod.rs1358
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/inst/regs.rs128
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/inst/unwind.rs14
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/lower.rs240
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/lower_inst.rs608
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs123
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/call_conv.rs106
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/constraints.rs207
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/enc_tables.rs292
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/encoding.rs163
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/mod.rs447
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/registers.rs360
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/riscv/abi.rs149
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/riscv/binemit.rs182
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/riscv/enc_tables.rs18
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs295
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/riscv/registers.rs50
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/riscv/settings.rs56
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/stack.rs95
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/test_utils.rs86
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/unwind.rs88
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs313
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs294
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/abi.rs794
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs1215
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs2819
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs3593
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs2733
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs289
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs125
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs204
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/lower.rs3771
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/mod.rs149
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/settings.rs9
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/abi.rs1093
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/binemit.rs576
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs1922
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/mod.rs190
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/registers.rs86
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/settings.rs52
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/unwind.rs535
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/unwind/systemv.rs234
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs268
59 files changed, 49674 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
new file mode 100644
index 0000000000..dfb7db4dbf
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/abi.rs
@@ -0,0 +1,850 @@
+//! Implementation of a standard AArch64 ABI.
+
+use crate::ir;
+use crate::ir::types;
+use crate::ir::types::*;
+use crate::ir::MemFlags;
+use crate::isa;
+use crate::isa::aarch64::{inst::EmitState, inst::*};
+use crate::machinst::*;
+use crate::settings;
+use crate::{CodegenError, CodegenResult};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use smallvec::SmallVec;
+
+// We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because
+// these ABIs are very similar.
+
+/// Support for the AArch64 ABI from the callee side (within a function body).
+pub(crate) type AArch64ABICallee = ABICalleeImpl<AArch64MachineDeps>;
+
+/// Support for the AArch64 ABI from the caller side (at a callsite).
+pub(crate) type AArch64ABICaller = ABICallerImpl<AArch64MachineDeps>;
+
+// Spidermonkey specific ABI convention.
+
+/// This is SpiderMonkey's `WasmTableCallSigReg`.
+static BALDRDASH_SIG_REG: u8 = 10;
+
+/// This is SpiderMonkey's `WasmTlsReg`.
+static BALDRDASH_TLS_REG: u8 = 23;
+
+/// Offset in stack-arg area to callee-TLS slot in Baldrdash-2020 calling convention.
+static BALDRDASH_CALLEE_TLS_OFFSET: i64 = 0;
+/// Offset in stack-arg area to caller-TLS slot in Baldrdash-2020 calling convention.
+static BALDRDASH_CALLER_TLS_OFFSET: i64 = 8;
+
+// These two lists represent the registers the JIT may *not* use at any point in generated code.
+//
+// So these are callee-preserved from the JIT's point of view, and every register not in this list
+// has to be caller-preserved by definition.
+//
+// Keep these lists in sync with the NonAllocatableMask set in Spidermonkey's
+// Architecture-arm64.cpp.
+
+// Indexed by physical register number.
+#[rustfmt::skip]
+static BALDRDASH_JIT_CALLEE_SAVED_GPR: &[bool] = &[
+ /* 0 = */ false, false, false, false, false, false, false, false,
+ /* 8 = */ false, false, false, false, false, false, false, false,
+ /* 16 = */ true /* x16 / ip1 */, true /* x17 / ip2 */, true /* x18 / TLS */, false,
+ /* 20 = */ false, false, false, false,
+ /* 24 = */ false, false, false, false,
+ // There should be 28, the pseudo stack pointer in this list, however the wasm stubs trash it
+ // gladly right now.
+ /* 28 = */ false, false, true /* x30 = FP */, false /* x31 = SP */
+];
+
+#[rustfmt::skip]
+static BALDRDASH_JIT_CALLEE_SAVED_FPU: &[bool] = &[
+ /* 0 = */ false, false, false, false, false, false, false, false,
+ /* 8 = */ false, false, false, false, false, false, false, false,
+ /* 16 = */ false, false, false, false, false, false, false, false,
+ /* 24 = */ false, false, false, false, false, false, false, true /* v31 / d31 */
+];
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
+
+/// Try to fill a Baldrdash register, returning it if it was found.
+fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
+ if call_conv.extends_baldrdash() {
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext => {
+ // This is SpiderMonkey's `WasmTlsReg`.
+ Some(ABIArg::Reg(
+ xreg(BALDRDASH_TLS_REG).to_real_reg(),
+ ir::types::I64,
+ param.extension,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::SignatureId => {
+ // This is SpiderMonkey's `WasmTableCallSigReg`.
+ Some(ABIArg::Reg(
+ xreg(BALDRDASH_SIG_REG).to_real_reg(),
+ ir::types::I64,
+ param.extension,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::CalleeTLS => {
+ // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
+ assert!(call_conv == isa::CallConv::Baldrdash2020);
+ Some(ABIArg::Stack(
+ BALDRDASH_CALLEE_TLS_OFFSET,
+ ir::types::I64,
+ ir::ArgumentExtension::None,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::CallerTLS => {
+ // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
+ assert!(call_conv == isa::CallConv::Baldrdash2020);
+ Some(ABIArg::Stack(
+ BALDRDASH_CALLER_TLS_OFFSET,
+ ir::types::I64,
+ ir::ArgumentExtension::None,
+ param.purpose,
+ ))
+ }
+ _ => None,
+ }
+ } else {
+ None
+ }
+}
+
+impl Into<AMode> for StackAMode {
+ fn into(self) -> AMode {
+ match self {
+ StackAMode::FPOffset(off, ty) => AMode::FPOffset(off, ty),
+ StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset(off, ty),
+ StackAMode::SPOffset(off, ty) => AMode::SPOffset(off, ty),
+ }
+ }
+}
+
+// Returns the size of stack space needed to store the
+// `int_reg` and `vec_reg`.
+fn saved_reg_stack_size(
+ int_reg: &[Writable<RealReg>],
+ vec_reg: &[Writable<RealReg>],
+) -> (usize, usize) {
+ // Round up to multiple of 2, to keep 16-byte stack alignment.
+ let int_save_bytes = (int_reg.len() + (int_reg.len() & 1)) * 8;
+ let vec_save_bytes = vec_reg.len() * 16;
+ (int_save_bytes, vec_save_bytes)
+}
+
+/// AArch64-specific ABI behavior. This struct just serves as an implementation
+/// point for the trait; it is never actually instantiated.
+pub(crate) struct AArch64MachineDeps;
+
+impl ABIMachineSpec for AArch64MachineDeps {
+ type I = Inst;
+
+ fn word_bits() -> u32 {
+ 64
+ }
+
+ /// Return required stack alignment in bytes.
+ fn stack_align(_call_conv: isa::CallConv) -> u32 {
+ 16
+ }
+
+ fn compute_arg_locs(
+ call_conv: isa::CallConv,
+ params: &[ir::AbiParam],
+ args_or_rets: ArgsOrRets,
+ add_ret_area_ptr: bool,
+ ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+ let is_baldrdash = call_conv.extends_baldrdash();
+ let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
+
+ // See AArch64 ABI (https://c9x.me/compile/bib/abi-arm64.pdf), sections 5.4.
+ let mut next_xreg = 0;
+ let mut next_vreg = 0;
+ let mut next_stack: u64 = 0;
+ let mut ret = vec![];
+
+ if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
+ // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
+ // caller TLS-register values, respectively.
+ next_stack = 16;
+ }
+
+ // Note on return values: on the regular non-baldrdash ABI, we may return values in 8
+ // registers for V128 and I64 registers independently of the number of register values
+ // returned in the other class. That is, we can return values in up to 8 integer and 8
+ // vector registers at once.
+ // In Baldrdash, we can only use one register for return value for all the register
+ // classes. That is, we can't return values in both one integer and one vector register;
+ // only one return value may be in a register.
+
+ let (max_per_class_reg_vals, mut remaining_reg_vals) = match (args_or_rets, is_baldrdash) {
+ (ArgsOrRets::Args, _) => (8, 16), // x0-x7 and v0-v7
+ (ArgsOrRets::Rets, false) => (8, 16), // x0-x7 and v0-v7
+ (ArgsOrRets::Rets, true) => (1, 1), // x0 or v0, but not both
+ };
+
+ for i in 0..params.len() {
+ // Process returns backward, according to the SpiderMonkey ABI (which we
+ // adopt internally if `is_baldrdash` is set).
+ let param = match (args_or_rets, is_baldrdash) {
+ (ArgsOrRets::Args, _) => &params[i],
+ (ArgsOrRets::Rets, false) => &params[i],
+ (ArgsOrRets::Rets, true) => &params[params.len() - 1 - i],
+ };
+
+ // Validate "purpose".
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext
+ | &ir::ArgumentPurpose::Normal
+ | &ir::ArgumentPurpose::StackLimit
+ | &ir::ArgumentPurpose::SignatureId
+ | &ir::ArgumentPurpose::CallerTLS
+ | &ir::ArgumentPurpose::CalleeTLS => {}
+ _ => panic!(
+ "Unsupported argument purpose {:?} in signature: {:?}",
+ param.purpose, params
+ ),
+ }
+
+ assert!(
+ legal_type_for_machine(param.value_type),
+ "Invalid type for AArch64: {:?}",
+ param.value_type
+ );
+ let rc = Inst::rc_for_type(param.value_type).unwrap();
+
+ let next_reg = match rc {
+ RegClass::I64 => &mut next_xreg,
+ RegClass::V128 => &mut next_vreg,
+ _ => panic!("Invalid register class: {:?}", rc),
+ };
+
+ if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
+ assert!(rc == RegClass::I64);
+ ret.push(param);
+ } else if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 {
+ let reg = match rc {
+ RegClass::I64 => xreg(*next_reg),
+ RegClass::V128 => vreg(*next_reg),
+ _ => unreachable!(),
+ };
+ ret.push(ABIArg::Reg(
+ reg.to_real_reg(),
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ *next_reg += 1;
+ remaining_reg_vals -= 1;
+ } else {
+ // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
+ // stack alignment happens separately after all args.)
+ let size = (ty_bits(param.value_type) / 8) as u64;
+ let size = std::cmp::max(size, 8);
+ // Align.
+ debug_assert!(size.is_power_of_two());
+ next_stack = (next_stack + size - 1) & !(size - 1);
+ ret.push(ABIArg::Stack(
+ next_stack as i64,
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ next_stack += size;
+ }
+ }
+
+ if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
+ ret.reverse();
+ }
+
+ let extra_arg = if add_ret_area_ptr {
+ debug_assert!(args_or_rets == ArgsOrRets::Args);
+ if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 {
+ ret.push(ABIArg::Reg(
+ xreg(next_xreg).to_real_reg(),
+ I64,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ } else {
+ ret.push(ABIArg::Stack(
+ next_stack as i64,
+ I64,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ next_stack += 8;
+ }
+ Some(ret.len() - 1)
+ } else {
+ None
+ };
+
+ next_stack = (next_stack + 15) & !15;
+
+ // To avoid overflow issues, limit the arg/return size to something
+ // reasonable -- here, 128 MB.
+ if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+ return Err(CodegenError::ImplLimitExceeded);
+ }
+
+ Ok((ret, next_stack as i64, extra_arg))
+ }
+
+ fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 {
+ if call_conv.extends_baldrdash() {
+ let num_words = flags.baldrdash_prologue_words() as i64;
+ debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
+ debug_assert_eq!(num_words % 2, 0, "stack must be 16-aligned");
+ num_words * 8
+ } else {
+ 16 // frame pointer + return address.
+ }
+ }
+
+ fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
+ Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
+ }
+
+ fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
+ Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted())
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+ Inst::gen_move(to_reg, from_reg, ty)
+ }
+
+ fn gen_extend(
+ to_reg: Writable<Reg>,
+ from_reg: Reg,
+ signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ ) -> Inst {
+ assert!(from_bits < to_bits);
+ Inst::Extend {
+ rd: to_reg,
+ rn: from_reg,
+ signed,
+ from_bits,
+ to_bits,
+ }
+ }
+
+ fn gen_ret() -> Inst {
+ Inst::Ret
+ }
+
+ fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
+ let imm = imm as u64;
+ let mut insts = SmallVec::new();
+ if let Some(imm12) = Imm12::maybe_from_u64(imm) {
+ insts.push(Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd: into_reg,
+ rn: from_reg,
+ imm12,
+ });
+ } else {
+ let scratch2 = writable_tmp2_reg();
+ assert_ne!(scratch2.to_reg(), from_reg);
+ insts.extend(Inst::load_constant(scratch2, imm.into()));
+ insts.push(Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd: into_reg,
+ rn: from_reg,
+ rm: scratch2.to_reg(),
+ extendop: ExtendOp::UXTX,
+ });
+ }
+ insts
+ }
+
+ fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
+ let mut insts = SmallVec::new();
+ insts.push(Inst::AluRRRExtend {
+ alu_op: ALUOp::SubS64,
+ rd: writable_zero_reg(),
+ rn: stack_reg(),
+ rm: limit_reg,
+ extendop: ExtendOp::UXTX,
+ });
+ insts.push(Inst::TrapIf {
+ trap_code: ir::TrapCode::StackOverflow,
+ // Here `Lo` == "less than" when interpreting the two
+ // operands as unsigned integers.
+ kind: CondBrKind::Cond(Cond::Lo),
+ });
+ insts
+ }
+
+ fn gen_epilogue_placeholder() -> Inst {
+ Inst::EpiloguePlaceholder
+ }
+
+ fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
+ let mem = mem.into();
+ Inst::LoadAddr { rd: into_reg, mem }
+ }
+
+ fn get_stacklimit_reg() -> Reg {
+ spilltmp_reg()
+ }
+
+ fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
+ let mem = AMode::RegOffset(base, offset as i64, ty);
+ Inst::gen_load(into_reg, mem, ty, MemFlags::trusted())
+ }
+
+ fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
+ let mem = AMode::RegOffset(base, offset as i64, ty);
+ Inst::gen_store(mem, from_reg, ty, MemFlags::trusted())
+ }
+
+ fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
+ if amount == 0 {
+ return SmallVec::new();
+ }
+
+ let (amount, is_sub) = if amount > 0 {
+ (amount as u64, false)
+ } else {
+ (-amount as u64, true)
+ };
+
+ let alu_op = if is_sub { ALUOp::Sub64 } else { ALUOp::Add64 };
+
+ let mut ret = SmallVec::new();
+ if let Some(imm12) = Imm12::maybe_from_u64(amount) {
+ let adj_inst = Inst::AluRRImm12 {
+ alu_op,
+ rd: writable_stack_reg(),
+ rn: stack_reg(),
+ imm12,
+ };
+ ret.push(adj_inst);
+ } else {
+ let tmp = writable_spilltmp_reg();
+ let const_inst = Inst::load_constant(tmp, amount);
+ let adj_inst = Inst::AluRRRExtend {
+ alu_op,
+ rd: writable_stack_reg(),
+ rn: stack_reg(),
+ rm: tmp.to_reg(),
+ extendop: ExtendOp::UXTX,
+ };
+ ret.extend(const_inst);
+ ret.push(adj_inst);
+ }
+ ret
+ }
+
+ fn gen_nominal_sp_adj(offset: i32) -> Inst {
+ Inst::VirtualSPOffsetAdj {
+ offset: offset as i64,
+ }
+ }
+
+ fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
+ let mut insts = SmallVec::new();
+ // stp fp (x29), lr (x30), [sp, #-16]!
+ insts.push(Inst::StoreP64 {
+ rt: fp_reg(),
+ rt2: link_reg(),
+ mem: PairAMode::PreIndexed(
+ writable_stack_reg(),
+ SImm7Scaled::maybe_from_i64(-16, types::I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ });
+ // mov fp (x29), sp. This uses the ADDI rd, rs, 0 form of `MOV` because
+ // the usual encoding (`ORR`) does not work with SP.
+ insts.push(Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd: writable_fp_reg(),
+ rn: stack_reg(),
+ imm12: Imm12 {
+ bits: 0,
+ shift12: false,
+ },
+ });
+ insts
+ }
+
+ fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
+ let mut insts = SmallVec::new();
+
+ // MOV (alias of ORR) interprets x31 as XZR, so use an ADD here.
+ // MOV to SP is an alias of ADD.
+ insts.push(Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd: writable_stack_reg(),
+ rn: fp_reg(),
+ imm12: Imm12 {
+ bits: 0,
+ shift12: false,
+ },
+ });
+ insts.push(Inst::LoadP64 {
+ rt: writable_fp_reg(),
+ rt2: writable_link_reg(),
+ mem: PairAMode::PostIndexed(
+ writable_stack_reg(),
+ SImm7Scaled::maybe_from_i64(16, types::I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ });
+
+ insts
+ }
+
+ // Returns stack bytes used as well as instructions. Does not adjust
+ // nominal SP offset; abi_impl generic code will do that.
+ fn gen_clobber_save(
+ call_conv: isa::CallConv,
+ _: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> (u64, SmallVec<[Inst; 16]>) {
+ let mut insts = SmallVec::new();
+ let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
+
+ let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
+ let total_save_bytes = (vec_save_bytes + int_save_bytes) as i32;
+ insts.extend(Self::gen_sp_reg_adjust(
+ -(total_save_bytes + fixed_frame_storage_size as i32),
+ ));
+
+ for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
+ let (r1, r2) = if reg_pair.len() == 2 {
+ // .to_reg().to_reg(): Writable<RealReg> --> RealReg --> Reg
+ (reg_pair[0].to_reg().to_reg(), reg_pair[1].to_reg().to_reg())
+ } else {
+ (reg_pair[0].to_reg().to_reg(), zero_reg())
+ };
+
+ debug_assert!(r1.get_class() == RegClass::I64);
+ debug_assert!(r2.get_class() == RegClass::I64);
+
+ // stp r1, r2, [sp, #(i * #16)]
+ insts.push(Inst::StoreP64 {
+ rt: r1,
+ rt2: r2,
+ mem: PairAMode::SignedOffset(
+ stack_reg(),
+ SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ });
+ }
+
+ let vec_offset = int_save_bytes;
+ for (i, reg) in clobbered_vec.iter().enumerate() {
+ insts.push(Inst::FpuStore128 {
+ rd: reg.to_reg().to_reg(),
+ mem: AMode::Unscaled(
+ stack_reg(),
+ SImm9::maybe_from_i64((vec_offset + (i * 16)) as i64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ });
+ }
+
+ (total_save_bytes as u64, insts)
+ }
+
+ fn gen_clobber_restore(
+ call_conv: isa::CallConv,
+ flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ _fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> SmallVec<[Inst; 16]> {
+ let mut insts = SmallVec::new();
+ let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
+
+ let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
+ for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
+ let (r1, r2) = if reg_pair.len() == 2 {
+ (
+ reg_pair[0].map(|r| r.to_reg()),
+ reg_pair[1].map(|r| r.to_reg()),
+ )
+ } else {
+ (reg_pair[0].map(|r| r.to_reg()), writable_zero_reg())
+ };
+
+ debug_assert!(r1.to_reg().get_class() == RegClass::I64);
+ debug_assert!(r2.to_reg().get_class() == RegClass::I64);
+
+ // ldp r1, r2, [sp, #(i * 16)]
+ insts.push(Inst::LoadP64 {
+ rt: r1,
+ rt2: r2,
+ mem: PairAMode::SignedOffset(
+ stack_reg(),
+ SImm7Scaled::maybe_from_i64((i * 16) as i64, types::I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ });
+ }
+
+ for (i, reg) in clobbered_vec.iter().enumerate() {
+ insts.push(Inst::FpuLoad128 {
+ rd: Writable::from_reg(reg.to_reg().to_reg()),
+ mem: AMode::Unscaled(
+ stack_reg(),
+ SImm9::maybe_from_i64(((i * 16) + int_save_bytes) as i64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ });
+ }
+
+ // For non-baldrdash calling conventions, the frame pointer
+ // will be moved into the stack pointer in the epilogue, so we
+ // can skip restoring the stack pointer value with this `add`.
+ if call_conv.extends_baldrdash() {
+ let total_save_bytes = (int_save_bytes + vec_save_bytes) as i32;
+ insts.extend(Self::gen_sp_reg_adjust(total_save_bytes));
+ }
+
+ // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
+ // register. We may have allocated it for something else and clobbered
+ // it, but the ABI expects us to leave the TLS register unchanged.
+ if call_conv == isa::CallConv::Baldrdash2020 {
+ let off = BALDRDASH_CALLEE_TLS_OFFSET + Self::fp_to_arg_offset(call_conv, flags);
+ insts.push(Inst::gen_load(
+ writable_xreg(BALDRDASH_TLS_REG),
+ AMode::UnsignedOffset(fp_reg(), UImm12Scaled::maybe_from_i64(off, I64).unwrap()),
+ I64,
+ MemFlags::trusted(),
+ ));
+ }
+
+ insts
+ }
+
+ fn gen_call(
+ dest: &CallDest,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: ir::Opcode,
+ tmp: Writable<Reg>,
+ callee_conv: isa::CallConv,
+ caller_conv: isa::CallConv,
+ ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
+ let mut insts = SmallVec::new();
+ match &dest {
+ &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
+ InstIsSafepoint::Yes,
+ Inst::Call {
+ info: Box::new(CallInfo {
+ dest: name.clone(),
+ uses,
+ defs,
+ opcode,
+ caller_callconv: caller_conv,
+ callee_callconv: callee_conv,
+ }),
+ },
+ )),
+ &CallDest::ExtName(ref name, RelocDistance::Far) => {
+ insts.push((
+ InstIsSafepoint::No,
+ Inst::LoadExtName {
+ rd: tmp,
+ name: Box::new(name.clone()),
+ offset: 0,
+ },
+ ));
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rn: tmp.to_reg(),
+ uses,
+ defs,
+ opcode,
+ caller_callconv: caller_conv,
+ callee_callconv: callee_conv,
+ }),
+ },
+ ));
+ }
+ &CallDest::Reg(reg) => insts.push((
+ InstIsSafepoint::Yes,
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rn: *reg,
+ uses,
+ defs,
+ opcode,
+ caller_callconv: caller_conv,
+ callee_callconv: callee_conv,
+ }),
+ },
+ )),
+ }
+
+ insts
+ }
+
+ fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
+ // We allocate in terms of 8-byte slots.
+ match (rc, ty) {
+ (RegClass::I64, _) => 1,
+ (RegClass::V128, F32) | (RegClass::V128, F64) => 1,
+ (RegClass::V128, _) => 2,
+ _ => panic!("Unexpected register class!"),
+ }
+ }
+
+ /// Get the current virtual-SP offset from an instruction-emission state.
+ fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
+ s.virtual_sp_offset
+ }
+
+ /// Get the nominal-SP-to-FP offset from an instruction-emission state.
+ fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
+ s.nominal_sp_to_fp
+ }
+
+ fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
+ let mut caller_saved = Vec::new();
+ for i in 0..29 {
+ let x = writable_xreg(i);
+ if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
+ caller_saved.push(x);
+ }
+ }
+ for i in 0..32 {
+ let v = writable_vreg(i);
+ if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
+ caller_saved.push(v);
+ }
+ }
+ caller_saved
+ }
+}
+
+/// Is this type supposed to be seen on this machine? E.g. references of the
+/// wrong width are invalid.
+fn legal_type_for_machine(ty: Type) -> bool {
+ match ty {
+ R32 => false,
+ _ => true,
+ }
+}
+
+/// Is the given register saved in the prologue if clobbered, i.e., is it a
+/// callee-save?
+fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
+ if call_conv.extends_baldrdash() {
+ match r.get_class() {
+ RegClass::I64 => {
+ let enc = r.get_hw_encoding();
+ return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc];
+ }
+ RegClass::V128 => {
+ let enc = r.get_hw_encoding();
+ return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc];
+ }
+ _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
+ };
+ }
+
+ match r.get_class() {
+ RegClass::I64 => {
+ // x19 - x28 inclusive are callee-saves.
+ r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28
+ }
+ RegClass::V128 => {
+ // v8 - v15 inclusive are callee-saves.
+ r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15
+ }
+ _ => panic!("Unexpected RegClass"),
+ }
+}
+
+/// Return the set of all integer and vector registers that must be saved in the
+/// prologue and restored in the epilogue, given the set of all registers
+/// written by the function's body.
+fn get_regs_saved_in_prologue(
+ call_conv: isa::CallConv,
+ regs: &Set<Writable<RealReg>>,
+) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
+ let mut int_saves = vec![];
+ let mut vec_saves = vec![];
+ for &reg in regs.iter() {
+ if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
+ match reg.to_reg().get_class() {
+ RegClass::I64 => int_saves.push(reg),
+ RegClass::V128 => vec_saves.push(reg),
+ _ => panic!("Unexpected RegClass"),
+ }
+ }
+ }
+ // Sort registers for deterministic code output. We can do an unstable sort because the
+ // registers will be unique (there are no dups).
+ int_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
+ vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index());
+ (int_saves, vec_saves)
+}
+
+fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool {
+ if call_conv_of_callee.extends_baldrdash() {
+ match r.get_class() {
+ RegClass::I64 => {
+ let enc = r.get_hw_encoding();
+ if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] {
+ return true;
+ }
+ // Otherwise, fall through to preserve native's ABI caller-saved.
+ }
+ RegClass::V128 => {
+ let enc = r.get_hw_encoding();
+ if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] {
+ return true;
+ }
+ // Otherwise, fall through to preserve native's ABI caller-saved.
+ }
+ _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"),
+ };
+ }
+
+ match r.get_class() {
+ RegClass::I64 => {
+ // x0 - x17 inclusive are caller-saves.
+ r.get_hw_encoding() <= 17
+ }
+ RegClass::V128 => {
+ // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The
+ // upper 64 bits of v8 - v15 inclusive are also caller-saves.
+ // However, because we cannot currently represent partial registers
+ // to regalloc.rs, we indicate here that every vector register is
+ // caller-save. Because this function is used at *callsites*,
+ // approximating in this direction (save more than necessary) is
+ // conservative and thus safe.
+ //
+ // Note that we set the 'not included in clobber set' flag in the
+ // regalloc.rs API when a call instruction's callee has the same ABI
+ // as the caller (the current function body); this is safe (anything
+ // clobbered by callee can be clobbered by caller as well) and
+ // avoids unnecessary saves of v8-v15 in the prologue even though we
+ // include them as defs here.
+ true
+ }
+ _ => panic!("Unexpected RegClass"),
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
new file mode 100644
index 0000000000..7bd181c86b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs
@@ -0,0 +1,728 @@
+//! AArch64 ISA definitions: instruction arguments.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
+use crate::ir::Type;
+use crate::isa::aarch64::inst::*;
+use crate::machinst::{ty_bits, MachLabel};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable};
+
+use core::convert::Into;
+use std::string::String;
+
+//=============================================================================
+// Instruction sub-components: shift and extend descriptors
+
+/// A shift operator for a register or immediate.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ShiftOp {
+ LSL = 0b00,
+ LSR = 0b01,
+ ASR = 0b10,
+ ROR = 0b11,
+}
+
+impl ShiftOp {
+ /// Get the encoding of this shift op.
+ pub fn bits(self) -> u8 {
+ self as u8
+ }
+}
+
+/// A shift operator amount.
+#[derive(Clone, Copy, Debug)]
+pub struct ShiftOpShiftImm(u8);
+
+impl ShiftOpShiftImm {
+ /// Maximum shift for shifted-register operands.
+ pub const MAX_SHIFT: u64 = 63;
+
+ /// Create a new shiftop shift amount, if possible.
+ pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> {
+ if shift <= Self::MAX_SHIFT {
+ Some(ShiftOpShiftImm(shift as u8))
+ } else {
+ None
+ }
+ }
+
+ /// Return the shift amount.
+ pub fn value(self) -> u8 {
+ self.0
+ }
+
+ /// Mask down to a given number of bits.
+ pub fn mask(self, bits: u8) -> ShiftOpShiftImm {
+ ShiftOpShiftImm(self.0 & (bits - 1))
+ }
+}
+
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+ op: ShiftOp,
+ shift: ShiftOpShiftImm,
+}
+
+impl ShiftOpAndAmt {
+ pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
+ ShiftOpAndAmt { op, shift }
+ }
+
+ /// Get the shift op.
+ pub fn op(&self) -> ShiftOp {
+ self.op
+ }
+
+ /// Get the shift amount.
+ pub fn amt(&self) -> ShiftOpShiftImm {
+ self.shift
+ }
+}
+
+/// An extend operator for a register.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ExtendOp {
+ UXTB = 0b000,
+ UXTH = 0b001,
+ UXTW = 0b010,
+ UXTX = 0b011,
+ SXTB = 0b100,
+ SXTH = 0b101,
+ SXTW = 0b110,
+ SXTX = 0b111,
+}
+
+impl ExtendOp {
+ /// Encoding of this op.
+ pub fn bits(self) -> u8 {
+ self as u8
+ }
+}
+
+//=============================================================================
+// Instruction sub-components (memory addresses): definitions
+
+/// A reference to some memory address.
+#[derive(Clone, Debug)]
+pub enum MemLabel {
+ /// An address in the code, a constant pool or jumptable, with relative
+ /// offset from this instruction. This form must be used at emission time;
+ /// see `memlabel_finalize()` for how other forms are lowered to this one.
+ PCRel(i32),
+}
+
+/// An addressing mode specified for a load/store operation.
+#[derive(Clone, Debug)]
+pub enum AMode {
+ //
+ // Real ARM64 addressing modes:
+ //
+ /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation.
+ PostIndexed(Writable<Reg>, SImm9),
+ /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation.
+ PreIndexed(Writable<Reg>, SImm9),
+
+ // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to
+ // what the ISA calls the "register offset" addressing mode. We split out
+ // several options here for more ergonomic codegen.
+ /// Register plus register offset.
+ RegReg(Reg, Reg),
+
+ /// Register plus register offset, scaled by type's size.
+ RegScaled(Reg, Reg, Type),
+
+ /// Register plus register offset, scaled by type's size, with index sign- or zero-extended
+ /// first.
+ RegScaledExtended(Reg, Reg, Type, ExtendOp),
+
+ /// Register plus register offset, with index sign- or zero-extended first.
+ RegExtended(Reg, Reg, ExtendOp),
+
+ /// Unscaled signed 9-bit immediate offset from reg.
+ Unscaled(Reg, SImm9),
+
+ /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg.
+ UnsignedOffset(Reg, UImm12Scaled),
+
+ //
+ // virtual addressing modes that are lowered at emission time:
+ //
+ /// Reference to a "label": e.g., a symbol.
+ Label(MemLabel),
+
+ /// Arbitrary offset from a register. Converted to generation of large
+ /// offsets with multiple instructions as necessary during code emission.
+ RegOffset(Reg, i64, Type),
+
+ /// Offset from the stack pointer.
+ SPOffset(i64, Type),
+
+ /// Offset from the frame pointer.
+ FPOffset(i64, Type),
+
+ /// Offset from the "nominal stack pointer", which is where the real SP is
+ /// just after stack and spill slots are allocated in the function prologue.
+ /// At emission time, this is converted to `SPOffset` with a fixup added to
+ /// the offset constant. The fixup is a running value that is tracked as
+ /// emission iterates through instructions in linear order, and can be
+ /// adjusted up and down with [Inst::VirtualSPOffsetAdj].
+ ///
+ /// The standard ABI is in charge of handling this (by emitting the
+ /// adjustment meta-instructions). It maintains the invariant that "nominal
+ /// SP" is where the actual SP is after the function prologue and before
+ /// clobber pushes. See the diagram in the documentation for
+ /// [crate::isa::aarch64::abi](the ABI module) for more details.
+ NominalSPOffset(i64, Type),
+}
+
+impl AMode {
+ /// Memory reference using an address in a register.
+ pub fn reg(reg: Reg) -> AMode {
+ // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur.
+ // This also does not use PostIndexed / PreIndexed as they update the register.
+ AMode::UnsignedOffset(reg, UImm12Scaled::zero(I64))
+ }
+
+ /// Memory reference using the sum of two registers as an address.
+ pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> AMode {
+ AMode::RegReg(reg1, reg2)
+ }
+
+ /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address.
+ pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> AMode {
+ AMode::RegScaled(reg1, reg2, ty)
+ }
+
+ /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or
+ /// zero-extended as per `op`.
+ pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> AMode {
+ AMode::RegScaledExtended(reg1, reg2, ty, op)
+ }
+
+ /// Memory reference to a label: a global function or value, or data in the constant pool.
+ pub fn label(label: MemLabel) -> AMode {
+ AMode::Label(label)
+ }
+}
+
+/// A memory argument to a load/store-pair.
+#[derive(Clone, Debug)]
+pub enum PairAMode {
+ SignedOffset(Reg, SImm7Scaled),
+ PreIndexed(Writable<Reg>, SImm7Scaled),
+ PostIndexed(Writable<Reg>, SImm7Scaled),
+}
+
+//=============================================================================
+// Instruction sub-components (conditions, branches and branch targets):
+// definitions
+
+/// Condition for conditional branches.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum Cond {
+ Eq = 0,
+ Ne = 1,
+ Hs = 2,
+ Lo = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ Hi = 8,
+ Ls = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15,
+}
+
+impl Cond {
+ /// Return the inverted condition.
+ pub fn invert(self) -> Cond {
+ match self {
+ Cond::Eq => Cond::Ne,
+ Cond::Ne => Cond::Eq,
+
+ Cond::Hs => Cond::Lo,
+ Cond::Lo => Cond::Hs,
+
+ Cond::Mi => Cond::Pl,
+ Cond::Pl => Cond::Mi,
+
+ Cond::Vs => Cond::Vc,
+ Cond::Vc => Cond::Vs,
+
+ Cond::Hi => Cond::Ls,
+ Cond::Ls => Cond::Hi,
+
+ Cond::Ge => Cond::Lt,
+ Cond::Lt => Cond::Ge,
+
+ Cond::Gt => Cond::Le,
+ Cond::Le => Cond::Gt,
+
+ Cond::Al => Cond::Nv,
+ Cond::Nv => Cond::Al,
+ }
+ }
+
+ /// Return the machine encoding of this condition.
+ pub fn bits(self) -> u32 {
+ self as u32
+ }
+}
+
+/// The kind of conditional branch: the common-case-optimized "reg-is-zero" /
+/// "reg-is-nonzero" variants, or the generic one that tests the machine
+/// condition codes.
+#[derive(Clone, Copy, Debug)]
+pub enum CondBrKind {
+ /// Condition: given register is zero.
+ Zero(Reg),
+ /// Condition: given register is nonzero.
+ NotZero(Reg),
+ /// Condition: the given condition-code test is true.
+ Cond(Cond),
+}
+
+impl CondBrKind {
+ /// Return the inverted branch condition.
+ pub fn invert(self) -> CondBrKind {
+ match self {
+ CondBrKind::Zero(reg) => CondBrKind::NotZero(reg),
+ CondBrKind::NotZero(reg) => CondBrKind::Zero(reg),
+ CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()),
+ }
+ }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum BranchTarget {
+ /// An unresolved reference to a Label, as passed into
+ /// `lower_branch_group()`.
+ Label(MachLabel),
+ /// A fixed PC offset.
+ ResolvedOffset(i32),
+}
+
+impl BranchTarget {
+ /// Return the target's label, if it is a label-based target.
+ pub fn as_label(self) -> Option<MachLabel> {
+ match self {
+ BranchTarget::Label(l) => Some(l),
+ _ => None,
+ }
+ }
+
+ /// Return the target's offset, if specified, or zero if label-based.
+ pub fn as_offset19_or_zero(self) -> u32 {
+ let off = match self {
+ BranchTarget::ResolvedOffset(off) => off >> 2,
+ _ => 0,
+ };
+ assert!(off <= 0x3ffff);
+ assert!(off >= -0x40000);
+ (off as u32) & 0x7ffff
+ }
+
+ /// Return the target's offset, if specified, or zero if label-based.
+ pub fn as_offset26_or_zero(self) -> u32 {
+ let off = match self {
+ BranchTarget::ResolvedOffset(off) => off >> 2,
+ _ => 0,
+ };
+ assert!(off <= 0x1ffffff);
+ assert!(off >= -0x2000000);
+ (off as u32) & 0x3ffffff
+ }
+}
+
+impl PrettyPrint for ShiftOpAndAmt {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("{:?} {}", self.op(), self.amt().value())
+ }
+}
+
+impl PrettyPrint for ExtendOp {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("{:?}", self)
+ }
+}
+
+impl PrettyPrint for MemLabel {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &MemLabel::PCRel(off) => format!("pc+{}", off),
+ }
+ }
+}
+
+fn shift_for_type(ty: Type) -> usize {
+ match ty.bytes() {
+ 1 => 0,
+ 2 => 1,
+ 4 => 2,
+ 8 => 3,
+ 16 => 4,
+ _ => panic!("unknown type: {}", ty),
+ }
+}
+
+impl PrettyPrint for AMode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &AMode::Unscaled(reg, simm9) => {
+ if simm9.value != 0 {
+ format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru))
+ } else {
+ format!("[{}]", reg.show_rru(mb_rru))
+ }
+ }
+ &AMode::UnsignedOffset(reg, uimm12) => {
+ if uimm12.value != 0 {
+ format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru))
+ } else {
+ format!("[{}]", reg.show_rru(mb_rru))
+ }
+ }
+ &AMode::RegReg(r1, r2) => {
+ format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),)
+ }
+ &AMode::RegScaled(r1, r2, ty) => {
+ let shift = shift_for_type(ty);
+ format!(
+ "[{}, {}, LSL #{}]",
+ r1.show_rru(mb_rru),
+ r2.show_rru(mb_rru),
+ shift,
+ )
+ }
+ &AMode::RegScaledExtended(r1, r2, ty, op) => {
+ let shift = shift_for_type(ty);
+ let size = match op {
+ ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+ _ => OperandSize::Size64,
+ };
+ let op = op.show_rru(mb_rru);
+ format!(
+ "[{}, {}, {} #{}]",
+ r1.show_rru(mb_rru),
+ show_ireg_sized(r2, mb_rru, size),
+ op,
+ shift
+ )
+ }
+ &AMode::RegExtended(r1, r2, op) => {
+ let size = match op {
+ ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32,
+ _ => OperandSize::Size64,
+ };
+ let op = op.show_rru(mb_rru);
+ format!(
+ "[{}, {}, {}]",
+ r1.show_rru(mb_rru),
+ show_ireg_sized(r2, mb_rru, size),
+ op,
+ )
+ }
+ &AMode::Label(ref label) => label.show_rru(mb_rru),
+ &AMode::PreIndexed(r, simm9) => format!(
+ "[{}, {}]!",
+ r.to_reg().show_rru(mb_rru),
+ simm9.show_rru(mb_rru)
+ ),
+ &AMode::PostIndexed(r, simm9) => format!(
+ "[{}], {}",
+ r.to_reg().show_rru(mb_rru),
+ simm9.show_rru(mb_rru)
+ ),
+ // Eliminated by `mem_finalize()`.
+ &AMode::SPOffset(..)
+ | &AMode::FPOffset(..)
+ | &AMode::NominalSPOffset(..)
+ | &AMode::RegOffset(..) => {
+ panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!")
+ }
+ }
+ }
+}
+
+impl PrettyPrint for PairAMode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &PairAMode::SignedOffset(reg, simm7) => {
+ if simm7.value != 0 {
+ format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru))
+ } else {
+ format!("[{}]", reg.show_rru(mb_rru))
+ }
+ }
+ &PairAMode::PreIndexed(reg, simm7) => format!(
+ "[{}, {}]!",
+ reg.to_reg().show_rru(mb_rru),
+ simm7.show_rru(mb_rru)
+ ),
+ &PairAMode::PostIndexed(reg, simm7) => format!(
+ "[{}], {}",
+ reg.to_reg().show_rru(mb_rru),
+ simm7.show_rru(mb_rru)
+ ),
+ }
+ }
+}
+
+impl PrettyPrint for Cond {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let mut s = format!("{:?}", self);
+ s.make_ascii_lowercase();
+ s
+ }
+}
+
+impl PrettyPrint for BranchTarget {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &BranchTarget::Label(label) => format!("label{:?}", label.get()),
+ &BranchTarget::ResolvedOffset(off) => format!("{}", off),
+ }
+ }
+}
+
+/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and
+/// 64-bit variants of many instructions (and integer registers).
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum OperandSize {
+ Size32,
+ Size64,
+}
+
+impl OperandSize {
+ /// 32-bit case?
+ pub fn is32(self) -> bool {
+ self == OperandSize::Size32
+ }
+ /// 64-bit case?
+ pub fn is64(self) -> bool {
+ self == OperandSize::Size64
+ }
+ /// Convert from an `is32` boolean flag to an `OperandSize`.
+ pub fn from_is32(is32: bool) -> OperandSize {
+ if is32 {
+ OperandSize::Size32
+ } else {
+ OperandSize::Size64
+ }
+ }
+ /// Convert from a needed width to the smallest size that fits.
+ pub fn from_bits<I: Into<usize>>(bits: I) -> OperandSize {
+ let bits: usize = bits.into();
+ assert!(bits <= 64);
+ if bits <= 32 {
+ OperandSize::Size32
+ } else {
+ OperandSize::Size64
+ }
+ }
+
+ /// Convert from an integer type into the smallest size that fits.
+ pub fn from_ty(ty: Type) -> OperandSize {
+ Self::from_bits(ty_bits(ty))
+ }
+
+ /// Convert to I32, I64, or I128.
+ pub fn to_ty(self) -> Type {
+ match self {
+ OperandSize::Size32 => I32,
+ OperandSize::Size64 => I64,
+ }
+ }
+
+ pub fn sf_bit(&self) -> u32 {
+ match self {
+ OperandSize::Size32 => 0,
+ OperandSize::Size64 => 1,
+ }
+ }
+}
+
+/// Type used to communicate the size of a scalar SIMD & FP operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ScalarSize {
+ Size8,
+ Size16,
+ Size32,
+ Size64,
+ Size128,
+}
+
+impl ScalarSize {
+ /// Convert from a needed width to the smallest size that fits.
+ pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
+ match bits.into().next_power_of_two() {
+ 8 => ScalarSize::Size8,
+ 16 => ScalarSize::Size16,
+ 32 => ScalarSize::Size32,
+ 64 => ScalarSize::Size64,
+ 128 => ScalarSize::Size128,
+ w => panic!("Unexpected type width: {}", w),
+ }
+ }
+
+ /// Convert to an integer operand size.
+ pub fn operand_size(&self) -> OperandSize {
+ match self {
+ ScalarSize::Size32 => OperandSize::Size32,
+ ScalarSize::Size64 => OperandSize::Size64,
+ _ => panic!("Unexpected operand_size request for: {:?}", self),
+ }
+ }
+
+ /// Convert from a type into the smallest size that fits.
+ pub fn from_ty(ty: Type) -> ScalarSize {
+ Self::from_bits(ty_bits(ty))
+ }
+
+ /// Return the encoding bits that are used by some scalar FP instructions
+ /// for a particular operand size.
+ pub fn ftype(&self) -> u32 {
+ match self {
+ ScalarSize::Size16 => 0b11,
+ ScalarSize::Size32 => 0b00,
+ ScalarSize::Size64 => 0b01,
+ _ => panic!("Unexpected scalar FP operand size: {:?}", self),
+ }
+ }
+}
+
+/// Type used to communicate the size of a vector operand.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum VectorSize {
+ Size8x8,
+ Size8x16,
+ Size16x4,
+ Size16x8,
+ Size32x2,
+ Size32x4,
+ Size64x2,
+}
+
+impl VectorSize {
+ /// Get the vector operand size with the given scalar size as lane size.
+ pub fn from_lane_size(size: ScalarSize, is_128bit: bool) -> VectorSize {
+ match (size, is_128bit) {
+ (ScalarSize::Size8, false) => VectorSize::Size8x8,
+ (ScalarSize::Size8, true) => VectorSize::Size8x16,
+ (ScalarSize::Size16, false) => VectorSize::Size16x4,
+ (ScalarSize::Size16, true) => VectorSize::Size16x8,
+ (ScalarSize::Size32, false) => VectorSize::Size32x2,
+ (ScalarSize::Size32, true) => VectorSize::Size32x4,
+ (ScalarSize::Size64, true) => VectorSize::Size64x2,
+ _ => panic!("Unexpected scalar FP operand size: {:?}", size),
+ }
+ }
+
+ /// Convert from a type into a vector operand size.
+ pub fn from_ty(ty: Type) -> VectorSize {
+ match ty {
+ B8X16 => VectorSize::Size8x16,
+ B16X8 => VectorSize::Size16x8,
+ B32X4 => VectorSize::Size32x4,
+ B64X2 => VectorSize::Size64x2,
+ F32X2 => VectorSize::Size32x2,
+ F32X4 => VectorSize::Size32x4,
+ F64X2 => VectorSize::Size64x2,
+ I8X8 => VectorSize::Size8x8,
+ I8X16 => VectorSize::Size8x16,
+ I16X4 => VectorSize::Size16x4,
+ I16X8 => VectorSize::Size16x8,
+ I32X2 => VectorSize::Size32x2,
+ I32X4 => VectorSize::Size32x4,
+ I64X2 => VectorSize::Size64x2,
+ _ => unimplemented!("Unsupported type: {}", ty),
+ }
+ }
+
+ /// Get the integer operand size that corresponds to a lane of a vector with a certain size.
+ pub fn operand_size(&self) -> OperandSize {
+ match self {
+ VectorSize::Size64x2 => OperandSize::Size64,
+ _ => OperandSize::Size32,
+ }
+ }
+
+ /// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
+ pub fn lane_size(&self) -> ScalarSize {
+ match self {
+ VectorSize::Size8x8 => ScalarSize::Size8,
+ VectorSize::Size8x16 => ScalarSize::Size8,
+ VectorSize::Size16x4 => ScalarSize::Size16,
+ VectorSize::Size16x8 => ScalarSize::Size16,
+ VectorSize::Size32x2 => ScalarSize::Size32,
+ VectorSize::Size32x4 => ScalarSize::Size32,
+ VectorSize::Size64x2 => ScalarSize::Size64,
+ }
+ }
+
+ pub fn is_128bits(&self) -> bool {
+ match self {
+ VectorSize::Size8x8 => false,
+ VectorSize::Size8x16 => true,
+ VectorSize::Size16x4 => false,
+ VectorSize::Size16x8 => true,
+ VectorSize::Size32x2 => false,
+ VectorSize::Size32x4 => true,
+ VectorSize::Size64x2 => true,
+ }
+ }
+
+ /// Produces a `VectorSize` with lanes twice as wide. Note that if the resulting
+ /// size would exceed 128 bits, then the number of lanes is also halved, so as to
+ /// ensure that the result size is at most 128 bits.
+ pub fn widen(&self) -> VectorSize {
+ match self {
+ VectorSize::Size8x8 => VectorSize::Size16x8,
+ VectorSize::Size8x16 => VectorSize::Size16x8,
+ VectorSize::Size16x4 => VectorSize::Size32x4,
+ VectorSize::Size16x8 => VectorSize::Size32x4,
+ VectorSize::Size32x2 => VectorSize::Size64x2,
+ VectorSize::Size32x4 => VectorSize::Size64x2,
+ VectorSize::Size64x2 => unreachable!(),
+ }
+ }
+
+ /// Produces a `VectorSize` that has the same lane width, but half as many lanes.
+ pub fn halve(&self) -> VectorSize {
+ match self {
+ VectorSize::Size8x16 => VectorSize::Size8x8,
+ VectorSize::Size16x8 => VectorSize::Size16x4,
+ VectorSize::Size32x4 => VectorSize::Size32x2,
+ _ => *self,
+ }
+ }
+
+ /// Return the encoding bits that are used by some SIMD instructions
+ /// for a particular operand size.
+ pub fn enc_size(&self) -> (u32, u32) {
+ let q = self.is_128bits() as u32;
+ let size = match self.lane_size() {
+ ScalarSize::Size8 => 0b00,
+ ScalarSize::Size16 => 0b01,
+ ScalarSize::Size32 => 0b10,
+ ScalarSize::Size64 => 0b11,
+ _ => unreachable!(),
+ };
+
+ (q, size)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
new file mode 100644
index 0000000000..5d0270dade
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs
@@ -0,0 +1,2359 @@
+//! AArch64 ISA: binary code emission.
+
+use crate::binemit::{CodeOffset, Reloc, StackMap};
+use crate::ir::constant::ConstantData;
+use crate::ir::types::*;
+use crate::ir::{MemFlags, TrapCode};
+use crate::isa::aarch64::inst::*;
+use crate::machinst::ty_bits;
+
+use regalloc::{Reg, RegClass, Writable};
+
+use core::convert::TryFrom;
+use log::debug;
+
+/// Memory label/reference finalization: convert a MemLabel to a PC-relative
+/// offset, possibly emitting relocation(s) as necessary.
+pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
+ match label {
+ &MemLabel::PCRel(rel) => rel,
+ }
+}
+
+/// Memory addressing mode finalization: convert "special" modes (e.g.,
+/// generic arbitrary stack offset) into real addressing modes, possibly by
+/// emitting some helper instructions that come immediately before the use
+/// of this amode.
+pub fn mem_finalize(
+ insn_off: CodeOffset,
+ mem: &AMode,
+ state: &EmitState,
+) -> (SmallVec<[Inst; 4]>, AMode) {
+ match mem {
+ &AMode::RegOffset(_, off, ty)
+ | &AMode::SPOffset(off, ty)
+ | &AMode::FPOffset(off, ty)
+ | &AMode::NominalSPOffset(off, ty) => {
+ let basereg = match mem {
+ &AMode::RegOffset(reg, _, _) => reg,
+ &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(),
+ &AMode::FPOffset(..) => fp_reg(),
+ _ => unreachable!(),
+ };
+ let adj = match mem {
+ &AMode::NominalSPOffset(..) => {
+ debug!(
+ "mem_finalize: nominal SP offset {} + adj {} -> {}",
+ off,
+ state.virtual_sp_offset,
+ off + state.virtual_sp_offset
+ );
+ state.virtual_sp_offset
+ }
+ _ => 0,
+ };
+ let off = off + adj;
+
+ if let Some(simm9) = SImm9::maybe_from_i64(off) {
+ let mem = AMode::Unscaled(basereg, simm9);
+ (smallvec![], mem)
+ } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) {
+ let mem = AMode::UnsignedOffset(basereg, uimm12s);
+ (smallvec![], mem)
+ } else {
+ let tmp = writable_spilltmp_reg();
+ let mut const_insts = Inst::load_constant(tmp, off as u64);
+ // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form
+ // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP
+ // is a valid base (for SPOffset) which we must handle here.
+ // Also, SP needs to be the first arg, not second.
+ let add_inst = Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: basereg,
+ rm: tmp.to_reg(),
+ extendop: ExtendOp::UXTX,
+ };
+ const_insts.push(add_inst);
+ (const_insts, AMode::reg(tmp.to_reg()))
+ }
+ }
+
+ &AMode::Label(ref label) => {
+ let off = memlabel_finalize(insn_off, label);
+ (smallvec![], AMode::Label(MemLabel::PCRel(off)))
+ }
+
+ _ => (smallvec![], mem.clone()),
+ }
+}
+
+/// Helper: get a ConstantData from a u64.
+pub fn u64_constant(bits: u64) -> ConstantData {
+ let data = bits.to_le_bytes();
+ ConstantData::from(&data[..])
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+fn machreg_to_gpr(m: Reg) -> u32 {
+ assert_eq!(m.get_class(), RegClass::I64);
+ u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_vec(m: Reg) -> u32 {
+ assert_eq!(m.get_class(), RegClass::V128);
+ u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_gpr_or_vec(m: Reg) -> u32 {
+ u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+ (bits_31_21 << 21)
+ | (bits_15_10 << 10)
+ | machreg_to_gpr(rd.to_reg())
+ | (machreg_to_gpr(rn) << 5)
+ | (machreg_to_gpr(rm) << 16)
+}
+
+fn enc_arith_rr_imm12(
+ bits_31_24: u32,
+ immshift: u32,
+ imm12: u32,
+ rn: Reg,
+ rd: Writable<Reg>,
+) -> u32 {
+ (bits_31_24 << 24)
+ | (immshift << 22)
+ | (imm12 << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (top11 << 21)
+ | (machreg_to_gpr(rm) << 16)
+ | (bit15 << 15)
+ | (machreg_to_gpr(ra) << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 {
+ assert!(off_26_0 < (1 << 26));
+ (op_31_26 << 26) | off_26_0
+}
+
+fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 {
+ assert!(off_18_0 < (1 << 19));
+ (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg)
+}
+
+fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 {
+ assert!(off_18_0 < (1 << 19));
+ assert!(cond < (1 << 4));
+ (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond
+}
+
+fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 {
+ match kind {
+ CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg),
+ CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg),
+ CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()),
+ }
+}
+
+const MOVE_WIDE_FIXED: u32 = 0x12800000;
+
+#[repr(u32)]
+enum MoveWideOpcode {
+ MOVN = 0b00,
+ MOVZ = 0b10,
+ MOVK = 0b11,
+}
+
+fn enc_move_wide(
+ op: MoveWideOpcode,
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+) -> u32 {
+ assert!(imm.shift <= 0b11);
+ MOVE_WIDE_FIXED
+ | size.sf_bit() << 31
+ | (op as u32) << 29
+ | u32::from(imm.shift) << 21
+ | u32::from(imm.bits) << 5
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 {
+ (op_31_22 << 22)
+ | (simm7.bits() << 15)
+ | (machreg_to_gpr(rt2) << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rt)
+}
+
+fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 {
+ (op_31_22 << 22)
+ | (simm9.bits() << 12)
+ | (op_11_10 << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 {
+ (op_31_22 << 22)
+ | (0b1 << 24)
+ | (uimm12.bits() << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_reg(
+ op_31_22: u32,
+ rn: Reg,
+ rm: Reg,
+ s_bit: bool,
+ extendop: Option<ExtendOp>,
+ rd: Reg,
+) -> u32 {
+ let s_bit = if s_bit { 1 } else { 0 };
+ let extend_bits = match extendop {
+ Some(ExtendOp::UXTW) => 0b010,
+ Some(ExtendOp::SXTW) => 0b110,
+ Some(ExtendOp::SXTX) => 0b111,
+ None => 0b011, // LSL
+ _ => panic!("bad extend mode for ld/st AMode"),
+ };
+ (op_31_22 << 22)
+ | (1 << 21)
+ | (machreg_to_gpr(rm) << 16)
+ | (extend_bits << 13)
+ | (s_bit << 12)
+ | (0b10 << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
+ (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
+}
+
+fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
+ debug_assert_eq!(q & 0b1, q);
+ debug_assert_eq!(size & 0b11, size);
+ 0b0_0_0011010_10_00000_110_0_00_00000_00000
+ | q << 30
+ | size << 10
+ | machreg_to_gpr(rn) << 5
+ | machreg_to_vec(rt.to_reg())
+}
+
+fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (top11 << 21)
+ | (machreg_to_vec(rm) << 16)
+ | (bit15_10 << 10)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
+ (0b01011010110 << 21)
+ | size << 31
+ | opcode2 << 16
+ | opcode1 << 10
+ | machreg_to_gpr(rn) << 5
+ | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_br(rn: Reg) -> u32 {
+ 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)
+}
+
+fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 {
+ let off = u32::try_from(off).unwrap();
+ let immlo = off & 3;
+ let immhi = (off >> 2) & ((1 << 19) - 1);
+ (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 {
+ 0b100_11010100_00000_0000_00_00000_00000
+ | (machreg_to_gpr(rm) << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg())
+ | (cond.bits() << 12)
+}
+
+fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 {
+ 0b000_11110_00_1_00000_0000_11_00000_00000
+ | (size.ftype() << 22)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+ | (cond.bits() << 12)
+}
+
+fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
+ 0b100_11010100_11111_0000_01_11111_00000
+ | machreg_to_gpr(rd.to_reg())
+ | (cond.invert().bits() << 12)
+}
+
+fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
+ 0b0_1_1_11010010_00000_0000_10_00000_0_0000
+ | size.sf_bit() << 31
+ | imm.bits() << 16
+ | cond.bits() << 12
+ | machreg_to_gpr(rn) << 5
+ | nzcv.bits()
+}
+
+fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
+ 0b00001110_101_00000_00011_1_00000_00000
+ | ((is_16b as u32) << 30)
+ | machreg_to_vec(rd.to_reg())
+ | (machreg_to_vec(rn) << 16)
+ | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+ (top22 << 10)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 {
+ (top17 << 15)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(ra) << 10)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 {
+ 0b000_11110_00_1_00000_00_1000_00000_00000
+ | (size.ftype() << 22)
+ | (machreg_to_vec(rm) << 16)
+ | (machreg_to_vec(rn) << 5)
+}
+
+fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg())
+}
+
+fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ debug_assert_eq!(qu & 0b11, qu);
+ debug_assert_eq!(size & 0b11, size);
+ debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
+ let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
+ bits | qu << 29
+ | size << 22
+ | bits_12_16 << 12
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
+ debug_assert_eq!(q & 0b1, q);
+ debug_assert_eq!(u & 0b1, u);
+ debug_assert_eq!(size & 0b11, size);
+ debug_assert_eq!(opcode & 0b11111, opcode);
+ 0b0_0_0_01110_00_11000_0_0000_10_00000_00000
+ | q << 30
+ | u << 29
+ | size << 22
+ | opcode << 12
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
+ debug_assert_eq!(len & 0b11, len);
+ 0b0_1_001110_000_00000_0_00_0_00_00000_00000
+ | (machreg_to_vec(rm) << 16)
+ | len << 13
+ | (is_extension as u32) << 12
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+fn enc_dmb_ish() -> u32 {
+ 0xD5033BBF
+}
+
+fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 {
+ let sz = match ty {
+ I64 => 0b11,
+ I32 => 0b10,
+ I16 => 0b01,
+ I8 => 0b00,
+ _ => unreachable!(),
+ };
+ 0b00001000_01011111_01111100_00000000
+ | (sz << 30)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rt.to_reg())
+}
+
+fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
+ let sz = match ty {
+ I64 => 0b11,
+ I32 => 0b10,
+ I16 => 0b01,
+ I8 => 0b00,
+ _ => unreachable!(),
+ };
+ 0b00001000_00000000_01111100_00000000
+ | (sz << 30)
+ | (machreg_to_gpr(rs.to_reg()) << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rt)
+}
+
+fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
+ let abc = (imm >> 5) as u32;
+ let defgh = (imm & 0b11111) as u32;
+
+ debug_assert_eq!(cmode & 0b1111, cmode);
+ debug_assert_eq!(q_op & 0b11, q_op);
+
+ 0b0_0_0_0111100000_000_0000_01_00000_00000
+ | (q_op << 29)
+ | (abc << 16)
+ | (cmode << 12)
+ | (defgh << 5)
+ | machreg_to_vec(rd.to_reg())
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+ /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+ /// program point.
+ pub(crate) virtual_sp_offset: i64,
+ /// Offset of FP from nominal-SP.
+ pub(crate) nominal_sp_to_fp: i64,
+ /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+ stack_map: Option<StackMap>,
+ /// Current source-code location corresponding to instruction to be emitted.
+ cur_srcloc: SourceLoc,
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+ fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
+ EmitState {
+ virtual_sp_offset: 0,
+ nominal_sp_to_fp: abi.frame_size() as i64,
+ stack_map: None,
+ cur_srcloc: SourceLoc::default(),
+ }
+ }
+
+ fn pre_safepoint(&mut self, stack_map: StackMap) {
+ self.stack_map = Some(stack_map);
+ }
+
+ fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
+ self.cur_srcloc = srcloc;
+ }
+}
+
+impl EmitState {
+ fn take_stack_map(&mut self) -> Option<StackMap> {
+ self.stack_map.take()
+ }
+
+ fn clear_post_insn(&mut self) {
+ self.stack_map = None;
+ }
+
+ fn cur_srcloc(&self) -> SourceLoc {
+ self.cur_srcloc
+ }
+}
+
+/// Constant state used during function compilation.
+pub struct EmitInfo(settings::Flags);
+
+impl EmitInfo {
+ pub(crate) fn new(flags: settings::Flags) -> Self {
+ Self(flags)
+ }
+}
+
+impl MachInstEmitInfo for EmitInfo {
+ fn flags(&self) -> &settings::Flags {
+ &self.0
+ }
+}
+
+impl MachInstEmit for Inst {
+ type State = EmitState;
+ type Info = EmitInfo;
+ type UnwindInfo = super::unwind::AArch64UnwindInfo;
+
+ fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+ // N.B.: we *must* not exceed the "worst-case size" used to compute
+ // where to insert islands, except when islands are explicitly triggered
+ // (with an `EmitIsland`). We check this in debug builds. This is `mut`
+ // to allow disabling the check for `JTSequence`, which is always
+ // emitted following an `EmitIsland`.
+ let mut start_off = sink.cur_offset();
+
+ match self {
+ &Inst::AluRRR { alu_op, rd, rn, rm } => {
+ let top11 = match alu_op {
+ ALUOp::Add32 => 0b00001011_000,
+ ALUOp::Add64 => 0b10001011_000,
+ ALUOp::Sub32 => 0b01001011_000,
+ ALUOp::Sub64 => 0b11001011_000,
+ ALUOp::Orr32 => 0b00101010_000,
+ ALUOp::Orr64 => 0b10101010_000,
+ ALUOp::And32 => 0b00001010_000,
+ ALUOp::And64 => 0b10001010_000,
+ ALUOp::Eor32 => 0b01001010_000,
+ ALUOp::Eor64 => 0b11001010_000,
+ ALUOp::OrrNot32 => 0b00101010_001,
+ ALUOp::OrrNot64 => 0b10101010_001,
+ ALUOp::AndNot32 => 0b00001010_001,
+ ALUOp::AndNot64 => 0b10001010_001,
+ ALUOp::EorNot32 => 0b01001010_001,
+ ALUOp::EorNot64 => 0b11001010_001,
+ ALUOp::AddS32 => 0b00101011_000,
+ ALUOp::AddS64 => 0b10101011_000,
+ ALUOp::SubS32 => 0b01101011_000,
+ ALUOp::SubS64 => 0b11101011_000,
+ ALUOp::SDiv64 => 0b10011010_110,
+ ALUOp::UDiv64 => 0b10011010_110,
+ ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
+ ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110,
+ ALUOp::SMulH => 0b10011011_010,
+ ALUOp::UMulH => 0b10011011_110,
+ };
+ let bit15_10 = match alu_op {
+ ALUOp::SDiv64 => 0b000011,
+ ALUOp::UDiv64 => 0b000010,
+ ALUOp::RotR32 | ALUOp::RotR64 => 0b001011,
+ ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
+ ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
+ ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
+ ALUOp::SMulH | ALUOp::UMulH => 0b011111,
+ _ => 0b000000,
+ };
+ debug_assert_ne!(writable_stack_reg(), rd);
+ // The stack pointer is the zero register in this context, so this might be an
+ // indication that something is wrong.
+ debug_assert_ne!(stack_reg(), rn);
+ debug_assert_ne!(stack_reg(), rm);
+ sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
+ }
+ &Inst::AluRRRR {
+ alu_op,
+ rd,
+ rm,
+ rn,
+ ra,
+ } => {
+ let (top11, bit15) = match alu_op {
+ ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
+ ALUOp3::MSub32 => (0b0_00_11011_000, 1),
+ ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
+ ALUOp3::MSub64 => (0b1_00_11011_000, 1),
+ };
+ sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
+ }
+ &Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn,
+ ref imm12,
+ } => {
+ let top8 = match alu_op {
+ ALUOp::Add32 => 0b000_10001,
+ ALUOp::Add64 => 0b100_10001,
+ ALUOp::Sub32 => 0b010_10001,
+ ALUOp::Sub64 => 0b110_10001,
+ ALUOp::AddS32 => 0b001_10001,
+ ALUOp::AddS64 => 0b101_10001,
+ ALUOp::SubS32 => 0b011_10001,
+ ALUOp::SubS64 => 0b111_10001,
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ sink.put4(enc_arith_rr_imm12(
+ top8,
+ imm12.shift_bits(),
+ imm12.imm_bits(),
+ rn,
+ rd,
+ ));
+ }
+ &Inst::AluRRImmLogic {
+ alu_op,
+ rd,
+ rn,
+ ref imml,
+ } => {
+ let (top9, inv) = match alu_op {
+ ALUOp::Orr32 => (0b001_100100, false),
+ ALUOp::Orr64 => (0b101_100100, false),
+ ALUOp::And32 => (0b000_100100, false),
+ ALUOp::And64 => (0b100_100100, false),
+ ALUOp::Eor32 => (0b010_100100, false),
+ ALUOp::Eor64 => (0b110_100100, false),
+ ALUOp::OrrNot32 => (0b001_100100, true),
+ ALUOp::OrrNot64 => (0b101_100100, true),
+ ALUOp::AndNot32 => (0b000_100100, true),
+ ALUOp::AndNot64 => (0b100_100100, true),
+ ALUOp::EorNot32 => (0b010_100100, true),
+ ALUOp::EorNot64 => (0b110_100100, true),
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ let imml = if inv { imml.invert() } else { imml.clone() };
+ sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd));
+ }
+
+ &Inst::AluRRImmShift {
+ alu_op,
+ rd,
+ rn,
+ ref immshift,
+ } => {
+ let amt = immshift.value();
+ let (top10, immr, imms) = match alu_op {
+ ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)),
+ ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)),
+ ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111),
+ ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111),
+ ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111),
+ ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111),
+ ALUOp::Lsl32 => (
+ 0b0101001100,
+ u32::from((32 - amt) % 32),
+ u32::from(31 - amt),
+ ),
+ ALUOp::Lsl64 => (
+ 0b1101001101,
+ u32::from((64 - amt) % 64),
+ u32::from(63 - amt),
+ ),
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ sink.put4(
+ (top10 << 22)
+ | (immr << 16)
+ | (imms << 10)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_gpr(rd.to_reg()),
+ );
+ }
+
+ &Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref shiftop,
+ } => {
+ let top11: u32 = match alu_op {
+ ALUOp::Add32 => 0b000_01011000,
+ ALUOp::Add64 => 0b100_01011000,
+ ALUOp::AddS32 => 0b001_01011000,
+ ALUOp::AddS64 => 0b101_01011000,
+ ALUOp::Sub32 => 0b010_01011000,
+ ALUOp::Sub64 => 0b110_01011000,
+ ALUOp::SubS32 => 0b011_01011000,
+ ALUOp::SubS64 => 0b111_01011000,
+ ALUOp::Orr32 => 0b001_01010000,
+ ALUOp::Orr64 => 0b101_01010000,
+ ALUOp::And32 => 0b000_01010000,
+ ALUOp::And64 => 0b100_01010000,
+ ALUOp::Eor32 => 0b010_01010000,
+ ALUOp::Eor64 => 0b110_01010000,
+ ALUOp::OrrNot32 => 0b001_01010001,
+ ALUOp::OrrNot64 => 0b101_01010001,
+ ALUOp::EorNot32 => 0b010_01010001,
+ ALUOp::EorNot64 => 0b110_01010001,
+ ALUOp::AndNot32 => 0b000_01010001,
+ ALUOp::AndNot64 => 0b100_01010001,
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ let top11 = top11 | (u32::from(shiftop.op().bits()) << 1);
+ let bits_15_10 = u32::from(shiftop.amt().value());
+ sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+ }
+
+ &Inst::AluRRRExtend {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ extendop,
+ } => {
+ let top11: u32 = match alu_op {
+ ALUOp::Add32 => 0b00001011001,
+ ALUOp::Add64 => 0b10001011001,
+ ALUOp::Sub32 => 0b01001011001,
+ ALUOp::Sub64 => 0b11001011001,
+ ALUOp::AddS32 => 0b00101011001,
+ ALUOp::AddS64 => 0b10101011001,
+ ALUOp::SubS32 => 0b01101011001,
+ ALUOp::SubS64 => 0b11101011001,
+ _ => unimplemented!("{:?}", alu_op),
+ };
+ let bits_15_10 = u32::from(extendop.bits()) << 3;
+ sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
+ }
+
+ &Inst::BitRR { op, rd, rn, .. } => {
+ let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
+ let (op1, op2) = match op {
+ BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
+ BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
+ BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
+ };
+ sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
+ }
+
+ &Inst::ULoad8 { rd, ref mem, flags }
+ | &Inst::SLoad8 { rd, ref mem, flags }
+ | &Inst::ULoad16 { rd, ref mem, flags }
+ | &Inst::SLoad16 { rd, ref mem, flags }
+ | &Inst::ULoad32 { rd, ref mem, flags }
+ | &Inst::SLoad32 { rd, ref mem, flags }
+ | &Inst::ULoad64 {
+ rd, ref mem, flags, ..
+ }
+ | &Inst::FpuLoad32 { rd, ref mem, flags }
+ | &Inst::FpuLoad64 { rd, ref mem, flags }
+ | &Inst::FpuLoad128 { rd, ref mem, flags } => {
+ let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+
+ // ldst encoding helpers take Reg, not Writable<Reg>.
+ let rd = rd.to_reg();
+
+ // This is the base opcode (top 10 bits) for the "unscaled
+ // immediate" form (Unscaled). Other addressing modes will OR in
+ // other values for bits 24/25 (bits 1/2 of this constant).
+ let (op, bits) = match self {
+ &Inst::ULoad8 { .. } => (0b0011100001, 8),
+ &Inst::SLoad8 { .. } => (0b0011100010, 8),
+ &Inst::ULoad16 { .. } => (0b0111100001, 16),
+ &Inst::SLoad16 { .. } => (0b0111100010, 16),
+ &Inst::ULoad32 { .. } => (0b1011100001, 32),
+ &Inst::SLoad32 { .. } => (0b1011100010, 32),
+ &Inst::ULoad64 { .. } => (0b1111100001, 64),
+ &Inst::FpuLoad32 { .. } => (0b1011110001, 32),
+ &Inst::FpuLoad64 { .. } => (0b1111110001, 64),
+ &Inst::FpuLoad128 { .. } => (0b0011110011, 128),
+ _ => unreachable!(),
+ };
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ match &mem {
+ &AMode::Unscaled(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+ }
+ &AMode::UnsignedOffset(reg, uimm12scaled) => {
+ if uimm12scaled.value() != 0 {
+ assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
+ }
+ sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+ }
+ &AMode::RegReg(r1, r2) => {
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+ ));
+ }
+ &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => {
+ assert_eq!(bits, ty_bits(ty));
+ let extendop = match &mem {
+ &AMode::RegScaled(..) => None,
+ &AMode::RegScaledExtended(_, _, _, op) => Some(op),
+ _ => unreachable!(),
+ };
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ true, extendop, rd,
+ ));
+ }
+ &AMode::RegExtended(r1, r2, extendop) => {
+ sink.put4(enc_ldst_reg(
+ op,
+ r1,
+ r2,
+ /* scaled = */ false,
+ Some(extendop),
+ rd,
+ ));
+ }
+ &AMode::Label(ref label) => {
+ let offset = match label {
+ // cast i32 to u32 (two's-complement)
+ &MemLabel::PCRel(off) => off as u32,
+ } / 4;
+ assert!(offset < (1 << 19));
+ match self {
+ &Inst::ULoad32 { .. } => {
+ sink.put4(enc_ldst_imm19(0b00011000, offset, rd));
+ }
+ &Inst::SLoad32 { .. } => {
+ sink.put4(enc_ldst_imm19(0b10011000, offset, rd));
+ }
+ &Inst::FpuLoad32 { .. } => {
+ sink.put4(enc_ldst_imm19(0b00011100, offset, rd));
+ }
+ &Inst::ULoad64 { .. } => {
+ sink.put4(enc_ldst_imm19(0b01011000, offset, rd));
+ }
+ &Inst::FpuLoad64 { .. } => {
+ sink.put4(enc_ldst_imm19(0b01011100, offset, rd));
+ }
+ &Inst::FpuLoad128 { .. } => {
+ sink.put4(enc_ldst_imm19(0b10011100, offset, rd));
+ }
+ _ => panic!("Unspported size for LDR from constant pool!"),
+ }
+ }
+ &AMode::PreIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+ }
+ &AMode::PostIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+ }
+ // Eliminated by `mem_finalize()` above.
+ &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
+ panic!("Should not see stack-offset here!")
+ }
+ &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+ }
+ }
+
+ &Inst::Store8 { rd, ref mem, flags }
+ | &Inst::Store16 { rd, ref mem, flags }
+ | &Inst::Store32 { rd, ref mem, flags }
+ | &Inst::Store64 { rd, ref mem, flags }
+ | &Inst::FpuStore32 { rd, ref mem, flags }
+ | &Inst::FpuStore64 { rd, ref mem, flags }
+ | &Inst::FpuStore128 { rd, ref mem, flags } => {
+ let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+
+ let (op, bits) = match self {
+ &Inst::Store8 { .. } => (0b0011100000, 8),
+ &Inst::Store16 { .. } => (0b0111100000, 16),
+ &Inst::Store32 { .. } => (0b1011100000, 32),
+ &Inst::Store64 { .. } => (0b1111100000, 64),
+ &Inst::FpuStore32 { .. } => (0b1011110000, 32),
+ &Inst::FpuStore64 { .. } => (0b1111110000, 64),
+ &Inst::FpuStore128 { .. } => (0b0011110010, 128),
+ _ => unreachable!(),
+ };
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ match &mem {
+ &AMode::Unscaled(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd));
+ }
+ &AMode::UnsignedOffset(reg, uimm12scaled) => {
+ if uimm12scaled.value() != 0 {
+ assert_eq!(bits, ty_bits(uimm12scaled.scale_ty()));
+ }
+ sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd));
+ }
+ &AMode::RegReg(r1, r2) => {
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd,
+ ));
+ }
+ &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => {
+ let extendop = match &mem {
+ &AMode::RegScaled(..) => None,
+ &AMode::RegScaledExtended(_, _, _, op) => Some(op),
+ _ => unreachable!(),
+ };
+ sink.put4(enc_ldst_reg(
+ op, r1, r2, /* scaled = */ true, extendop, rd,
+ ));
+ }
+ &AMode::RegExtended(r1, r2, extendop) => {
+ sink.put4(enc_ldst_reg(
+ op,
+ r1,
+ r2,
+ /* scaled = */ false,
+ Some(extendop),
+ rd,
+ ));
+ }
+ &AMode::Label(..) => {
+ panic!("Store to a MemLabel not implemented!");
+ }
+ &AMode::PreIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd));
+ }
+ &AMode::PostIndexed(reg, simm9) => {
+ sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd));
+ }
+ // Eliminated by `mem_finalize()` above.
+ &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => {
+ panic!("Should not see stack-offset here!")
+ }
+ &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"),
+ }
+ }
+
+ &Inst::StoreP64 {
+ rt,
+ rt2,
+ ref mem,
+ flags,
+ } => {
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ match mem {
+ &PairAMode::SignedOffset(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2));
+ }
+ &PairAMode::PreIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2));
+ }
+ &PairAMode::PostIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2));
+ }
+ }
+ }
+ &Inst::LoadP64 {
+ rt,
+ rt2,
+ ref mem,
+ flags,
+ } => {
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && !flags.notrap() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ let rt = rt.to_reg();
+ let rt2 = rt2.to_reg();
+ match mem {
+ &PairAMode::SignedOffset(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2));
+ }
+ &PairAMode::PreIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2));
+ }
+ &PairAMode::PostIndexed(reg, simm7) => {
+ assert_eq!(simm7.scale_ty, I64);
+ sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2));
+ }
+ }
+ }
+ &Inst::Mov64 { rd, rm } => {
+ assert!(rd.to_reg().get_class() == rm.get_class());
+ assert!(rm.get_class() == RegClass::I64);
+
+ // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+ // should never MOV to XZR.
+ assert!(rd.to_reg() != stack_reg());
+
+ if rm == stack_reg() {
+ // We can't use ORR here, so use an `add rd, sp, #0` instead.
+ let imm12 = Imm12::maybe_from_u64(0).unwrap();
+ sink.put4(enc_arith_rr_imm12(
+ 0b100_10001,
+ imm12.shift_bits(),
+ imm12.imm_bits(),
+ rm,
+ rd,
+ ));
+ } else {
+ // Encoded as ORR rd, rm, zero.
+ sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm));
+ }
+ }
+ &Inst::Mov32 { rd, rm } => {
+ // MOV to SP is interpreted as MOV to XZR instead. And our codegen
+ // should never MOV to XZR.
+ assert!(machreg_to_gpr(rd.to_reg()) != 31);
+ // Encoded as ORR rd, rm, zero.
+ sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm));
+ }
+ &Inst::MovZ { rd, imm, size } => {
+ sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size))
+ }
+ &Inst::MovN { rd, imm, size } => {
+ sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size))
+ }
+ &Inst::MovK { rd, imm, size } => {
+ sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size))
+ }
+ &Inst::CSel { rd, rn, rm, cond } => {
+ sink.put4(enc_csel(rd, rn, rm, cond));
+ }
+ &Inst::CSet { rd, cond } => {
+ sink.put4(enc_cset(rd, cond));
+ }
+ &Inst::CCmpImm {
+ size,
+ rn,
+ imm,
+ nzcv,
+ cond,
+ } => {
+ sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond));
+ }
+ &Inst::AtomicRMW { ty, op } => {
+ /* Emit this:
+ dmb ish
+ again:
+ ldxr{,b,h} x/w27, [x25]
+ op x28, x27, x26 // op is add,sub,and,orr,eor
+ stxr{,b,h} w24, x/w28, [x25]
+ cbnz x24, again
+ dmb ish
+
+ Operand conventions:
+ IN: x25 (addr), x26 (2nd arg for op)
+ OUT: x27 (old value), x24 (trashed), x28 (trashed)
+
+ It is unfortunate that, per the ARM documentation, x28 cannot be used for
+ both the store-data and success-flag operands of stxr. This causes the
+ instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24
+ instead for the success-flag.
+
+ In the case where the operation is 'xchg', the second insn is instead
+ mov x28, x26
+ so that we simply write in the destination, the "2nd arg for op".
+ */
+ let xzr = zero_reg();
+ let x24 = xreg(24);
+ let x25 = xreg(25);
+ let x26 = xreg(26);
+ let x27 = xreg(27);
+ let x28 = xreg(28);
+ let x24wr = writable_xreg(24);
+ let x27wr = writable_xreg(27);
+ let x28wr = writable_xreg(28);
+ let again_label = sink.get_label();
+
+ sink.put4(enc_dmb_ish()); // dmb ish
+
+ // again:
+ sink.bind_label(again_label);
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
+
+ if op == inst_common::AtomicRmwOp::Xchg {
+ // mov x28, x26
+ sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
+ } else {
+ // add/sub/and/orr/eor x28, x27, x26
+ let bits_31_21 = match op {
+ inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
+ inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
+ inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
+ inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
+ inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
+ inst_common::AtomicRmwOp::Xchg => unreachable!(),
+ };
+ sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
+ }
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
+
+ // cbnz w24, again
+ // Note, we're actually testing x24, and relying on the default zero-high-half
+ // rule in the assignment that `stxr` does.
+ let br_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(again_label),
+ CondBrKind::NotZero(x24),
+ ));
+ sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19);
+
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::AtomicCAS { ty } => {
+ /* Emit this:
+ dmb ish
+ again:
+ ldxr{,b,h} x/w27, [x25]
+ and x24, x26, MASK (= 2^size_bits - 1)
+ cmp x27, x24
+ b.ne out
+ stxr{,b,h} w24, x/w28, [x25]
+ cbnz x24, again
+ out:
+ dmb ish
+
+ Operand conventions:
+ IN: x25 (addr), x26 (expected value), x28 (replacement value)
+ OUT: x27 (old value), x24 (trashed)
+ */
+ let xzr = zero_reg();
+ let x24 = xreg(24);
+ let x25 = xreg(25);
+ let x26 = xreg(26);
+ let x27 = xreg(27);
+ let x28 = xreg(28);
+ let xzrwr = writable_zero_reg();
+ let x24wr = writable_xreg(24);
+ let x27wr = writable_xreg(27);
+ let again_label = sink.get_label();
+ let out_label = sink.get_label();
+
+ sink.put4(enc_dmb_ish()); // dmb ish
+
+ // again:
+ sink.bind_label(again_label);
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
+
+ if ty == I64 {
+ // mov x24, x26
+ sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26))
+ } else {
+ // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF
+ let (mask, s) = match ty {
+ I8 => (0xFF, 7),
+ I16 => (0xFFFF, 15),
+ I32 => (0xFFFFFFFF, 31),
+ _ => unreachable!(),
+ };
+ sink.put4(enc_arith_rr_imml(
+ 0b100_100100,
+ ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(),
+ x26,
+ x24wr,
+ ))
+ }
+
+ // cmp x27, x24 (== subs xzr, x27, x24)
+ sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24));
+
+ // b.ne out
+ let br_out_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(out_label),
+ CondBrKind::Cond(Cond::Ne),
+ ));
+ sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19);
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25]
+
+ // cbnz w24, again.
+ // Note, we're actually testing x24, and relying on the default zero-high-half
+ // rule in the assignment that `stxr` does.
+ let br_again_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(again_label),
+ CondBrKind::NotZero(x24),
+ ));
+ sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19);
+
+ // out:
+ sink.bind_label(out_label);
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::AtomicLoad { ty, r_data, r_addr } => {
+ let op = match ty {
+ I8 => 0b0011100001,
+ I16 => 0b0111100001,
+ I32 => 0b1011100001,
+ I64 => 0b1111100001,
+ _ => unreachable!(),
+ };
+ sink.put4(enc_dmb_ish()); // dmb ish
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
+ sink.put4(enc_ldst_uimm12(
+ op,
+ uimm12scaled_zero,
+ r_addr,
+ r_data.to_reg(),
+ ));
+ }
+ &Inst::AtomicStore { ty, r_data, r_addr } => {
+ let op = match ty {
+ I8 => 0b0011100000,
+ I16 => 0b0111100000,
+ I32 => 0b1011100000,
+ I64 => 0b1111100000,
+ _ => unreachable!(),
+ };
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/);
+ sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data));
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::Fence {} => {
+ sink.put4(enc_dmb_ish()); // dmb ish
+ }
+ &Inst::FpuMove64 { rd, rn } => {
+ sink.put4(enc_vecmov(/* 16b = */ false, rd, rn));
+ }
+ &Inst::FpuMove128 { rd, rn } => {
+ sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+ }
+ &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+ let (imm5, shift, mask) = match size.lane_size() {
+ ScalarSize::Size32 => (0b00100, 3, 0b011),
+ ScalarSize::Size64 => (0b01000, 4, 0b001),
+ _ => unimplemented!(),
+ };
+ debug_assert_eq!(idx & mask, idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b010_11110000_00000_000001_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::FpuRR { fpu_op, rd, rn } => {
+ let top22 = match fpu_op {
+ FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000,
+ FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000,
+ FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000,
+ FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000,
+ FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000,
+ FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000,
+ FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000,
+ FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000,
+ };
+ sink.put4(enc_fpurr(top22, rd, rn));
+ }
+ &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+ let top22 = match fpu_op {
+ FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
+ FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
+ FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
+ FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
+ FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
+ FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
+ FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
+ FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
+ FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
+ FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
+ FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
+ FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
+ FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
+ FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
+ FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
+ FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
+ };
+ sink.put4(enc_fpurrr(top22, rd, rn, rm));
+ }
+ &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
+ FPUOpRI::UShr32(imm) => {
+ debug_assert_eq!(32, imm.lane_size_in_bits);
+ sink.put4(
+ 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ FPUOpRI::UShr64(imm) => {
+ debug_assert_eq!(64, imm.lane_size_in_bits);
+ sink.put4(
+ 0b01_1_111110_0000000_00_0_0_0_1_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ FPUOpRI::Sli64(imm) => {
+ debug_assert_eq!(64, imm.lane_size_in_bits);
+ sink.put4(
+ 0b01_1_111110_0000000_010101_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ FPUOpRI::Sli32(imm) => {
+ debug_assert_eq!(32, imm.lane_size_in_bits);
+ sink.put4(
+ 0b0_0_1_011110_0000000_010101_00000_00000
+ | imm.enc() << 16
+ | machreg_to_vec(rn) << 5
+ | machreg_to_vec(rd.to_reg()),
+ )
+ }
+ },
+ &Inst::FpuRRRR {
+ fpu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ } => {
+ let top17 = match fpu_op {
+ FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0,
+ FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0,
+ };
+ sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
+ }
+ &Inst::VecMisc { op, rd, rn, size } => {
+ let (q, enc_size) = size.enc_size();
+ let (u, bits_12_16, size) = match op {
+ VecMisc2::Not => (0b1, 0b00101, 0b00),
+ VecMisc2::Neg => (0b1, 0b01011, enc_size),
+ VecMisc2::Abs => (0b0, 0b01011, enc_size),
+ VecMisc2::Fabs => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b01111, enc_size)
+ }
+ VecMisc2::Fneg => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b01111, enc_size)
+ }
+ VecMisc2::Fsqrt => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b11111, enc_size)
+ }
+ VecMisc2::Rev64 => {
+ debug_assert_ne!(VectorSize::Size64x2, size);
+ (0b0, 0b00000, enc_size)
+ }
+ VecMisc2::Shll => {
+ debug_assert_ne!(VectorSize::Size64x2, size);
+ debug_assert!(!size.is_128bits());
+ (0b1, 0b10011, enc_size)
+ }
+ VecMisc2::Fcvtzs => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11011, enc_size)
+ }
+ VecMisc2::Fcvtzu => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b11011, enc_size)
+ }
+ VecMisc2::Scvtf => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11101, enc_size & 0b1)
+ }
+ VecMisc2::Ucvtf => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b1, 0b11101, enc_size & 0b1)
+ }
+ VecMisc2::Frintn => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11000, enc_size & 0b01)
+ }
+ VecMisc2::Frintz => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11001, enc_size | 0b10)
+ }
+ VecMisc2::Frintm => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11001, enc_size & 0b01)
+ }
+ VecMisc2::Frintp => {
+ debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
+ (0b0, 0b11000, enc_size | 0b10)
+ }
+ };
+ sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
+ }
+ &Inst::VecLanes { op, rd, rn, size } => {
+ let (q, size) = match size {
+ VectorSize::Size8x16 => (0b1, 0b00),
+ VectorSize::Size16x8 => (0b1, 0b01),
+ VectorSize::Size32x4 => (0b1, 0b10),
+ _ => unreachable!(),
+ };
+ let (u, opcode) = match op {
+ VecLanesOp::Uminv => (0b1, 0b11010),
+ VecLanesOp::Addv => (0b0, 0b11011),
+ };
+ sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
+ }
+ &Inst::VecShiftImm {
+ op,
+ rd,
+ rn,
+ size,
+ imm,
+ } => {
+ let (is_shr, template) = match op {
+ VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
+ VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
+ VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
+ };
+ let imm = imm as u32;
+ // Deal with the somewhat strange encoding scheme for, and limits on,
+ // the shift amount.
+ let immh_immb = match (size, is_shr) {
+ (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
+ 0b_1000_000_u32 | (64 - imm)
+ }
+ (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
+ 0b_0100_000_u32 | (32 - imm)
+ }
+ (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
+ 0b_0010_000_u32 | (16 - imm)
+ }
+ (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
+ 0b_0001_000_u32 | (8 - imm)
+ }
+ (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
+ (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
+ (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
+ (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
+ _ => panic!(
+ "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
+ op, size, imm
+ ),
+ };
+ let rn_enc = machreg_to_vec(rn);
+ let rd_enc = machreg_to_vec(rd.to_reg());
+ sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc);
+ }
+ &Inst::VecExtract { rd, rn, rm, imm4 } => {
+ if imm4 < 16 {
+ let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32;
+ let rm_enc = machreg_to_vec(rm);
+ let rn_enc = machreg_to_vec(rn);
+ let rd_enc = machreg_to_vec(rd.to_reg());
+ sink.put4(
+ template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc,
+ );
+ } else {
+ panic!(
+ "aarch64: Inst::VecExtract: emit: invalid extract index {}",
+ imm4
+ );
+ }
+ }
+ &Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension,
+ } => {
+ sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
+ }
+ &Inst::VecTbl2 {
+ rd,
+ rn,
+ rn2,
+ rm,
+ is_extension,
+ } => {
+ assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
+ sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
+ }
+ &Inst::FpuCmp32 { rn, rm } => {
+ sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
+ }
+ &Inst::FpuCmp64 { rn, rm } => {
+ sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
+ }
+ &Inst::FpuToInt { op, rd, rn } => {
+ let top16 = match op {
+ // FCVTZS (32/32-bit)
+ FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000,
+ // FCVTZU (32/32-bit)
+ FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001,
+ // FCVTZS (32/64-bit)
+ FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000,
+ // FCVTZU (32/64-bit)
+ FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001,
+ // FCVTZS (64/32-bit)
+ FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000,
+ // FCVTZU (64/32-bit)
+ FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001,
+ // FCVTZS (64/64-bit)
+ FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000,
+ // FCVTZU (64/64-bit)
+ FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001,
+ };
+ sink.put4(enc_fputoint(top16, rd, rn));
+ }
+ &Inst::IntToFpu { op, rd, rn } => {
+ let top16 = match op {
+ // SCVTF (32/32-bit)
+ IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010,
+ // UCVTF (32/32-bit)
+ IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011,
+ // SCVTF (64/32-bit)
+ IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010,
+ // UCVTF (64/32-bit)
+ IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011,
+ // SCVTF (32/64-bit)
+ IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010,
+ // UCVTF (32/64-bit)
+ IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011,
+ // SCVTF (64/64-bit)
+ IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010,
+ // UCVTF (64/64-bit)
+ IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011,
+ };
+ sink.put4(enc_inttofpu(top16, rd, rn));
+ }
+ &Inst::LoadFpuConst64 { rd, const_data } => {
+ let inst = Inst::FpuLoad64 {
+ rd,
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(12),
+ };
+ inst.emit(sink, emit_info, state);
+ sink.put8(const_data);
+ }
+ &Inst::LoadFpuConst128 { rd, const_data } => {
+ let inst = Inst::FpuLoad128 {
+ rd,
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(20),
+ };
+ inst.emit(sink, emit_info, state);
+
+ for i in const_data.to_le_bytes().iter() {
+ sink.put1(*i);
+ }
+ }
+ &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+ sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
+ }
+ &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+ sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64));
+ }
+ &Inst::FpuRound { op, rd, rn } => {
+ let top22 = match op {
+ FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000,
+ FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000,
+ FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000,
+ FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000,
+ FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000,
+ FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000,
+ FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000,
+ FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000,
+ };
+ sink.put4(enc_fround(top22, rd, rn));
+ }
+ &Inst::MovToFpu { rd, rn, size } => {
+ let template = match size {
+ ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
+ ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
+ _ => unreachable!(),
+ };
+ sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
+ }
+ &Inst::MovToVec { rd, rn, idx, size } => {
+ let (imm5, shift) = match size.lane_size() {
+ ScalarSize::Size8 => (0b00001, 1),
+ ScalarSize::Size16 => (0b00010, 2),
+ ScalarSize::Size32 => (0b00100, 3),
+ ScalarSize::Size64 => (0b01000, 4),
+ _ => unreachable!(),
+ };
+ debug_assert_eq!(idx & (0b11111 >> shift), idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b010_01110000_00000_0_0011_1_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::MovFromVec { rd, rn, idx, size } => {
+ let (q, imm5, shift, mask) = match size {
+ VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
+ VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
+ VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
+ VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
+ _ => unreachable!(),
+ };
+ debug_assert_eq!(idx & mask, idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b000_01110000_00000_0_0111_1_00000_00000
+ | (q << 30)
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_gpr(rd.to_reg()),
+ );
+ }
+ &Inst::MovFromVecSigned {
+ rd,
+ rn,
+ idx,
+ size,
+ scalar_size,
+ } => {
+ let (imm5, shift, half) = match size {
+ VectorSize::Size8x8 => (0b00001, 1, true),
+ VectorSize::Size8x16 => (0b00001, 1, false),
+ VectorSize::Size16x4 => (0b00010, 2, true),
+ VectorSize::Size16x8 => (0b00010, 2, false),
+ VectorSize::Size32x2 => {
+ debug_assert_ne!(scalar_size, OperandSize::Size32);
+ (0b00100, 3, true)
+ }
+ VectorSize::Size32x4 => {
+ debug_assert_ne!(scalar_size, OperandSize::Size32);
+ (0b00100, 3, false)
+ }
+ _ => panic!("Unexpected vector operand size"),
+ };
+ debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx);
+ let imm5 = imm5 | ((idx as u32) << shift);
+ sink.put4(
+ 0b000_01110000_00000_0_0101_1_00000_00000
+ | (scalar_size.is64() as u32) << 30
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_gpr(rd.to_reg()),
+ );
+ }
+ &Inst::VecDup { rd, rn, size } => {
+ let imm5 = match size {
+ VectorSize::Size8x16 => 0b00001,
+ VectorSize::Size16x8 => 0b00010,
+ VectorSize::Size32x4 => 0b00100,
+ VectorSize::Size64x2 => 0b01000,
+ _ => unimplemented!(),
+ };
+ sink.put4(
+ 0b010_01110000_00000_000011_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_gpr(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecDupFromFpu { rd, rn, size } => {
+ let imm5 = match size {
+ VectorSize::Size32x4 => 0b00100,
+ VectorSize::Size64x2 => 0b01000,
+ _ => unimplemented!(),
+ };
+ sink.put4(
+ 0b010_01110000_00000_000001_00000_00000
+ | (imm5 << 16)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecDupImm {
+ rd,
+ imm,
+ invert,
+ size,
+ } => {
+ let (imm, shift, shift_ones) = imm.value();
+ let (op, cmode) = match size.lane_size() {
+ ScalarSize::Size8 => {
+ assert!(!invert);
+ assert_eq!(shift, 0);
+
+ (0, 0b1110)
+ }
+ ScalarSize::Size16 => {
+ let s = shift & 8;
+
+ assert!(!shift_ones);
+ assert_eq!(s, shift);
+
+ (invert as u32, 0b1000 | (s >> 2))
+ }
+ ScalarSize::Size32 => {
+ if shift_ones {
+ assert!(shift == 8 || shift == 16);
+
+ (invert as u32, 0b1100 | (shift >> 4))
+ } else {
+ let s = shift & 24;
+
+ assert_eq!(s, shift);
+
+ (invert as u32, 0b0000 | (s >> 2))
+ }
+ }
+ ScalarSize::Size64 => {
+ assert!(!invert);
+ assert_eq!(shift, 0);
+
+ (1, 0b1110)
+ }
+ _ => unreachable!(),
+ };
+ let q_op = op | ((size.is_128bits() as u32) << 1);
+
+ sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
+ }
+ &Inst::VecExtend {
+ t,
+ rd,
+ rn,
+ high_half,
+ } => {
+ let (u, immh) = match t {
+ VecExtendOp::Sxtl8 => (0b0, 0b001),
+ VecExtendOp::Sxtl16 => (0b0, 0b010),
+ VecExtendOp::Sxtl32 => (0b0, 0b100),
+ VecExtendOp::Uxtl8 => (0b1, 0b001),
+ VecExtendOp::Uxtl16 => (0b1, 0b010),
+ VecExtendOp::Uxtl32 => (0b1, 0b100),
+ };
+ sink.put4(
+ 0b000_011110_0000_000_101001_00000_00000
+ | ((high_half as u32) << 30)
+ | (u << 29)
+ | (immh << 19)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecMiscNarrow {
+ op,
+ rd,
+ rn,
+ size,
+ high_half,
+ } => {
+ let size = match size.lane_size() {
+ ScalarSize::Size8 => 0b00,
+ ScalarSize::Size16 => 0b01,
+ ScalarSize::Size32 => 0b10,
+ _ => panic!("Unexpected vector operand lane size!"),
+ };
+ let (u, bits_12_16) = match op {
+ VecMiscNarrowOp::Xtn => (0b0, 0b10010),
+ VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
+ VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
+ };
+ sink.put4(enc_vec_rr_misc(
+ ((high_half as u32) << 1) | u,
+ size,
+ bits_12_16,
+ rd,
+ rn,
+ ));
+ }
+ &Inst::VecMovElement {
+ rd,
+ rn,
+ dest_idx,
+ src_idx,
+ size,
+ } => {
+ let (imm5, shift) = match size.lane_size() {
+ ScalarSize::Size8 => (0b00001, 1),
+ ScalarSize::Size16 => (0b00010, 2),
+ ScalarSize::Size32 => (0b00100, 3),
+ ScalarSize::Size64 => (0b01000, 4),
+ _ => unreachable!(),
+ };
+ let mask = 0b11111 >> shift;
+ debug_assert_eq!(dest_idx & mask, dest_idx);
+ debug_assert_eq!(src_idx & mask, src_idx);
+ let imm4 = (src_idx as u32) << (shift - 1);
+ let imm5 = imm5 | ((dest_idx as u32) << shift);
+ sink.put4(
+ 0b011_01110000_00000_0_0000_1_00000_00000
+ | (imm5 << 16)
+ | (imm4 << 11)
+ | (machreg_to_vec(rn) << 5)
+ | machreg_to_vec(rd.to_reg()),
+ );
+ }
+ &Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op,
+ size,
+ } => {
+ let (q, enc_size) = size.enc_size();
+ let is_float = match alu_op {
+ VecALUOp::Fcmeq
+ | VecALUOp::Fcmgt
+ | VecALUOp::Fcmge
+ | VecALUOp::Fadd
+ | VecALUOp::Fsub
+ | VecALUOp::Fdiv
+ | VecALUOp::Fmax
+ | VecALUOp::Fmin
+ | VecALUOp::Fmul => true,
+ _ => false,
+ };
+ let enc_float_size = match (is_float, size) {
+ (true, VectorSize::Size32x2) => 0b0,
+ (true, VectorSize::Size32x4) => 0b0,
+ (true, VectorSize::Size64x2) => 0b1,
+ (true, _) => unimplemented!(),
+ _ => 0,
+ };
+
+ let (top11, bit15_10) = match alu_op {
+ VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
+ VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
+ VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
+ VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
+ VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
+ VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
+ VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
+ VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
+ VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
+ VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
+ VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
+ VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
+ // The following logical instructions operate on bytes, so are not encoded differently
+ // for the different vector types.
+ VecALUOp::And => (0b000_01110_00_1, 0b000111),
+ VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
+ VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
+ VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
+ VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
+ VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
+ VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
+ VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
+ VecALUOp::Mul => {
+ debug_assert_ne!(size, VectorSize::Size64x2);
+ (0b000_01110_00_1 | enc_size << 1, 0b100111)
+ }
+ VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
+ VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
+ VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
+ VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
+ VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
+ VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
+ VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
+ VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
+ VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
+ VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
+ VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
+ VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
+ VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
+ VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
+ VecALUOp::Umlal => {
+ debug_assert!(!size.is_128bits());
+ (0b001_01110_00_1 | enc_size << 1, 0b100000)
+ }
+ VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
+ VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
+ VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
+ };
+ let top11 = match alu_op {
+ VecALUOp::Smull | VecALUOp::Smull2 => top11,
+ _ if is_float => top11 | (q << 9) | enc_float_size << 1,
+ _ => top11 | (q << 9),
+ };
+ sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
+ }
+ &Inst::VecLoadReplicate { rd, rn, size } => {
+ let (q, size) = size.enc_size();
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ // Register the offset at which the actual load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ sink.put4(enc_ldst_vec(q, size, rn, rd));
+ }
+ &Inst::VecCSel { rd, rn, rm, cond } => {
+ /* Emit this:
+ b.cond else
+ mov rd, rm
+ b out
+ else:
+ mov rd, rn
+ out:
+
+ Note, we could do better in the cases where rd == rn or rd == rm.
+ */
+ let else_label = sink.get_label();
+ let out_label = sink.get_label();
+
+ // b.cond else
+ let br_else_offset = sink.cur_offset();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(else_label),
+ CondBrKind::Cond(cond),
+ ));
+ sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19);
+
+ // mov rd, rm
+ sink.put4(enc_vecmov(/* 16b = */ true, rd, rm));
+
+ // b out
+ let b_out_offset = sink.cur_offset();
+ sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26);
+ sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label);
+ sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */));
+
+ // else:
+ sink.bind_label(else_label);
+
+ // mov rd, rn
+ sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
+
+ // out:
+ sink.bind_label(out_label);
+ }
+ &Inst::MovToNZCV { rn } => {
+ sink.put4(0xd51b4200 | machreg_to_gpr(rn));
+ }
+ &Inst::MovFromNZCV { rd } => {
+ sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg()));
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits >= 8 => {
+ let top22 = match (signed, from_bits, to_bits) {
+ (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
+ (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
+ (true, 8, 32) => 0b000_100110_0_000000_000111, // SXTB (32)
+ (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
+ // The 64-bit unsigned variants are the same as the 32-bit ones,
+ // because writes to Wn zero out the top 32 bits of Xn
+ (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
+ (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
+ (true, 8, 64) => 0b100_100110_1_000000_000111, // SXTB (64)
+ (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
+ // 32-to-64: the unsigned case is a 'mov' (special-cased below).
+ (false, 32, 64) => 0, // MOV
+ (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
+ _ => panic!(
+ "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
+ signed, from_bits, to_bits
+ ),
+ };
+ if top22 != 0 {
+ sink.put4(enc_extend(top22, rd, rn));
+ } else {
+ Inst::mov32(rd, rn).emit(sink, emit_info, state);
+ }
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits == 1 && signed => {
+ assert!(to_bits <= 64);
+ // Reduce sign-extend-from-1-bit to:
+ // - and rd, rn, #1
+ // - sub rd, zr, rd
+
+ // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+ sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+ let sub_inst = Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd,
+ rn: zero_reg(),
+ rm: rd.to_reg(),
+ };
+ sub_inst.emit(sink, emit_info, state);
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits == 1 && !signed => {
+ assert!(to_bits <= 64);
+ // Reduce zero-extend-from-1-bit to:
+ // - and rd, rn, #1
+
+ // We don't have ImmLogic yet, so we just hardcode this. FIXME.
+ sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+ }
+ &Inst::Extend { .. } => {
+ panic!("Unsupported extend variant");
+ }
+ &Inst::Jump { ref dest } => {
+ let off = sink.cur_offset();
+ // Indicate that the jump uses a label, if so, so that a fixup can occur later.
+ if let Some(l) = dest.as_label() {
+ sink.use_label_at_offset(off, l, LabelUse::Branch26);
+ sink.add_uncond_branch(off, off + 4, l);
+ }
+ // Emit the jump itself.
+ sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero()));
+ }
+ &Inst::Ret => {
+ sink.put4(0xd65f03c0);
+ }
+ &Inst::EpiloguePlaceholder => {
+ // Noop; this is just a placeholder for epilogues.
+ }
+ &Inst::Call { ref info } => {
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+ }
+ let loc = state.cur_srcloc();
+ sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0);
+ sink.put4(enc_jump26(0b100101, 0));
+ if info.opcode.is_call() {
+ sink.add_call_site(loc, info.opcode);
+ }
+ }
+ &Inst::CallInd { ref info } => {
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+ }
+ sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5));
+ let loc = state.cur_srcloc();
+ if info.opcode.is_call() {
+ sink.add_call_site(loc, info.opcode);
+ }
+ }
+ &Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ } => {
+ // Conditional part first.
+ let cond_off = sink.cur_offset();
+ if let Some(l) = taken.as_label() {
+ sink.use_label_at_offset(cond_off, l, LabelUse::Branch19);
+ let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes();
+ sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
+ }
+ sink.put4(enc_conditional_br(taken, kind));
+
+ // Unconditional part next.
+ let uncond_off = sink.cur_offset();
+ if let Some(l) = not_taken.as_label() {
+ sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26);
+ sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
+ }
+ sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero()));
+ }
+ &Inst::TrapIf { kind, trap_code } => {
+ // condbr KIND, LABEL
+ let off = sink.cur_offset();
+ let label = sink.get_label();
+ sink.put4(enc_conditional_br(
+ BranchTarget::Label(label),
+ kind.invert(),
+ ));
+ sink.use_label_at_offset(off, label, LabelUse::Branch19);
+ // udf
+ let trap = Inst::Udf { trap_code };
+ trap.emit(sink, emit_info, state);
+ // LABEL:
+ sink.bind_label(label);
+ }
+ &Inst::IndirectBr { rn, .. } => {
+ sink.put4(enc_br(rn));
+ }
+ &Inst::Nop0 => {}
+ &Inst::Nop4 => {
+ sink.put4(0xd503201f);
+ }
+ &Inst::Brk => {
+ sink.put4(0xd4200000);
+ }
+ &Inst::Udf { trap_code } => {
+ let srcloc = state.cur_srcloc();
+ sink.add_trap(srcloc, trap_code);
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
+ }
+ sink.put4(0xd4a00000);
+ }
+ &Inst::Adr { rd, off } => {
+ assert!(off > -(1 << 20));
+ assert!(off < (1 << 20));
+ sink.put4(enc_adr(off, rd));
+ }
+ &Inst::Word4 { data } => {
+ sink.put4(data);
+ }
+ &Inst::Word8 { data } => {
+ sink.put8(data);
+ }
+ &Inst::JTSequence {
+ ridx,
+ rtmp1,
+ rtmp2,
+ ref info,
+ ..
+ } => {
+ // This sequence is *one* instruction in the vcode, and is expanded only here at
+ // emission time, because we cannot allow the regalloc to insert spills/reloads in
+ // the middle; we depend on hardcoded PC-rel addressing below.
+
+ // Branch to default when condition code from prior comparison indicates.
+ let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs));
+ // No need to inform the sink's branch folding logic about this branch, because it
+ // will not be merged with any other branch, flipped, or elided (it is not preceded
+ // or succeeded by any other branch). Just emit it with the label use.
+ let default_br_offset = sink.cur_offset();
+ if let BranchTarget::Label(l) = info.default_target {
+ sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19);
+ }
+ sink.put4(br);
+
+ // Save index in a tmp (the live range of ridx only goes to start of this
+ // sequence; rtmp1 or rtmp2 may overwrite it).
+ let inst = Inst::gen_move(rtmp2, ridx, I64);
+ inst.emit(sink, emit_info, state);
+ // Load address of jump table
+ let inst = Inst::Adr { rd: rtmp1, off: 16 };
+ inst.emit(sink, emit_info, state);
+ // Load value out of jump table
+ let inst = Inst::SLoad32 {
+ rd: rtmp2,
+ mem: AMode::reg_plus_reg_scaled_extended(
+ rtmp1.to_reg(),
+ rtmp2.to_reg(),
+ I32,
+ ExtendOp::UXTW,
+ ),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ // Add base of jump table to jump-table-sourced block offset
+ let inst = Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: rtmp1,
+ rn: rtmp1.to_reg(),
+ rm: rtmp2.to_reg(),
+ };
+ inst.emit(sink, emit_info, state);
+ // Branch to computed address. (`targets` here is only used for successor queries
+ // and is not needed for emission.)
+ let inst = Inst::IndirectBr {
+ rn: rtmp1.to_reg(),
+ targets: vec![],
+ };
+ inst.emit(sink, emit_info, state);
+ // Emit jump table (table of 32-bit offsets).
+ let jt_off = sink.cur_offset();
+ for &target in info.targets.iter() {
+ let word_off = sink.cur_offset();
+ // off_into_table is an addend here embedded in the label to be later patched
+ // at the end of codegen. The offset is initially relative to this jump table
+ // entry; with the extra addend, it'll be relative to the jump table's start,
+ // after patching.
+ let off_into_table = word_off - jt_off;
+ sink.use_label_at_offset(
+ word_off,
+ target.as_label().unwrap(),
+ LabelUse::PCRel32,
+ );
+ sink.put4(off_into_table);
+ }
+
+ // Lowering produces an EmitIsland before using a JTSequence, so we can safely
+ // disable the worst-case-size check in this case.
+ start_off = sink.cur_offset();
+ }
+ &Inst::LoadExtName {
+ rd,
+ ref name,
+ offset,
+ } => {
+ let inst = Inst::ULoad64 {
+ rd,
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ };
+ inst.emit(sink, emit_info, state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(12),
+ };
+ inst.emit(sink, emit_info, state);
+ let srcloc = state.cur_srcloc();
+ sink.add_reloc(srcloc, Reloc::Abs8, name, offset);
+ if emit_info.flags().emit_all_ones_funcaddrs() {
+ sink.put8(u64::max_value());
+ } else {
+ sink.put8(0);
+ }
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state);
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+
+ let (reg, index_reg, offset) = match mem {
+ AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
+ AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
+ AMode::UnsignedOffset(r, uimm12scaled) => {
+ (r, None, uimm12scaled.value() as i32)
+ }
+ _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
+ };
+ let abs_offset = if offset < 0 {
+ -offset as u64
+ } else {
+ offset as u64
+ };
+ let alu_op = if offset < 0 {
+ ALUOp::Sub64
+ } else {
+ ALUOp::Add64
+ };
+
+ if let Some((idx, extendop)) = index_reg {
+ let add = Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd,
+ rn: reg,
+ rm: idx,
+ extendop,
+ };
+
+ add.emit(sink, emit_info, state);
+ } else if offset == 0 {
+ if reg != rd.to_reg() {
+ let mov = Inst::mov(rd, reg);
+
+ mov.emit(sink, emit_info, state);
+ }
+ } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
+ let add = Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn: reg,
+ imm12,
+ };
+ add.emit(sink, emit_info, state);
+ } else {
+ // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction
+ // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note
+ // that no other instructions will be inserted here (we're emitting directly),
+ // and a live range of `tmp2` should not span this instruction, so this use
+ // should otherwise be correct.
+ debug_assert!(rd.to_reg() != tmp2_reg());
+ debug_assert!(reg != tmp2_reg());
+ let tmp = writable_tmp2_reg();
+ for insn in Inst::load_constant(tmp, abs_offset).into_iter() {
+ insn.emit(sink, emit_info, state);
+ }
+ let add = Inst::AluRRR {
+ alu_op,
+ rd,
+ rn: reg,
+ rm: tmp.to_reg(),
+ };
+ add.emit(sink, emit_info, state);
+ }
+ }
+ &Inst::VirtualSPOffsetAdj { offset } => {
+ debug!(
+ "virtual sp offset adjusted by {} -> {}",
+ offset,
+ state.virtual_sp_offset + offset,
+ );
+ state.virtual_sp_offset += offset;
+ }
+ &Inst::EmitIsland { needed_space } => {
+ if sink.island_needed(needed_space + 4) {
+ let jump_around_label = sink.get_label();
+ let jmp = Inst::Jump {
+ dest: BranchTarget::Label(jump_around_label),
+ };
+ jmp.emit(sink, emit_info, state);
+ sink.emit_island();
+ sink.bind_label(jump_around_label);
+ }
+ }
+ }
+
+ let end_off = sink.cur_offset();
+ debug_assert!((end_off - start_off) <= Inst::worst_case_size());
+
+ state.clear_post_insn();
+ }
+
+ fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+ self.print_with_state(mb_rru, state)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
new file mode 100644
index 0000000000..eb31963b5d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -0,0 +1,5143 @@
+use crate::ir::types::*;
+use crate::isa::aarch64::inst::*;
+use crate::isa::test_utils;
+use crate::isa::CallConv;
+use crate::settings;
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+
+#[test]
+fn test_aarch64_binemit() {
+ let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+ // N.B.: the architecture is little-endian, so when transcribing the 32-bit
+ // hex instructions from e.g. objdump disassembly, one must swap the bytes
+ // seen below. (E.g., a `ret` is normally written as the u32 `D65F03C0`,
+ // but we write it here as C0035FD6.)
+
+ // Useful helper script to produce the encodings from the text:
+ //
+ // #!/bin/sh
+ // tmp=`mktemp /tmp/XXXXXXXX.o`
+ // aarch64-linux-gnu-as /dev/stdin -o $tmp
+ // aarch64-linux-gnu-objdump -d $tmp
+ // rm -f $tmp
+ //
+ // Then:
+ //
+ // $ echo "mov x1, x2" | aarch64inst.sh
+ insns.push((Inst::Ret, "C0035FD6", "ret"));
+ insns.push((Inst::Nop0, "", "nop-zero-len"));
+ insns.push((Inst::Nop4, "1F2003D5", "nop"));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100030B",
+ "add w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400068B",
+ "add x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100034B",
+ "sub w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006CB",
+ "sub x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Orr32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100032A",
+ "orr w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Orr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006AA",
+ "orr x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::And32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100030A",
+ "and w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::And64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400068A",
+ "and x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SubS32,
+ rd: writable_zero_reg(),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "5F00036B",
+ // TODO: Display as cmp
+ "subs wzr, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SubS32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100036B",
+ "subs w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SubS64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006EB",
+ "subs x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AddS32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "4100032B",
+ "adds w1, w2, w3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AddS64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006AB",
+ "adds x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::AddS64,
+ rd: writable_zero_reg(),
+ rn: xreg(5),
+ imm12: Imm12::maybe_from_u64(1).unwrap(),
+ },
+ "BF0400B1",
+ // TODO: Display as cmn.
+ "adds xzr, x5, #1",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SDiv64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40CC69A",
+ "sdiv x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::UDiv64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A408C69A",
+ "udiv x4, x5, x6",
+ ));
+
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Eor32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400064A",
+ "eor w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Eor64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40006CA",
+ "eor x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AndNot32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400260A",
+ "bic w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::AndNot64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400268A",
+ "bic x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::OrrNot32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400262A",
+ "orn w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::OrrNot64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40026AA",
+ "orn x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::EorNot32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A400264A",
+ "eon w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::EorNot64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A40026CA",
+ "eon x4, x5, x6",
+ ));
+
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::RotR32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A42CC61A",
+ "ror w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::RotR64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A42CC69A",
+ "ror x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsr32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A424C61A",
+ "lsr w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A424C69A",
+ "lsr x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Asr32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A428C61A",
+ "asr w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Asr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A428C69A",
+ "asr x4, x5, x6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsl32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A420C61A",
+ "lsl w4, w5, w6",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsl64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ rm: xreg(6),
+ },
+ "A420C69A",
+ "lsl x4, x5, x6",
+ ));
+
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0411",
+ "add w7, w8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: true,
+ },
+ },
+ "078D4411",
+ "add w7, w8, #1191936",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0491",
+ "add x7, x8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0451",
+ "sub w7, w8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D04D1",
+ "sub x7, x8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::SubS32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D0471",
+ "subs w7, w8, #291",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::SubS64,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ imm12: Imm12 {
+ bits: 0x123,
+ shift12: false,
+ },
+ },
+ "078D04F1",
+ "subs x7, x8, #291",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(7),
+ rn: xreg(8),
+ rm: xreg(9),
+ extendop: ExtendOp::SXTB,
+ },
+ "0781290B",
+ "add w7, w8, w9, SXTB",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(15),
+ rn: xreg(16),
+ rm: xreg(17),
+ extendop: ExtendOp::UXTB,
+ },
+ "0F02318B",
+ "add x15, x16, x17, UXTB",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ extendop: ExtendOp::SXTH,
+ },
+ "41A0234B",
+ "sub w1, w2, w3, SXTH",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(20),
+ rn: xreg(21),
+ rm: xreg(22),
+ extendop: ExtendOp::UXTW,
+ },
+ "B44236CB",
+ "sub x20, x21, x22, UXTW",
+ ));
+
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(20).unwrap(),
+ ),
+ },
+ "6A510C0B",
+ "add w10, w11, w12, LSL 20",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::ASR,
+ ShiftOpShiftImm::maybe_from_shift(42).unwrap(),
+ ),
+ },
+ "6AA98C8B",
+ "add x10, x11, x12, ASR 42",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sub32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C4B",
+ "sub w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sub64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CCB",
+ "sub x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orr32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C2A",
+ "orr w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orr64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CAA",
+ "orr x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::And32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C0A",
+ "and w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::And64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C8A",
+ "and x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Eor32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C4A",
+ "eor w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Eor64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CCA",
+ "eor x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::OrrNot32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C2A",
+ "orn w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::OrrNot64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2CAA",
+ "orn x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AndNot32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C0A",
+ "bic w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AndNot64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C8A",
+ "bic x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::EorNot32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2C4A",
+ "eon w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::EorNot64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D2CCA",
+ "eon x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AddS32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C2B",
+ "adds w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::AddS64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CAB",
+ "adds x10, x11, x12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::SubS32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0C6B",
+ "subs w10, w11, w12, LSL 23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::SubS64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ rm: xreg(12),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ ),
+ },
+ "6A5D0CEB",
+ "subs x10, x11, x12, LSL 23",
+ ));
+
+ insns.push((
+ Inst::AluRRRExtend {
+ alu_op: ALUOp::SubS64,
+ rd: writable_zero_reg(),
+ rn: stack_reg(),
+ rm: xreg(12),
+ extendop: ExtendOp::UXTX,
+ },
+ "FF632CEB",
+ "subs xzr, sp, x12, UXTX",
+ ));
+
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MAdd32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4110031B",
+ "madd w1, w2, w3, w4",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MAdd64,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4110039B",
+ "madd x1, x2, x3, x4",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MSub32,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4190031B",
+ "msub w1, w2, w3, w4",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp3::MSub64,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ ra: xreg(4),
+ },
+ "4190039B",
+ "msub x1, x2, x3, x4",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::SMulH,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "417C439B",
+ "smulh x1, x2, x3",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::UMulH,
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ rm: xreg(3),
+ },
+ "417CC39B",
+ "umulh x1, x2, x3",
+ ));
+
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::RotR32,
+ rd: writable_xreg(20),
+ rn: xreg(21),
+ immshift: ImmShift::maybe_from_u64(19).unwrap(),
+ },
+ "B44E9513",
+ "ror w20, w21, #19",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::RotR64,
+ rd: writable_xreg(20),
+ rn: xreg(21),
+ immshift: ImmShift::maybe_from_u64(42).unwrap(),
+ },
+ "B4AAD593",
+ "ror x20, x21, #42",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(13).unwrap(),
+ },
+ "6A7D0D53",
+ "lsr w10, w11, #13",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(57).unwrap(),
+ },
+ "6AFD79D3",
+ "lsr x10, x11, #57",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Asr32,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ immshift: ImmShift::maybe_from_u64(7).unwrap(),
+ },
+ "A47C0713",
+ "asr w4, w5, #7",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Asr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ immshift: ImmShift::maybe_from_u64(35).unwrap(),
+ },
+ "A4FC6393",
+ "asr x4, x5, #35",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl32,
+ rd: writable_xreg(8),
+ rn: xreg(9),
+ immshift: ImmShift::maybe_from_u64(24).unwrap(),
+ },
+ "281D0853",
+ "lsl w8, w9, #24",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl64,
+ rd: writable_xreg(8),
+ rn: xreg(9),
+ immshift: ImmShift::maybe_from_u64(63).unwrap(),
+ },
+ "280141D3",
+ "lsl x8, x9, #63",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl32,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(0).unwrap(),
+ },
+ "6A7D0053",
+ "lsl w10, w11, #0",
+ ));
+ insns.push((
+ Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl64,
+ rd: writable_xreg(10),
+ rn: xreg(11),
+ immshift: ImmShift::maybe_from_u64(0).unwrap(),
+ },
+ "6AFD40D3",
+ "lsl x10, x11, #0",
+ ));
+
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::And32,
+ rd: writable_xreg(21),
+ rn: xreg(27),
+ imml: ImmLogic::maybe_from_u64(0x80003fff, I32).unwrap(),
+ },
+ "753B0112",
+ "and w21, w27, #2147500031",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd: writable_xreg(7),
+ rn: xreg(6),
+ imml: ImmLogic::maybe_from_u64(0x3fff80003fff800, I64).unwrap(),
+ },
+ "C7381592",
+ "and x7, x6, #288221580125796352",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Orr32,
+ rd: writable_xreg(1),
+ rn: xreg(5),
+ imml: ImmLogic::maybe_from_u64(0x100000, I32).unwrap(),
+ },
+ "A1000C32",
+ "orr w1, w5, #1048576",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Orr64,
+ rd: writable_xreg(4),
+ rn: xreg(5),
+ imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+ },
+ "A4C401B2",
+ "orr x4, x5, #9331882296111890817",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Eor32,
+ rd: writable_xreg(1),
+ rn: xreg(5),
+ imml: ImmLogic::maybe_from_u64(0x00007fff, I32).unwrap(),
+ },
+ "A1380052",
+ "eor w1, w5, #32767",
+ ));
+ insns.push((
+ Inst::AluRRImmLogic {
+ alu_op: ALUOp::Eor64,
+ rd: writable_xreg(10),
+ rn: xreg(8),
+ imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(),
+ },
+ "0AC501D2",
+ "eor x10, x8, #9331882296111890817",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::RBit32,
+ rd: writable_xreg(1),
+ rn: xreg(10),
+ },
+ "4101C05A",
+ "rbit w1, w10",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::RBit64,
+ rd: writable_xreg(1),
+ rn: xreg(10),
+ },
+ "4101C0DA",
+ "rbit x1, x10",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Clz32,
+ rd: writable_xreg(15),
+ rn: xreg(3),
+ },
+ "6F10C05A",
+ "clz w15, w3",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Clz64,
+ rd: writable_xreg(15),
+ rn: xreg(3),
+ },
+ "6F10C0DA",
+ "clz x15, x3",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Cls32,
+ rd: writable_xreg(21),
+ rn: xreg(16),
+ },
+ "1516C05A",
+ "cls w21, w16",
+ ));
+
+ insns.push((
+ Inst::BitRR {
+ op: BitOp::Cls64,
+ rd: writable_xreg(21),
+ rn: xreg(16),
+ },
+ "1516C0DA",
+ "cls x21, x16",
+ ));
+
+ insns.push((
+ Inst::ULoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41004038",
+ "ldurb w1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)),
+ flags: MemFlags::trusted(),
+ },
+ "41004039",
+ "ldrb w1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(5)),
+ flags: MemFlags::trusted(),
+ },
+ "41686538",
+ "ldrb w1, [x2, x5]",
+ ));
+ insns.push((
+ Inst::SLoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41008038",
+ "ldursb x1, [x2]",
+ ));
+ insns.push((
+ Inst::SLoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC8039",
+ "ldrsb x1, [x2, #63]",
+ ));
+ insns.push((
+ Inst::SLoad8 {
+ rd: writable_xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(5)),
+ flags: MemFlags::trusted(),
+ },
+ "4168A538",
+ "ldrsb x1, [x2, x5]",
+ ));
+ insns.push((
+ Inst::ULoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41504078",
+ "ldurh w1, [x2, #5]",
+ ));
+ insns.push((
+ Inst::ULoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41104079",
+ "ldrh w1, [x2, #8]",
+ ));
+ insns.push((
+ Inst::ULoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(3), I16),
+ flags: MemFlags::trusted(),
+ },
+ "41786378",
+ "ldrh w1, [x2, x3, LSL #1]",
+ ));
+ insns.push((
+ Inst::SLoad16 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41008078",
+ "ldursh x1, [x2]",
+ ));
+ insns.push((
+ Inst::SLoad16 {
+ rd: writable_xreg(28),
+ mem: AMode::UnsignedOffset(xreg(20), UImm12Scaled::maybe_from_i64(24, I16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "9C328079",
+ "ldrsh x28, [x20, #24]",
+ ));
+ insns.push((
+ Inst::SLoad16 {
+ rd: writable_xreg(28),
+ mem: AMode::RegScaled(xreg(20), xreg(20), I16),
+ flags: MemFlags::trusted(),
+ },
+ "9C7AB478",
+ "ldrsh x28, [x20, x20, LSL #1]",
+ ));
+ insns.push((
+ Inst::ULoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410040B8",
+ "ldur w1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad32 {
+ rd: writable_xreg(12),
+ mem: AMode::UnsignedOffset(xreg(0), UImm12Scaled::maybe_from_i64(204, I32).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "0CCC40B9",
+ "ldr w12, [x0, #204]",
+ ));
+ insns.push((
+ Inst::ULoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(12), I32),
+ flags: MemFlags::trusted(),
+ },
+ "41786CB8",
+ "ldr w1, [x2, x12, LSL #2]",
+ ));
+ insns.push((
+ Inst::SLoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410080B8",
+ "ldursw x1, [x2]",
+ ));
+ insns.push((
+ Inst::SLoad32 {
+ rd: writable_xreg(12),
+ mem: AMode::UnsignedOffset(xreg(1), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "2CFCBFB9",
+ "ldrsw x12, [x1, #16380]",
+ ));
+ insns.push((
+ Inst::SLoad32 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(5), xreg(1), I32),
+ flags: MemFlags::trusted(),
+ },
+ "A178A1B8",
+ "ldrsw x1, [x5, x1, LSL #2]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410040F8",
+ "ldur x1, [x2]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410050F8",
+ "ldur x1, [x2, #-256]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41F04FF8",
+ "ldur x1, [x2, #255]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC7FF9",
+ "ldr x1, [x2, #32760]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(3)),
+ flags: MemFlags::trusted(),
+ },
+ "416863F8",
+ "ldr x1, [x2, x3]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(3), I64),
+ flags: MemFlags::trusted(),
+ },
+ "417863F8",
+ "ldr x1, [x2, x3, LSL #3]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW),
+ flags: MemFlags::trusted(),
+ },
+ "41D863F8",
+ "ldr x1, [x2, w3, SXTW #3]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW),
+ flags: MemFlags::trusted(),
+ },
+ "41C863F8",
+ "ldr x1, [x2, w3, SXTW]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::Label(MemLabel::PCRel(64)),
+ flags: MemFlags::trusted(),
+ },
+ "01020058",
+ "ldr x1, pc+64",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410C41F8",
+ "ldr x1, [x2, #16]!",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410441F8",
+ "ldr x1, [x2], #16",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(32768, I8),
+ flags: MemFlags::trusted(),
+ },
+ "100090D2B063308B010240F9",
+ "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(-32768, I8),
+ flags: MemFlags::trusted(),
+ },
+ "F0FF8F92B063308B010240F9",
+ "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(1048576, I8), // 2^20
+ flags: MemFlags::trusted(),
+ },
+ "1002A0D2B063308B010240F9",
+ "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1
+ flags: MemFlags::trusted(),
+ },
+ "300080521002A072B063308B010240F9",
+ "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]",
+ ));
+
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegOffset(xreg(7), 8, I64),
+ flags: MemFlags::trusted(),
+ },
+ "E18040F8",
+ "ldur x1, [x7, #8]",
+ ));
+
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegOffset(xreg(7), 1024, I64),
+ flags: MemFlags::trusted(),
+ },
+ "E10042F9",
+ "ldr x1, [x7, #1024]",
+ ));
+
+ insns.push((
+ Inst::ULoad64 {
+ rd: writable_xreg(1),
+ mem: AMode::RegOffset(xreg(7), 1048576, I64),
+ flags: MemFlags::trusted(),
+ },
+ "1002A0D2F060308B010240F9",
+ "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]",
+ ));
+
+ insns.push((
+ Inst::Store8 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41000038",
+ "sturb w1, [x2]",
+ ));
+ insns.push((
+ Inst::Store8 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(4095, I8).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3F39",
+ "strb w1, [x2, #4095]",
+ ));
+ insns.push((
+ Inst::Store16 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "41000078",
+ "sturh w1, [x2]",
+ ));
+ insns.push((
+ Inst::Store16 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8190, I16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3F79",
+ "strh w1, [x2, #8190]",
+ ));
+ insns.push((
+ Inst::Store32 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410000B8",
+ "stur w1, [x2]",
+ ));
+ insns.push((
+ Inst::Store32 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3FB9",
+ "str w1, [x2, #16380]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::Unscaled(xreg(2), SImm9::zero()),
+ flags: MemFlags::trusted(),
+ },
+ "410000F8",
+ "stur x1, [x2]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "41FC3FF9",
+ "str x1, [x2, #32760]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegReg(xreg(2), xreg(3)),
+ flags: MemFlags::trusted(),
+ },
+ "416823F8",
+ "str x1, [x2, x3]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegScaled(xreg(2), xreg(3), I64),
+ flags: MemFlags::trusted(),
+ },
+ "417823F8",
+ "str x1, [x2, x3, LSL #3]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW),
+ flags: MemFlags::trusted(),
+ },
+ "415823F8",
+ "str x1, [x2, w3, UXTW #3]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW),
+ flags: MemFlags::trusted(),
+ },
+ "414823F8",
+ "str x1, [x2, w3, UXTW]",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410C01F8",
+ "str x1, [x2, #16]!",
+ ));
+ insns.push((
+ Inst::Store64 {
+ rd: xreg(1),
+ mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "410401F8",
+ "str x1, [x2], #16",
+ ));
+
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+ flags: MemFlags::trusted(),
+ },
+ "482500A9",
+ "stp x8, x9, [x10]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48A51FA9",
+ "stp x8, x9, [x10, #504]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48253CA9",
+ "stp x8, x9, [x10, #-64]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(21),
+ rt2: xreg(28),
+ mem: PairAMode::SignedOffset(xreg(1), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "357020A9",
+ "stp x21, x28, [x1, #-512]",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(8),
+ rt2: xreg(9),
+ mem: PairAMode::PreIndexed(
+ writable_xreg(10),
+ SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "4825BCA9",
+ "stp x8, x9, [x10, #-64]!",
+ ));
+ insns.push((
+ Inst::StoreP64 {
+ rt: xreg(15),
+ rt2: xreg(16),
+ mem: PairAMode::PostIndexed(
+ writable_xreg(20),
+ SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "8FC29FA8",
+ "stp x15, x16, [x20], #504",
+ ));
+
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)),
+ flags: MemFlags::trusted(),
+ },
+ "482540A9",
+ "ldp x8, x9, [x10]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48A55FA9",
+ "ldp x8, x9, [x10, #504]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "48257CA9",
+ "ldp x8, x9, [x10, #-64]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()),
+ flags: MemFlags::trusted(),
+ },
+ "482560A9",
+ "ldp x8, x9, [x10, #-512]",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(9),
+ mem: PairAMode::PreIndexed(
+ writable_xreg(10),
+ SImm7Scaled::maybe_from_i64(-64, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "4825FCA9",
+ "ldp x8, x9, [x10, #-64]!",
+ ));
+ insns.push((
+ Inst::LoadP64 {
+ rt: writable_xreg(8),
+ rt2: writable_xreg(25),
+ mem: PairAMode::PostIndexed(
+ writable_xreg(12),
+ SImm7Scaled::maybe_from_i64(504, I64).unwrap(),
+ ),
+ flags: MemFlags::trusted(),
+ },
+ "88E5DFA8",
+ "ldp x8, x25, [x12], #504",
+ ));
+
+ insns.push((
+ Inst::Mov64 {
+ rd: writable_xreg(8),
+ rm: xreg(9),
+ },
+ "E80309AA",
+ "mov x8, x9",
+ ));
+ insns.push((
+ Inst::Mov32 {
+ rd: writable_xreg(8),
+ rm: xreg(9),
+ },
+ "E803092A",
+ "mov w8, w9",
+ ));
+
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FF9FD2",
+ "movz x8, #65535",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFBFD2",
+ "movz x8, #65535, LSL #16",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFDFD2",
+ "movz x8, #65535, LSL #32",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFFFD2",
+ "movz x8, #65535, LSL #48",
+ ));
+ insns.push((
+ Inst::MovZ {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size32,
+ },
+ "E8FFBF52",
+ "movz w8, #65535, LSL #16",
+ ));
+
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FF9F92",
+ "movn x8, #65535",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFBF92",
+ "movn x8, #65535, LSL #16",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFDF92",
+ "movn x8, #65535, LSL #32",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFFF92",
+ "movn x8, #65535, LSL #48",
+ ));
+ insns.push((
+ Inst::MovN {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size32,
+ },
+ "E8FF9F12",
+ "movn w8, #65535",
+ ));
+
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(12),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "0C0080F2",
+ "movk x12, #0",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(19),
+ imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "1300A0F2",
+ "movk x19, #0, LSL #16",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(3),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E3FF9FF2",
+ "movk x3, #65535",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFBFF2",
+ "movk x8, #65535, LSL #16",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFDFF2",
+ "movk x8, #65535, LSL #32",
+ ));
+ insns.push((
+ Inst::MovK {
+ rd: writable_xreg(8),
+ imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(),
+ size: OperandSize::Size64,
+ },
+ "E8FFFFF2",
+ "movk x8, #65535, LSL #48",
+ ));
+
+ insns.push((
+ Inst::CSel {
+ rd: writable_xreg(10),
+ rn: xreg(12),
+ rm: xreg(14),
+ cond: Cond::Hs,
+ },
+ "8A218E9A",
+ "csel x10, x12, x14, hs",
+ ));
+ insns.push((
+ Inst::CSet {
+ rd: writable_xreg(15),
+ cond: Cond::Ge,
+ },
+ "EFB79F9A",
+ "cset x15, ge",
+ ));
+ insns.push((
+ Inst::CCmpImm {
+ size: OperandSize::Size64,
+ rn: xreg(22),
+ imm: UImm5::maybe_from_u8(5).unwrap(),
+ nzcv: NZCV::new(false, false, true, true),
+ cond: Cond::Eq,
+ },
+ "C30A45FA",
+ "ccmp x22, #5, #nzCV, eq",
+ ));
+ insns.push((
+ Inst::CCmpImm {
+ size: OperandSize::Size32,
+ rn: xreg(3),
+ imm: UImm5::maybe_from_u8(30).unwrap(),
+ nzcv: NZCV::new(true, true, true, true),
+ cond: Cond::Gt,
+ },
+ "6FC85E7A",
+ "ccmp w3, #30, #NZCV, gt",
+ ));
+ insns.push((
+ Inst::MovToFpu {
+ rd: writable_vreg(31),
+ rn: xreg(0),
+ size: ScalarSize::Size64,
+ },
+ "1F00679E",
+ "fmov d31, x0",
+ ));
+ insns.push((
+ Inst::MovToFpu {
+ rd: writable_vreg(1),
+ rn: xreg(28),
+ size: ScalarSize::Size32,
+ },
+ "8103271E",
+ "fmov s1, w28",
+ ));
+ insns.push((
+ Inst::MovToVec {
+ rd: writable_vreg(0),
+ rn: xreg(0),
+ idx: 7,
+ size: VectorSize::Size8x8,
+ },
+ "001C0F4E",
+ "mov v0.b[7], w0",
+ ));
+ insns.push((
+ Inst::MovToVec {
+ rd: writable_vreg(20),
+ rn: xreg(21),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ },
+ "B41E084E",
+ "mov v20.d[0], x21",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(3),
+ rn: vreg(27),
+ idx: 14,
+ size: VectorSize::Size8x16,
+ },
+ "633F1D0E",
+ "umov w3, v27.b[14]",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(24),
+ rn: vreg(5),
+ idx: 3,
+ size: VectorSize::Size16x8,
+ },
+ "B83C0E0E",
+ "umov w24, v5.h[3]",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(12),
+ rn: vreg(17),
+ idx: 1,
+ size: VectorSize::Size32x4,
+ },
+ "2C3E0C0E",
+ "mov w12, v17.s[1]",
+ ));
+ insns.push((
+ Inst::MovFromVec {
+ rd: writable_xreg(21),
+ rn: vreg(20),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ },
+ "953E084E",
+ "mov x21, v20.d[0]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(0),
+ rn: vreg(0),
+ idx: 15,
+ size: VectorSize::Size8x16,
+ scalar_size: OperandSize::Size32,
+ },
+ "002C1F0E",
+ "smov w0, v0.b[15]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(12),
+ rn: vreg(13),
+ idx: 7,
+ size: VectorSize::Size8x8,
+ scalar_size: OperandSize::Size64,
+ },
+ "AC2D0F4E",
+ "smov x12, v13.b[7]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(23),
+ rn: vreg(31),
+ idx: 7,
+ size: VectorSize::Size16x8,
+ scalar_size: OperandSize::Size32,
+ },
+ "F72F1E0E",
+ "smov w23, v31.h[7]",
+ ));
+ insns.push((
+ Inst::MovFromVecSigned {
+ rd: writable_xreg(24),
+ rn: vreg(5),
+ idx: 1,
+ size: VectorSize::Size32x2,
+ scalar_size: OperandSize::Size64,
+ },
+ "B82C0C4E",
+ "smov x24, v5.s[1]",
+ ));
+ insns.push((
+ Inst::MovToNZCV { rn: xreg(13) },
+ "0D421BD5",
+ "msr nzcv, x13",
+ ));
+ insns.push((
+ Inst::MovFromNZCV {
+ rd: writable_xreg(27),
+ },
+ "1B423BD5",
+ "mrs x27, nzcv",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(25),
+ rn: xreg(7),
+ size: VectorSize::Size8x16,
+ },
+ "F90C014E",
+ "dup v25.16b, w7",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(2),
+ rn: xreg(23),
+ size: VectorSize::Size16x8,
+ },
+ "E20E024E",
+ "dup v2.8h, w23",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(0),
+ rn: xreg(28),
+ size: VectorSize::Size32x4,
+ },
+ "800F044E",
+ "dup v0.4s, w28",
+ ));
+ insns.push((
+ Inst::VecDup {
+ rd: writable_vreg(31),
+ rn: xreg(5),
+ size: VectorSize::Size64x2,
+ },
+ "BF0C084E",
+ "dup v31.2d, x5",
+ ));
+ insns.push((
+ Inst::VecDupFromFpu {
+ rd: writable_vreg(14),
+ rn: vreg(19),
+ size: VectorSize::Size32x4,
+ },
+ "6E06044E",
+ "dup v14.4s, v19.s[0]",
+ ));
+ insns.push((
+ Inst::VecDupFromFpu {
+ rd: writable_vreg(18),
+ rn: vreg(10),
+ size: VectorSize::Size64x2,
+ },
+ "5205084E",
+ "dup v18.2d, v10.d[0]",
+ ));
+ insns.push((
+ Inst::VecDupImm {
+ rd: writable_vreg(31),
+ imm: ASIMDMovModImm::maybe_from_u64(255, ScalarSize::Size8).unwrap(),
+ invert: false,
+ size: VectorSize::Size8x16,
+ },
+ "FFE7074F",
+ "movi v31.16b, #255",
+ ));
+ insns.push((
+ Inst::VecDupImm {
+ rd: writable_vreg(0),
+ imm: ASIMDMovModImm::zero(),
+ invert: true,
+ size: VectorSize::Size16x4,
+ },
+ "0084002F",
+ "mvni v0.4h, #0",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Sxtl8,
+ rd: writable_vreg(4),
+ rn: vreg(27),
+ high_half: false,
+ },
+ "64A7080F",
+ "sxtl v4.8h, v27.8b",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Sxtl16,
+ rd: writable_vreg(17),
+ rn: vreg(19),
+ high_half: true,
+ },
+ "71A6104F",
+ "sxtl2 v17.4s, v19.8h",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Sxtl32,
+ rd: writable_vreg(30),
+ rn: vreg(6),
+ high_half: false,
+ },
+ "DEA4200F",
+ "sxtl v30.2d, v6.2s",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Uxtl8,
+ rd: writable_vreg(3),
+ rn: vreg(29),
+ high_half: true,
+ },
+ "A3A7086F",
+ "uxtl2 v3.8h, v29.16b",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Uxtl16,
+ rd: writable_vreg(15),
+ rn: vreg(12),
+ high_half: false,
+ },
+ "8FA5102F",
+ "uxtl v15.4s, v12.4h",
+ ));
+ insns.push((
+ Inst::VecExtend {
+ t: VecExtendOp::Uxtl32,
+ rd: writable_vreg(28),
+ rn: vreg(2),
+ high_half: true,
+ },
+ "5CA4206F",
+ "uxtl2 v28.2d, v2.4s",
+ ));
+
+ insns.push((
+ Inst::VecMovElement {
+ rd: writable_vreg(0),
+ rn: vreg(31),
+ dest_idx: 7,
+ src_idx: 7,
+ size: VectorSize::Size16x8,
+ },
+ "E0771E6E",
+ "mov v0.h[7], v31.h[7]",
+ ));
+
+ insns.push((
+ Inst::VecMovElement {
+ rd: writable_vreg(31),
+ rn: vreg(16),
+ dest_idx: 1,
+ src_idx: 0,
+ size: VectorSize::Size32x2,
+ },
+ "1F060C6E",
+ "mov v31.s[1], v16.s[0]",
+ ));
+
+ insns.push((
+ Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Xtn,
+ rd: writable_vreg(22),
+ rn: vreg(8),
+ size: VectorSize::Size32x2,
+ high_half: false,
+ },
+ "1629A10E",
+ "xtn v22.2s, v8.2d",
+ ));
+
+ insns.push((
+ Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Sqxtn,
+ rd: writable_vreg(31),
+ rn: vreg(0),
+ size: VectorSize::Size16x8,
+ high_half: true,
+ },
+ "1F48614E",
+ "sqxtn2 v31.8h, v0.4s",
+ ));
+
+ insns.push((
+ Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Sqxtun,
+ rd: writable_vreg(16),
+ rn: vreg(23),
+ size: VectorSize::Size8x16,
+ high_half: false,
+ },
+ "F02A212E",
+ "sqxtun v16.8b, v23.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "410C284E",
+ "sqadd v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "810D7C4E",
+ "sqadd v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C0CA64E",
+ "sqadd v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqadd,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F40CED4E",
+ "sqadd v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "412C284E",
+ "sqsub v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "812D7C4E",
+ "sqsub v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C2CA64E",
+ "sqsub v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sqsub,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F42CED4E",
+ "sqsub v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "410C286E",
+ "uqadd v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "810D7C6E",
+ "uqadd v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C0CA66E",
+ "uqadd v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqadd,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F40CED6E",
+ "uqadd v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "412C286E",
+ "uqsub v1.16b, v2.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "812D7C6E",
+ "uqsub v1.8h, v12.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(12),
+ rn: vreg(2),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "4C2CA66E",
+ "uqsub v12.4s, v2.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Uqsub,
+ rd: writable_vreg(20),
+ rn: vreg(7),
+ rm: vreg(13),
+ size: VectorSize::Size64x2,
+ },
+ "F42CED6E",
+ "uqsub v20.2d, v7.2d, v13.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmeq,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size8x16,
+ },
+ "E38E386E",
+ "cmeq v3.16b, v23.16b, v24.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmgt,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size8x16,
+ },
+ "E336384E",
+ "cmgt v3.16b, v23.16b, v24.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmge,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ rm: vreg(12),
+ size: VectorSize::Size8x16,
+ },
+ "373D2C4E",
+ "cmge v23.16b, v9.16b, v12.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhi,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "2534216E",
+ "cmhi v5.16b, v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhs,
+ rd: writable_vreg(8),
+ rn: vreg(2),
+ rm: vreg(15),
+ size: VectorSize::Size8x16,
+ },
+ "483C2F6E",
+ "cmhs v8.16b, v2.16b, v15.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmeq,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size16x8,
+ },
+ "E38E786E",
+ "cmeq v3.8h, v23.8h, v24.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmgt,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size16x8,
+ },
+ "E336784E",
+ "cmgt v3.8h, v23.8h, v24.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmge,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ rm: vreg(12),
+ size: VectorSize::Size16x8,
+ },
+ "373D6C4E",
+ "cmge v23.8h, v9.8h, v12.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhi,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size16x8,
+ },
+ "2534616E",
+ "cmhi v5.8h, v1.8h, v1.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhs,
+ rd: writable_vreg(8),
+ rn: vreg(2),
+ rm: vreg(15),
+ size: VectorSize::Size16x8,
+ },
+ "483C6F6E",
+ "cmhs v8.8h, v2.8h, v15.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmeq,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size32x4,
+ },
+ "E38EB86E",
+ "cmeq v3.4s, v23.4s, v24.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmgt,
+ rd: writable_vreg(3),
+ rn: vreg(23),
+ rm: vreg(24),
+ size: VectorSize::Size32x4,
+ },
+ "E336B84E",
+ "cmgt v3.4s, v23.4s, v24.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmge,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ rm: vreg(12),
+ size: VectorSize::Size32x4,
+ },
+ "373DAC4E",
+ "cmge v23.4s, v9.4s, v12.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhi,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size32x4,
+ },
+ "2534A16E",
+ "cmhi v5.4s, v1.4s, v1.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Cmhs,
+ rd: writable_vreg(8),
+ rn: vreg(2),
+ rm: vreg(15),
+ size: VectorSize::Size32x4,
+ },
+ "483CAF6E",
+ "cmhs v8.4s, v2.4s, v15.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fcmeq,
+ rd: writable_vreg(28),
+ rn: vreg(12),
+ rm: vreg(4),
+ size: VectorSize::Size32x2,
+ },
+ "9CE5240E",
+ "fcmeq v28.2s, v12.2s, v4.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fcmgt,
+ rd: writable_vreg(3),
+ rn: vreg(16),
+ rm: vreg(31),
+ size: VectorSize::Size64x2,
+ },
+ "03E6FF6E",
+ "fcmgt v3.2d, v16.2d, v31.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fcmge,
+ rd: writable_vreg(18),
+ rn: vreg(23),
+ rm: vreg(0),
+ size: VectorSize::Size64x2,
+ },
+ "F2E6606E",
+ "fcmge v18.2d, v23.2d, v0.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::And,
+ rd: writable_vreg(20),
+ rn: vreg(19),
+ rm: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "741E324E",
+ "and v20.16b, v19.16b, v18.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Bic,
+ rd: writable_vreg(8),
+ rn: vreg(11),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "681D614E",
+ "bic v8.16b, v11.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Orr,
+ rd: writable_vreg(15),
+ rn: vreg(2),
+ rm: vreg(12),
+ size: VectorSize::Size16x8,
+ },
+ "4F1CAC4E",
+ "orr v15.16b, v2.16b, v12.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Eor,
+ rd: writable_vreg(18),
+ rn: vreg(3),
+ rm: vreg(22),
+ size: VectorSize::Size8x16,
+ },
+ "721C366E",
+ "eor v18.16b, v3.16b, v22.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Bsl,
+ rd: writable_vreg(8),
+ rn: vreg(9),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "281D616E",
+ "bsl v8.16b, v9.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "88A5216E",
+ "umaxp v8.16b, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: writable_vreg(1),
+ rn: vreg(6),
+ rm: vreg(1),
+ size: VectorSize::Size16x8,
+ },
+ "C1A4616E",
+ "umaxp v1.8h, v6.8h, v1.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: writable_vreg(1),
+ rn: vreg(20),
+ rm: vreg(16),
+ size: VectorSize::Size32x4,
+ },
+ "81A6B06E",
+ "umaxp v1.4s, v20.4s, v16.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "2584214E",
+ "add v5.16b, v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(7),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "A785624E",
+ "add v7.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(18),
+ rn: vreg(9),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "3285A64E",
+ "add v18.4s, v9.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Add,
+ rd: writable_vreg(1),
+ rn: vreg(3),
+ rm: vreg(2),
+ size: VectorSize::Size64x2,
+ },
+ "6184E24E",
+ "add v1.2d, v3.2d, v2.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(5),
+ rn: vreg(1),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "2584216E",
+ "sub v5.16b, v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(7),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "A785626E",
+ "sub v7.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(18),
+ rn: vreg(9),
+ rm: vreg(6),
+ size: VectorSize::Size32x4,
+ },
+ "3285A66E",
+ "sub v18.4s, v9.4s, v6.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sub,
+ rd: writable_vreg(18),
+ rn: vreg(0),
+ rm: vreg(8),
+ size: VectorSize::Size64x2,
+ },
+ "1284E86E",
+ "sub v18.2d, v0.2d, v8.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd: writable_vreg(25),
+ rn: vreg(9),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "399D284E",
+ "mul v25.16b, v9.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd: writable_vreg(30),
+ rn: vreg(30),
+ rm: vreg(12),
+ size: VectorSize::Size16x8,
+ },
+ "DE9F6C4E",
+ "mul v30.8h, v30.8h, v12.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "529EB24E",
+ "mul v18.4s, v18.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size8x16,
+ },
+ "5246326E",
+ "ushl v18.16b, v18.16b, v18.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size16x8,
+ },
+ "5246726E",
+ "ushl v18.8h, v18.8h, v18.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(18),
+ rn: vreg(1),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "3244B56E",
+ "ushl v18.4s, v1.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Ushl,
+ rd: writable_vreg(5),
+ rn: vreg(7),
+ rm: vreg(19),
+ size: VectorSize::Size64x2,
+ },
+ "E544F36E",
+ "ushl v5.2d, v7.2d, v19.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(18),
+ rn: vreg(18),
+ rm: vreg(18),
+ size: VectorSize::Size8x16,
+ },
+ "5246324E",
+ "sshl v18.16b, v18.16b, v18.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(30),
+ rn: vreg(1),
+ rm: vreg(29),
+ size: VectorSize::Size16x8,
+ },
+ "3E447D4E",
+ "sshl v30.8h, v1.8h, v29.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "C846B54E",
+ "sshl v8.4s, v22.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Sshl,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(2),
+ size: VectorSize::Size64x2,
+ },
+ "C846E24E",
+ "sshl v8.2d, v22.2d, v2.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umin,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(3),
+ size: VectorSize::Size8x16,
+ },
+ "816D236E",
+ "umin v1.16b, v12.16b, v3.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umin,
+ rd: writable_vreg(30),
+ rn: vreg(20),
+ rm: vreg(10),
+ size: VectorSize::Size16x8,
+ },
+ "9E6E6A6E",
+ "umin v30.8h, v20.8h, v10.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umin,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "C86EB56E",
+ "umin v8.4s, v22.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smin,
+ rd: writable_vreg(1),
+ rn: vreg(12),
+ rm: vreg(3),
+ size: VectorSize::Size8x16,
+ },
+ "816D234E",
+ "smin v1.16b, v12.16b, v3.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smin,
+ rd: writable_vreg(30),
+ rn: vreg(20),
+ rm: vreg(10),
+ size: VectorSize::Size16x8,
+ },
+ "9E6E6A4E",
+ "smin v30.8h, v20.8h, v10.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smin,
+ rd: writable_vreg(8),
+ rn: vreg(22),
+ rm: vreg(21),
+ size: VectorSize::Size32x4,
+ },
+ "C86EB54E",
+ "smin v8.4s, v22.4s, v21.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umax,
+ rd: writable_vreg(6),
+ rn: vreg(9),
+ rm: vreg(8),
+ size: VectorSize::Size8x8,
+ },
+ "2665282E",
+ "umax v6.8b, v9.8b, v8.8b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umax,
+ rd: writable_vreg(11),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "AB65626E",
+ "umax v11.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umax,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "8865AE6E",
+ "umax v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smax,
+ rd: writable_vreg(6),
+ rn: vreg(9),
+ rm: vreg(8),
+ size: VectorSize::Size8x16,
+ },
+ "2665284E",
+ "smax v6.16b, v9.16b, v8.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smax,
+ rd: writable_vreg(11),
+ rn: vreg(13),
+ rm: vreg(2),
+ size: VectorSize::Size16x8,
+ },
+ "AB65624E",
+ "smax v11.8h, v13.8h, v2.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smax,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "8865AE4E",
+ "smax v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd: writable_vreg(8),
+ rn: vreg(1),
+ rm: vreg(3),
+ size: VectorSize::Size8x16,
+ },
+ "2814236E",
+ "urhadd v8.16b, v1.16b, v3.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A215666E",
+ "urhadd v2.8h, v13.8h, v6.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "8815AE6E",
+ "urhadd v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fadd,
+ rd: writable_vreg(31),
+ rn: vreg(0),
+ rm: vreg(16),
+ size: VectorSize::Size32x4,
+ },
+ "1FD4304E",
+ "fadd v31.4s, v0.4s, v16.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fsub,
+ rd: writable_vreg(8),
+ rn: vreg(7),
+ rm: vreg(15),
+ size: VectorSize::Size64x2,
+ },
+ "E8D4EF4E",
+ "fsub v8.2d, v7.2d, v15.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fdiv,
+ rd: writable_vreg(1),
+ rn: vreg(3),
+ rm: vreg(4),
+ size: VectorSize::Size32x4,
+ },
+ "61FC246E",
+ "fdiv v1.4s, v3.4s, v4.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmax,
+ rd: writable_vreg(31),
+ rn: vreg(16),
+ rm: vreg(0),
+ size: VectorSize::Size64x2,
+ },
+ "1FF6604E",
+ "fmax v31.2d, v16.2d, v0.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmin,
+ rd: writable_vreg(5),
+ rn: vreg(19),
+ rm: vreg(26),
+ size: VectorSize::Size32x4,
+ },
+ "65F6BA4E",
+ "fmin v5.4s, v19.4s, v26.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Fmul,
+ rd: writable_vreg(2),
+ rn: vreg(0),
+ rm: vreg(5),
+ size: VectorSize::Size64x2,
+ },
+ "02DC656E",
+ "fmul v2.2d, v0.2d, v5.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Addp,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "90BD214E",
+ "addp v16.16b, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Addp,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88BDAE4E",
+ "addp v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Umlal,
+ rd: writable_vreg(9),
+ rn: vreg(20),
+ rm: vreg(17),
+ size: VectorSize::Size32x2,
+ },
+ "8982B12E",
+ "umlal v9.2d, v20.2s, v17.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "9039014E",
+ "zip1 v16.16b, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A239464E",
+ "zip1 v2.8h, v13.8h, v6.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88398E4E",
+ "zip1 v8.4s, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: writable_vreg(9),
+ rn: vreg(20),
+ rm: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "893AD14E",
+ "zip1 v9.2d, v20.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "90C1210E",
+ "smull v16.8h, v12.8b, v1.8b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A2C1660E",
+ "smull v2.4s, v13.4h, v6.4h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88C1AE0E",
+ "smull v8.2d, v12.2s, v14.2s",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: writable_vreg(16),
+ rn: vreg(12),
+ rm: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "90C1214E",
+ "smull2 v16.8h, v12.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: writable_vreg(2),
+ rn: vreg(13),
+ rm: vreg(6),
+ size: VectorSize::Size16x8,
+ },
+ "A2C1664E",
+ "smull2 v2.4s, v13.8h, v6.8h",
+ ));
+
+ insns.push((
+ Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ rm: vreg(14),
+ size: VectorSize::Size32x4,
+ },
+ "88C1AE4E",
+ "smull2 v8.2d, v12.4s, v14.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Not,
+ rd: writable_vreg(20),
+ rn: vreg(17),
+ size: VectorSize::Size8x8,
+ },
+ "345A202E",
+ "mvn v20.8b, v17.8b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Not,
+ rd: writable_vreg(2),
+ rn: vreg(1),
+ size: VectorSize::Size32x4,
+ },
+ "2258206E",
+ "mvn v2.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(3),
+ rn: vreg(7),
+ size: VectorSize::Size8x8,
+ },
+ "E3B8202E",
+ "neg v3.8b, v7.8b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(8),
+ rn: vreg(12),
+ size: VectorSize::Size8x16,
+ },
+ "88B9206E",
+ "neg v8.16b, v12.16b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(0),
+ rn: vreg(31),
+ size: VectorSize::Size16x8,
+ },
+ "E0BB606E",
+ "neg v0.8h, v31.8h",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(2),
+ rn: vreg(3),
+ size: VectorSize::Size32x4,
+ },
+ "62B8A06E",
+ "neg v2.4s, v3.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd: writable_vreg(10),
+ rn: vreg(8),
+ size: VectorSize::Size64x2,
+ },
+ "0AB9E06E",
+ "neg v10.2d, v8.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(3),
+ rn: vreg(1),
+ size: VectorSize::Size8x8,
+ },
+ "23B8200E",
+ "abs v3.8b, v1.8b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(1),
+ rn: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "21B8204E",
+ "abs v1.16b, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(29),
+ rn: vreg(28),
+ size: VectorSize::Size16x8,
+ },
+ "9DBB604E",
+ "abs v29.8h, v28.8h",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(7),
+ rn: vreg(8),
+ size: VectorSize::Size32x4,
+ },
+ "07B9A04E",
+ "abs v7.4s, v8.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd: writable_vreg(1),
+ rn: vreg(10),
+ size: VectorSize::Size64x2,
+ },
+ "41B9E04E",
+ "abs v1.2d, v10.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fabs,
+ rd: writable_vreg(15),
+ rn: vreg(16),
+ size: VectorSize::Size32x4,
+ },
+ "0FFAA04E",
+ "fabs v15.4s, v16.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fneg,
+ rd: writable_vreg(31),
+ rn: vreg(0),
+ size: VectorSize::Size32x4,
+ },
+ "1FF8A06E",
+ "fneg v31.4s, v0.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fsqrt,
+ rd: writable_vreg(7),
+ rn: vreg(18),
+ size: VectorSize::Size64x2,
+ },
+ "47FAE16E",
+ "fsqrt v7.2d, v18.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Rev64,
+ rd: writable_vreg(1),
+ rn: vreg(10),
+ size: VectorSize::Size32x4,
+ },
+ "4109A04E",
+ "rev64 v1.4s, v10.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd: writable_vreg(12),
+ rn: vreg(5),
+ size: VectorSize::Size8x8,
+ },
+ "AC38212E",
+ "shll v12.8h, v5.8b, #8",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd: writable_vreg(9),
+ rn: vreg(1),
+ size: VectorSize::Size16x4,
+ },
+ "2938612E",
+ "shll v9.4s, v1.4h, #16",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd: writable_vreg(1),
+ rn: vreg(10),
+ size: VectorSize::Size32x2,
+ },
+ "4139A12E",
+ "shll v1.2d, v10.2s, #32",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fcvtzs,
+ rd: writable_vreg(4),
+ rn: vreg(22),
+ size: VectorSize::Size32x4,
+ },
+ "C4BAA14E",
+ "fcvtzs v4.4s, v22.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Fcvtzu,
+ rd: writable_vreg(29),
+ rn: vreg(15),
+ size: VectorSize::Size64x2,
+ },
+ "FDB9E16E",
+ "fcvtzu v29.2d, v15.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Scvtf,
+ rd: writable_vreg(20),
+ rn: vreg(8),
+ size: VectorSize::Size32x4,
+ },
+ "14D9214E",
+ "scvtf v20.4s, v8.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Ucvtf,
+ rd: writable_vreg(10),
+ rn: vreg(19),
+ size: VectorSize::Size64x2,
+ },
+ "6ADA616E",
+ "ucvtf v10.2d, v19.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintn,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B8A214E",
+ "frintn v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintn,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C8A614E",
+ "frintn v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintz,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B9AA14E",
+ "frintz v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintz,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C9AE14E",
+ "frintz v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintm,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B9A214E",
+ "frintm v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintm,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C9A614E",
+ "frintm v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintp,
+ rd: writable_vreg(11),
+ rn: vreg(18),
+ size: VectorSize::Size32x4,
+ },
+ "4B8AA14E",
+ "frintp v11.4s, v18.4s",
+ ));
+
+ insns.push((
+ Inst::VecMisc {
+ op: VecMisc2::Frintp,
+ rd: writable_vreg(12),
+ rn: vreg(17),
+ size: VectorSize::Size64x2,
+ },
+ "2C8AE14E",
+ "frintp v12.2d, v17.2d",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: writable_vreg(2),
+ rn: vreg(1),
+ size: VectorSize::Size8x16,
+ },
+ "22A8316E",
+ "uminv b2, v1.16b",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: writable_vreg(3),
+ rn: vreg(11),
+ size: VectorSize::Size16x8,
+ },
+ "63A9716E",
+ "uminv h3, v11.8h",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: writable_vreg(18),
+ rn: vreg(4),
+ size: VectorSize::Size32x4,
+ },
+ "92A8B16E",
+ "uminv s18, v4.4s",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: writable_vreg(2),
+ rn: vreg(29),
+ size: VectorSize::Size8x16,
+ },
+ "A2BB314E",
+ "addv b2, v29.16b",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: writable_vreg(3),
+ rn: vreg(21),
+ size: VectorSize::Size16x8,
+ },
+ "A3BA714E",
+ "addv h3, v21.8h",
+ ));
+
+ insns.push((
+ Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: writable_vreg(18),
+ rn: vreg(5),
+ size: VectorSize::Size32x4,
+ },
+ "B2B8B14E",
+ "addv s18, v5.4s",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(27),
+ rn: vreg(5),
+ imm: 7,
+ size: VectorSize::Size8x16,
+ },
+ "BB540F4F",
+ "shl v27.16b, v5.16b, #7",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ imm: 0,
+ size: VectorSize::Size8x16,
+ },
+ "C157084F",
+ "shl v1.16b, v30.16b, #0",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: writable_vreg(26),
+ rn: vreg(6),
+ imm: 16,
+ size: VectorSize::Size16x8,
+ },
+ "DA04104F",
+ "sshr v26.8h, v6.8h, #16",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: writable_vreg(3),
+ rn: vreg(19),
+ imm: 1,
+ size: VectorSize::Size16x8,
+ },
+ "63061F4F",
+ "sshr v3.8h, v19.8h, #1",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Ushr,
+ rd: writable_vreg(25),
+ rn: vreg(6),
+ imm: 32,
+ size: VectorSize::Size32x4,
+ },
+ "D904206F",
+ "ushr v25.4s, v6.4s, #32",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Ushr,
+ rd: writable_vreg(5),
+ rn: vreg(21),
+ imm: 1,
+ size: VectorSize::Size32x4,
+ },
+ "A5063F6F",
+ "ushr v5.4s, v21.4s, #1",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(22),
+ rn: vreg(13),
+ imm: 63,
+ size: VectorSize::Size64x2,
+ },
+ "B6557F4F",
+ "shl v22.2d, v13.2d, #63",
+ ));
+
+ insns.push((
+ Inst::VecShiftImm {
+ op: VecShiftImmOp::Shl,
+ rd: writable_vreg(23),
+ rn: vreg(9),
+ imm: 0,
+ size: VectorSize::Size64x2,
+ },
+ "3755404F",
+ "shl v23.2d, v9.2d, #0",
+ ));
+
+ insns.push((
+ Inst::VecExtract {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ rm: vreg(17),
+ imm4: 0,
+ },
+ "C103116E",
+ "ext v1.16b, v30.16b, v17.16b, #0",
+ ));
+
+ insns.push((
+ Inst::VecExtract {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ rm: vreg(17),
+ imm4: 8,
+ },
+ "C143116E",
+ "ext v1.16b, v30.16b, v17.16b, #8",
+ ));
+
+ insns.push((
+ Inst::VecExtract {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ rm: vreg(17),
+ imm4: 15,
+ },
+ "C17B116E",
+ "ext v1.16b, v30.16b, v17.16b, #15",
+ ));
+
+ insns.push((
+ Inst::VecTbl {
+ rd: writable_vreg(0),
+ rn: vreg(31),
+ rm: vreg(16),
+ is_extension: false,
+ },
+ "E003104E",
+ "tbl v0.16b, { v31.16b }, v16.16b",
+ ));
+
+ insns.push((
+ Inst::VecTbl {
+ rd: writable_vreg(4),
+ rn: vreg(12),
+ rm: vreg(23),
+ is_extension: true,
+ },
+ "8411174E",
+ "tbx v4.16b, { v12.16b }, v23.16b",
+ ));
+
+ insns.push((
+ Inst::VecTbl2 {
+ rd: writable_vreg(16),
+ rn: vreg(31),
+ rn2: vreg(0),
+ rm: vreg(26),
+ is_extension: false,
+ },
+ "F0231A4E",
+ "tbl v16.16b, { v31.16b, v0.16b }, v26.16b",
+ ));
+
+ insns.push((
+ Inst::VecTbl2 {
+ rd: writable_vreg(3),
+ rn: vreg(11),
+ rn2: vreg(12),
+ rm: vreg(19),
+ is_extension: true,
+ },
+ "6331134E",
+ "tbx v3.16b, { v11.16b, v12.16b }, v19.16b",
+ ));
+
+ insns.push((
+ Inst::VecLoadReplicate {
+ rd: writable_vreg(31),
+ rn: xreg(0),
+
+ size: VectorSize::Size64x2,
+ },
+ "1FCC404D",
+ "ld1r { v31.2d }, [x0]",
+ ));
+
+ insns.push((
+ Inst::VecLoadReplicate {
+ rd: writable_vreg(0),
+ rn: xreg(25),
+
+ size: VectorSize::Size8x8,
+ },
+ "20C3400D",
+ "ld1r { v0.8b }, [x25]",
+ ));
+
+ insns.push((
+ Inst::VecCSel {
+ rd: writable_vreg(5),
+ rn: vreg(10),
+ rm: vreg(19),
+ cond: Cond::Gt,
+ },
+ "6C000054651EB34E02000014451DAA4E",
+ "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
+ ));
+
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 8,
+ to_bits: 32,
+ },
+ "411C0053",
+ "uxtb w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 8,
+ to_bits: 32,
+ },
+ "411C0013",
+ "sxtb w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 16,
+ to_bits: 32,
+ },
+ "413C0053",
+ "uxth w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 16,
+ to_bits: 32,
+ },
+ "413C0013",
+ "sxth w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 8,
+ to_bits: 64,
+ },
+ "411C0053",
+ "uxtb x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 8,
+ to_bits: 64,
+ },
+ "411C4093",
+ "sxtb x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 16,
+ to_bits: 64,
+ },
+ "413C0053",
+ "uxth x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 16,
+ to_bits: 64,
+ },
+ "413C4093",
+ "sxth x1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: false,
+ from_bits: 32,
+ to_bits: 64,
+ },
+ "E103022A",
+ "mov w1, w2",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_xreg(1),
+ rn: xreg(2),
+ signed: true,
+ from_bits: 32,
+ to_bits: 64,
+ },
+ "417C4093",
+ "sxtw x1, w2",
+ ));
+
+ insns.push((
+ Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(64),
+ },
+ "10000014",
+ "b 64",
+ ));
+
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::NotZero(xreg(8)),
+ },
+ "480000B40000A0D4",
+ "cbz x8, 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Zero(xreg(8)),
+ },
+ "480000B50000A0D4",
+ "cbnz x8, 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Ne),
+ },
+ "400000540000A0D4",
+ "b.eq 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Eq),
+ },
+ "410000540000A0D4",
+ "b.ne 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Lo),
+ },
+ "420000540000A0D4",
+ "b.hs 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Hs),
+ },
+ "430000540000A0D4",
+ "b.lo 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Pl),
+ },
+ "440000540000A0D4",
+ "b.mi 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Mi),
+ },
+ "450000540000A0D4",
+ "b.pl 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Vc),
+ },
+ "460000540000A0D4",
+ "b.vs 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Vs),
+ },
+ "470000540000A0D4",
+ "b.vc 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Ls),
+ },
+ "480000540000A0D4",
+ "b.hi 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Hi),
+ },
+ "490000540000A0D4",
+ "b.ls 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Lt),
+ },
+ "4A0000540000A0D4",
+ "b.ge 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Ge),
+ },
+ "4B0000540000A0D4",
+ "b.lt 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Le),
+ },
+ "4C0000540000A0D4",
+ "b.gt 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Gt),
+ },
+ "4D0000540000A0D4",
+ "b.le 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Nv),
+ },
+ "4E0000540000A0D4",
+ "b.al 8 ; udf",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ trap_code: TrapCode::Interrupt,
+ kind: CondBrKind::Cond(Cond::Al),
+ },
+ "4F0000540000A0D4",
+ "b.nv 8 ; udf",
+ ));
+
+ insns.push((
+ Inst::CondBr {
+ taken: BranchTarget::ResolvedOffset(64),
+ not_taken: BranchTarget::ResolvedOffset(128),
+ kind: CondBrKind::Cond(Cond::Le),
+ },
+ "0D02005420000014",
+ "b.le 64 ; b 128",
+ ));
+
+ insns.push((
+ Inst::Call {
+ info: Box::new(CallInfo {
+ dest: ExternalName::testcase("test0"),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ opcode: Opcode::Call,
+ caller_callconv: CallConv::SystemV,
+ callee_callconv: CallConv::SystemV,
+ }),
+ },
+ "00000094",
+ "bl 0",
+ ));
+
+ insns.push((
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rn: xreg(10),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ opcode: Opcode::CallIndirect,
+ caller_callconv: CallConv::SystemV,
+ callee_callconv: CallConv::SystemV,
+ }),
+ },
+ "40013FD6",
+ "blr x10",
+ ));
+
+ insns.push((
+ Inst::IndirectBr {
+ rn: xreg(3),
+ targets: vec![],
+ },
+ "60001FD6",
+ "br x3",
+ ));
+
+ insns.push((Inst::Brk, "000020D4", "brk #0"));
+
+ insns.push((
+ Inst::Adr {
+ rd: writable_xreg(15),
+ off: (1 << 20) - 4,
+ },
+ "EFFF7F10",
+ "adr x15, pc+1048572",
+ ));
+
+ insns.push((
+ Inst::FpuMove64 {
+ rd: writable_vreg(8),
+ rn: vreg(4),
+ },
+ "881CA40E",
+ "mov v8.8b, v4.8b",
+ ));
+
+ insns.push((
+ Inst::FpuMove128 {
+ rd: writable_vreg(17),
+ rn: vreg(26),
+ },
+ "511FBA4E",
+ "mov v17.16b, v26.16b",
+ ));
+
+ insns.push((
+ Inst::FpuMoveFromVec {
+ rd: writable_vreg(1),
+ rn: vreg(30),
+ idx: 2,
+ size: VectorSize::Size32x4,
+ },
+ "C107145E",
+ "mov s1, v30.s[2]",
+ ));
+
+ insns.push((
+ Inst::FpuMoveFromVec {
+ rd: writable_vreg(23),
+ rn: vreg(11),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ },
+ "7705085E",
+ "mov d23, v11.d[0]",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Abs32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3201E",
+ "fabs s15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Abs64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3601E",
+ "fabs d15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Neg32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CF43211E",
+ "fneg s15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Neg64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CF43611E",
+ "fneg d15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Sqrt32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3211E",
+ "fsqrt s15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Sqrt64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3611E",
+ "fsqrt d15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Cvt32To64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CFC3221E",
+ "fcvt d15, s30",
+ ));
+
+ insns.push((
+ Inst::FpuRR {
+ fpu_op: FPUOp1::Cvt64To32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ },
+ "CF43621E",
+ "fcvt s15, d30",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Add32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF2B3F1E",
+ "fadd s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Add64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF2B7F1E",
+ "fadd d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sub32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF3B3F1E",
+ "fsub s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sub64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF3B7F1E",
+ "fsub d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Mul32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF0B3F1E",
+ "fmul s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Mul64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF0B7F1E",
+ "fmul d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Div32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF1B3F1E",
+ "fdiv s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Div64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF1B7F1E",
+ "fdiv d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Max32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF4B3F1E",
+ "fmax s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Max64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF4B7F1E",
+ "fmax d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Min32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF5B3F1E",
+ "fmin s15, s30, s31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Min64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ },
+ "CF5B7F1E",
+ "fmin d15, d30, d31",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Uqadd64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D50EF77E",
+ "uqadd d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sqadd64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D50EF75E",
+ "sqadd d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Uqsub64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D52EF77E",
+ "uqsub d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRR {
+ fpu_op: FPUOp2::Sqsub64,
+ rd: writable_vreg(21),
+ rn: vreg(22),
+ rm: vreg(23),
+ },
+ "D52EF75E",
+ "sqsub d21, d22, d23",
+ ));
+
+ insns.push((
+ Inst::FpuRRRR {
+ fpu_op: FPUOp3::MAdd32,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ ra: vreg(1),
+ },
+ "CF071F1F",
+ "fmadd s15, s30, s31, s1",
+ ));
+
+ insns.push((
+ Inst::FpuRRRR {
+ fpu_op: FPUOp3::MAdd64,
+ rd: writable_vreg(15),
+ rn: vreg(30),
+ rm: vreg(31),
+ ra: vreg(1),
+ },
+ "CF075F1F",
+ "fmadd d15, d30, d31, d1",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
+ rd: writable_vreg(2),
+ rn: vreg(5),
+ },
+ "A204202F",
+ "ushr v2.2s, v5.2s, #32",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
+ rd: writable_vreg(2),
+ rn: vreg(5),
+ },
+ "A204417F",
+ "ushr d2, d5, #63",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
+ rd: writable_vreg(4),
+ rn: vreg(10),
+ },
+ "44553F2F",
+ "sli v4.2s, v10.2s, #31",
+ ));
+
+ insns.push((
+ Inst::FpuRRI {
+ fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
+ rd: writable_vreg(4),
+ rn: vreg(10),
+ },
+ "44557F7F",
+ "sli d4, d10, #63",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToU32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100391E",
+ "fcvtzu w1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToU64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100399E",
+ "fcvtzu x1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToI32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100381E",
+ "fcvtzs w1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F32ToI64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100389E",
+ "fcvtzs x1, s4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToU32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100791E",
+ "fcvtzu w1, d4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToU64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100799E",
+ "fcvtzu x1, d4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToI32,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100781E",
+ "fcvtzs w1, d4",
+ ));
+
+ insns.push((
+ Inst::FpuToInt {
+ op: FpuToIntOp::F64ToI64,
+ rd: writable_xreg(1),
+ rn: vreg(4),
+ },
+ "8100789E",
+ "fcvtzs x1, d4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U32ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100231E",
+ "ucvtf s1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I32ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100221E",
+ "scvtf s1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U32ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100631E",
+ "ucvtf d1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I32ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100621E",
+ "scvtf d1, w4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U64ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100239E",
+ "ucvtf s1, x4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I64ToF32,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100229E",
+ "scvtf s1, x4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::U64ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100639E",
+ "ucvtf d1, x4",
+ ));
+
+ insns.push((
+ Inst::IntToFpu {
+ op: IntToFpuOp::I64ToF64,
+ rd: writable_vreg(1),
+ rn: xreg(4),
+ },
+ "8100629E",
+ "scvtf d1, x4",
+ ));
+
+ insns.push((
+ Inst::FpuCmp32 {
+ rn: vreg(23),
+ rm: vreg(24),
+ },
+ "E022381E",
+ "fcmp s23, s24",
+ ));
+
+ insns.push((
+ Inst::FpuCmp64 {
+ rn: vreg(23),
+ rm: vreg(24),
+ },
+ "E022781E",
+ "fcmp d23, d24",
+ ));
+
+ insns.push((
+ Inst::FpuLoad32 {
+ rd: writable_vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F32),
+ flags: MemFlags::trusted(),
+ },
+ "107969BC",
+ "ldr s16, [x8, x9, LSL #2]",
+ ));
+
+ insns.push((
+ Inst::FpuLoad64 {
+ rd: writable_vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F64),
+ flags: MemFlags::trusted(),
+ },
+ "107969FC",
+ "ldr d16, [x8, x9, LSL #3]",
+ ));
+
+ insns.push((
+ Inst::FpuLoad128 {
+ rd: writable_vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), I128),
+ flags: MemFlags::trusted(),
+ },
+ "1079E93C",
+ "ldr q16, [x8, x9, LSL #4]",
+ ));
+
+ insns.push((
+ Inst::FpuLoad32 {
+ rd: writable_vreg(16),
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ },
+ "5000001C",
+ "ldr s16, pc+8",
+ ));
+
+ insns.push((
+ Inst::FpuLoad64 {
+ rd: writable_vreg(16),
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ },
+ "5000005C",
+ "ldr d16, pc+8",
+ ));
+
+ insns.push((
+ Inst::FpuLoad128 {
+ rd: writable_vreg(16),
+ mem: AMode::Label(MemLabel::PCRel(8)),
+ flags: MemFlags::trusted(),
+ },
+ "5000009C",
+ "ldr q16, pc+8",
+ ));
+
+ insns.push((
+ Inst::FpuStore32 {
+ rd: vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F32),
+ flags: MemFlags::trusted(),
+ },
+ "107929BC",
+ "str s16, [x8, x9, LSL #2]",
+ ));
+
+ insns.push((
+ Inst::FpuStore64 {
+ rd: vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), F64),
+ flags: MemFlags::trusted(),
+ },
+ "107929FC",
+ "str d16, [x8, x9, LSL #3]",
+ ));
+
+ insns.push((
+ Inst::FpuStore128 {
+ rd: vreg(16),
+ mem: AMode::RegScaled(xreg(8), xreg(9), I128),
+ flags: MemFlags::trusted(),
+ },
+ "1079A93C",
+ "str q16, [x8, x9, LSL #4]",
+ ));
+
+ insns.push((
+ Inst::LoadFpuConst64 {
+ rd: writable_vreg(16),
+ const_data: 1.0_f64.to_bits(),
+ },
+ "5000005C03000014000000000000F03F",
+ "ldr d16, pc+8 ; b 12 ; data.f64 1",
+ ));
+
+ insns.push((
+ Inst::LoadFpuConst128 {
+ rd: writable_vreg(5),
+ const_data: 0x0f0e0d0c0b0a09080706050403020100,
+ },
+ "4500009C05000014000102030405060708090A0B0C0D0E0F",
+ "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
+ ));
+
+ insns.push((
+ Inst::FpuCSel32 {
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(3),
+ cond: Cond::Hi,
+ },
+ "418C231E",
+ "fcsel s1, s2, s3, hi",
+ ));
+
+ insns.push((
+ Inst::FpuCSel64 {
+ rd: writable_vreg(1),
+ rn: vreg(2),
+ rm: vreg(3),
+ cond: Cond::Eq,
+ },
+ "410C631E",
+ "fcsel d1, d2, d3, eq",
+ ));
+
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Minus32,
+ },
+ "1743251E",
+ "frintm s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Minus64,
+ },
+ "1743651E",
+ "frintm d23, d24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Plus32,
+ },
+ "17C3241E",
+ "frintp s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Plus64,
+ },
+ "17C3641E",
+ "frintp d23, d24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Zero32,
+ },
+ "17C3251E",
+ "frintz s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Zero64,
+ },
+ "17C3651E",
+ "frintz d23, d24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Nearest32,
+ },
+ "1743241E",
+ "frintn s23, s24",
+ ));
+ insns.push((
+ Inst::FpuRound {
+ rd: writable_vreg(23),
+ rn: vreg(24),
+ op: FpuRoundMode::Nearest64,
+ },
+ "1743641E",
+ "frintn d23, d24",
+ ));
+
+ insns.push((
+ Inst::AtomicRMW {
+ ty: I16,
+ op: inst_common::AtomicRmwOp::Xor,
+ },
+ "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
+ "atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
+ ));
+
+ insns.push((
+ Inst::AtomicRMW {
+ ty: I32,
+ op: inst_common::AtomicRmwOp::Xchg,
+ },
+ "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
+ "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }",
+ ));
+
+ insns.push((
+ Inst::AtomicCAS {
+ ty: I8,
+ },
+ "BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5",
+ "atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
+ ));
+
+ insns.push((
+ Inst::AtomicCAS {
+ ty: I64,
+ },
+ "BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5",
+ "atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }"
+ ));
+
+ insns.push((
+ Inst::AtomicLoad {
+ ty: I8,
+ r_data: writable_xreg(7),
+ r_addr: xreg(28),
+ },
+ "BF3B03D587034039",
+ "atomically { x7 = zero_extend_8_bits_at[x28] }",
+ ));
+
+ insns.push((
+ Inst::AtomicLoad {
+ ty: I64,
+ r_data: writable_xreg(28),
+ r_addr: xreg(7),
+ },
+ "BF3B03D5FC0040F9",
+ "atomically { x28 = zero_extend_64_bits_at[x7] }",
+ ));
+
+ insns.push((
+ Inst::AtomicStore {
+ ty: I16,
+ r_data: xreg(17),
+ r_addr: xreg(8),
+ },
+ "11010079BF3B03D5",
+ "atomically { 16_bits_at[x8] = x17 }",
+ ));
+
+ insns.push((
+ Inst::AtomicStore {
+ ty: I32,
+ r_data: xreg(18),
+ r_addr: xreg(7),
+ },
+ "F20000B9BF3B03D5",
+ "atomically { 32_bits_at[x7] = x18 }",
+ ));
+
+ insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish"));
+
+ let flags = settings::Flags::new(settings::builder());
+ let rru = create_reg_universe(&flags);
+ let emit_info = EmitInfo::new(flags);
+ for (insn, expected_encoding, expected_printing) in insns {
+ println!(
+ "AArch64: {:?}, {}, {}",
+ insn, expected_encoding, expected_printing
+ );
+
+ // Check the printed text is as expected.
+ let actual_printing = insn.show_rru(Some(&rru));
+ assert_eq!(expected_printing, actual_printing);
+
+ let mut sink = test_utils::TestCodeSink::new();
+ let mut buffer = MachBuffer::new();
+ insn.emit(&mut buffer, &emit_info, &mut Default::default());
+ let buffer = buffer.finish();
+ buffer.emit(&mut sink);
+ let actual_encoding = &sink.stringify();
+ assert_eq!(expected_encoding, actual_encoding);
+ }
+}
+
+#[test]
+fn test_cond_invert() {
+ for cond in vec![
+ Cond::Eq,
+ Cond::Ne,
+ Cond::Hs,
+ Cond::Lo,
+ Cond::Mi,
+ Cond::Pl,
+ Cond::Vs,
+ Cond::Vc,
+ Cond::Hi,
+ Cond::Ls,
+ Cond::Ge,
+ Cond::Lt,
+ Cond::Gt,
+ Cond::Le,
+ Cond::Al,
+ Cond::Nv,
+ ]
+ .into_iter()
+ {
+ assert_eq!(cond.invert().invert(), cond);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
new file mode 100644
index 0000000000..b6da0402bc
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs
@@ -0,0 +1,1025 @@
+//! AArch64 ISA definitions: immediate constants.
+
+// Some variants are never constructed, but we still want them as options in the future.
+#[allow(dead_code)]
+use crate::ir::types::*;
+use crate::ir::Type;
+use crate::isa::aarch64::inst::{OperandSize, ScalarSize};
+
+use regalloc::{PrettyPrint, RealRegUniverse};
+
+use core::convert::TryFrom;
+use std::string::String;
+
+/// An immediate that represents the NZCV flags.
+#[derive(Clone, Copy, Debug)]
+pub struct NZCV {
+ /// The negative condition flag.
+ n: bool,
+ /// The zero condition flag.
+ z: bool,
+ /// The carry condition flag.
+ c: bool,
+ /// The overflow condition flag.
+ v: bool,
+}
+
+impl NZCV {
+ pub fn new(n: bool, z: bool, c: bool, v: bool) -> NZCV {
+ NZCV { n, z, c, v }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ (u32::from(self.n) << 3)
+ | (u32::from(self.z) << 2)
+ | (u32::from(self.c) << 1)
+ | u32::from(self.v)
+ }
+}
+
+/// An unsigned 5-bit immediate.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm5 {
+ /// The value.
+ value: u8,
+}
+
+impl UImm5 {
+ pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
+ if value < 32 {
+ Some(UImm5 { value })
+ } else {
+ None
+ }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ u32::from(self.value)
+ }
+}
+
+/// A signed, scaled 7-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm7Scaled {
+ /// The value.
+ pub value: i16,
+ /// multiplied by the size of this type
+ pub scale_ty: Type,
+}
+
+impl SImm7Scaled {
+ /// Create a SImm7Scaled from a raw offset and the known scale type, if
+ /// possible.
+ pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> {
+ assert!(scale_ty == I64 || scale_ty == I32);
+ let scale = scale_ty.bytes();
+ assert!(scale.is_power_of_two());
+ let scale = i64::from(scale);
+ let upper_limit = 63 * scale;
+ let lower_limit = -(64 * scale);
+ if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 {
+ Some(SImm7Scaled {
+ value: i16::try_from(value).unwrap(),
+ scale_ty,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero(scale_ty: Type) -> SImm7Scaled {
+ SImm7Scaled { value: 0, scale_ty }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ let ty_bytes: i16 = self.scale_ty.bytes() as i16;
+ let scaled: i16 = self.value / ty_bytes;
+ assert!(scaled <= 63 && scaled >= -64);
+ let scaled: i8 = scaled as i8;
+ let encoded: u32 = scaled as u32;
+ encoded & 0x7f
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct FPULeftShiftImm {
+ pub amount: u8,
+ pub lane_size_in_bits: u8,
+}
+
+impl FPULeftShiftImm {
+ pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
+ debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
+ if amount < lane_size_in_bits {
+ Some(Self {
+ amount,
+ lane_size_in_bits,
+ })
+ } else {
+ None
+ }
+ }
+
+ pub fn enc(&self) -> u32 {
+ debug_assert!(self.lane_size_in_bits.is_power_of_two());
+ debug_assert!(self.lane_size_in_bits > self.amount);
+ // The encoding of the immediate follows the table below,
+ // where xs encode the shift amount.
+ //
+ // | lane_size_in_bits | encoding |
+ // +------------------------------+
+ // | 8 | 0001xxx |
+ // | 16 | 001xxxx |
+ // | 32 | 01xxxxx |
+ // | 64 | 1xxxxxx |
+ //
+ // The highest one bit is represented by `lane_size_in_bits`. Since
+ // `lane_size_in_bits` is a power of 2 and `amount` is less
+ // than `lane_size_in_bits`, they can be ORed
+ // together to produced the encoded value.
+ u32::from(self.lane_size_in_bits | self.amount)
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct FPURightShiftImm {
+ pub amount: u8,
+ pub lane_size_in_bits: u8,
+}
+
+impl FPURightShiftImm {
+ pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
+ debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
+ if amount > 0 && amount <= lane_size_in_bits {
+ Some(Self {
+ amount,
+ lane_size_in_bits,
+ })
+ } else {
+ None
+ }
+ }
+
+ pub fn enc(&self) -> u32 {
+ debug_assert_ne!(0, self.amount);
+ // The encoding of the immediate follows the table below,
+ // where xs encodes the negated shift amount.
+ //
+ // | lane_size_in_bits | encoding |
+ // +------------------------------+
+ // | 8 | 0001xxx |
+ // | 16 | 001xxxx |
+ // | 32 | 01xxxxx |
+ // | 64 | 1xxxxxx |
+ //
+ // The shift amount is negated such that a shift ammount
+ // of 1 (in 64-bit) is encoded as 0b111111 and a shift
+ // amount of 64 is encoded as 0b000000,
+ // in the bottom 6 bits.
+ u32::from((self.lane_size_in_bits * 2) - self.amount)
+ }
+}
+
+/// a 9-bit signed offset.
+#[derive(Clone, Copy, Debug)]
+pub struct SImm9 {
+ /// The value.
+ pub value: i16,
+}
+
+impl SImm9 {
+ /// Create a signed 9-bit offset from a full-range value, if possible.
+ pub fn maybe_from_i64(value: i64) -> Option<SImm9> {
+ if value >= -256 && value <= 255 {
+ Some(SImm9 {
+ value: value as i16,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero() -> SImm9 {
+ SImm9 { value: 0 }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ (self.value as u32) & 0x1ff
+ }
+
+ /// Signed value of immediate.
+ pub fn value(&self) -> i32 {
+ self.value as i32
+ }
+}
+
+/// An unsigned, scaled 12-bit offset.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm12Scaled {
+ /// The value.
+ pub value: u16,
+ /// multiplied by the size of this type
+ pub scale_ty: Type,
+}
+
+impl UImm12Scaled {
+ /// Create a UImm12Scaled from a raw offset and the known scale type, if
+ /// possible.
+ pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> {
+ // Ensure the type is at least one byte.
+ let scale_ty = if scale_ty == B1 { B8 } else { scale_ty };
+
+ let scale = scale_ty.bytes();
+ assert!(scale.is_power_of_two());
+ let scale = scale as i64;
+ let limit = 4095 * scale;
+ if value >= 0 && value <= limit && (value & (scale - 1)) == 0 {
+ Some(UImm12Scaled {
+ value: value as u16,
+ scale_ty,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero(scale_ty: Type) -> UImm12Scaled {
+ UImm12Scaled { value: 0, scale_ty }
+ }
+
+ /// Encoded bits.
+ pub fn bits(&self) -> u32 {
+ (self.value as u32 / self.scale_ty.bytes()) & 0xfff
+ }
+
+ /// Value after scaling.
+ pub fn value(&self) -> u32 {
+ self.value as u32
+ }
+
+ /// The value type which is the scaling base.
+ pub fn scale_ty(&self) -> Type {
+ self.scale_ty
+ }
+}
+
+/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted
+/// left by 0 or 12 places.
+#[derive(Clone, Debug)]
+pub struct Imm12 {
+ /// The immediate bits.
+ pub bits: u16,
+ /// Whether the immediate bits are shifted left by 12 or not.
+ pub shift12: bool,
+}
+
+impl Imm12 {
+ /// Compute a Imm12 from raw bits, if possible.
+ pub fn maybe_from_u64(val: u64) -> Option<Imm12> {
+ if val == 0 {
+ Some(Imm12 {
+ bits: 0,
+ shift12: false,
+ })
+ } else if val < 0xfff {
+ Some(Imm12 {
+ bits: val as u16,
+ shift12: false,
+ })
+ } else if val < 0xfff_000 && (val & 0xfff == 0) {
+ Some(Imm12 {
+ bits: (val >> 12) as u16,
+ shift12: true,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero() -> Self {
+ Imm12 {
+ bits: 0,
+ shift12: false,
+ }
+ }
+
+ /// Bits for 2-bit "shift" field in e.g. AddI.
+ pub fn shift_bits(&self) -> u32 {
+ if self.shift12 {
+ 0b01
+ } else {
+ 0b00
+ }
+ }
+
+ /// Bits for 12-bit "imm" field in e.g. AddI.
+ pub fn imm_bits(&self) -> u32 {
+ self.bits as u32
+ }
+}
+
+/// An immediate for logical instructions.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ImmLogic {
+ /// The actual value.
+ value: u64,
+ /// `N` flag.
+ pub n: bool,
+ /// `S` field: element size and element bits.
+ pub r: u8,
+ /// `R` field: rotate amount.
+ pub s: u8,
+ /// Was this constructed for a 32-bit or 64-bit instruction?
+ pub size: OperandSize,
+}
+
+impl ImmLogic {
+ /// Compute an ImmLogic from raw bits, if possible.
+ pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> {
+ // Note: This function is a port of VIXL's Assembler::IsImmLogical.
+
+ if ty != I64 && ty != I32 {
+ return None;
+ }
+ let operand_size = OperandSize::from_ty(ty);
+
+ let original_value = value;
+
+ let value = if ty == I32 {
+ // To handle 32-bit logical immediates, the very easiest thing is to repeat
+ // the input value twice to make a 64-bit word. The correct encoding of that
+ // as a logical immediate will also be the correct encoding of the 32-bit
+ // value.
+
+ // Avoid making the assumption that the most-significant 32 bits are zero by
+ // shifting the value left and duplicating it.
+ let value = value << 32;
+ value | value >> 32
+ } else {
+ value
+ };
+
+ // Logical immediates are encoded using parameters n, imm_s and imm_r using
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1 bits
+ // are set. The pattern is rotated right by R, and repeated across a 32 or
+ // 64-bit value, depending on destination register width.
+ //
+ // Put another way: the basic format of a logical immediate is a single
+ // contiguous stretch of 1 bits, repeated across the whole word at intervals
+ // given by a power of 2. To identify them quickly, we first locate the
+ // lowest stretch of 1 bits, then the next 1 bit above that; that combination
+ // is different for every logical immediate, so it gives us all the
+ // information we need to identify the only logical immediate that our input
+ // could be, and then we simply check if that's the value we actually have.
+ //
+ // (The rotation parameter does give the possibility of the stretch of 1 bits
+ // going 'round the end' of the word. To deal with that, we observe that in
+ // any situation where that happens the bitwise NOT of the value is also a
+ // valid logical immediate. So we simply invert the input whenever its low bit
+ // is set, and then we know that the rotated case can't arise.)
+ let (value, inverted) = if value & 1 == 1 {
+ (!value, true)
+ } else {
+ (value, false)
+ };
+
+ if value == 0 {
+ return None;
+ }
+
+ // The basic analysis idea: imagine our input word looks like this.
+ //
+ // 0011111000111110001111100011111000111110001111100011111000111110
+ // c b a
+ // |<--d-->|
+ //
+ // We find the lowest set bit (as an actual power-of-2 value, not its index)
+ // and call it a. Then we add a to our original number, which wipes out the
+ // bottommost stretch of set bits and replaces it with a 1 carried into the
+ // next zero bit. Then we look for the new lowest set bit, which is in
+ // position b, and subtract it, so now our number is just like the original
+ // but with the lowest stretch of set bits completely gone. Now we find the
+ // lowest set bit again, which is position c in the diagram above. Then we'll
+ // measure the distance d between bit positions a and c (using CLZ), and that
+ // tells us that the only valid logical immediate that could possibly be equal
+ // to this number is the one in which a stretch of bits running from a to just
+ // below b is replicated every d bits.
+ fn lowest_set_bit(value: u64) -> u64 {
+ let bit = value.trailing_zeros();
+ 1u64.checked_shl(bit).unwrap_or(0)
+ }
+ let a = lowest_set_bit(value);
+ assert_ne!(0, a);
+ let value_plus_a = value.wrapping_add(a);
+ let b = lowest_set_bit(value_plus_a);
+ let value_plus_a_minus_b = value_plus_a - b;
+ let c = lowest_set_bit(value_plus_a_minus_b);
+
+ let (d, clz_a, out_n, mask) = if c != 0 {
+ // The general case, in which there is more than one stretch of set bits.
+ // Compute the repeat distance d, and set up a bitmask covering the basic
+ // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
+ // of these cases the N bit of the output will be zero.
+ let clz_a = a.leading_zeros();
+ let clz_c = c.leading_zeros();
+ let d = clz_a - clz_c;
+ let mask = (1 << d) - 1;
+ (d, clz_a, 0, mask)
+ } else {
+ (64, a.leading_zeros(), 1, u64::max_value())
+ };
+
+ // If the repeat period d is not a power of two, it can't be encoded.
+ if !d.is_power_of_two() {
+ return None;
+ }
+
+ if ((b.wrapping_sub(a)) & !mask) != 0 {
+ // If the bit stretch (b - a) does not fit within the mask derived from the
+ // repeat period, then fail.
+ return None;
+ }
+
+ // The only possible option is b - a repeated every d bits. Now we're going to
+ // actually construct the valid logical immediate derived from that
+ // specification, and see if it equals our original input.
+ //
+ // To repeat a value every d bits, we multiply it by a number of the form
+ // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
+ // be derived using a table lookup on CLZ(d).
+ const MULTIPLIERS: [u64; 6] = [
+ 0x0000000000000001,
+ 0x0000000100000001,
+ 0x0001000100010001,
+ 0x0101010101010101,
+ 0x1111111111111111,
+ 0x5555555555555555,
+ ];
+ let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize];
+ let candidate = b.wrapping_sub(a) * multiplier;
+
+ if value != candidate {
+ // The candidate pattern doesn't match our input value, so fail.
+ return None;
+ }
+
+ // We have a match! This is a valid logical immediate, so now we have to
+ // construct the bits and pieces of the instruction encoding that generates
+ // it.
+
+ // Count the set bits in our basic stretch. The special case of clz(0) == -1
+ // makes the answer come out right for stretches that reach the very top of
+ // the word (e.g. numbers like 0xffffc00000000000).
+ let clz_b = if b == 0 {
+ u32::max_value() // -1
+ } else {
+ b.leading_zeros()
+ };
+ let s = clz_a.wrapping_sub(clz_b);
+
+ // Decide how many bits to rotate right by, to put the low bit of that basic
+ // stretch in position a.
+ let (s, r) = if inverted {
+ // If we inverted the input right at the start of this function, here's
+ // where we compensate: the number of set bits becomes the number of clear
+ // bits, and the rotation count is based on position b rather than position
+ // a (since b is the location of the 'lowest' 1 bit after inversion).
+ // Need wrapping for when clz_b is max_value() (for when b == 0).
+ (d - s, clz_b.wrapping_add(1) & (d - 1))
+ } else {
+ (s, (clz_a + 1) & (d - 1))
+ };
+
+ // Now we're done, except for having to encode the S output in such a way that
+ // it gives both the number of set bits and the length of the repeated
+ // segment. The s field is encoded like this:
+ //
+ // imms size S
+ // ssssss 64 UInt(ssssss)
+ // 0sssss 32 UInt(sssss)
+ // 10ssss 16 UInt(ssss)
+ // 110sss 8 UInt(sss)
+ // 1110ss 4 UInt(ss)
+ // 11110s 2 UInt(s)
+ //
+ // So we 'or' (2 * -d) with our computed s to form imms.
+ let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f;
+ debug_assert!(u8::try_from(r).is_ok());
+ debug_assert!(u8::try_from(s).is_ok());
+ Some(ImmLogic {
+ value: original_value,
+ n: out_n != 0,
+ r: r as u8,
+ s: s as u8,
+ size: operand_size,
+ })
+ }
+
+ /// Returns bits ready for encoding: (N:1, R:6, S:6)
+ pub fn enc_bits(&self) -> u32 {
+ ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32)
+ }
+
+ /// Returns the value that this immediate represents.
+ pub fn value(&self) -> u64 {
+ self.value
+ }
+
+ /// Return an immediate for the bitwise-inverted value.
+ pub fn invert(&self) -> ImmLogic {
+ // For every ImmLogical immediate, the inverse can also be encoded.
+ Self::maybe_from_u64(!self.value, self.size.to_ty()).unwrap()
+ }
+
+ /// This provides a safe(ish) way to avoid the costs of `maybe_from_u64` when we want to
+ /// encode a constant that we know at compiler-build time. It constructs an `ImmLogic` from
+ /// the fields `n`, `r`, `s` and `size`, but in a debug build, checks that `value_to_check`
+ /// corresponds to those four fields. The intention is that, in a non-debug build, this
+ /// reduces to something small enough that it will be a candidate for inlining.
+ pub fn from_n_r_s(value_to_check: u64, n: bool, r: u8, s: u8, size: OperandSize) -> Self {
+ // Construct it from the components we got given.
+ let imml = Self {
+ value: value_to_check,
+ n,
+ r,
+ s,
+ size,
+ };
+
+ // In debug mode, check that `n`/`r`/`s` are correct, given `value` and `size`.
+ debug_assert!(match ImmLogic::maybe_from_u64(
+ value_to_check,
+ if size == OperandSize::Size64 {
+ I64
+ } else {
+ I32
+ }
+ ) {
+ None => false, // fail: `value` is unrepresentable
+ Some(imml_check) => imml_check == imml,
+ });
+
+ imml
+ }
+}
+
+/// An immediate for shift instructions.
+#[derive(Clone, Debug)]
+pub struct ImmShift {
+ /// 6-bit shift amount.
+ pub imm: u8,
+}
+
+impl ImmShift {
+ /// Create an ImmShift from raw bits, if possible.
+ pub fn maybe_from_u64(val: u64) -> Option<ImmShift> {
+ if val < 64 {
+ Some(ImmShift { imm: val as u8 })
+ } else {
+ None
+ }
+ }
+
+ /// Get the immediate value.
+ pub fn value(&self) -> u8 {
+ self.imm
+ }
+}
+
+/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift.
+#[derive(Clone, Copy, Debug)]
+pub struct MoveWideConst {
+ /// The value.
+ pub bits: u16,
+ /// Result is `bits` shifted 16*shift bits to the left.
+ pub shift: u8,
+}
+
+impl MoveWideConst {
+ /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible.
+ pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> {
+ let mask0 = 0x0000_0000_0000_ffffu64;
+ let mask1 = 0x0000_0000_ffff_0000u64;
+ let mask2 = 0x0000_ffff_0000_0000u64;
+ let mask3 = 0xffff_0000_0000_0000u64;
+
+ if value == (value & mask0) {
+ return Some(MoveWideConst {
+ bits: (value & mask0) as u16,
+ shift: 0,
+ });
+ }
+ if value == (value & mask1) {
+ return Some(MoveWideConst {
+ bits: ((value >> 16) & mask0) as u16,
+ shift: 1,
+ });
+ }
+ if value == (value & mask2) {
+ return Some(MoveWideConst {
+ bits: ((value >> 32) & mask0) as u16,
+ shift: 2,
+ });
+ }
+ if value == (value & mask3) {
+ return Some(MoveWideConst {
+ bits: ((value >> 48) & mask0) as u16,
+ shift: 3,
+ });
+ }
+ None
+ }
+
+ pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> {
+ let shift_enc = shift / 16;
+ if shift_enc > 3 {
+ None
+ } else {
+ Some(MoveWideConst {
+ bits: imm,
+ shift: shift_enc,
+ })
+ }
+ }
+
+ /// Returns the value that this constant represents.
+ pub fn value(&self) -> u64 {
+ (self.bits as u64) << (16 * self.shift)
+ }
+}
+
+/// Advanced SIMD modified immediate as used by MOVI/MVNI.
+#[derive(Clone, Copy, Debug)]
+pub struct ASIMDMovModImm {
+ imm: u8,
+ shift: u8,
+ shift_ones: bool,
+}
+
+impl ASIMDMovModImm {
+ pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> {
+ match size {
+ ScalarSize::Size8 => Some(ASIMDMovModImm {
+ imm: value as u8,
+ shift: 0,
+ shift_ones: false,
+ }),
+ _ => None,
+ }
+ }
+
+ /// Create a zero immediate of this format.
+ pub fn zero() -> Self {
+ ASIMDMovModImm {
+ imm: 0,
+ shift: 0,
+ shift_ones: false,
+ }
+ }
+
+ pub fn value(&self) -> (u8, u32, bool) {
+ (self.imm, self.shift as u32, self.shift_ones)
+ }
+}
+
+impl PrettyPrint for NZCV {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
+ format!(
+ "#{}{}{}{}",
+ fmt('n', self.n),
+ fmt('z', self.z),
+ fmt('c', self.c),
+ fmt('v', self.v)
+ )
+ }
+}
+
+impl PrettyPrint for UImm5 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for Imm12 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let shift = if self.shift12 { 12 } else { 0 };
+ let value = u32::from(self.bits) << shift;
+ format!("#{}", value)
+ }
+}
+
+impl PrettyPrint for SImm7Scaled {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for FPULeftShiftImm {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.amount)
+ }
+}
+
+impl PrettyPrint for FPURightShiftImm {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.amount)
+ }
+}
+
+impl PrettyPrint for SImm9 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for UImm12Scaled {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for ImmLogic {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value())
+ }
+}
+
+impl PrettyPrint for ImmShift {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.imm)
+ }
+}
+
+impl PrettyPrint for MoveWideConst {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ if self.shift == 0 {
+ format!("#{}", self.bits)
+ } else {
+ format!("#{}, LSL #{}", self.bits, self.shift * 16)
+ }
+ }
+}
+
+impl PrettyPrint for ASIMDMovModImm {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ if self.shift == 0 {
+ format!("#{}", self.imm)
+ } else {
+ let shift_type = if self.shift_ones { "MSL" } else { "LSL" };
+ format!("#{}, {} #{}", self.imm, shift_type, self.shift)
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn imm_logical_test() {
+ assert_eq!(None, ImmLogic::maybe_from_u64(0, I64));
+ assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64));
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 1,
+ n: true,
+ r: 0,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(1, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 2,
+ n: true,
+ r: 63,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(2, I64)
+ );
+
+ assert_eq!(None, ImmLogic::maybe_from_u64(5, I64));
+
+ assert_eq!(None, ImmLogic::maybe_from_u64(11, I64));
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 248,
+ n: true,
+ r: 61,
+ s: 4,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(248, I64)
+ );
+
+ assert_eq!(None, ImmLogic::maybe_from_u64(249, I64));
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 1920,
+ n: true,
+ r: 57,
+ s: 3,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(1920, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x7ffe,
+ n: true,
+ r: 63,
+ s: 13,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x7ffe, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x30000,
+ n: true,
+ r: 48,
+ s: 1,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x30000, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x100000,
+ n: true,
+ r: 44,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x100000, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: u64::max_value() - 1,
+ n: true,
+ r: 63,
+ s: 62,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(u64::max_value() - 1, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0xaaaaaaaaaaaaaaaa,
+ n: false,
+ r: 1,
+ s: 60,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x8181818181818181,
+ n: false,
+ r: 1,
+ s: 49,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x8181818181818181, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0xffc3ffc3ffc3ffc3,
+ n: false,
+ r: 10,
+ s: 43,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x100000001,
+ n: false,
+ r: 0,
+ s: 0,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x100000001, I64)
+ );
+
+ assert_eq!(
+ Some(ImmLogic {
+ value: 0x1111111111111111,
+ n: false,
+ r: 0,
+ s: 56,
+ size: OperandSize::Size64,
+ }),
+ ImmLogic::maybe_from_u64(0x1111111111111111, I64)
+ );
+
+ for n in 0..2 {
+ let types = if n == 0 { vec![I64, I32] } else { vec![I64] };
+ for s in 0..64 {
+ for r in 0..64 {
+ let imm = get_logical_imm(n, s, r);
+ for &ty in &types {
+ match ImmLogic::maybe_from_u64(imm, ty) {
+ Some(ImmLogic { value, .. }) => {
+ assert_eq!(imm, value);
+ ImmLogic::maybe_from_u64(!value, ty).unwrap();
+ }
+ None => assert_eq!(0, imm),
+ };
+ }
+ }
+ }
+ }
+ }
+
+ // Repeat a value that has `width` bits, across a 64-bit value.
+ fn repeat(value: u64, width: u64) -> u64 {
+ let mut result = value & ((1 << width) - 1);
+ let mut i = width;
+ while i < 64 {
+ result |= result << i;
+ i *= 2;
+ }
+ result
+ }
+
+ // Get the logical immediate, from the encoding N/R/S bits.
+ fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 {
+ // An integer is constructed from the n, imm_s and imm_r bits according to
+ // the following table:
+ //
+ // N imms immr size S R
+ // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
+ // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
+ // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
+ // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
+ // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
+ // 0 11110s xxxxxr 2 UInt(s) UInt(r)
+ // (s bits must not be all set)
+ //
+ // A pattern is constructed of size bits, where the least significant S+1
+ // bits are set. The pattern is rotated right by R, and repeated across a
+ // 64-bit value.
+
+ if n == 1 {
+ if s == 0x3f {
+ return 0;
+ }
+ let bits = (1u64 << (s + 1)) - 1;
+ bits.rotate_right(r)
+ } else {
+ if (s >> 1) == 0x1f {
+ return 0;
+ }
+ let mut width = 0x20;
+ while width >= 0x2 {
+ if (s & width) == 0 {
+ let mask = width - 1;
+ if (s & mask) == mask {
+ return 0;
+ }
+ let bits = (1u64 << ((s & mask) + 1)) - 1;
+ return repeat(bits.rotate_right(r & mask), width.into());
+ }
+ width >>= 1;
+ }
+ unreachable!();
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
new file mode 100644
index 0000000000..278302018e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs
@@ -0,0 +1,4057 @@
+//! This module defines aarch64-specific machine instruction types.
+
+// Some variants are not constructed, but we still want them as options in the future.
+#![allow(dead_code)]
+
+use crate::binemit::CodeOffset;
+use crate::ir::types::{
+ B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
+ I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
+};
+use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type};
+use crate::isa::CallConv;
+use crate::machinst::*;
+use crate::{settings, CodegenError, CodegenResult};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
+use regalloc::{RegUsageCollector, RegUsageMapper};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::convert::TryFrom;
+use smallvec::{smallvec, SmallVec};
+use std::string::{String, ToString};
+
+pub mod regs;
+pub use self::regs::*;
+pub mod imms;
+pub use self::imms::*;
+pub mod args;
+pub use self::args::*;
+pub mod emit;
+pub use self::emit::*;
+pub mod unwind;
+
+#[cfg(test)]
+mod emit_tests;
+
+//=============================================================================
+// Instructions (top level): definition
+
+/// An ALU operation. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp {
+ Add32,
+ Add64,
+ Sub32,
+ Sub64,
+ Orr32,
+ Orr64,
+ OrrNot32,
+ OrrNot64,
+ And32,
+ And64,
+ AndNot32,
+ AndNot64,
+ /// XOR (AArch64 calls this "EOR")
+ Eor32,
+ /// XOR (AArch64 calls this "EOR")
+ Eor64,
+ /// XNOR (AArch64 calls this "EOR-NOT")
+ EorNot32,
+ /// XNOR (AArch64 calls this "EOR-NOT")
+ EorNot64,
+ /// Add, setting flags
+ AddS32,
+ /// Add, setting flags
+ AddS64,
+ /// Sub, setting flags
+ SubS32,
+ /// Sub, setting flags
+ SubS64,
+ /// Signed multiply, high-word result
+ SMulH,
+ /// Unsigned multiply, high-word result
+ UMulH,
+ SDiv64,
+ UDiv64,
+ RotR32,
+ RotR64,
+ Lsr32,
+ Lsr64,
+ Asr32,
+ Asr64,
+ Lsl32,
+ Lsl64,
+}
+
+/// An ALU operation with three arguments.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp3 {
+ /// Multiply-add
+ MAdd32,
+ /// Multiply-add
+ MAdd64,
+ /// Multiply-sub
+ MSub32,
+ /// Multiply-sub
+ MSub64,
+}
+
+/// A floating-point unit (FPU) operation with one arg.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp1 {
+ Abs32,
+ Abs64,
+ Neg32,
+ Neg64,
+ Sqrt32,
+ Sqrt64,
+ Cvt32To64,
+ Cvt64To32,
+}
+
+/// A floating-point unit (FPU) operation with two args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp2 {
+ Add32,
+ Add64,
+ Sub32,
+ Sub64,
+ Mul32,
+ Mul64,
+ Div32,
+ Div64,
+ Max32,
+ Max64,
+ Min32,
+ Min64,
+ /// Signed saturating add
+ Sqadd64,
+ /// Unsigned saturating add
+ Uqadd64,
+ /// Signed saturating subtract
+ Sqsub64,
+ /// Unsigned saturating subtract
+ Uqsub64,
+}
+
+/// A floating-point unit (FPU) operation with two args, a register and an immediate.
+#[derive(Copy, Clone, Debug)]
+pub enum FPUOpRI {
+ /// Unsigned right shift. Rd = Rn << #imm
+ UShr32(FPURightShiftImm),
+ /// Unsigned right shift. Rd = Rn << #imm
+ UShr64(FPURightShiftImm),
+ /// Shift left and insert. Rd |= Rn << #imm
+ Sli32(FPULeftShiftImm),
+ /// Shift left and insert. Rd |= Rn << #imm
+ Sli64(FPULeftShiftImm),
+}
+
+/// A floating-point unit (FPU) operation with three args.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FPUOp3 {
+ MAdd32,
+ MAdd64,
+}
+
+/// A conversion from an FP to an integer value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuToIntOp {
+ F32ToU32,
+ F32ToI32,
+ F32ToU64,
+ F32ToI64,
+ F64ToU32,
+ F64ToI32,
+ F64ToU64,
+ F64ToI64,
+}
+
+/// A conversion from an integer to an FP value.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum IntToFpuOp {
+ U32ToF32,
+ I32ToF32,
+ U32ToF64,
+ I32ToF64,
+ U64ToF32,
+ I64ToF32,
+ U64ToF64,
+ I64ToF64,
+}
+
+/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to
+/// nearest, and for 32- or 64-bit FP values.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum FpuRoundMode {
+ Minus32,
+ Minus64,
+ Plus32,
+ Plus64,
+ Zero32,
+ Zero64,
+ Nearest32,
+ Nearest64,
+}
+
+/// Type of vector element extensions.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecExtendOp {
+ /// Signed extension of 8-bit elements
+ Sxtl8,
+ /// Signed extension of 16-bit elements
+ Sxtl16,
+ /// Signed extension of 32-bit elements
+ Sxtl32,
+ /// Unsigned extension of 8-bit elements
+ Uxtl8,
+ /// Unsigned extension of 16-bit elements
+ Uxtl16,
+ /// Unsigned extension of 32-bit elements
+ Uxtl32,
+}
+
+/// A vector ALU operation.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecALUOp {
+ /// Signed saturating add
+ Sqadd,
+ /// Unsigned saturating add
+ Uqadd,
+ /// Signed saturating subtract
+ Sqsub,
+ /// Unsigned saturating subtract
+ Uqsub,
+ /// Compare bitwise equal
+ Cmeq,
+ /// Compare signed greater than or equal
+ Cmge,
+ /// Compare signed greater than
+ Cmgt,
+ /// Compare unsigned higher
+ Cmhs,
+ /// Compare unsigned higher or same
+ Cmhi,
+ /// Floating-point compare equal
+ Fcmeq,
+ /// Floating-point compare greater than
+ Fcmgt,
+ /// Floating-point compare greater than or equal
+ Fcmge,
+ /// Bitwise and
+ And,
+ /// Bitwise bit clear
+ Bic,
+ /// Bitwise inclusive or
+ Orr,
+ /// Bitwise exclusive or
+ Eor,
+ /// Bitwise select
+ Bsl,
+ /// Unsigned maximum pairwise
+ Umaxp,
+ /// Add
+ Add,
+ /// Subtract
+ Sub,
+ /// Multiply
+ Mul,
+ /// Signed shift left
+ Sshl,
+ /// Unsigned shift left
+ Ushl,
+ /// Unsigned minimum
+ Umin,
+ /// Signed minimum
+ Smin,
+ /// Unsigned maximum
+ Umax,
+ /// Signed maximum
+ Smax,
+ /// Unsigned rounding halving add
+ Urhadd,
+ /// Floating-point add
+ Fadd,
+ /// Floating-point subtract
+ Fsub,
+ /// Floating-point divide
+ Fdiv,
+ /// Floating-point maximum
+ Fmax,
+ /// Floating-point minimum
+ Fmin,
+ /// Floating-point multiply
+ Fmul,
+ /// Add pairwise
+ Addp,
+ /// Unsigned multiply add long
+ Umlal,
+ /// Zip vectors (primary) [meaning, high halves]
+ Zip1,
+ /// Signed multiply long (low halves)
+ Smull,
+ /// Signed multiply long (high halves)
+ Smull2,
+}
+
+/// A Vector miscellaneous operation with two registers.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecMisc2 {
+ /// Bitwise NOT
+ Not,
+ /// Negate
+ Neg,
+ /// Absolute value
+ Abs,
+ /// Floating-point absolute value
+ Fabs,
+ /// Floating-point negate
+ Fneg,
+ /// Floating-point square root
+ Fsqrt,
+ /// Reverse elements in 64-bit doublewords
+ Rev64,
+ /// Shift left long (by element size)
+ Shll,
+ /// Floating-point convert to signed integer, rounding toward zero
+ Fcvtzs,
+ /// Floating-point convert to unsigned integer, rounding toward zero
+ Fcvtzu,
+ /// Signed integer convert to floating-point
+ Scvtf,
+ /// Unsigned integer convert to floating-point
+ Ucvtf,
+ /// Floating point round to integral, rounding towards nearest
+ Frintn,
+ /// Floating point round to integral, rounding towards zero
+ Frintz,
+ /// Floating point round to integral, rounding towards minus infinity
+ Frintm,
+ /// Floating point round to integral, rounding towards plus infinity
+ Frintp,
+}
+
+/// A Vector narrowing operation with two registers.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecMiscNarrowOp {
+ /// Extract Narrow
+ Xtn,
+ /// Signed saturating extract narrow
+ Sqxtn,
+ /// Signed saturating extract unsigned narrow
+ Sqxtun,
+}
+
+/// An operation across the lanes of vectors.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecLanesOp {
+ /// Integer addition across a vector
+ Addv,
+ /// Unsigned minimum across a vector
+ Uminv,
+}
+
+/// A shift-by-immediate operation on each lane of a vector.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum VecShiftImmOp {
+ // Unsigned shift left
+ Shl,
+ // Unsigned shift right
+ Ushr,
+ // Signed shift right
+ Sshr,
+}
+
+/// An operation on the bits of a register. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BitOp {
+ /// Bit reverse
+ RBit32,
+ /// Bit reverse
+ RBit64,
+ Clz32,
+ Clz64,
+ Cls32,
+ Cls64,
+}
+
+impl BitOp {
+ /// What is the opcode's native width?
+ pub fn operand_size(&self) -> OperandSize {
+ match self {
+ BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
+ _ => OperandSize::Size64,
+ }
+ }
+
+ /// Get the assembly mnemonic for this opcode.
+ pub fn op_str(&self) -> &'static str {
+ match self {
+ BitOp::RBit32 | BitOp::RBit64 => "rbit",
+ BitOp::Clz32 | BitOp::Clz64 => "clz",
+ BitOp::Cls32 | BitOp::Cls64 => "cls",
+ }
+ }
+}
+
+impl From<(Opcode, Type)> for BitOp {
+ /// Get the BitOp from the IR opcode.
+ fn from(op_ty: (Opcode, Type)) -> BitOp {
+ match op_ty {
+ (Opcode::Bitrev, I32) => BitOp::RBit32,
+ (Opcode::Bitrev, I64) => BitOp::RBit64,
+ (Opcode::Clz, I32) => BitOp::Clz32,
+ (Opcode::Clz, I64) => BitOp::Clz64,
+ (Opcode::Cls, I32) => BitOp::Cls32,
+ (Opcode::Cls, I64) => BitOp::Cls64,
+ _ => unreachable!("Called with non-bit op!: {:?}", op_ty),
+ }
+ }
+}
+
+/// Additional information for (direct) Call instructions, left out of line to lower the size of
+/// the Inst enum.
+#[derive(Clone, Debug)]
+pub struct CallInfo {
+ pub dest: ExternalName,
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Writable<Reg>>,
+ pub opcode: Opcode,
+ pub caller_callconv: CallConv,
+ pub callee_callconv: CallConv,
+}
+
+/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct CallIndInfo {
+ pub rn: Reg,
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Writable<Reg>>,
+ pub opcode: Opcode,
+ pub caller_callconv: CallConv,
+ pub callee_callconv: CallConv,
+}
+
+/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct JTSequenceInfo {
+ pub targets: Vec<BranchTarget>,
+ pub default_target: BranchTarget,
+ pub targets_for_term: Vec<MachLabel>, // needed for MachTerminator.
+}
+
+/// Instruction formats.
+#[derive(Clone, Debug)]
+pub enum Inst {
+ /// A no-op of zero size.
+ Nop0,
+
+ /// A no-op that is one instruction large.
+ Nop4,
+
+ /// An ALU operation with two register sources and a register destination.
+ AluRRR {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ },
+ /// An ALU operation with three register sources and a register destination.
+ AluRRRR {
+ alu_op: ALUOp3,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ ra: Reg,
+ },
+ /// An ALU operation with a register source and an immediate-12 source, and a register
+ /// destination.
+ AluRRImm12 {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ imm12: Imm12,
+ },
+ /// An ALU operation with a register source and an immediate-logic source, and a register destination.
+ AluRRImmLogic {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ imml: ImmLogic,
+ },
+ /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination.
+ AluRRImmShift {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ immshift: ImmShift,
+ },
+ /// An ALU operation with two register sources, one of which can be shifted, and a register
+ /// destination.
+ AluRRRShift {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ shiftop: ShiftOpAndAmt,
+ },
+ /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and
+ /// shifted, and a register destination.
+ AluRRRExtend {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ extendop: ExtendOp,
+ },
+
+ /// A bit op instruction with a single register source.
+ BitRR {
+ op: BitOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// An unsigned (zero-extending) 8-bit load.
+ ULoad8 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A signed (sign-extending) 8-bit load.
+ SLoad8 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// An unsigned (zero-extending) 16-bit load.
+ ULoad16 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A signed (sign-extending) 16-bit load.
+ SLoad16 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// An unsigned (zero-extending) 32-bit load.
+ ULoad32 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A signed (sign-extending) 32-bit load.
+ SLoad32 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 64-bit load.
+ ULoad64 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+
+ /// An 8-bit store.
+ Store8 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 16-bit store.
+ Store16 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 32-bit store.
+ Store32 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// A 64-bit store.
+ Store64 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+
+ /// A store of a pair of registers.
+ StoreP64 {
+ rt: Reg,
+ rt2: Reg,
+ mem: PairAMode,
+ flags: MemFlags,
+ },
+ /// A load of a pair of registers.
+ LoadP64 {
+ rt: Writable<Reg>,
+ rt2: Writable<Reg>,
+ mem: PairAMode,
+ flags: MemFlags,
+ },
+
+ /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we
+ /// keep them separate at the `Inst` level for better pretty-printing
+ /// and faster `is_move()` logic.
+ Mov64 {
+ rd: Writable<Reg>,
+ rm: Reg,
+ },
+
+ /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is
+ /// effectively an alias for an unsigned 32-to-64-bit extension.
+ Mov32 {
+ rd: Writable<Reg>,
+ rm: Reg,
+ },
+
+ /// A MOVZ with a 16-bit immediate.
+ MovZ {
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+ },
+
+ /// A MOVN with a 16-bit immediate.
+ MovN {
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+ },
+
+ /// A MOVK with a 16-bit immediate.
+ MovK {
+ rd: Writable<Reg>,
+ imm: MoveWideConst,
+ size: OperandSize,
+ },
+
+ /// A sign- or zero-extend operation.
+ Extend {
+ rd: Writable<Reg>,
+ rn: Reg,
+ signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ },
+
+ /// A conditional-select operation.
+ CSel {
+ rd: Writable<Reg>,
+ cond: Cond,
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// A conditional-set operation.
+ CSet {
+ rd: Writable<Reg>,
+ cond: Cond,
+ },
+
+ /// A conditional comparison with an immediate.
+ CCmpImm {
+ size: OperandSize,
+ rn: Reg,
+ imm: UImm5,
+ nzcv: NZCV,
+ cond: Cond,
+ },
+
+ /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall
+ /// effect of atomically modifying a memory location in a particular way. Because we have
+ /// no way to explain to the regalloc about earlyclobber registers, this instruction has
+ /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies
+ /// in the surrounding code to the extent it can. The sequence is both preceded and
+ /// followed by a fence which is at least as comprehensive as that of the `Fence`
+ /// instruction below. This instruction is sequentially consistent. The operand
+ /// conventions are:
+ ///
+ /// x25 (rd) address
+ /// x26 (rd) second operand for `op`
+ /// x27 (wr) old value
+ /// x24 (wr) scratch reg; value afterwards has no meaning
+ /// x28 (wr) scratch reg; value afterwards has no meaning
+ AtomicRMW {
+ ty: Type, // I8, I16, I32 or I64
+ op: inst_common::AtomicRmwOp,
+ },
+
+ /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked
+ /// store-conditional loop. (Although we could possibly implement it more directly using
+ /// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is
+ /// both preceded and followed by a fence which is at least as comprehensive as that of the
+ /// `Fence` instruction below. This instruction is sequentially consistent. Note that the
+ /// operand conventions, although very similar to AtomicRMW, are different:
+ ///
+ /// x25 (rd) address
+ /// x26 (rd) expected value
+ /// x28 (rd) replacement value
+ /// x27 (wr) old value
+ /// x24 (wr) scratch reg; value afterwards has no meaning
+ AtomicCAS {
+ ty: Type, // I8, I16, I32 or I64
+ },
+
+ /// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it
+ /// in `r_data`. The load instruction is preceded by a fence at least as comprehensive as
+ /// that of the `Fence` instruction below. This instruction is sequentially consistent.
+ AtomicLoad {
+ ty: Type, // I8, I16, I32 or I64
+ r_data: Writable<Reg>,
+ r_addr: Reg,
+ },
+
+ /// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence
+ /// instruction following the store. The fence is at least as comprehensive as that of the
+ /// `Fence` instruction below. This instruction is sequentially consistent.
+ AtomicStore {
+ ty: Type, // I8, I16, I32 or I64
+ r_data: Reg,
+ r_addr: Reg,
+ },
+
+ /// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
+ /// nor stores may move forwards or backwards across the fence. Currently emitted as "dmb
+ /// ish". This instruction is sequentially consistent.
+ Fence,
+
+ /// FPU move. Note that this is distinct from a vector-register
+ /// move; moving just 64 bits seems to be significantly faster.
+ FpuMove64 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Vector register move.
+ FpuMove128 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Move to scalar from a vector element.
+ FpuMoveFromVec {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ },
+
+ /// 1-op FPU instruction.
+ FpuRR {
+ fpu_op: FPUOp1,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// 2-op FPU instruction.
+ FpuRRR {
+ fpu_op: FPUOp2,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ },
+
+ FpuRRI {
+ fpu_op: FPUOpRI,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// 3-op FPU instruction.
+ FpuRRRR {
+ fpu_op: FPUOp3,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ ra: Reg,
+ },
+
+ /// FPU comparison, single-precision (32 bit).
+ FpuCmp32 {
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// FPU comparison, double-precision (64 bit).
+ FpuCmp64 {
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// Floating-point load, single-precision (32 bit).
+ FpuLoad32 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point store, single-precision (32 bit).
+ FpuStore32 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point load, double-precision (64 bit).
+ FpuLoad64 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point store, double-precision (64 bit).
+ FpuStore64 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point/vector load, 128 bit.
+ FpuLoad128 {
+ rd: Writable<Reg>,
+ mem: AMode,
+ flags: MemFlags,
+ },
+ /// Floating-point/vector store, 128 bit.
+ FpuStore128 {
+ rd: Reg,
+ mem: AMode,
+ flags: MemFlags,
+ },
+
+ LoadFpuConst64 {
+ rd: Writable<Reg>,
+ const_data: u64,
+ },
+
+ LoadFpuConst128 {
+ rd: Writable<Reg>,
+ const_data: u128,
+ },
+
+ /// Conversion: FP -> integer.
+ FpuToInt {
+ op: FpuToIntOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Conversion: integer -> FP.
+ IntToFpu {
+ op: IntToFpuOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// FP conditional select, 32 bit.
+ FpuCSel32 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ cond: Cond,
+ },
+ /// FP conditional select, 64 bit.
+ FpuCSel64 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ cond: Cond,
+ },
+
+ /// Round to integer.
+ FpuRound {
+ op: FpuRoundMode,
+ rd: Writable<Reg>,
+ rn: Reg,
+ },
+
+ /// Move from a GPR to a vector register. The scalar value is parked in the lowest lane
+ /// of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit
+ /// transactions are supported.
+ MovToFpu {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: ScalarSize,
+ },
+
+ /// Move to a vector element from a GPR.
+ MovToVec {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ },
+
+ /// Unsigned move from a vector element to a GPR.
+ MovFromVec {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ },
+
+ /// Signed move from a vector element to a GPR.
+ MovFromVecSigned {
+ rd: Writable<Reg>,
+ rn: Reg,
+ idx: u8,
+ size: VectorSize,
+ scalar_size: OperandSize,
+ },
+
+ /// Duplicate general-purpose register to vector.
+ VecDup {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Duplicate scalar to vector.
+ VecDupFromFpu {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Duplicate immediate to vector.
+ VecDupImm {
+ rd: Writable<Reg>,
+ imm: ASIMDMovModImm,
+ invert: bool,
+ size: VectorSize,
+ },
+
+ /// Vector extend.
+ VecExtend {
+ t: VecExtendOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ high_half: bool,
+ },
+
+ /// Move vector element to another vector element.
+ VecMovElement {
+ rd: Writable<Reg>,
+ rn: Reg,
+ dest_idx: u8,
+ src_idx: u8,
+ size: VectorSize,
+ },
+
+ /// Vector narrowing operation.
+ VecMiscNarrow {
+ op: VecMiscNarrowOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ high_half: bool,
+ },
+
+ /// A vector ALU op.
+ VecRRR {
+ alu_op: VecALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector two register miscellaneous instruction.
+ VecMisc {
+ op: VecMisc2,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector instruction across lanes.
+ VecLanes {
+ op: VecLanesOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate),
+ /// Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts,
+ /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero
+ /// right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm`
+ /// values from 0 to lane-size-in-bits - 1 inclusive.
+ VecShiftImm {
+ op: VecShiftImmOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ imm: u8,
+ },
+
+ /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes
+ /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`.
+ VecExtract {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ imm4: u8,
+ },
+
+ /// Table vector lookup - single register table. The table consists of 8-bit elements and is
+ /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
+ /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
+ /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
+ /// to 0.
+ VecTbl {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ is_extension: bool,
+ },
+
+ /// Table vector lookup - two register table. The table consists of 8-bit elements and is
+ /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
+ /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
+ /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified
+ /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
+ /// modulo 32, that is v31 and v0 (in that order) are consecutive registers.
+ VecTbl2 {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rn2: Reg,
+ rm: Reg,
+ is_extension: bool,
+ },
+
+ /// Load an element and replicate to all lanes of a vector.
+ VecLoadReplicate {
+ rd: Writable<Reg>,
+ rn: Reg,
+ size: VectorSize,
+ },
+
+ /// Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn
+ /// control-flow diamond.
+ VecCSel {
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ cond: Cond,
+ },
+
+ /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
+ MovToNZCV {
+ rn: Reg,
+ },
+
+ /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn).
+ MovFromNZCV {
+ rd: Writable<Reg>,
+ },
+
+ /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation
+ /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the
+ /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit
+ /// target.
+ Call {
+ info: Box<CallInfo>,
+ },
+ /// A machine indirect-call instruction.
+ CallInd {
+ info: Box<CallIndInfo>,
+ },
+
+ // ---- branches (exactly one must appear at end of BB) ----
+ /// A machine return instruction.
+ Ret,
+
+ /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+ /// inserted there.
+ EpiloguePlaceholder,
+
+ /// An unconditional branch.
+ Jump {
+ dest: BranchTarget,
+ },
+
+ /// A conditional branch. Contains two targets; at emission time, both are emitted, but
+ /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the
+ /// choice of taken/not_taken (inverting the branch polarity as needed) based on the
+ /// fallthrough at the time of lowering.
+ CondBr {
+ taken: BranchTarget,
+ not_taken: BranchTarget,
+ kind: CondBrKind,
+ },
+
+ /// A conditional trap: execute a `udf` if the condition is true. This is
+ /// one VCode instruction because it uses embedded control flow; it is
+ /// logically a single-in, single-out region, but needs to appear as one
+ /// unit to the register allocator.
+ ///
+ /// The `CondBrKind` gives the conditional-branch condition that will
+ /// *execute* the embedded `Inst`. (In the emitted code, we use the inverse
+ /// of this condition in a branch that skips the trap instruction.)
+ TrapIf {
+ kind: CondBrKind,
+ trap_code: TrapCode,
+ },
+
+ /// An indirect branch through a register, augmented with set of all
+ /// possible successors.
+ IndirectBr {
+ rn: Reg,
+ targets: Vec<MachLabel>,
+ },
+
+ /// A "break" instruction, used for e.g. traps and debug breakpoints.
+ Brk,
+
+ /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+ /// runtime.
+ Udf {
+ trap_code: TrapCode,
+ },
+
+ /// Compute the address (using a PC-relative offset) of a memory location, using the `ADR`
+ /// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is
+ /// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may
+ /// need full `MemLabel` support.
+ Adr {
+ rd: Writable<Reg>,
+ /// Offset in range -2^20 .. 2^20.
+ off: i32,
+ },
+
+ /// Raw 32-bit word, used for inline constants and jump-table entries.
+ Word4 {
+ data: u32,
+ },
+
+ /// Raw 64-bit word, used for inline constants.
+ Word8 {
+ data: u64,
+ },
+
+ /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale).
+ JTSequence {
+ info: Box<JTSequenceInfo>,
+ ridx: Reg,
+ rtmp1: Writable<Reg>,
+ rtmp2: Writable<Reg>,
+ },
+
+ /// Load an inline symbol reference.
+ LoadExtName {
+ rd: Writable<Reg>,
+ name: Box<ExternalName>,
+ offset: i64,
+ },
+
+ /// Load address referenced by `mem` into `rd`.
+ LoadAddr {
+ rd: Writable<Reg>,
+ mem: AMode,
+ },
+
+ /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+ /// controls how AMode::NominalSPOffset args are lowered.
+ VirtualSPOffsetAdj {
+ offset: i64,
+ },
+
+ /// Meta-insn, no-op in generated code: emit constant/branch veneer island
+ /// at this point (with a guard jump around it) if less than the needed
+ /// space is available before the next branch deadline. See the `MachBuffer`
+ /// implementation in `machinst/buffer.rs` for the overall algorithm. In
+ /// brief, we retain a set of "pending/unresolved label references" from
+ /// branches as we scan forward through instructions to emit machine code;
+ /// if we notice we're about to go out of range on an unresolved reference,
+ /// we stop, emit a bunch of "veneers" (branches in a form that has a longer
+ /// range, e.g. a 26-bit-offset unconditional jump), and point the original
+ /// label references to those. This is an "island" because it comes in the
+ /// middle of the code.
+ ///
+ /// This meta-instruction is a necessary part of the logic that determines
+ /// where to place islands. Ordinarily, we want to place them between basic
+ /// blocks, so we compute the worst-case size of each block, and emit the
+ /// island before starting a block if we would exceed a deadline before the
+ /// end of the block. However, some sequences (such as an inline jumptable)
+ /// are variable-length and not accounted for by this logic; so these
+ /// lowered sequences include an `EmitIsland` to trigger island generation
+ /// where necessary.
+ EmitIsland {
+ /// The needed space before the next deadline.
+ needed_space: CodeOffset,
+ },
+}
+
+fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize {
+ let mut count = 0;
+ for _ in 0..num_half_words {
+ if value & 0xffff == 0 {
+ count += 1;
+ }
+ value >>= 16;
+ }
+
+ count
+}
+
+#[test]
+fn inst_size_test() {
+ // This test will help with unintentionally growing the size
+ // of the Inst enum.
+ assert_eq!(32, std::mem::size_of::<Inst>());
+}
+
+impl Inst {
+ /// Create a move instruction.
+ pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+ assert!(to_reg.to_reg().get_class() == from_reg.get_class());
+ if from_reg.get_class() == RegClass::I64 {
+ Inst::Mov64 {
+ rd: to_reg,
+ rm: from_reg,
+ }
+ } else if from_reg.get_class() == RegClass::V128 {
+ Inst::FpuMove128 {
+ rd: to_reg,
+ rn: from_reg,
+ }
+ } else {
+ Inst::FpuMove64 {
+ rd: to_reg,
+ rn: from_reg,
+ }
+ }
+ }
+
+ /// Create a 32-bit move instruction.
+ pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+ Inst::Mov32 {
+ rd: to_reg,
+ rm: from_reg,
+ }
+ }
+
+ /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN,
+ /// logical immediate, or constant pool).
+ pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> {
+ if let Some(imm) = MoveWideConst::maybe_from_u64(value) {
+ // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ
+ smallvec![Inst::MovZ {
+ rd,
+ imm,
+ size: OperandSize::Size64
+ }]
+ } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) {
+ // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN
+ smallvec![Inst::MovN {
+ rd,
+ imm,
+ size: OperandSize::Size64
+ }]
+ } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) {
+ // Weird logical-instruction immediate in ORI using zero register
+ smallvec![Inst::AluRRImmLogic {
+ alu_op: ALUOp::Orr64,
+ rd,
+ rn: zero_reg(),
+ imml,
+ }]
+ } else {
+ let mut insts = smallvec![];
+
+ // If the top 32 bits are zero, use 32-bit `mov` operations.
+ let (num_half_words, size, negated) = if value >> 32 == 0 {
+ (2, OperandSize::Size32, (!value << 32) >> 32)
+ } else {
+ (4, OperandSize::Size64, !value)
+ };
+ // If the number of 0xffff half words is greater than the number of 0x0000 half words
+ // it is more efficient to use `movn` for the first instruction.
+ let first_is_inverted = count_zero_half_words(negated, num_half_words)
+ > count_zero_half_words(value, num_half_words);
+ // Either 0xffff or 0x0000 half words can be skipped, depending on the first
+ // instruction used.
+ let ignored_halfword = if first_is_inverted { 0xffff } else { 0 };
+ let mut first_mov_emitted = false;
+
+ for i in 0..num_half_words {
+ let imm16 = (value >> (16 * i)) & 0xffff;
+ if imm16 != ignored_halfword {
+ if !first_mov_emitted {
+ first_mov_emitted = true;
+ if first_is_inverted {
+ let imm =
+ MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16)
+ .unwrap();
+ insts.push(Inst::MovN { rd, imm, size });
+ } else {
+ let imm =
+ MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+ insts.push(Inst::MovZ { rd, imm, size });
+ }
+ } else {
+ let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap();
+ insts.push(Inst::MovK { rd, imm, size });
+ }
+ }
+ }
+
+ assert!(first_mov_emitted);
+
+ insts
+ }
+ }
+
+ /// Create instructions that load a 32-bit floating-point constant.
+ pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ value: u32,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ if value == 0 {
+ smallvec![Inst::VecDupImm {
+ rd,
+ imm: ASIMDMovModImm::zero(),
+ invert: false,
+ size: VectorSize::Size8x8
+ }]
+ } else {
+ // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
+ // bits.
+ let tmp = alloc_tmp(RegClass::I64, I32);
+ let mut insts = Inst::load_constant(tmp, value as u64);
+
+ insts.push(Inst::MovToFpu {
+ rd,
+ rn: tmp.to_reg(),
+ size: ScalarSize::Size64,
+ });
+
+ insts
+ }
+ }
+
+ /// Create instructions that load a 64-bit floating-point constant.
+ pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ const_data: u64,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ if let Ok(const_data) = u32::try_from(const_data) {
+ Inst::load_fp_constant32(rd, const_data, alloc_tmp)
+ // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
+ // bits. Also, treat it as half of a 128-bit vector and consider replicated
+ // patterns. Scalar MOVI might also be an option.
+ } else if const_data & (u32::MAX as u64) == 0 {
+ let tmp = alloc_tmp(RegClass::I64, I64);
+ let mut insts = Inst::load_constant(tmp, const_data);
+
+ insts.push(Inst::MovToFpu {
+ rd,
+ rn: tmp.to_reg(),
+ size: ScalarSize::Size64,
+ });
+
+ insts
+ } else {
+ smallvec![Inst::LoadFpuConst64 { rd, const_data }]
+ }
+ }
+
+ /// Create instructions that load a 128-bit vector constant.
+ pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ const_data: u128,
+ alloc_tmp: F,
+ ) -> SmallVec<[Inst; 5]> {
+ if let Ok(const_data) = u64::try_from(const_data) {
+ SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
+ } else if let Some((pattern, size)) =
+ Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
+ {
+ Inst::load_replicated_vector_pattern(
+ rd,
+ pattern,
+ VectorSize::from_lane_size(size, true),
+ alloc_tmp,
+ )
+ } else {
+ smallvec![Inst::LoadFpuConst128 { rd, const_data }]
+ }
+ }
+
+ /// Determine whether a 128-bit constant represents a vector consisting of elements with
+ /// the same value.
+ pub fn get_replicated_vector_pattern(
+ value: u128,
+ size: ScalarSize,
+ ) -> Option<(u64, ScalarSize)> {
+ let (mask, shift, next_size) = match size {
+ ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
+ ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
+ ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
+ ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
+ _ => return None,
+ };
+ let mut r = None;
+ let v = value & mask;
+
+ if (value >> shift) & mask == v {
+ r = Inst::get_replicated_vector_pattern(v, next_size);
+
+ if r.is_none() {
+ r = Some((v as u64, size));
+ }
+ }
+
+ r
+ }
+
+ /// Create instructions that load a 128-bit vector constant consisting of elements with
+ /// the same value.
+ pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ rd: Writable<Reg>,
+ pattern: u64,
+ size: VectorSize,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Inst; 5]> {
+ let lane_size = size.lane_size();
+
+ if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
+ smallvec![Inst::VecDupImm {
+ rd,
+ imm,
+ invert: false,
+ size
+ }]
+ } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
+ debug_assert_ne!(lane_size, ScalarSize::Size8);
+ debug_assert_ne!(lane_size, ScalarSize::Size64);
+
+ smallvec![Inst::VecDupImm {
+ rd,
+ imm,
+ invert: true,
+ size
+ }]
+ } else {
+ let tmp = alloc_tmp(RegClass::I64, I64);
+ let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]);
+
+ insts.push(Inst::VecDup {
+ rd,
+ rn: tmp.to_reg(),
+ size,
+ });
+
+ insts
+ }
+ }
+
+ /// Generic constructor for a load (zero-extending where appropriate).
+ pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
+ match ty {
+ B1 | B8 | I8 => Inst::ULoad8 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ B16 | I16 => Inst::ULoad16 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ B32 | I32 | R32 => Inst::ULoad32 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ B64 | I64 | R64 => Inst::ULoad64 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ F32 => Inst::FpuLoad32 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ F64 => Inst::FpuLoad64 {
+ rd: into_reg,
+ mem,
+ flags,
+ },
+ _ => {
+ if ty.is_vector() {
+ let bits = ty_bits(ty);
+ let rd = into_reg;
+
+ if bits == 128 {
+ Inst::FpuLoad128 { rd, mem, flags }
+ } else {
+ assert_eq!(bits, 64);
+ Inst::FpuLoad64 { rd, mem, flags }
+ }
+ } else {
+ unimplemented!("gen_load({})", ty);
+ }
+ }
+ }
+ }
+
+ /// Generic constructor for a store.
+ pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
+ match ty {
+ B1 | B8 | I8 => Inst::Store8 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ B16 | I16 => Inst::Store16 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ B32 | I32 | R32 => Inst::Store32 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ B64 | I64 | R64 => Inst::Store64 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ F32 => Inst::FpuStore32 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ F64 => Inst::FpuStore64 {
+ rd: from_reg,
+ mem,
+ flags,
+ },
+ _ => {
+ if ty.is_vector() {
+ let bits = ty_bits(ty);
+ let rd = from_reg;
+
+ if bits == 128 {
+ Inst::FpuStore128 { rd, mem, flags }
+ } else {
+ assert_eq!(bits, 64);
+ Inst::FpuStore64 { rd, mem, flags }
+ }
+ } else {
+ unimplemented!("gen_store({})", ty);
+ }
+ }
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: get_regs
+
+fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
+ match memarg {
+ &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => {
+ collector.add_use(reg);
+ }
+ &AMode::RegReg(r1, r2, ..)
+ | &AMode::RegScaled(r1, r2, ..)
+ | &AMode::RegScaledExtended(r1, r2, ..)
+ | &AMode::RegExtended(r1, r2, ..) => {
+ collector.add_use(r1);
+ collector.add_use(r2);
+ }
+ &AMode::Label(..) => {}
+ &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => {
+ collector.add_mod(reg);
+ }
+ &AMode::FPOffset(..) => {
+ collector.add_use(fp_reg());
+ }
+ &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
+ collector.add_use(stack_reg());
+ }
+ &AMode::RegOffset(r, ..) => {
+ collector.add_use(r);
+ }
+ }
+}
+
+fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) {
+ match pairmemarg {
+ &PairAMode::SignedOffset(reg, ..) => {
+ collector.add_use(reg);
+ }
+ &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => {
+ collector.add_mod(reg);
+ }
+ }
+}
+
+fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+ match inst {
+ &Inst::AluRRR { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRRR { rd, rn, rm, ra, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ collector.add_use(ra);
+ }
+ &Inst::AluRRImm12 { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRImmLogic { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRImmShift { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRRShift { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRRExtend { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::BitRR { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::ULoad8 { rd, ref mem, .. }
+ | &Inst::SLoad8 { rd, ref mem, .. }
+ | &Inst::ULoad16 { rd, ref mem, .. }
+ | &Inst::SLoad16 { rd, ref mem, .. }
+ | &Inst::ULoad32 { rd, ref mem, .. }
+ | &Inst::SLoad32 { rd, ref mem, .. }
+ | &Inst::ULoad64 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::Store8 { rd, ref mem, .. }
+ | &Inst::Store16 { rd, ref mem, .. }
+ | &Inst::Store32 { rd, ref mem, .. }
+ | &Inst::Store64 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::StoreP64 {
+ rt, rt2, ref mem, ..
+ } => {
+ collector.add_use(rt);
+ collector.add_use(rt2);
+ pairmemarg_regs(mem, collector);
+ }
+ &Inst::LoadP64 {
+ rt, rt2, ref mem, ..
+ } => {
+ collector.add_def(rt);
+ collector.add_def(rt2);
+ pairmemarg_regs(mem, collector);
+ }
+ &Inst::Mov64 { rd, rm } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::Mov32 { rd, rm } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::MovK { rd, .. } => {
+ collector.add_mod(rd);
+ }
+ &Inst::CSel { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::CSet { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::CCmpImm { rn, .. } => {
+ collector.add_use(rn);
+ }
+ &Inst::AtomicRMW { .. } => {
+ collector.add_use(xreg(25));
+ collector.add_use(xreg(26));
+ collector.add_def(writable_xreg(24));
+ collector.add_def(writable_xreg(27));
+ collector.add_def(writable_xreg(28));
+ }
+ &Inst::AtomicCAS { .. } => {
+ collector.add_use(xreg(25));
+ collector.add_use(xreg(26));
+ collector.add_use(xreg(28));
+ collector.add_def(writable_xreg(24));
+ collector.add_def(writable_xreg(27));
+ }
+ &Inst::AtomicLoad { r_data, r_addr, .. } => {
+ collector.add_use(r_addr);
+ collector.add_def(r_data);
+ }
+ &Inst::AtomicStore { r_data, r_addr, .. } => {
+ collector.add_use(r_addr);
+ collector.add_use(r_data);
+ }
+ &Inst::Fence {} => {}
+ &Inst::FpuMove64 { rd, rn } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuMove128 { rd, rn } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuMoveFromVec { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuRR { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuRRR { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuRRI { fpu_op, rd, rn, .. } => {
+ match fpu_op {
+ FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
+ FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
+ }
+ collector.add_use(rn);
+ }
+ &Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ collector.add_use(ra);
+ }
+ &Inst::VecMisc { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+
+ &Inst::VecLanes { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecShiftImm { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecExtract { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension,
+ } => {
+ collector.add_use(rn);
+ collector.add_use(rm);
+
+ if is_extension {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ }
+ &Inst::VecTbl2 {
+ rd,
+ rn,
+ rn2,
+ rm,
+ is_extension,
+ } => {
+ collector.add_use(rn);
+ collector.add_use(rn2);
+ collector.add_use(rm);
+
+ if is_extension {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ }
+ &Inst::VecLoadReplicate { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecCSel { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuLoad32 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuLoad64 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuLoad128 { rd, ref mem, .. } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuStore32 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuStore64 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::FpuStore128 { rd, ref mem, .. } => {
+ collector.add_use(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::FpuToInt { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::IntToFpu { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::FpuRound { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::MovToFpu { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::MovToVec { rd, rn, .. } => {
+ collector.add_mod(rd);
+ collector.add_use(rn);
+ }
+ &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecDup { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecDupFromFpu { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecDupImm { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::VecExtend { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecMovElement { rd, rn, .. } => {
+ collector.add_mod(rd);
+ collector.add_use(rn);
+ }
+ &Inst::VecMiscNarrow {
+ rd, rn, high_half, ..
+ } => {
+ collector.add_use(rn);
+
+ if high_half {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ }
+ &Inst::VecRRR {
+ alu_op, rd, rn, rm, ..
+ } => {
+ if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
+ collector.add_mod(rd);
+ } else {
+ collector.add_def(rd);
+ }
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::MovToNZCV { rn } => {
+ collector.add_use(rn);
+ }
+ &Inst::MovFromNZCV { rd } => {
+ collector.add_def(rd);
+ }
+ &Inst::Extend { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {}
+ &Inst::Call { ref info, .. } => {
+ collector.add_uses(&*info.uses);
+ collector.add_defs(&*info.defs);
+ }
+ &Inst::CallInd { ref info, .. } => {
+ collector.add_uses(&*info.uses);
+ collector.add_defs(&*info.defs);
+ collector.add_use(info.rn);
+ }
+ &Inst::CondBr { ref kind, .. } => match kind {
+ CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+ collector.add_use(*rt);
+ }
+ CondBrKind::Cond(_) => {}
+ },
+ &Inst::IndirectBr { rn, .. } => {
+ collector.add_use(rn);
+ }
+ &Inst::Nop0 | Inst::Nop4 => {}
+ &Inst::Brk => {}
+ &Inst::Udf { .. } => {}
+ &Inst::TrapIf { ref kind, .. } => match kind {
+ CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
+ collector.add_use(*rt);
+ }
+ CondBrKind::Cond(_) => {}
+ },
+ &Inst::Adr { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::Word4 { .. } | &Inst::Word8 { .. } => {}
+ &Inst::JTSequence {
+ ridx, rtmp1, rtmp2, ..
+ } => {
+ collector.add_use(ridx);
+ collector.add_def(rtmp1);
+ collector.add_def(rtmp2);
+ }
+ &Inst::LoadExtName { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ collector.add_def(rd);
+ memarg_regs(mem, collector);
+ }
+ &Inst::VirtualSPOffsetAdj { .. } => {}
+ &Inst::EmitIsland { .. } => {}
+ }
+}
+
+//=============================================================================
+// Instructions: map_regs
+
+fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
+ fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
+ if r.is_virtual() {
+ let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
+ *r = new;
+ }
+ }
+
+ fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if r.to_reg().is_virtual() {
+ let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+ }
+
+ fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if r.to_reg().is_virtual() {
+ let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+ }
+
+ fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) {
+ // N.B.: we take only the pre-map here, but this is OK because the
+ // only addressing modes that update registers (pre/post-increment on
+ // AArch64) both read and write registers, so they are "mods" rather
+ // than "defs", so must be the same in both the pre- and post-map.
+ match mem {
+ &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg),
+ &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg),
+ &mut AMode::RegReg(ref mut r1, ref mut r2)
+ | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..)
+ | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..)
+ | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => {
+ map_use(m, r1);
+ map_use(m, r2);
+ }
+ &mut AMode::Label(..) => {}
+ &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r),
+ &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r),
+ &mut AMode::FPOffset(..)
+ | &mut AMode::SPOffset(..)
+ | &mut AMode::NominalSPOffset(..) => {}
+ &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r),
+ };
+ }
+
+ fn map_pairmem<RUM: RegUsageMapper>(m: &RUM, mem: &mut PairAMode) {
+ match mem {
+ &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg),
+ &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg),
+ &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg),
+ }
+ }
+
+ fn map_br<RUM: RegUsageMapper>(m: &RUM, br: &mut CondBrKind) {
+ match br {
+ &mut CondBrKind::Zero(ref mut reg) => map_use(m, reg),
+ &mut CondBrKind::NotZero(ref mut reg) => map_use(m, reg),
+ &mut CondBrKind::Cond(..) => {}
+ };
+ }
+
+ match inst {
+ &mut Inst::AluRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ref mut ra,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ map_use(mapper, ra);
+ }
+ &mut Inst::AluRRImm12 {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRImmLogic {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRImmShift {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRRShift {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRRExtend {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::BitRR {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::ULoad8 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::SLoad8 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::ULoad16 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::SLoad16 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::ULoad32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::SLoad32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+
+ &mut Inst::ULoad64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store8 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store16 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Store64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+
+ &mut Inst::StoreP64 {
+ ref mut rt,
+ ref mut rt2,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rt);
+ map_use(mapper, rt2);
+ map_pairmem(mapper, mem);
+ }
+ &mut Inst::LoadP64 {
+ ref mut rt,
+ ref mut rt2,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rt);
+ map_def(mapper, rt2);
+ map_pairmem(mapper, mem);
+ }
+ &mut Inst::Mov64 {
+ ref mut rd,
+ ref mut rm,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::Mov32 {
+ ref mut rd,
+ ref mut rm,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::MovZ { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::MovN { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::MovK { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::CSel {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::CSet { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::CCmpImm { ref mut rn, .. } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::AtomicRMW { .. } => {
+ // There are no vregs to map in this insn.
+ }
+ &mut Inst::AtomicCAS { .. } => {
+ // There are no vregs to map in this insn.
+ }
+ &mut Inst::AtomicLoad {
+ ref mut r_data,
+ ref mut r_addr,
+ ..
+ } => {
+ map_def(mapper, r_data);
+ map_use(mapper, r_addr);
+ }
+ &mut Inst::AtomicStore {
+ ref mut r_data,
+ ref mut r_addr,
+ ..
+ } => {
+ map_use(mapper, r_data);
+ map_use(mapper, r_addr);
+ }
+ &mut Inst::Fence {} => {}
+ &mut Inst::FpuMove64 {
+ ref mut rd,
+ ref mut rn,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuMove128 {
+ ref mut rd,
+ ref mut rn,
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuMoveFromVec {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuRR {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuRRI {
+ fpu_op,
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ match fpu_op {
+ FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => map_def(mapper, rd),
+ FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => map_mod(mapper, rd),
+ }
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuRRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ref mut ra,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ map_use(mapper, ra);
+ }
+ &mut Inst::VecMisc {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecLanes {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecShiftImm {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecExtract {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::VecTbl {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ is_extension,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+
+ if is_extension {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ }
+ &mut Inst::VecTbl2 {
+ ref mut rd,
+ ref mut rn,
+ ref mut rn2,
+ ref mut rm,
+ is_extension,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rn2);
+ map_use(mapper, rm);
+
+ if is_extension {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ }
+ &mut Inst::VecLoadReplicate {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecCSel {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuCmp32 {
+ ref mut rn,
+ ref mut rm,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuCmp64 {
+ ref mut rn,
+ ref mut rm,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuLoad32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuLoad64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuLoad128 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuStore32 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuStore64 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::FpuStore128 {
+ ref mut rd,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::LoadFpuConst64 { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::LoadFpuConst128 { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::FpuToInt {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::IntToFpu {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::FpuCSel32 {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuCSel64 {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::FpuRound {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovToFpu {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovToVec {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_mod(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovFromVec {
+ ref mut rd,
+ ref mut rn,
+ ..
+ }
+ | &mut Inst::MovFromVecSigned {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecDup {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecDupFromFpu {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecDupImm { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::VecExtend {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecMovElement {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_mod(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::VecMiscNarrow {
+ ref mut rd,
+ ref mut rn,
+ high_half,
+ ..
+ } => {
+ map_use(mapper, rn);
+
+ if high_half {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ }
+ &mut Inst::VecRRR {
+ alu_op,
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
+ map_mod(mapper, rd);
+ } else {
+ map_def(mapper, rd);
+ }
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::MovToNZCV { ref mut rn } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::MovFromNZCV { ref mut rd } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::Extend {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::Jump { .. } => {}
+ &mut Inst::Call { ref mut info } => {
+ for r in info.uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in info.defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ }
+ &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {}
+ &mut Inst::CallInd { ref mut info, .. } => {
+ for r in info.uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in info.defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ map_use(mapper, &mut info.rn);
+ }
+ &mut Inst::CondBr { ref mut kind, .. } => {
+ map_br(mapper, kind);
+ }
+ &mut Inst::IndirectBr { ref mut rn, .. } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {}
+ &mut Inst::TrapIf { ref mut kind, .. } => {
+ map_br(mapper, kind);
+ }
+ &mut Inst::Adr { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {}
+ &mut Inst::JTSequence {
+ ref mut ridx,
+ ref mut rtmp1,
+ ref mut rtmp2,
+ ..
+ } => {
+ map_use(mapper, ridx);
+ map_def(mapper, rtmp1);
+ map_def(mapper, rtmp2);
+ }
+ &mut Inst::LoadExtName { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::LoadAddr {
+ ref mut rd,
+ ref mut mem,
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::VirtualSPOffsetAdj { .. } => {}
+ &mut Inst::EmitIsland { .. } => {}
+ }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+ type LabelUse = LabelUse;
+
+ fn get_regs(&self, collector: &mut RegUsageCollector) {
+ aarch64_get_regs(self, collector)
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ aarch64_map_regs(self, mapper);
+ }
+
+ fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+ match self {
+ &Inst::Mov64 { rd, rm } => Some((rd, rm)),
+ &Inst::FpuMove64 { rd, rn } => Some((rd, rn)),
+ &Inst::FpuMove128 { rd, rn } => Some((rd, rn)),
+ _ => None,
+ }
+ }
+
+ fn is_epilogue_placeholder(&self) -> bool {
+ if let Inst::EpiloguePlaceholder = self {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn is_included_in_clobbers(&self) -> bool {
+ // We exclude call instructions from the clobber-set when they are calls
+ // from caller to callee with the same ABI. Such calls cannot possibly
+ // force any new registers to be saved in the prologue, because anything
+ // that the callee clobbers, the caller is also allowed to clobber. This
+ // both saves work and enables us to more precisely follow the
+ // half-caller-save, half-callee-save SysV ABI for some vector
+ // registers.
+ //
+ // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
+ // more information on this ABI-implementation hack.
+ match self {
+ &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv,
+ &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv,
+ _ => true,
+ }
+ }
+
+ fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+ match self {
+ &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
+ &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
+ &Inst::CondBr {
+ taken, not_taken, ..
+ } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
+ &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
+ &Inst::JTSequence { ref info, .. } => {
+ MachTerminator::Indirect(&info.targets_for_term[..])
+ }
+ _ => MachTerminator::None,
+ }
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+ assert!(ty.bits() <= 128);
+ Inst::mov(to_reg, from_reg)
+ }
+
+ fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ to_reg: Writable<Reg>,
+ value: u64,
+ ty: Type,
+ alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ if ty == F64 {
+ Inst::load_fp_constant64(to_reg, value, alloc_tmp)
+ } else if ty == F32 {
+ Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
+ } else {
+ // Must be an integer type.
+ debug_assert!(
+ ty == B1
+ || ty == I8
+ || ty == B8
+ || ty == I16
+ || ty == B16
+ || ty == I32
+ || ty == B32
+ || ty == I64
+ || ty == B64
+ || ty == R32
+ || ty == R64
+ );
+ Inst::load_constant(to_reg, value)
+ }
+ }
+
+ fn gen_zero_len_nop() -> Inst {
+ Inst::Nop0
+ }
+
+ fn gen_nop(preferred_size: usize) -> Inst {
+ // We can't give a NOP (or any insn) < 4 bytes.
+ assert!(preferred_size >= 4);
+ Inst::Nop4
+ }
+
+ fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+ None
+ }
+
+ fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+ match ty {
+ I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
+ F32 | F64 => Ok(RegClass::V128),
+ IFLAGS | FFLAGS => Ok(RegClass::I64),
+ B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
+ Ok(RegClass::V128)
+ }
+ _ => Err(CodegenError::Unsupported(format!(
+ "Unexpected SSA-value type: {}",
+ ty
+ ))),
+ }
+ }
+
+ fn gen_jump(target: MachLabel) -> Inst {
+ Inst::Jump {
+ dest: BranchTarget::Label(target),
+ }
+ }
+
+ fn reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+ create_reg_universe(flags)
+ }
+
+ fn worst_case_size() -> CodeOffset {
+ // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of
+ // an 8-instruction sequence (saturating int-to-float conversions) with three embedded
+ // 64-bit f64 constants.
+ //
+ // Note that inline jump-tables handle island/pool insertion separately, so we do not need
+ // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not
+ // feasible for other reasons).
+ 44
+ }
+
+ fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+ RegClass::I64
+ }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+
+fn mem_finalize_for_show(
+ mem: &AMode,
+ mb_rru: Option<&RealRegUniverse>,
+ state: &EmitState,
+) -> (String, AMode) {
+ let (mem_insts, mem) = mem_finalize(0, mem, state);
+ let mut mem_str = mem_insts
+ .into_iter()
+ .map(|inst| inst.show_rru(mb_rru))
+ .collect::<Vec<_>>()
+ .join(" ; ");
+ if !mem_str.is_empty() {
+ mem_str += " ; ";
+ }
+
+ (mem_str, mem)
+}
+
+impl PrettyPrint for Inst {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.pretty_print(mb_rru, &mut EmitState::default())
+ }
+}
+
+impl Inst {
+ fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+ fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) {
+ match alu_op {
+ ALUOp::Add32 => ("add", OperandSize::Size32),
+ ALUOp::Add64 => ("add", OperandSize::Size64),
+ ALUOp::Sub32 => ("sub", OperandSize::Size32),
+ ALUOp::Sub64 => ("sub", OperandSize::Size64),
+ ALUOp::Orr32 => ("orr", OperandSize::Size32),
+ ALUOp::Orr64 => ("orr", OperandSize::Size64),
+ ALUOp::And32 => ("and", OperandSize::Size32),
+ ALUOp::And64 => ("and", OperandSize::Size64),
+ ALUOp::Eor32 => ("eor", OperandSize::Size32),
+ ALUOp::Eor64 => ("eor", OperandSize::Size64),
+ ALUOp::AddS32 => ("adds", OperandSize::Size32),
+ ALUOp::AddS64 => ("adds", OperandSize::Size64),
+ ALUOp::SubS32 => ("subs", OperandSize::Size32),
+ ALUOp::SubS64 => ("subs", OperandSize::Size64),
+ ALUOp::SMulH => ("smulh", OperandSize::Size64),
+ ALUOp::UMulH => ("umulh", OperandSize::Size64),
+ ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
+ ALUOp::UDiv64 => ("udiv", OperandSize::Size64),
+ ALUOp::AndNot32 => ("bic", OperandSize::Size32),
+ ALUOp::AndNot64 => ("bic", OperandSize::Size64),
+ ALUOp::OrrNot32 => ("orn", OperandSize::Size32),
+ ALUOp::OrrNot64 => ("orn", OperandSize::Size64),
+ ALUOp::EorNot32 => ("eon", OperandSize::Size32),
+ ALUOp::EorNot64 => ("eon", OperandSize::Size64),
+ ALUOp::RotR32 => ("ror", OperandSize::Size32),
+ ALUOp::RotR64 => ("ror", OperandSize::Size64),
+ ALUOp::Lsr32 => ("lsr", OperandSize::Size32),
+ ALUOp::Lsr64 => ("lsr", OperandSize::Size64),
+ ALUOp::Asr32 => ("asr", OperandSize::Size32),
+ ALUOp::Asr64 => ("asr", OperandSize::Size64),
+ ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
+ ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
+ }
+ }
+
+ match self {
+ &Inst::Nop0 => "nop-zero-len".to_string(),
+ &Inst::Nop4 => "nop".to_string(),
+ &Inst::AluRRR { alu_op, rd, rn, rm } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::AluRRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ } => {
+ let (op, size) = match alu_op {
+ ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
+ ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
+ ALUOp3::MSub32 => ("msub", OperandSize::Size32),
+ ALUOp3::MSub64 => ("msub", OperandSize::Size64),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ let ra = show_ireg_sized(ra, mb_rru, size);
+
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+ }
+ &Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn,
+ ref imm12,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+
+ if imm12.bits == 0 && alu_op == ALUOp::Add64 {
+ // special-case MOV (used for moving into SP).
+ format!("mov {}, {}", rd, rn)
+ } else {
+ let imm12 = imm12.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imm12)
+ }
+ }
+ &Inst::AluRRImmLogic {
+ alu_op,
+ rd,
+ rn,
+ ref imml,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let imml = imml.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imml)
+ }
+ &Inst::AluRRImmShift {
+ alu_op,
+ rd,
+ rn,
+ ref immshift,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let immshift = immshift.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, immshift)
+ }
+ &Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref shiftop,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ let shiftop = shiftop.show_rru(mb_rru);
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop)
+ }
+ &Inst::AluRRRExtend {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref extendop,
+ } => {
+ let (op, size) = op_name_size(alu_op);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let rm = show_ireg_sized(rm, mb_rru, size);
+ let extendop = extendop.show_rru(mb_rru);
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
+ }
+ &Inst::BitRR { op, rd, rn } => {
+ let size = op.operand_size();
+ let op = op.op_str();
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::ULoad8 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::SLoad8 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::ULoad16 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::SLoad16 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::ULoad32 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::SLoad32 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::ULoad64 {
+ rd,
+ ref mem,
+ ..
+ } => {
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+
+ let is_unscaled = match &mem {
+ &AMode::Unscaled(..) => true,
+ _ => false,
+ };
+ let (op, size) = match (self, is_unscaled) {
+ (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32),
+ (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32),
+ (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64),
+ (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64),
+ (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32),
+ (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32),
+ (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64),
+ (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64),
+ (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32),
+ (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32),
+ (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64),
+ (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64),
+ (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64),
+ (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64),
+ _ => unreachable!(),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}{} {}, {}", mem_str, op, rd, mem)
+ }
+ &Inst::Store8 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::Store16 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::Store32 {
+ rd,
+ ref mem,
+ ..
+ }
+ | &Inst::Store64 {
+ rd,
+ ref mem,
+ ..
+ } => {
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+
+ let is_unscaled = match &mem {
+ &AMode::Unscaled(..) => true,
+ _ => false,
+ };
+ let (op, size) = match (self, is_unscaled) {
+ (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32),
+ (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32),
+ (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32),
+ (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32),
+ (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32),
+ (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32),
+ (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64),
+ (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64),
+ _ => unreachable!(),
+ };
+ let rd = show_ireg_sized(rd, mb_rru, size);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}{} {}, {}", mem_str, op, rd, mem)
+ }
+ &Inst::StoreP64 { rt, rt2, ref mem, .. } => {
+ let rt = rt.show_rru(mb_rru);
+ let rt2 = rt2.show_rru(mb_rru);
+ let mem = mem.show_rru(mb_rru);
+ format!("stp {}, {}, {}", rt, rt2, mem)
+ }
+ &Inst::LoadP64 { rt, rt2, ref mem, .. } => {
+ let rt = rt.to_reg().show_rru(mb_rru);
+ let rt2 = rt2.to_reg().show_rru(mb_rru);
+ let mem = mem.show_rru(mb_rru);
+ format!("ldp {}, {}, {}", rt, rt2, mem)
+ }
+ &Inst::Mov64 { rd, rm } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("mov {}, {}", rd, rm)
+ }
+ &Inst::Mov32 { rd, rm } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+ let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32);
+ format!("mov {}, {}", rd, rm)
+ }
+ &Inst::MovZ { rd, ref imm, size } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ format!("movz {}, {}", rd, imm)
+ }
+ &Inst::MovN { rd, ref imm, size } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ format!("movn {}, {}", rd, imm)
+ }
+ &Inst::MovK { rd, ref imm, size } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ format!("movk {}, {}", rd, imm)
+ }
+ &Inst::CSel { rd, rn, rm, cond } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ let cond = cond.show_rru(mb_rru);
+ format!("csel {}, {}, {}, {}", rd, rn, rm, cond)
+ }
+ &Inst::CSet { rd, cond } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let cond = cond.show_rru(mb_rru);
+ format!("cset {}, {}", rd, cond)
+ }
+ &Inst::CCmpImm {
+ size,
+ rn,
+ imm,
+ nzcv,
+ cond,
+ } => {
+ let rn = show_ireg_sized(rn, mb_rru, size);
+ let imm = imm.show_rru(mb_rru);
+ let nzcv = nzcv.show_rru(mb_rru);
+ let cond = cond.show_rru(mb_rru);
+ format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond)
+ }
+ &Inst::AtomicRMW { ty, op, .. } => {
+ format!(
+ "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}",
+ ty.bits(), op)
+ }
+ &Inst::AtomicCAS { ty, .. } => {
+ format!(
+ "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}",
+ ty.bits())
+ }
+ &Inst::AtomicLoad { ty, r_data, r_addr, .. } => {
+ format!(
+ "atomically {{ {} = zero_extend_{}_bits_at[{}] }}",
+ r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru))
+ }
+ &Inst::AtomicStore { ty, r_data, r_addr, .. } => {
+ format!(
+ "atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru))
+ }
+ &Inst::Fence {} => {
+ format!("dmb ish")
+ }
+ &Inst::FpuMove64 { rd, rn } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ format!("mov {}.8b, {}.8b", rd, rn)
+ }
+ &Inst::FpuMove128 { rd, rn } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ format!("mov {}.16b, {}.16b", rd, rn)
+ }
+ &Inst::FpuMoveFromVec { rd, rn, idx, size } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
+ let rn = show_vreg_element(rn, mb_rru, idx, size);
+ format!("mov {}, {}", rd, rn)
+ }
+ &Inst::FpuRR { fpu_op, rd, rn } => {
+ let (op, sizesrc, sizedest) = match fpu_op {
+ FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32),
+ FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64),
+ FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32),
+ FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64),
+ FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32),
+ FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64),
+ FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64),
+ FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
+ let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::FpuRRR { fpu_op, rd, rn, rm } => {
+ let (op, size) = match fpu_op {
+ FPUOp2::Add32 => ("fadd", ScalarSize::Size32),
+ FPUOp2::Add64 => ("fadd", ScalarSize::Size64),
+ FPUOp2::Sub32 => ("fsub", ScalarSize::Size32),
+ FPUOp2::Sub64 => ("fsub", ScalarSize::Size64),
+ FPUOp2::Mul32 => ("fmul", ScalarSize::Size32),
+ FPUOp2::Mul64 => ("fmul", ScalarSize::Size64),
+ FPUOp2::Div32 => ("fdiv", ScalarSize::Size32),
+ FPUOp2::Div64 => ("fdiv", ScalarSize::Size64),
+ FPUOp2::Max32 => ("fmax", ScalarSize::Size32),
+ FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
+ FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
+ FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
+ FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
+ FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
+ FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
+ FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_scalar(rn, mb_rru, size);
+ let rm = show_vreg_scalar(rm, mb_rru, size);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::FpuRRI { fpu_op, rd, rn } => {
+ let (op, imm, vector) = match fpu_op {
+ FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
+ FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
+ FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
+ FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
+ };
+
+ let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
+ |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
+ } else {
+ |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
+ };
+ let rd = show_vreg_fn(rd.to_reg(), mb_rru);
+ let rn = show_vreg_fn(rn, mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imm)
+ }
+ &Inst::FpuRRRR {
+ fpu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ } => {
+ let (op, size) = match fpu_op {
+ FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32),
+ FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_scalar(rn, mb_rru, size);
+ let rm = show_vreg_scalar(rm, mb_rru, size);
+ let ra = show_vreg_scalar(ra, mb_rru, size);
+ format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
+ }
+ &Inst::FpuCmp32 { rn, rm } => {
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+ format!("fcmp {}, {}", rn, rm)
+ }
+ &Inst::FpuCmp64 { rn, rm } => {
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+ format!("fcmp {}, {}", rn, rm)
+ }
+ &Inst::FpuLoad32 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}ldr {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuLoad64 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}ldr {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuLoad128 { rd, ref mem, .. } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ let rd = "q".to_string() + &rd[1..];
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}ldr {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuStore32 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}str {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuStore64 { rd, ref mem, .. } => {
+ let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}str {}, {}", mem_str, rd, mem)
+ }
+ &Inst::FpuStore128 { rd, ref mem, .. } => {
+ let rd = rd.show_rru(mb_rru);
+ let rd = "q".to_string() + &rd[1..];
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}str {}, {}", mem_str, rd, mem)
+ }
+ &Inst::LoadFpuConst64 { rd, const_data } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+ format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data))
+ }
+ &Inst::LoadFpuConst128 { rd, const_data } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128);
+ format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
+ }
+ &Inst::FpuToInt { op, rd, rn } => {
+ let (op, sizesrc, sizedest) = match op {
+ FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
+ FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32),
+ FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64),
+ FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64),
+ FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32),
+ FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32),
+ FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64),
+ FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest);
+ let rn = show_vreg_scalar(rn, mb_rru, sizesrc);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::IntToFpu { op, rd, rn } => {
+ let (op, sizesrc, sizedest) = match op {
+ IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32),
+ IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32),
+ IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32),
+ IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32),
+ IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64),
+ IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64),
+ IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64),
+ IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest);
+ let rn = show_ireg_sized(rn, mb_rru, sizesrc);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::FpuCSel32 { rd, rn, rm, cond } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32);
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32);
+ let cond = cond.show_rru(mb_rru);
+ format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+ }
+ &Inst::FpuCSel64 { rd, rn, rm, cond } => {
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
+ let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64);
+ let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64);
+ let cond = cond.show_rru(mb_rru);
+ format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond)
+ }
+ &Inst::FpuRound { op, rd, rn } => {
+ let (inst, size) = match op {
+ FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32),
+ FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64),
+ FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32),
+ FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64),
+ FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32),
+ FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64),
+ FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32),
+ FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64),
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_scalar(rn, mb_rru, size);
+ format!("{} {}, {}", inst, rd, rn)
+ }
+ &Inst::MovToFpu { rd, rn, size } => {
+ let operand_size = size.operand_size();
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, operand_size);
+ format!("fmov {}, {}", rd, rn)
+ }
+ &Inst::MovToVec { rd, rn, idx, size } => {
+ let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
+ let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+ format!("mov {}, {}", rd, rn)
+ }
+ &Inst::MovFromVec { rd, rn, idx, size } => {
+ let op = match size {
+ VectorSize::Size8x16 => "umov",
+ VectorSize::Size16x8 => "umov",
+ VectorSize::Size32x4 => "mov",
+ VectorSize::Size64x2 => "mov",
+ _ => unimplemented!(),
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
+ let rn = show_vreg_element(rn, mb_rru, idx, size);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::MovFromVecSigned {
+ rd,
+ rn,
+ idx,
+ size,
+ scalar_size,
+ } => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size);
+ let rn = show_vreg_element(rn, mb_rru, idx, size);
+ format!("smov {}, {}", rd, rn)
+ }
+ &Inst::VecDup { rd, rn, size } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
+ format!("dup {}, {}", rd, rn)
+ }
+ &Inst::VecDupFromFpu { rd, rn, size } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_element(rn, mb_rru, 0, size);
+ format!("dup {}, {}", rd, rn)
+ }
+ &Inst::VecDupImm { rd, imm, invert, size } => {
+ let imm = imm.show_rru(mb_rru);
+ let op = if invert {
+ "mvni"
+ } else {
+ "movi"
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+
+ format!("{} {}, {}", op, rd, imm)
+ }
+ &Inst::VecExtend { t, rd, rn, high_half } => {
+ let (op, dest, src) = match (t, high_half) {
+ (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
+ (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
+ (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
+ (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
+ (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
+ (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+ (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
+ (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
+ (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
+ (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
+ (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
+ (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
+ let rn = show_vreg_vector(rn, mb_rru, src);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::VecMovElement {
+ rd,
+ rn,
+ dest_idx,
+ src_idx,
+ size,
+ } => {
+ let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size);
+ let rn = show_vreg_element(rn, mb_rru, src_idx, size);
+ format!("mov {}, {}", rd, rn)
+ }
+ &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
+ let dest_size = if high_half {
+ assert!(size.is_128bits());
+ size
+ } else {
+ size.halve()
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
+ let rn = show_vreg_vector(rn, mb_rru, size.widen());
+ let op = match (op, high_half) {
+ (VecMiscNarrowOp::Xtn, false) => "xtn",
+ (VecMiscNarrowOp::Xtn, true) => "xtn2",
+ (VecMiscNarrowOp::Sqxtn, false) => "sqxtn",
+ (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2",
+ (VecMiscNarrowOp::Sqxtun, false) => "sqxtun",
+ (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2",
+ };
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op,
+ size,
+ } => {
+ let (op, size) = match alu_op {
+ VecALUOp::Sqadd => ("sqadd", size),
+ VecALUOp::Uqadd => ("uqadd", size),
+ VecALUOp::Sqsub => ("sqsub", size),
+ VecALUOp::Uqsub => ("uqsub", size),
+ VecALUOp::Cmeq => ("cmeq", size),
+ VecALUOp::Cmge => ("cmge", size),
+ VecALUOp::Cmgt => ("cmgt", size),
+ VecALUOp::Cmhs => ("cmhs", size),
+ VecALUOp::Cmhi => ("cmhi", size),
+ VecALUOp::Fcmeq => ("fcmeq", size),
+ VecALUOp::Fcmgt => ("fcmgt", size),
+ VecALUOp::Fcmge => ("fcmge", size),
+ VecALUOp::And => ("and", VectorSize::Size8x16),
+ VecALUOp::Bic => ("bic", VectorSize::Size8x16),
+ VecALUOp::Orr => ("orr", VectorSize::Size8x16),
+ VecALUOp::Eor => ("eor", VectorSize::Size8x16),
+ VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
+ VecALUOp::Umaxp => ("umaxp", size),
+ VecALUOp::Add => ("add", size),
+ VecALUOp::Sub => ("sub", size),
+ VecALUOp::Mul => ("mul", size),
+ VecALUOp::Sshl => ("sshl", size),
+ VecALUOp::Ushl => ("ushl", size),
+ VecALUOp::Umin => ("umin", size),
+ VecALUOp::Smin => ("smin", size),
+ VecALUOp::Umax => ("umax", size),
+ VecALUOp::Smax => ("smax", size),
+ VecALUOp::Urhadd => ("urhadd", size),
+ VecALUOp::Fadd => ("fadd", size),
+ VecALUOp::Fsub => ("fsub", size),
+ VecALUOp::Fdiv => ("fdiv", size),
+ VecALUOp::Fmax => ("fmax", size),
+ VecALUOp::Fmin => ("fmin", size),
+ VecALUOp::Fmul => ("fmul", size),
+ VecALUOp::Addp => ("addp", size),
+ VecALUOp::Umlal => ("umlal", size),
+ VecALUOp::Zip1 => ("zip1", size),
+ VecALUOp::Smull => ("smull", size),
+ VecALUOp::Smull2 => ("smull2", size),
+ };
+ let rd_size = match alu_op {
+ VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
+ _ => size
+ };
+ let rn_size = match alu_op {
+ VecALUOp::Smull => size.halve(),
+ _ => size
+ };
+ let rm_size = rn_size;
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+ let rn = show_vreg_vector(rn, mb_rru, rn_size);
+ let rm = show_vreg_vector(rm, mb_rru, rm_size);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::VecMisc { op, rd, rn, size } => {
+ let is_shll = op == VecMisc2::Shll;
+ let suffix = match (is_shll, size) {
+ (true, VectorSize::Size8x8) => ", #8",
+ (true, VectorSize::Size16x4) => ", #16",
+ (true, VectorSize::Size32x2) => ", #32",
+ _ => "",
+ };
+
+ let (op, size) = match op {
+ VecMisc2::Not => (
+ "mvn",
+ if size.is_128bits() {
+ VectorSize::Size8x16
+ } else {
+ VectorSize::Size8x8
+ },
+ ),
+ VecMisc2::Neg => ("neg", size),
+ VecMisc2::Abs => ("abs", size),
+ VecMisc2::Fabs => ("fabs", size),
+ VecMisc2::Fneg => ("fneg", size),
+ VecMisc2::Fsqrt => ("fsqrt", size),
+ VecMisc2::Rev64 => ("rev64", size),
+ VecMisc2::Shll => ("shll", size),
+ VecMisc2::Fcvtzs => ("fcvtzs", size),
+ VecMisc2::Fcvtzu => ("fcvtzu", size),
+ VecMisc2::Scvtf => ("scvtf", size),
+ VecMisc2::Ucvtf => ("ucvtf", size),
+ VecMisc2::Frintn => ("frintn", size),
+ VecMisc2::Frintz => ("frintz", size),
+ VecMisc2::Frintm => ("frintm", size),
+ VecMisc2::Frintp => ("frintp", size),
+ };
+
+ let rd_size = if is_shll { size.widen() } else { size };
+
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
+ let rn = show_vreg_vector(rn, mb_rru, size);
+ format!("{} {}, {}{}", op, rd, rn, suffix)
+ }
+ &Inst::VecLanes { op, rd, rn, size } => {
+ let op = match op {
+ VecLanesOp::Uminv => "uminv",
+ VecLanesOp::Addv => "addv",
+ };
+ let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
+ let rn = show_vreg_vector(rn, mb_rru, size);
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::VecShiftImm { op, rd, rn, size, imm } => {
+ let op = match op {
+ VecShiftImmOp::Shl => "shl",
+ VecShiftImmOp::Ushr => "ushr",
+ VecShiftImmOp::Sshr => "sshr",
+ };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = show_vreg_vector(rn, mb_rru, size);
+ format!("{} {}, {}, #{}", op, rd, rn, imm)
+ }
+ &Inst::VecExtract { rd, rn, rm, imm4 } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4)
+ }
+ &Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension,
+ } => {
+ let op = if is_extension { "tbx" } else { "tbl" };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
+ }
+ &Inst::VecTbl2 {
+ rd,
+ rn,
+ rn2,
+ rm,
+ is_extension,
+ } => {
+ let op = if is_extension { "tbx" } else { "tbl" };
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
+ }
+ &Inst::VecLoadReplicate { rd, rn, size, .. } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
+ let rn = rn.show_rru(mb_rru);
+
+ format!("ld1r {{ {} }}, [{}]", rd, rn)
+ }
+ &Inst::VecCSel { rd, rn, rm, cond } => {
+ let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
+ let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
+ let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
+ let cond = cond.show_rru(mb_rru);
+ format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond)
+ }
+ &Inst::MovToNZCV { rn } => {
+ let rn = rn.show_rru(mb_rru);
+ format!("msr nzcv, {}", rn)
+ }
+ &Inst::MovFromNZCV { rd } => {
+ let rd = rd.to_reg().show_rru(mb_rru);
+ format!("mrs {}, nzcv", rd)
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits >= 8 => {
+ // Is the destination a 32-bit register? Corresponds to whether
+ // extend-to width is <= 32 bits, *unless* we have an unsigned
+ // 32-to-64-bit extension, which is implemented with a "mov" to a
+ // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
+ let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
+ OperandSize::Size32
+ } else {
+ OperandSize::from_bits(to_bits)
+ };
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+ let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
+ let op = match (signed, from_bits, to_bits) {
+ (false, 8, 32) => "uxtb",
+ (true, 8, 32) => "sxtb",
+ (false, 16, 32) => "uxth",
+ (true, 16, 32) => "sxth",
+ (false, 8, 64) => "uxtb",
+ (true, 8, 64) => "sxtb",
+ (false, 16, 64) => "uxth",
+ (true, 16, 64) => "sxth",
+ (false, 32, 64) => "mov", // special case (see above).
+ (true, 32, 64) => "sxtw",
+ _ => panic!("Unsupported Extend case: {:?}", self),
+ };
+ format!("{} {}, {}", op, rd, rn)
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ } if from_bits == 1 && signed => {
+ let dest_size = OperandSize::from_bits(to_bits);
+ let zr = if dest_size.is32() { "wzr" } else { "xzr" };
+ let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+ let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+ format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
+ }
+ &Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ ..
+ } if from_bits == 1 && !signed => {
+ let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+ let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+ format!("and {}, {}, #1", rd, rn)
+ }
+ &Inst::Extend { .. } => {
+ panic!("Unsupported Extend case");
+ }
+ &Inst::Call { .. } => format!("bl 0"),
+ &Inst::CallInd { ref info, .. } => {
+ let rn = info.rn.show_rru(mb_rru);
+ format!("blr {}", rn)
+ }
+ &Inst::Ret => "ret".to_string(),
+ &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+ &Inst::Jump { ref dest } => {
+ let dest = dest.show_rru(mb_rru);
+ format!("b {}", dest)
+ }
+ &Inst::CondBr {
+ ref taken,
+ ref not_taken,
+ ref kind,
+ } => {
+ let taken = taken.show_rru(mb_rru);
+ let not_taken = not_taken.show_rru(mb_rru);
+ match kind {
+ &CondBrKind::Zero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbz {}, {} ; b {}", reg, taken, not_taken)
+ }
+ &CondBrKind::NotZero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbnz {}, {} ; b {}", reg, taken, not_taken)
+ }
+ &CondBrKind::Cond(c) => {
+ let c = c.show_rru(mb_rru);
+ format!("b.{} {} ; b {}", c, taken, not_taken)
+ }
+ }
+ }
+ &Inst::IndirectBr { rn, .. } => {
+ let rn = rn.show_rru(mb_rru);
+ format!("br {}", rn)
+ }
+ &Inst::Brk => "brk #0".to_string(),
+ &Inst::Udf { .. } => "udf".to_string(),
+ &Inst::TrapIf { ref kind, .. } => match kind {
+ &CondBrKind::Zero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbnz {}, 8 ; udf", reg)
+ }
+ &CondBrKind::NotZero(reg) => {
+ let reg = reg.show_rru(mb_rru);
+ format!("cbz {}, 8 ; udf", reg)
+ }
+ &CondBrKind::Cond(c) => {
+ let c = c.invert().show_rru(mb_rru);
+ format!("b.{} 8 ; udf", c)
+ }
+ },
+ &Inst::Adr { rd, off } => {
+ let rd = rd.show_rru(mb_rru);
+ format!("adr {}, pc+{}", rd, off)
+ }
+ &Inst::Word4 { data } => format!("data.i32 {}", data),
+ &Inst::Word8 { data } => format!("data.i64 {}", data),
+ &Inst::JTSequence {
+ ref info,
+ ridx,
+ rtmp1,
+ rtmp2,
+ ..
+ } => {
+ let ridx = ridx.show_rru(mb_rru);
+ let rtmp1 = rtmp1.show_rru(mb_rru);
+ let rtmp2 = rtmp2.show_rru(mb_rru);
+ let default_target = info.default_target.show_rru(mb_rru);
+ format!(
+ concat!(
+ "b.hs {} ; ",
+ "adr {}, pc+16 ; ",
+ "ldrsw {}, [{}, {}, LSL 2] ; ",
+ "add {}, {}, {} ; ",
+ "br {} ; ",
+ "jt_entries {:?}"
+ ),
+ default_target,
+ rtmp1,
+ rtmp2,
+ rtmp1,
+ ridx,
+ rtmp1,
+ rtmp1,
+ rtmp2,
+ rtmp1,
+ info.targets
+ )
+ }
+ &Inst::LoadExtName {
+ rd,
+ ref name,
+ offset,
+ } => {
+ let rd = rd.show_rru(mb_rru);
+ format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset)
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ // TODO: we really should find a better way to avoid duplication of
+ // this logic between `emit()` and `show_rru()` -- a separate 1-to-N
+ // expansion stage (i.e., legalization, but without the slow edit-in-place
+ // of the existing legalization framework).
+ let (mem_insts, mem) = mem_finalize(0, mem, state);
+ let mut ret = String::new();
+ for inst in mem_insts.into_iter() {
+ ret.push_str(&inst.show_rru(mb_rru));
+ }
+ let (reg, offset) = match mem {
+ AMode::Unscaled(r, simm9) => (r, simm9.value()),
+ AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
+ _ => panic!("Unsupported case for LoadAddr: {:?}", mem),
+ };
+ let abs_offset = if offset < 0 {
+ -offset as u64
+ } else {
+ offset as u64
+ };
+ let alu_op = if offset < 0 {
+ ALUOp::Sub64
+ } else {
+ ALUOp::Add64
+ };
+
+ if offset == 0 {
+ let mov = Inst::mov(rd, reg);
+ ret.push_str(&mov.show_rru(mb_rru));
+ } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
+ let add = Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn: reg,
+ imm12,
+ };
+ ret.push_str(&add.show_rru(mb_rru));
+ } else {
+ let tmp = writable_spilltmp_reg();
+ for inst in Inst::load_constant(tmp, abs_offset).into_iter() {
+ ret.push_str(&inst.show_rru(mb_rru));
+ }
+ let add = Inst::AluRRR {
+ alu_op,
+ rd,
+ rn: reg,
+ rm: tmp.to_reg(),
+ };
+ ret.push_str(&add.show_rru(mb_rru));
+ }
+ ret
+ }
+ &Inst::VirtualSPOffsetAdj { offset } => {
+ state.virtual_sp_offset += offset;
+ format!("virtual_sp_offset_adjust {}", offset)
+ }
+ &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space),
+ }
+ }
+}
+
+//=============================================================================
+// Label fixups and jump veneers.
+
+/// Different forms of label references for different instruction formats.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+ /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19
+ /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond.
+ Branch19,
+ /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26
+ /// signed bits, in bits 25:0. Used by b, bl.
+ Branch26,
+ /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits,
+ /// in bits 23:5.
+ Ldr19,
+ /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is
+ /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29.
+ Adr21,
+ /// 32-bit PC relative constant offset (from address of constant itself),
+ /// signed. Used in jump tables.
+ PCRel32,
+}
+
+impl MachInstLabelUse for LabelUse {
+ /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned.
+ const ALIGN: CodeOffset = 4;
+
+ /// Maximum PC-relative range (positive), inclusive.
+ fn max_pos_range(self) -> CodeOffset {
+ match self {
+ // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20
+ // from zero. Likewise for two other shifted cases below.
+ LabelUse::Branch19 => (1 << 20) - 1,
+ LabelUse::Branch26 => (1 << 27) - 1,
+ LabelUse::Ldr19 => (1 << 20) - 1,
+ // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total
+ // range.
+ LabelUse::Adr21 => (1 << 20) - 1,
+ LabelUse::PCRel32 => 0x7fffffff,
+ }
+ }
+
+ /// Maximum PC-relative range (negative).
+ fn max_neg_range(self) -> CodeOffset {
+ // All forms are twos-complement signed offsets, so negative limit is one more than
+ // positive limit.
+ self.max_pos_range() + 1
+ }
+
+ /// Size of window into code needed to do the patch.
+ fn patch_size(self) -> CodeOffset {
+ // Patch is on one instruction only for all of these label reference types.
+ 4
+ }
+
+ /// Perform the patch.
+ fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+ let pc_rel = (label_offset as i64) - (use_offset as i64);
+ debug_assert!(pc_rel <= self.max_pos_range() as i64);
+ debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
+ let pc_rel = pc_rel as u32;
+ let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+ let mask = match self {
+ LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive
+ LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive
+ LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive
+ LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive
+ LabelUse::PCRel32 => 0xffffffff,
+ };
+ let pc_rel_shifted = match self {
+ LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel,
+ _ => {
+ debug_assert!(pc_rel & 3 == 0);
+ pc_rel >> 2
+ }
+ };
+ let pc_rel_inserted = match self {
+ LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5,
+ LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff,
+ LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10,
+ LabelUse::PCRel32 => pc_rel_shifted,
+ };
+ let is_add = match self {
+ LabelUse::PCRel32 => true,
+ _ => false,
+ };
+ let insn_word = if is_add {
+ insn_word.wrapping_add(pc_rel_inserted)
+ } else {
+ (insn_word & !mask) | pc_rel_inserted
+ };
+ buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
+ }
+
+ /// Is a veneer supported for this label reference type?
+ fn supports_veneer(self) -> bool {
+ match self {
+ LabelUse::Branch19 => true, // veneer is a Branch26
+ _ => false,
+ }
+ }
+
+ /// How large is the veneer, if supported?
+ fn veneer_size(self) -> CodeOffset {
+ 4
+ }
+
+ /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return
+ /// an offset and label-use for the veneer's use of the original label.
+ fn generate_veneer(
+ self,
+ buffer: &mut [u8],
+ veneer_offset: CodeOffset,
+ ) -> (CodeOffset, LabelUse) {
+ match self {
+ LabelUse::Branch19 => {
+ // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't
+ // bother with constructing an Inst.
+ let insn_word = 0b000101 << 26;
+ buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word));
+ (veneer_offset, LabelUse::Branch26)
+ }
+ _ => panic!("Unsupported label-reference type for veneer generation!"),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
new file mode 100644
index 0000000000..0b4babe04a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs
@@ -0,0 +1,351 @@
+//! AArch64 ISA definitions: registers.
+
+use crate::isa::aarch64::inst::OperandSize;
+use crate::isa::aarch64::inst::ScalarSize;
+use crate::isa::aarch64::inst::VectorSize;
+use crate::settings;
+
+use regalloc::{
+ PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES,
+};
+
+use std::string::{String, ToString};
+
+//=============================================================================
+// Registers, the Universe thereof, and printing
+
+/// The pinned register on this architecture.
+/// It must be the same as Spidermonkey's HeapReg, as found in this file.
+/// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103
+pub const PINNED_REG: u8 = 21;
+
+#[rustfmt::skip]
+const XREG_INDICES: [u8; 31] = [
+ // X0 - X7
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ // X8 - X15
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ // X16, X17
+ 58, 59,
+ // X18
+ 60,
+ // X19, X20
+ 48, 49,
+ // X21, put aside because it's the pinned register.
+ 57,
+ // X22 - X28
+ 50, 51, 52, 53, 54, 55, 56,
+ // X29 (FP)
+ 61,
+ // X30 (LR)
+ 62,
+];
+
+const ZERO_REG_INDEX: u8 = 63;
+
+const SP_REG_INDEX: u8 = 64;
+
+/// Get a reference to an X-register (integer register).
+pub fn xreg(num: u8) -> Reg {
+ assert!(num < 31);
+ Reg::new_real(
+ RegClass::I64,
+ /* enc = */ num,
+ /* index = */ XREG_INDICES[num as usize],
+ )
+}
+
+/// Get a writable reference to an X-register.
+pub fn writable_xreg(num: u8) -> Writable<Reg> {
+ Writable::from_reg(xreg(num))
+}
+
+/// Get a reference to a V-register (vector/FP register).
+pub fn vreg(num: u8) -> Reg {
+ assert!(num < 32);
+ Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num)
+}
+
+/// Get a writable reference to a V-register.
+pub fn writable_vreg(num: u8) -> Writable<Reg> {
+ Writable::from_reg(vreg(num))
+}
+
+/// Get a reference to the zero-register.
+pub fn zero_reg() -> Reg {
+ // This should be the same as what xreg(31) returns, except that
+ // we use the special index into the register index space.
+ Reg::new_real(
+ RegClass::I64,
+ /* enc = */ 31,
+ /* index = */ ZERO_REG_INDEX,
+ )
+}
+
+/// Get a writable reference to the zero-register (this discards a result).
+pub fn writable_zero_reg() -> Writable<Reg> {
+ Writable::from_reg(zero_reg())
+}
+
+/// Get a reference to the stack-pointer register.
+pub fn stack_reg() -> Reg {
+ // XSP (stack) and XZR (zero) are logically different registers which have
+ // the same hardware encoding, and whose meaning, in real aarch64
+ // instructions, is context-dependent. For convenience of
+ // universe-construction and for correct printing, we make them be two
+ // different real registers.
+ Reg::new_real(
+ RegClass::I64,
+ /* enc = */ 31,
+ /* index = */ SP_REG_INDEX,
+ )
+}
+
+/// Get a writable reference to the stack-pointer register.
+pub fn writable_stack_reg() -> Writable<Reg> {
+ Writable::from_reg(stack_reg())
+}
+
+/// Get a reference to the link register (x30).
+pub fn link_reg() -> Reg {
+ xreg(30)
+}
+
+/// Get a writable reference to the link register.
+pub fn writable_link_reg() -> Writable<Reg> {
+ Writable::from_reg(link_reg())
+}
+
+/// Get a reference to the frame pointer (x29).
+pub fn fp_reg() -> Reg {
+ xreg(29)
+}
+
+/// Get a writable reference to the frame pointer.
+pub fn writable_fp_reg() -> Writable<Reg> {
+ Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is
+/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not
+/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this
+/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how
+/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc.
+///
+/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is
+/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it
+/// to live through call instructions.
+pub fn spilltmp_reg() -> Reg {
+ xreg(16)
+}
+
+/// Get a writable reference to the spilltmp reg.
+pub fn writable_spilltmp_reg() -> Writable<Reg> {
+ Writable::from_reg(spilltmp_reg())
+}
+
+/// Get a reference to the second temp register. We need this in some edge cases
+/// where we need both the spilltmp and another temporary.
+///
+/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is
+/// free to use otherwise.
+pub fn tmp2_reg() -> Reg {
+ xreg(17)
+}
+
+/// Get a writable reference to the tmp2 reg.
+pub fn writable_tmp2_reg() -> Writable<Reg> {
+ Writable::from_reg(tmp2_reg())
+}
+
+/// Create the register universe for AArch64.
+pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse {
+ let mut regs = vec![];
+ let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+ // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers:
+ // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link
+ // register), x31 (stack pointer or zero register, depending on context).
+
+ let v_reg_base = 0u8; // in contiguous real-register index space
+ let v_reg_count = 32;
+ for i in 0u8..v_reg_count {
+ let reg = Reg::new_real(
+ RegClass::V128,
+ /* enc = */ i,
+ /* index = */ v_reg_base + i,
+ )
+ .to_real_reg();
+ let name = format!("v{}", i);
+ regs.push((reg, name));
+ }
+ let v_reg_last = v_reg_base + v_reg_count - 1;
+
+ // Add the X registers. N.B.: the order here must match the order implied
+ // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above.
+
+ let x_reg_base = 32u8; // in contiguous real-register index space
+ let mut x_reg_count = 0;
+
+ let uses_pinned_reg = flags.enable_pinned_reg();
+
+ for i in 0u8..32u8 {
+ // See above for excluded registers.
+ if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG {
+ continue;
+ }
+ let reg = Reg::new_real(
+ RegClass::I64,
+ /* enc = */ i,
+ /* index = */ x_reg_base + x_reg_count,
+ )
+ .to_real_reg();
+ let name = format!("x{}", i);
+ regs.push((reg, name));
+ x_reg_count += 1;
+ }
+ let x_reg_last = x_reg_base + x_reg_count - 1;
+
+ allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+ first: x_reg_base as usize,
+ last: x_reg_last as usize,
+ suggested_scratch: Some(XREG_INDICES[19] as usize),
+ });
+ allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+ first: v_reg_base as usize,
+ last: v_reg_last as usize,
+ suggested_scratch: Some(/* V31: */ 31),
+ });
+
+ // Other regs, not available to the allocator.
+ let allocable = if uses_pinned_reg {
+ // The pinned register is not allocatable in this case, so record the length before adding
+ // it.
+ let len = regs.len();
+ regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string()));
+ len
+ } else {
+ regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string()));
+ regs.len()
+ };
+
+ regs.push((xreg(16).to_real_reg(), "x16".to_string()));
+ regs.push((xreg(17).to_real_reg(), "x17".to_string()));
+ regs.push((xreg(18).to_real_reg(), "x18".to_string()));
+ regs.push((fp_reg().to_real_reg(), "fp".to_string()));
+ regs.push((link_reg().to_real_reg(), "lr".to_string()));
+ regs.push((zero_reg().to_real_reg(), "xzr".to_string()));
+ regs.push((stack_reg().to_real_reg(), "sp".to_string()));
+
+ // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs
+ // to 65, which is potentially inconvenient from a compiler performance
+ // standpoint. We could possibly drop back to 64 by "losing" a vector
+ // register in future.
+
+ // Assert sanity: the indices in the register structs must match their
+ // actual indices in the array.
+ for (i, reg) in regs.iter().enumerate() {
+ assert_eq!(i, reg.0.get_index());
+ }
+
+ RealRegUniverse {
+ regs,
+ allocable,
+ allocable_by_class,
+ }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show
+/// its name at the 32-bit size.
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String {
+ let mut s = reg.show_rru(mb_rru);
+ if reg.get_class() != RegClass::I64 || !size.is32() {
+ // We can't do any better.
+ return s;
+ }
+
+ if reg.is_real() {
+ // Change (eg) "x42" into "w42" as appropriate
+ if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") {
+ s = "w".to_string() + &s[1..];
+ }
+ } else {
+ // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role
+ if reg.get_class() == RegClass::I64 && size.is32() {
+ s.push('w');
+ }
+ }
+ s
+}
+
+/// Show a vector register used in a scalar context.
+pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String {
+ let mut s = reg.show_rru(mb_rru);
+ if reg.get_class() != RegClass::V128 {
+ // We can't do any better.
+ return s;
+ }
+
+ if reg.is_real() {
+ // Change (eg) "v0" into "d0".
+ if s.starts_with("v") {
+ let replacement = match size {
+ ScalarSize::Size8 => "b",
+ ScalarSize::Size16 => "h",
+ ScalarSize::Size32 => "s",
+ ScalarSize::Size64 => "d",
+ ScalarSize::Size128 => "q",
+ };
+ s.replace_range(0..1, replacement);
+ }
+ } else {
+ // Add a "d" suffix to RegClass::V128 vregs.
+ if reg.get_class() == RegClass::V128 {
+ s.push('d');
+ }
+ }
+ s
+}
+
+/// Show a vector register.
+pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
+ assert_eq!(RegClass::V128, reg.get_class());
+ let mut s = reg.show_rru(mb_rru);
+
+ let suffix = match size {
+ VectorSize::Size8x8 => ".8b",
+ VectorSize::Size8x16 => ".16b",
+ VectorSize::Size16x4 => ".4h",
+ VectorSize::Size16x8 => ".8h",
+ VectorSize::Size32x2 => ".2s",
+ VectorSize::Size32x4 => ".4s",
+ VectorSize::Size64x2 => ".2d",
+ };
+
+ s.push_str(suffix);
+ s
+}
+
+/// Show an indexed vector element.
+pub fn show_vreg_element(
+ reg: Reg,
+ mb_rru: Option<&RealRegUniverse>,
+ idx: u8,
+ size: VectorSize,
+) -> String {
+ assert_eq!(RegClass::V128, reg.get_class());
+ let mut s = reg.show_rru(mb_rru);
+
+ let suffix = match size {
+ VectorSize::Size8x8 => "b",
+ VectorSize::Size8x16 => "b",
+ VectorSize::Size16x4 => "h",
+ VectorSize::Size16x8 => "h",
+ VectorSize::Size32x2 => "s",
+ VectorSize::Size32x4 => "s",
+ VectorSize::Size64x2 => "d",
+ };
+
+ s.push_str(&format!(".{}[{}]", suffix, idx));
+ s
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
new file mode 100644
index 0000000000..698e094795
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs
@@ -0,0 +1,201 @@
+use super::*;
+use crate::isa::aarch64::inst::{args::PairAMode, imms::Imm12, regs, ALUOp, Inst};
+use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
+use crate::machinst::UnwindInfoContext;
+use crate::result::CodegenResult;
+use alloc::vec::Vec;
+use regalloc::Reg;
+
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
+
+pub struct AArch64UnwindInfo;
+
+impl UnwindInfoGenerator<Inst> for AArch64UnwindInfo {
+ fn create_unwind_info(
+ context: UnwindInfoContext<Inst>,
+ ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
+ let word_size = 8u8;
+ let pair_size = word_size * 2;
+ let mut codes = Vec::new();
+
+ for i in context.prologue.clone() {
+ let i = i as usize;
+ let inst = &context.insts[i];
+ let offset = context.insts_layout[i];
+
+ match inst {
+ Inst::StoreP64 {
+ rt,
+ rt2,
+ mem: PairAMode::PreIndexed(rn, imm7),
+ ..
+ } if *rt == regs::fp_reg()
+ && *rt2 == regs::link_reg()
+ && *rn == regs::writable_stack_reg()
+ && imm7.value == -(pair_size as i16) =>
+ {
+ // stp fp (x29), lr (x30), [sp, #-16]!
+ codes.push((
+ offset,
+ UnwindCode::StackAlloc {
+ size: pair_size as u32,
+ },
+ ));
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt,
+ stack_offset: 0,
+ },
+ ));
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt2,
+ stack_offset: word_size as u32,
+ },
+ ));
+ }
+ Inst::StoreP64 {
+ rt,
+ rt2,
+ mem: PairAMode::PreIndexed(rn, imm7),
+ ..
+ } if rn.to_reg() == regs::stack_reg() && imm7.value % (pair_size as i16) == 0 => {
+ // stp r1, r2, [sp, #(i * #16)]
+ let stack_offset = imm7.value as u32;
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt,
+ stack_offset,
+ },
+ ));
+ if *rt2 != regs::zero_reg() {
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *rt2,
+ stack_offset: stack_offset + word_size as u32,
+ },
+ ));
+ }
+ }
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd,
+ rn,
+ imm12:
+ Imm12 {
+ bits: 0,
+ shift12: false,
+ },
+ } if *rd == regs::writable_fp_reg() && *rn == regs::stack_reg() => {
+ // mov fp (x29), sp.
+ codes.push((offset, UnwindCode::SetFramePointer { reg: rd.to_reg() }));
+ }
+ Inst::VirtualSPOffsetAdj { offset: adj } if offset > 0 => {
+ codes.push((offset, UnwindCode::StackAlloc { size: *adj as u32 }));
+ }
+ _ => {}
+ }
+ }
+
+ // TODO epilogues
+
+ let prologue_size = if context.prologue.is_empty() {
+ 0
+ } else {
+ context.insts_layout[context.prologue.end as usize - 1]
+ };
+
+ Ok(Some(UnwindInfo {
+ prologue_size,
+ prologue_unwind_codes: codes,
+ epilogues_unwind_codes: vec![],
+ function_size: context.len,
+ word_size,
+ initial_sp_offset: 0,
+ }))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{ExternalName, Function, InstBuilder, Signature, StackSlotData, StackSlotKind};
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_simple_func() {
+ let isa = lookup(triple!("aarch64"))
+ .expect("expect aarch64 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let result = context.mach_compile_result.unwrap();
+ let unwind_info = result.unwind_info.unwrap();
+
+ assert_eq!(
+ unwind_info,
+ UnwindInfo {
+ prologue_size: 12,
+ prologue_unwind_codes: vec![
+ (4, UnwindCode::StackAlloc { size: 16 }),
+ (
+ 4,
+ UnwindCode::SaveRegister {
+ reg: regs::fp_reg(),
+ stack_offset: 0
+ }
+ ),
+ (
+ 4,
+ UnwindCode::SaveRegister {
+ reg: regs::link_reg(),
+ stack_offset: 8
+ }
+ ),
+ (
+ 8,
+ UnwindCode::SetFramePointer {
+ reg: regs::fp_reg()
+ }
+ )
+ ],
+ epilogues_unwind_codes: vec![],
+ function_size: 24,
+ word_size: 8,
+ initial_sp_offset: 0,
+ }
+ );
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
new file mode 100644
index 0000000000..b988314b1b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs
@@ -0,0 +1,158 @@
+//! Unwind information for System V ABI (Aarch64).
+
+use crate::isa::aarch64::inst::regs;
+use crate::isa::unwind::input;
+use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
+use crate::result::CodegenResult;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register};
+use regalloc::{Reg, RegClass};
+
+/// Creates a new aarch64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+ use gimli::write::CallFrameInstruction;
+
+ let mut entry = CommonInformationEntry::new(
+ Encoding {
+ address_size: 8,
+ format: Format::Dwarf32,
+ version: 1,
+ },
+ 4, // Code alignment factor
+ -8, // Data alignment factor
+ Register(regs::link_reg().get_hw_encoding().into()),
+ );
+
+ // Every frame will start with the call frame address (CFA) at SP
+ let sp = Register(regs::stack_reg().get_hw_encoding().into());
+ entry.add_instruction(CallFrameInstruction::Cfa(sp, 0));
+
+ entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+ match reg.get_class() {
+ RegClass::I64 => Ok(Register(reg.get_hw_encoding().into())),
+ _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+ }
+}
+
+pub(crate) fn create_unwind_info(
+ unwind: input::UnwindInfo<Reg>,
+) -> CodegenResult<Option<UnwindInfo>> {
+ struct RegisterMapper;
+ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+ fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+ Ok(map_reg(reg)?.0)
+ }
+ fn sp(&self) -> u16 {
+ regs::stack_reg().get_hw_encoding().into()
+ }
+ }
+ let map = RegisterMapper;
+ Ok(Some(UnwindInfo::build(unwind, &map)?))
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{
+ types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+ StackSlotKind,
+ };
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use gimli::write::Address;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_simple_func() {
+ let isa = lookup(triple!("aarch64"))
+ .expect("expect aarch64 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(1234))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+
+ #[test]
+ fn test_multi_return_func() {
+ let isa = lookup(triple!("aarch64"))
+ .expect("expect aarch64 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(4321))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 40, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+ }
+
+ fn create_multi_return_function(call_conv: CallConv) -> Function {
+ let mut sig = Signature::new(call_conv);
+ sig.params.push(AbiParam::new(types::I32));
+ let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+ let block0 = func.dfg.make_block();
+ let v0 = func.dfg.append_block_param(block0, types::I32);
+ let block1 = func.dfg.make_block();
+ let block2 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().brnz(v0, block2, &[]);
+ pos.ins().jump(block1, &[]);
+
+ pos.insert_block(block1);
+ pos.ins().return_(&[]);
+
+ pos.insert_block(block2);
+ pos.ins().return_(&[]);
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
new file mode 100644
index 0000000000..17555c1bd2
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower.rs
@@ -0,0 +1,1196 @@
+//! Lowering rules for AArch64.
+//!
+//! TODO: opportunities for better code generation:
+//!
+//! - Smarter use of addressing modes. Recognize a+SCALE*b patterns. Recognize
+//! pre/post-index opportunities.
+//!
+//! - Floating-point immediates (FIMM instruction).
+
+use crate::ir::condcodes::{FloatCC, IntCC};
+use crate::ir::types::*;
+use crate::ir::Inst as IRInst;
+use crate::ir::{Opcode, Type};
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::CodegenResult;
+
+use crate::isa::aarch64::inst::*;
+use crate::isa::aarch64::AArch64Backend;
+
+use super::lower_inst;
+
+use crate::data_value::DataValue;
+use log::{debug, trace};
+use regalloc::{Reg, RegClass, Writable};
+use smallvec::SmallVec;
+
+//============================================================================
+// Result enum types.
+//
+// Lowering of a given value results in one of these enums, depending on the
+// modes in which we can accept the value.
+
+/// A lowering result: register, register-shift. An SSA value can always be
+/// lowered into one of these options; the register form is the fallback.
+#[derive(Clone, Debug)]
+enum ResultRS {
+ Reg(Reg),
+ RegShift(Reg, ShiftOpAndAmt),
+}
+
+/// A lowering result: register, register-shift, register-extend. An SSA value can always be
+/// lowered into one of these options; the register form is the fallback.
+#[derive(Clone, Debug)]
+enum ResultRSE {
+ Reg(Reg),
+ RegShift(Reg, ShiftOpAndAmt),
+ RegExtend(Reg, ExtendOp),
+}
+
+impl ResultRSE {
+ fn from_rs(rs: ResultRS) -> ResultRSE {
+ match rs {
+ ResultRS::Reg(r) => ResultRSE::Reg(r),
+ ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s),
+ }
+ }
+}
+
+/// A lowering result: register, register-shift, register-extend, or 12-bit immediate form.
+/// An SSA value can always be lowered into one of these options; the register form is the
+/// fallback.
+#[derive(Clone, Debug)]
+pub(crate) enum ResultRSEImm12 {
+ Reg(Reg),
+ RegShift(Reg, ShiftOpAndAmt),
+ RegExtend(Reg, ExtendOp),
+ Imm12(Imm12),
+}
+
+impl ResultRSEImm12 {
+ fn from_rse(rse: ResultRSE) -> ResultRSEImm12 {
+ match rse {
+ ResultRSE::Reg(r) => ResultRSEImm12::Reg(r),
+ ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s),
+ ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e),
+ }
+ }
+}
+
+/// A lowering result: register, register-shift, or logical immediate form.
+/// An SSA value can always be lowered into one of these options; the register form is the
+/// fallback.
+#[derive(Clone, Debug)]
+pub(crate) enum ResultRSImmLogic {
+ Reg(Reg),
+ RegShift(Reg, ShiftOpAndAmt),
+ ImmLogic(ImmLogic),
+}
+
+impl ResultRSImmLogic {
+ fn from_rs(rse: ResultRS) -> ResultRSImmLogic {
+ match rse {
+ ResultRS::Reg(r) => ResultRSImmLogic::Reg(r),
+ ResultRS::RegShift(r, s) => ResultRSImmLogic::RegShift(r, s),
+ }
+ }
+}
+
+/// A lowering result: register or immediate shift amount (arg to a shift op).
+/// An SSA value can always be lowered into one of these options; the register form is the
+/// fallback.
+#[derive(Clone, Debug)]
+pub(crate) enum ResultRegImmShift {
+ Reg(Reg),
+ ImmShift(ImmShift),
+}
+
+//============================================================================
+// Lowering: convert instruction inputs to forms that we can use.
+
+/// Lower an instruction input to a 64-bit constant, if possible.
+pub(crate) fn input_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, input: InsnInput) -> Option<u64> {
+ let input = ctx.get_input(input.insn, input.input);
+ input.constant
+}
+
+/// Lower an instruction input to a constant register-shift amount, if possible.
+pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+) -> Option<ShiftOpShiftImm> {
+ input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
+}
+
+pub(crate) fn const_param_to_u128<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ inst: IRInst,
+) -> Option<u128> {
+ match ctx.get_immediate(inst) {
+ Some(DataValue::V128(bytes)) => Some(u128::from_le_bytes(bytes)),
+ _ => None,
+ }
+}
+
+/// How to handle narrow values loaded into registers; see note on `narrow_mode`
+/// parameter to `put_input_in_*` below.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum NarrowValueMode {
+ None,
+ /// Zero-extend to 32 bits if original is < 32 bits.
+ ZeroExtend32,
+ /// Sign-extend to 32 bits if original is < 32 bits.
+ SignExtend32,
+ /// Zero-extend to 64 bits if original is < 64 bits.
+ ZeroExtend64,
+ /// Sign-extend to 64 bits if original is < 64 bits.
+ SignExtend64,
+}
+
+impl NarrowValueMode {
+ fn is_32bit(&self) -> bool {
+ match self {
+ NarrowValueMode::None => false,
+ NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true,
+ NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false,
+ }
+ }
+}
+
+/// Lower an instruction input to a reg.
+///
+/// The given register will be extended appropriately, according to
+/// `narrow_mode` and the input's type. If extended, the value is
+/// always extended to 64 bits, for simplicity.
+pub(crate) fn put_input_in_reg<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ narrow_mode: NarrowValueMode,
+) -> Reg {
+ debug!("put_input_in_reg: input {:?}", input);
+ let ty = ctx.input_ty(input.insn, input.input);
+ let from_bits = ty_bits(ty) as u8;
+ let inputs = ctx.get_input(input.insn, input.input);
+ let in_reg = if let Some(c) = inputs.constant {
+ // Generate constants fresh at each use to minimize long-range register pressure.
+ let masked = if from_bits < 64 {
+ c & ((1u64 << from_bits) - 1)
+ } else {
+ c
+ };
+ let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
+ for inst in Inst::gen_constant(to_reg, masked, ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ })
+ .into_iter()
+ {
+ ctx.emit(inst);
+ }
+ to_reg.to_reg()
+ } else {
+ ctx.use_input_reg(inputs);
+ inputs.reg
+ };
+
+ match (narrow_mode, from_bits) {
+ (NarrowValueMode::None, _) => in_reg,
+ (NarrowValueMode::ZeroExtend32, n) if n < 32 => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rn: in_reg,
+ signed: false,
+ from_bits,
+ to_bits: 32,
+ });
+ tmp.to_reg()
+ }
+ (NarrowValueMode::SignExtend32, n) if n < 32 => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rn: in_reg,
+ signed: true,
+ from_bits,
+ to_bits: 32,
+ });
+ tmp.to_reg()
+ }
+ (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg,
+
+ (NarrowValueMode::ZeroExtend64, n) if n < 64 => {
+ if inputs.constant.is_some() {
+ // Constants are zero-extended to full 64-bit width on load already.
+ in_reg
+ } else {
+ let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rn: in_reg,
+ signed: false,
+ from_bits,
+ to_bits: 64,
+ });
+ tmp.to_reg()
+ }
+ }
+ (NarrowValueMode::SignExtend64, n) if n < 64 => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rn: in_reg,
+ signed: true,
+ from_bits,
+ to_bits: 64,
+ });
+ tmp.to_reg()
+ }
+ (_, 64) => in_reg,
+ (_, 128) => in_reg,
+
+ _ => panic!(
+ "Unsupported input width: input ty {} bits {} mode {:?}",
+ ty, from_bits, narrow_mode
+ ),
+ }
+}
+
+/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
+///
+/// The `narrow_mode` flag indicates whether the consumer of this value needs
+/// the high bits clear. For many operations, such as an add/sub/mul or any
+/// bitwise logical operation, the low-bit results depend only on the low-bit
+/// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit
+/// value is stored in the low 8 bits of the register and the high 24 bits are
+/// undefined. If the op truly needs the high N bits clear (such as for a
+/// divide or a right-shift or a compare-to-zero), `narrow_mode` should be
+/// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting
+/// register will be provided the extended value.
+fn put_input_in_rs<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ narrow_mode: NarrowValueMode,
+) -> ResultRS {
+ let inputs = ctx.get_input(input.insn, input.input);
+ if let Some((insn, 0)) = inputs.inst {
+ let op = ctx.data(insn).opcode();
+
+ if op == Opcode::Ishl {
+ let shiftee = InsnInput { insn, input: 0 };
+ let shift_amt = InsnInput { insn, input: 1 };
+
+ // Can we get the shift amount as an immediate?
+ if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) {
+ let shiftee_bits = ty_bits(ctx.input_ty(insn, 0));
+ if shiftee_bits <= std::u8::MAX as usize {
+ let shiftimm = shiftimm.mask(shiftee_bits as u8);
+ let reg = put_input_in_reg(ctx, shiftee, narrow_mode);
+ return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm));
+ }
+ }
+ }
+ }
+
+ ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode))
+}
+
+/// Lower an instruction input to a reg or reg/shift, or reg/extend operand.
+/// This does not actually codegen the source instruction; it just uses the
+/// vreg into which the source instruction will generate its value.
+///
+/// See note on `put_input_in_rs` for a description of `narrow_mode`.
+fn put_input_in_rse<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ narrow_mode: NarrowValueMode,
+) -> ResultRSE {
+ let inputs = ctx.get_input(input.insn, input.input);
+ if let Some((insn, 0)) = inputs.inst {
+ let op = ctx.data(insn).opcode();
+ let out_ty = ctx.output_ty(insn, 0);
+ let out_bits = ty_bits(out_ty);
+
+ // Is this a zero-extend or sign-extend and can we handle that with a register-mode operator?
+ if op == Opcode::Uextend || op == Opcode::Sextend {
+ let sign_extend = op == Opcode::Sextend;
+ let inner_ty = ctx.input_ty(insn, 0);
+ let inner_bits = ty_bits(inner_ty);
+ assert!(inner_bits < out_bits);
+ if match (sign_extend, narrow_mode) {
+ // A single zero-extend or sign-extend is equal to itself.
+ (_, NarrowValueMode::None) => true,
+ // Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend.
+ (false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => {
+ true
+ }
+ (true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => {
+ true
+ }
+ // A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend
+ (false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => {
+ false
+ }
+ (true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => {
+ false
+ }
+ } {
+ let extendop = match (sign_extend, inner_bits) {
+ (true, 8) => ExtendOp::SXTB,
+ (false, 8) => ExtendOp::UXTB,
+ (true, 16) => ExtendOp::SXTH,
+ (false, 16) => ExtendOp::UXTH,
+ (true, 32) => ExtendOp::SXTW,
+ (false, 32) => ExtendOp::UXTW,
+ _ => unreachable!(),
+ };
+ let reg =
+ put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None);
+ return ResultRSE::RegExtend(reg, extendop);
+ }
+ }
+
+ // If `out_ty` is smaller than 32 bits and we need to zero- or sign-extend,
+ // then get the result into a register and return an Extend-mode operand on
+ // that register.
+ if narrow_mode != NarrowValueMode::None
+ && ((narrow_mode.is_32bit() && out_bits < 32)
+ || (!narrow_mode.is_32bit() && out_bits < 64))
+ {
+ let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
+ let extendop = match (narrow_mode, out_bits) {
+ (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => {
+ ExtendOp::SXTB
+ }
+ (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => {
+ ExtendOp::UXTB
+ }
+ (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => {
+ ExtendOp::SXTB
+ }
+ (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => {
+ ExtendOp::UXTB
+ }
+ (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => {
+ ExtendOp::SXTH
+ }
+ (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => {
+ ExtendOp::UXTH
+ }
+ (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW,
+ (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW,
+ _ => unreachable!(),
+ };
+ return ResultRSE::RegExtend(reg, extendop);
+ }
+ }
+
+ ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode))
+}
+
+pub(crate) fn put_input_in_rse_imm12<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ narrow_mode: NarrowValueMode,
+) -> ResultRSEImm12 {
+ if let Some(imm_value) = input_to_const(ctx, input) {
+ if let Some(i) = Imm12::maybe_from_u64(imm_value) {
+ return ResultRSEImm12::Imm12(i);
+ }
+ }
+
+ ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
+}
+
+/// Like `put_input_in_rse_imm12` above, except is allowed to negate the
+/// argument (assuming a two's-complement representation with the given bit
+/// width) if this allows use of 12-bit immediate. Used to flip `add`s with
+/// negative immediates to `sub`s (and vice-versa).
+pub(crate) fn put_input_in_rse_imm12_maybe_negated<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ twos_complement_bits: usize,
+ narrow_mode: NarrowValueMode,
+) -> (ResultRSEImm12, bool) {
+ assert!(twos_complement_bits <= 64);
+ if let Some(imm_value) = input_to_const(ctx, input) {
+ if let Some(i) = Imm12::maybe_from_u64(imm_value) {
+ return (ResultRSEImm12::Imm12(i), false);
+ }
+ let sign_extended =
+ ((imm_value as i64) << (64 - twos_complement_bits)) >> (64 - twos_complement_bits);
+ let inverted = sign_extended.wrapping_neg();
+ if let Some(i) = Imm12::maybe_from_u64(inverted as u64) {
+ return (ResultRSEImm12::Imm12(i), true);
+ }
+ }
+
+ (
+ ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)),
+ false,
+ )
+}
+
+pub(crate) fn put_input_in_rs_immlogic<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ narrow_mode: NarrowValueMode,
+) -> ResultRSImmLogic {
+ if let Some(imm_value) = input_to_const(ctx, input) {
+ let ty = ctx.input_ty(input.insn, input.input);
+ let ty = if ty_bits(ty) < 32 { I32 } else { ty };
+ if let Some(i) = ImmLogic::maybe_from_u64(imm_value, ty) {
+ return ResultRSImmLogic::ImmLogic(i);
+ }
+ }
+
+ ResultRSImmLogic::from_rs(put_input_in_rs(ctx, input, narrow_mode))
+}
+
+pub(crate) fn put_input_in_reg_immshift<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ shift_width_bits: usize,
+) -> ResultRegImmShift {
+ if let Some(imm_value) = input_to_const(ctx, input) {
+ let imm_value = imm_value & ((shift_width_bits - 1) as u64);
+ if let Some(immshift) = ImmShift::maybe_from_u64(imm_value) {
+ return ResultRegImmShift::ImmShift(immshift);
+ }
+ }
+
+ ResultRegImmShift::Reg(put_input_in_reg(ctx, input, NarrowValueMode::None))
+}
+
+//============================================================================
+// ALU instruction constructors.
+
+pub(crate) fn alu_inst_imm12(op: ALUOp, rd: Writable<Reg>, rn: Reg, rm: ResultRSEImm12) -> Inst {
+ match rm {
+ ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 {
+ alu_op: op,
+ rd,
+ rn,
+ imm12,
+ },
+ ResultRSEImm12::Reg(rm) => Inst::AluRRR {
+ alu_op: op,
+ rd,
+ rn,
+ rm,
+ },
+ ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift {
+ alu_op: op,
+ rd,
+ rn,
+ rm,
+ shiftop,
+ },
+ ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend {
+ alu_op: op,
+ rd,
+ rn,
+ rm,
+ extendop,
+ },
+ }
+}
+
+pub(crate) fn alu_inst_immlogic(
+ op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: ResultRSImmLogic,
+) -> Inst {
+ match rm {
+ ResultRSImmLogic::ImmLogic(imml) => Inst::AluRRImmLogic {
+ alu_op: op,
+ rd,
+ rn,
+ imml,
+ },
+ ResultRSImmLogic::Reg(rm) => Inst::AluRRR {
+ alu_op: op,
+ rd,
+ rn,
+ rm,
+ },
+ ResultRSImmLogic::RegShift(rm, shiftop) => Inst::AluRRRShift {
+ alu_op: op,
+ rd,
+ rn,
+ rm,
+ shiftop,
+ },
+ }
+}
+
+pub(crate) fn alu_inst_immshift(
+ op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: ResultRegImmShift,
+) -> Inst {
+ match rm {
+ ResultRegImmShift::ImmShift(immshift) => Inst::AluRRImmShift {
+ alu_op: op,
+ rd,
+ rn,
+ immshift,
+ },
+ ResultRegImmShift::Reg(rm) => Inst::AluRRR {
+ alu_op: op,
+ rd,
+ rn,
+ rm,
+ },
+ }
+}
+
+//============================================================================
+// Lowering: addressing mode support. Takes instruction directly, rather
+// than an `InsnInput`, to do more introspection.
+
+/// 32-bit addends that make up an address: an input, and an extension mode on that
+/// input.
+type AddressAddend32List = SmallVec<[(Reg, ExtendOp); 4]>;
+/// 64-bit addends that make up an address: just an input.
+type AddressAddend64List = SmallVec<[Reg; 4]>;
+
+/// Collect all addends that feed into an address computation, with extend-modes
+/// on each. Note that a load/store may have multiple address components (and
+/// the CLIF semantics are that these components are added to form the final
+/// address), but sometimes the CLIF that we receive still has arguments that
+/// refer to `iadd` instructions. We also want to handle uextend/sextend below
+/// the add(s).
+///
+/// We match any 64-bit add (and descend into its inputs), and we match any
+/// 32-to-64-bit sign or zero extension. The returned addend-list will use
+/// NarrowValueMode values to indicate how to extend each input:
+///
+/// - NarrowValueMode::None: the associated input is 64 bits wide; no extend.
+/// - NarrowValueMode::SignExtend64: the associated input is 32 bits wide;
+/// do a sign-extension.
+/// - NarrowValueMode::ZeroExtend64: the associated input is 32 bits wide;
+/// do a zero-extension.
+///
+/// We do not descend further into the inputs of extensions (unless it is a constant),
+/// because supporting (e.g.) a 32-bit add that is later extended would require
+/// additional masking of high-order bits, which is too complex. So, in essence, we
+/// descend any number of adds from the roots, collecting all 64-bit address addends;
+/// then possibly support extensions at these leaves.
+fn collect_address_addends<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ roots: &[InsnInput],
+) -> (AddressAddend64List, AddressAddend32List, i64) {
+ let mut result32: AddressAddend32List = SmallVec::new();
+ let mut result64: AddressAddend64List = SmallVec::new();
+ let mut offset: i64 = 0;
+
+ let mut workqueue: SmallVec<[InsnInput; 4]> = roots.iter().cloned().collect();
+
+ while let Some(input) = workqueue.pop() {
+ debug_assert!(ty_bits(ctx.input_ty(input.insn, input.input)) == 64);
+ if let Some((op, insn)) = maybe_input_insn_multi(
+ ctx,
+ input,
+ &[
+ Opcode::Uextend,
+ Opcode::Sextend,
+ Opcode::Iadd,
+ Opcode::Iconst,
+ ],
+ ) {
+ match op {
+ Opcode::Uextend | Opcode::Sextend if ty_bits(ctx.input_ty(insn, 0)) == 32 => {
+ let extendop = if op == Opcode::Uextend {
+ ExtendOp::UXTW
+ } else {
+ ExtendOp::SXTW
+ };
+ let extendee_input = InsnInput { insn, input: 0 };
+ // If the input is a zero-extension of a constant, add the value to the known
+ // offset.
+ // Only do this for zero-extension, as generating a sign-extended
+ // constant may be more instructions than using the 'SXTW' addressing mode.
+ if let (Some(insn), ExtendOp::UXTW) = (
+ maybe_input_insn(ctx, extendee_input, Opcode::Iconst),
+ extendop,
+ ) {
+ let value = (ctx.get_constant(insn).unwrap() & 0xFFFF_FFFF_u64) as i64;
+ offset += value;
+ } else {
+ let reg = put_input_in_reg(ctx, extendee_input, NarrowValueMode::None);
+ result32.push((reg, extendop));
+ }
+ }
+ Opcode::Uextend | Opcode::Sextend => {
+ let reg = put_input_in_reg(ctx, input, NarrowValueMode::None);
+ result64.push(reg);
+ }
+ Opcode::Iadd => {
+ for input in 0..ctx.num_inputs(insn) {
+ let addend = InsnInput { insn, input };
+ workqueue.push(addend);
+ }
+ }
+ Opcode::Iconst => {
+ let value: i64 = ctx.get_constant(insn).unwrap() as i64;
+ offset += value;
+ }
+ _ => panic!("Unexpected opcode from maybe_input_insn_multi"),
+ }
+ } else {
+ let reg = put_input_in_reg(ctx, input, NarrowValueMode::ZeroExtend64);
+ result64.push(reg);
+ }
+ }
+
+ (result64, result32, offset)
+}
+
+/// Lower the address of a load or store.
+pub(crate) fn lower_address<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ elem_ty: Type,
+ roots: &[InsnInput],
+ offset: i32,
+) -> AMode {
+ // TODO: support base_reg + scale * index_reg. For this, we would need to pattern-match shl or
+ // mul instructions (Load/StoreComplex don't include scale factors).
+
+ // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
+ // extends and addition ops. We update these as we consume address
+ // components, so they represent the remaining addends not yet handled.
+ let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots);
+ let mut offset = args_offset + (offset as i64);
+
+ trace!(
+ "lower_address: addends64 {:?}, addends32 {:?}, offset {}",
+ addends64,
+ addends32,
+ offset
+ );
+
+ // First, decide what the `AMode` will be. Take one extendee and one 64-bit
+ // reg, or two 64-bit regs, or a 64-bit reg and a 32-bit reg with extension,
+ // or some other combination as appropriate.
+ let memarg = if addends64.len() > 0 {
+ if addends32.len() > 0 {
+ let (reg32, extendop) = addends32.pop().unwrap();
+ let reg64 = addends64.pop().unwrap();
+ AMode::RegExtended(reg64, reg32, extendop)
+ } else if offset > 0 && offset < 0x1000 {
+ let reg64 = addends64.pop().unwrap();
+ let off = offset;
+ offset = 0;
+ AMode::RegOffset(reg64, off, elem_ty)
+ } else if addends64.len() >= 2 {
+ let reg1 = addends64.pop().unwrap();
+ let reg2 = addends64.pop().unwrap();
+ AMode::RegReg(reg1, reg2)
+ } else {
+ let reg1 = addends64.pop().unwrap();
+ AMode::reg(reg1)
+ }
+ } else
+ /* addends64.len() == 0 */
+ {
+ if addends32.len() > 0 {
+ let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+ let (reg1, extendop) = addends32.pop().unwrap();
+ let signed = match extendop {
+ ExtendOp::SXTW => true,
+ ExtendOp::UXTW => false,
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rn: reg1,
+ signed,
+ from_bits: 32,
+ to_bits: 64,
+ });
+ if let Some((reg2, extendop)) = addends32.pop() {
+ AMode::RegExtended(tmp.to_reg(), reg2, extendop)
+ } else {
+ AMode::reg(tmp.to_reg())
+ }
+ } else
+ /* addends32.len() == 0 */
+ {
+ let off_reg = ctx.alloc_tmp(RegClass::I64, I64);
+ lower_constant_u64(ctx, off_reg, offset as u64);
+ offset = 0;
+ AMode::reg(off_reg.to_reg())
+ }
+ };
+
+ // At this point, if we have any remaining components, we need to allocate a
+ // temp, replace one of the registers in the AMode with the temp, and emit
+ // instructions to add together the remaining components. Return immediately
+ // if this is *not* the case.
+ if offset == 0 && addends32.len() == 0 && addends64.len() == 0 {
+ return memarg;
+ }
+
+ // Allocate the temp and shoehorn it into the AMode.
+ let addr = ctx.alloc_tmp(RegClass::I64, I64);
+ let (reg, memarg) = match memarg {
+ AMode::RegExtended(r1, r2, extendop) => {
+ (r1, AMode::RegExtended(addr.to_reg(), r2, extendop))
+ }
+ AMode::RegOffset(r, off, ty) => (r, AMode::RegOffset(addr.to_reg(), off, ty)),
+ AMode::RegReg(r1, r2) => (r2, AMode::RegReg(addr.to_reg(), r1)),
+ AMode::UnsignedOffset(r, imm) => (r, AMode::UnsignedOffset(addr.to_reg(), imm)),
+ _ => unreachable!(),
+ };
+
+ // If there is any offset, load that first into `addr`, and add the `reg`
+ // that we kicked out of the `AMode`; otherwise, start with that reg.
+ if offset != 0 {
+ // If we can fit offset or -offset in an imm12, use an add-imm
+ // to combine the reg and offset. Otherwise, load value first then add.
+ if let Some(imm12) = Imm12::maybe_from_u64(offset as u64) {
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: ALUOp::Add64,
+ rd: addr,
+ rn: reg,
+ imm12,
+ });
+ } else if let Some(imm12) = Imm12::maybe_from_u64(offset.wrapping_neg() as u64) {
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub64,
+ rd: addr,
+ rn: reg,
+ imm12,
+ });
+ } else {
+ lower_constant_u64(ctx, addr, offset as u64);
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: addr,
+ rn: addr.to_reg(),
+ rm: reg,
+ });
+ }
+ } else {
+ ctx.emit(Inst::gen_move(addr, reg, I64));
+ }
+
+ // Now handle reg64 and reg32-extended components.
+ for reg in addends64 {
+ // If the register is the stack reg, we must move it to another reg
+ // before adding it.
+ let reg = if reg == stack_reg() {
+ let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+ ctx.emit(Inst::gen_move(tmp, stack_reg(), I64));
+ tmp.to_reg()
+ } else {
+ reg
+ };
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: addr,
+ rn: addr.to_reg(),
+ rm: reg,
+ });
+ }
+ for (reg, extendop) in addends32 {
+ assert!(reg != stack_reg());
+ ctx.emit(Inst::AluRRRExtend {
+ alu_op: ALUOp::Add64,
+ rd: addr,
+ rn: addr.to_reg(),
+ rm: reg,
+ extendop,
+ });
+ }
+
+ memarg
+}
+
+pub(crate) fn lower_constant_u64<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ rd: Writable<Reg>,
+ value: u64,
+) {
+ for inst in Inst::load_constant(rd, value) {
+ ctx.emit(inst);
+ }
+}
+
+pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ rd: Writable<Reg>,
+ value: f32,
+) {
+ let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+
+ for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
+ ctx.emit(inst);
+ }
+}
+
+pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ rd: Writable<Reg>,
+ value: f64,
+) {
+ let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+
+ for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
+ ctx.emit(inst);
+ }
+}
+
+pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ rd: Writable<Reg>,
+ value: u128,
+) {
+ if value == 0 {
+ // Fast-track a common case. The general case, viz, calling `Inst::load_fp_constant128`,
+ // is potentially expensive.
+ ctx.emit(Inst::VecDupImm {
+ rd,
+ imm: ASIMDMovModImm::zero(),
+ invert: false,
+ size: VectorSize::Size8x16,
+ });
+ } else {
+ let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+ for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
+ ctx.emit(inst);
+ }
+ }
+}
+
+pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ rd: Writable<Reg>,
+ value: u64,
+ size: VectorSize,
+) {
+ let (value, narrow_size) = match size.lane_size() {
+ ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
+ ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
+ ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
+ ScalarSize::Size64 => (value, ScalarSize::Size32),
+ _ => unreachable!(),
+ };
+ let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
+ Some((value, lane_size)) => (
+ value,
+ VectorSize::from_lane_size(lane_size, size.is_128bits()),
+ ),
+ None => (value, size),
+ };
+ let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
+
+ for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
+ ctx.emit(inst);
+ }
+}
+
+pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
+ match cc {
+ IntCC::Equal => Cond::Eq,
+ IntCC::NotEqual => Cond::Ne,
+ IntCC::SignedGreaterThanOrEqual => Cond::Ge,
+ IntCC::SignedGreaterThan => Cond::Gt,
+ IntCC::SignedLessThanOrEqual => Cond::Le,
+ IntCC::SignedLessThan => Cond::Lt,
+ IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
+ IntCC::UnsignedGreaterThan => Cond::Hi,
+ IntCC::UnsignedLessThanOrEqual => Cond::Ls,
+ IntCC::UnsignedLessThan => Cond::Lo,
+ IntCC::Overflow => Cond::Vs,
+ IntCC::NotOverflow => Cond::Vc,
+ }
+}
+
+pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
+ // Refer to `codegen/shared/src/condcodes.rs` and to the `FCMP` AArch64 docs.
+ // The FCMP instruction sets:
+ // NZCV
+ // - PCSR.NZCV = 0011 on UN (unordered),
+ // 0110 on EQ,
+ // 1000 on LT,
+ // 0010 on GT.
+ match cc {
+ // EQ | LT | GT. Vc => V clear.
+ FloatCC::Ordered => Cond::Vc,
+ // UN. Vs => V set.
+ FloatCC::Unordered => Cond::Vs,
+ // EQ. Eq => Z set.
+ FloatCC::Equal => Cond::Eq,
+ // UN | LT | GT. Ne => Z clear.
+ FloatCC::NotEqual => Cond::Ne,
+ // LT | GT.
+ FloatCC::OrderedNotEqual => unimplemented!(),
+ // UN | EQ
+ FloatCC::UnorderedOrEqual => unimplemented!(),
+ // LT. Mi => N set.
+ FloatCC::LessThan => Cond::Mi,
+ // LT | EQ. Ls => C clear or Z set.
+ FloatCC::LessThanOrEqual => Cond::Ls,
+ // GT. Gt => Z clear, N = V.
+ FloatCC::GreaterThan => Cond::Gt,
+ // GT | EQ. Ge => N = V.
+ FloatCC::GreaterThanOrEqual => Cond::Ge,
+ // UN | LT
+ FloatCC::UnorderedOrLessThan => unimplemented!(),
+ // UN | LT | EQ
+ FloatCC::UnorderedOrLessThanOrEqual => unimplemented!(),
+ // UN | GT
+ FloatCC::UnorderedOrGreaterThan => unimplemented!(),
+ // UN | GT | EQ
+ FloatCC::UnorderedOrGreaterThanOrEqual => unimplemented!(),
+ }
+}
+
+pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ rd: Writable<Reg>,
+ mut rn: Reg,
+ mut rm: Reg,
+ ty: Type,
+ cond: Cond,
+) -> CodegenResult<()> {
+ let is_float = match ty {
+ F32X4 | F64X2 => true,
+ _ => false,
+ };
+ let size = VectorSize::from_ty(ty);
+ // 'Less than' operations are implemented by swapping
+ // the order of operands and using the 'greater than'
+ // instructions.
+ // 'Not equal' is implemented with 'equal' and inverting
+ // the result.
+ let (alu_op, swap) = match (is_float, cond) {
+ (false, Cond::Eq) => (VecALUOp::Cmeq, false),
+ (false, Cond::Ne) => (VecALUOp::Cmeq, false),
+ (false, Cond::Ge) => (VecALUOp::Cmge, false),
+ (false, Cond::Gt) => (VecALUOp::Cmgt, false),
+ (false, Cond::Le) => (VecALUOp::Cmge, true),
+ (false, Cond::Lt) => (VecALUOp::Cmgt, true),
+ (false, Cond::Hs) => (VecALUOp::Cmhs, false),
+ (false, Cond::Hi) => (VecALUOp::Cmhi, false),
+ (false, Cond::Ls) => (VecALUOp::Cmhs, true),
+ (false, Cond::Lo) => (VecALUOp::Cmhi, true),
+ (true, Cond::Eq) => (VecALUOp::Fcmeq, false),
+ (true, Cond::Ne) => (VecALUOp::Fcmeq, false),
+ (true, Cond::Mi) => (VecALUOp::Fcmgt, true),
+ (true, Cond::Ls) => (VecALUOp::Fcmge, true),
+ (true, Cond::Ge) => (VecALUOp::Fcmge, false),
+ (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
+ _ => unreachable!(),
+ };
+
+ if swap {
+ std::mem::swap(&mut rn, &mut rm);
+ }
+
+ ctx.emit(Inst::VecRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ size,
+ });
+
+ if cond == Cond::Ne {
+ ctx.emit(Inst::VecMisc {
+ op: VecMisc2::Not,
+ rd,
+ rn: rd.to_reg(),
+ size,
+ });
+ }
+
+ Ok(())
+}
+
+/// Determines whether this condcode interprets inputs as signed or unsigned. See the
+/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs
+/// for further insights into this.
+pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
+ match cc {
+ IntCC::Equal
+ | IntCC::UnsignedGreaterThanOrEqual
+ | IntCC::UnsignedGreaterThan
+ | IntCC::UnsignedLessThanOrEqual
+ | IntCC::UnsignedLessThan
+ | IntCC::NotEqual => false,
+ IntCC::SignedGreaterThanOrEqual
+ | IntCC::SignedGreaterThan
+ | IntCC::SignedLessThanOrEqual
+ | IntCC::SignedLessThan
+ | IntCC::Overflow
+ | IntCC::NotOverflow => true,
+ }
+}
+
+//=============================================================================
+// Helpers for instruction lowering.
+
+pub(crate) fn choose_32_64<T: Copy>(ty: Type, op32: T, op64: T) -> T {
+ let bits = ty_bits(ty);
+ if bits <= 32 {
+ op32
+ } else if bits == 64 {
+ op64
+ } else {
+ panic!("choose_32_64 on > 64 bits!")
+ }
+}
+
+/// Checks for an instance of `op` feeding the given input.
+pub(crate) fn maybe_input_insn<C: LowerCtx<I = Inst>>(
+ c: &mut C,
+ input: InsnInput,
+ op: Opcode,
+) -> Option<IRInst> {
+ let inputs = c.get_input(input.insn, input.input);
+ debug!(
+ "maybe_input_insn: input {:?} has options {:?}; looking for op {:?}",
+ input, inputs, op
+ );
+ if let Some((src_inst, _)) = inputs.inst {
+ let data = c.data(src_inst);
+ debug!(" -> input inst {:?}", data);
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ }
+ None
+}
+
+/// Checks for an instance of any one of `ops` feeding the given input.
+pub(crate) fn maybe_input_insn_multi<C: LowerCtx<I = Inst>>(
+ c: &mut C,
+ input: InsnInput,
+ ops: &[Opcode],
+) -> Option<(Opcode, IRInst)> {
+ for &op in ops {
+ if let Some(inst) = maybe_input_insn(c, input, op) {
+ return Some((op, inst));
+ }
+ }
+ None
+}
+
+/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g.,
+/// Bint or a bitcast).
+///
+/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it
+/// a bit more generic.
+pub(crate) fn maybe_input_insn_via_conv<C: LowerCtx<I = Inst>>(
+ c: &mut C,
+ input: InsnInput,
+ op: Opcode,
+ conv: Opcode,
+) -> Option<IRInst> {
+ let inputs = c.get_input(input.insn, input.input);
+ if let Some((src_inst, _)) = inputs.inst {
+ let data = c.data(src_inst);
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ if data.opcode() == conv {
+ let inputs = c.get_input(src_inst, 0);
+ if let Some((src_inst, _)) = inputs.inst {
+ let data = c.data(src_inst);
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ }
+ }
+ }
+ None
+}
+
+pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ insn: IRInst,
+ is_signed: bool,
+) {
+ debug!("lower_icmp_or_ifcmp_to_flags: insn {}", insn);
+ let ty = ctx.input_ty(insn, 0);
+ let bits = ty_bits(ty);
+ let narrow_mode = match (bits <= 32, is_signed) {
+ (true, true) => NarrowValueMode::SignExtend32,
+ (true, false) => NarrowValueMode::ZeroExtend32,
+ (false, true) => NarrowValueMode::SignExtend64,
+ (false, false) => NarrowValueMode::ZeroExtend64,
+ };
+ let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+ let ty = ctx.input_ty(insn, 0);
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
+ debug!("lower_icmp_or_ifcmp_to_flags: rn = {:?} rm = {:?}", rn, rm);
+ let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
+ let rd = writable_zero_reg();
+ ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+}
+
+pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
+ let ty = ctx.input_ty(insn, 0);
+ let bits = ty_bits(ty);
+ let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ match bits {
+ 32 => {
+ ctx.emit(Inst::FpuCmp32 { rn, rm });
+ }
+ 64 => {
+ ctx.emit(Inst::FpuCmp64 { rn, rm });
+ }
+ _ => panic!("Unknown float size"),
+ }
+}
+
+/// Convert a 0 / 1 result, such as from a conditional-set instruction, into a 0
+/// / -1 (all-ones) result as expected for bool operations.
+pub(crate) fn normalize_bool_result<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ insn: IRInst,
+ rd: Writable<Reg>,
+) {
+ // A boolean is 0 / -1; if output width is > 1, negate.
+ if ty_bits(ctx.output_ty(insn, 0)) > 1 {
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd,
+ rn: zero_reg(),
+ rm: rd.to_reg(),
+ });
+ }
+}
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for AArch64Backend {
+ type MInst = Inst;
+
+ fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
+ lower_inst::lower_insn_to_regs(ctx, ir_inst)
+ }
+
+ fn lower_branch_group<C: LowerCtx<I = Inst>>(
+ &self,
+ ctx: &mut C,
+ branches: &[IRInst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+ ) -> CodegenResult<()> {
+ lower_inst::lower_branch(ctx, branches, targets, fallthrough)
+ }
+
+ fn maybe_pinned_reg(&self) -> Option<Reg> {
+ Some(xreg(PINNED_REG))
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
new file mode 100644
index 0000000000..faa89d3b98
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/lower_inst.rs
@@ -0,0 +1,3409 @@
+//! Lower a single Cranelift instruction into vcode.
+
+use crate::binemit::CodeOffset;
+use crate::ir::condcodes::FloatCC;
+use crate::ir::types::*;
+use crate::ir::Inst as IRInst;
+use crate::ir::{InstructionData, Opcode, TrapCode};
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::{CodegenError, CodegenResult};
+
+use crate::isa::aarch64::abi::*;
+use crate::isa::aarch64::inst::*;
+
+use regalloc::{RegClass, Writable};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::convert::TryFrom;
+use smallvec::SmallVec;
+
+use super::lower::*;
+
+/// This is target-word-size dependent. And it excludes booleans and reftypes.
+fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
+ match ty {
+ I8 | I16 | I32 | I64 => true,
+ _ => false,
+ }
+}
+
+/// Actually codegen an instruction's results into registers.
+pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ insn: IRInst,
+) -> CodegenResult<()> {
+ let op = ctx.data(insn).opcode();
+ let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
+ .map(|i| InsnInput { insn, input: i })
+ .collect();
+ let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
+ .map(|i| InsnOutput { insn, output: i })
+ .collect();
+ let ty = if outputs.len() > 0 {
+ Some(ctx.output_ty(insn, 0))
+ } else {
+ None
+ };
+
+ match op {
+ Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
+ let value = ctx.get_constant(insn).unwrap();
+ // Sign extend constant if necessary
+ let value = match ty.unwrap() {
+ I8 => (((value as i64) << 56) >> 56) as u64,
+ I16 => (((value as i64) << 48) >> 48) as u64,
+ I32 => (((value as i64) << 32) >> 32) as u64,
+ I64 | R64 => value,
+ ty if ty.is_bool() => value,
+ ty => unreachable!("Unknown type for const: {}", ty),
+ };
+ let rd = get_output_reg(ctx, outputs[0]);
+ lower_constant_u64(ctx, rd, value);
+ }
+ Opcode::F32const => {
+ let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32);
+ let rd = get_output_reg(ctx, outputs[0]);
+ lower_constant_f32(ctx, rd, value);
+ }
+ Opcode::F64const => {
+ let value = f64::from_bits(ctx.get_constant(insn).unwrap());
+ let rd = get_output_reg(ctx, outputs[0]);
+ lower_constant_f64(ctx, rd, value);
+ }
+ Opcode::Iadd => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ let mul_insn =
+ if let Some(mul_insn) = maybe_input_insn(ctx, inputs[1], Opcode::Imul) {
+ Some((mul_insn, 0))
+ } else if let Some(mul_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Imul) {
+ Some((mul_insn, 1))
+ } else {
+ None
+ };
+ // If possible combine mul + add into madd.
+ if let Some((insn, addend_idx)) = mul_insn {
+ let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
+ let rn_input = InsnInput { insn, input: 0 };
+ let rm_input = InsnInput { insn, input: 1 };
+
+ let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None);
+ let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None);
+
+ ctx.emit(Inst::AluRRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ra,
+ });
+ } else {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
+ ctx,
+ inputs[1],
+ ty_bits(ty),
+ NarrowValueMode::None,
+ );
+ let alu_op = if !negated {
+ choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
+ } else {
+ choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
+ };
+ ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+ }
+ } else {
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op: VecALUOp::Add,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+ Opcode::Isub => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
+ ctx,
+ inputs[1],
+ ty_bits(ty),
+ NarrowValueMode::None,
+ );
+ let alu_op = if !negated {
+ choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
+ } else {
+ choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
+ };
+ ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+ } else {
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ ctx.emit(Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op: VecALUOp::Sub,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+ Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
+ // We use the scalar SIMD & FP saturating additions and subtractions
+ // (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers.
+ let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
+ let ty = ty.unwrap();
+ let rd = get_output_reg(ctx, outputs[0]);
+ if !ty.is_vector() {
+ let narrow_mode = if is_signed {
+ NarrowValueMode::SignExtend64
+ } else {
+ NarrowValueMode::ZeroExtend64
+ };
+ let fpu_op = match op {
+ Opcode::UaddSat => FPUOp2::Uqadd64,
+ Opcode::SaddSat => FPUOp2::Sqadd64,
+ Opcode::UsubSat => FPUOp2::Uqsub64,
+ Opcode::SsubSat => FPUOp2::Sqsub64,
+ _ => unreachable!(),
+ };
+ let va = ctx.alloc_tmp(RegClass::V128, I128);
+ let vb = ctx.alloc_tmp(RegClass::V128, I128);
+ let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
+ ctx.emit(Inst::MovToFpu {
+ rd: va,
+ rn: ra,
+ size: ScalarSize::Size64,
+ });
+ ctx.emit(Inst::MovToFpu {
+ rd: vb,
+ rn: rb,
+ size: ScalarSize::Size64,
+ });
+ ctx.emit(Inst::FpuRRR {
+ fpu_op,
+ rd: va,
+ rn: va.to_reg(),
+ rm: vb.to_reg(),
+ });
+ ctx.emit(Inst::MovFromVec {
+ rd,
+ rn: va.to_reg(),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ });
+ } else {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+
+ let alu_op = match op {
+ Opcode::UaddSat => VecALUOp::Uqadd,
+ Opcode::SaddSat => VecALUOp::Sqadd,
+ Opcode::UsubSat => VecALUOp::Uqsub,
+ Opcode::SsubSat => VecALUOp::Sqsub,
+ _ => unreachable!(),
+ };
+
+ ctx.emit(Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::Ineg => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ let rn = zero_reg();
+ let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
+ let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
+ ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+ } else {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::VecMisc {
+ op: VecMisc2::Neg,
+ rd,
+ rn,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::Imul => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
+ ctx.emit(Inst::AluRRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ra: zero_reg(),
+ });
+ } else {
+ if ty == I64X2 {
+ let tmp1 = ctx.alloc_tmp(RegClass::V128, I64X2);
+ let tmp2 = ctx.alloc_tmp(RegClass::V128, I64X2);
+
+ // This I64X2 multiplication is performed with several 32-bit
+ // operations.
+
+ // 64-bit numbers x and y, can be represented as:
+ // x = a + 2^32(b)
+ // y = c + 2^32(d)
+
+ // A 64-bit multiplication is:
+ // x * y = ac + 2^32(ad + bc) + 2^64(bd)
+ // note: `2^64(bd)` can be ignored, the value is too large to fit in
+ // 64 bits.
+
+ // This sequence implements a I64X2 multiply, where the registers
+ // `rn` and `rm` are split up into 32-bit components:
+ // rn = |d|c|b|a|
+ // rm = |h|g|f|e|
+ //
+ // rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
+ //
+ // The sequence is:
+ // rev64 rd.4s, rm.4s
+ // mul rd.4s, rd.4s, rn.4s
+ // xtn tmp1.2s, rn.2d
+ // addp rd.4s, rd.4s, rd.4s
+ // xtn tmp2.2s, rm.2d
+ // shll rd.2d, rd.2s, #32
+ // umlal rd.2d, tmp2.2s, tmp1.2s
+
+ // Reverse the 32-bit elements in the 64-bit words.
+ // rd = |g|h|e|f|
+ ctx.emit(Inst::VecMisc {
+ op: VecMisc2::Rev64,
+ rd,
+ rn: rm,
+ size: VectorSize::Size32x4,
+ });
+
+ // Calculate the high half components.
+ // rd = |dg|ch|be|af|
+ //
+ // Note that this 32-bit multiply of the high half
+ // discards the bits that would overflow, same as
+ // if 64-bit operations were used. Also the Shll
+ // below would shift out the overflow bits anyway.
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd,
+ rn: rd.to_reg(),
+ rm: rn,
+ size: VectorSize::Size32x4,
+ });
+
+ // Extract the low half components of rn.
+ // tmp1 = |c|a|
+ ctx.emit(Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Xtn,
+ rd: tmp1,
+ rn,
+ size: VectorSize::Size32x2,
+ high_half: false,
+ });
+
+ // Sum the respective high half components.
+ // rd = |dg+ch|be+af||dg+ch|be+af|
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Addp,
+ rd: rd,
+ rn: rd.to_reg(),
+ rm: rd.to_reg(),
+ size: VectorSize::Size32x4,
+ });
+
+ // Extract the low half components of rm.
+ // tmp2 = |g|e|
+ ctx.emit(Inst::VecMiscNarrow {
+ op: VecMiscNarrowOp::Xtn,
+ rd: tmp2,
+ rn: rm,
+ size: VectorSize::Size32x2,
+ high_half: false,
+ });
+
+ // Shift the high half components, into the high half.
+ // rd = |dg+ch << 32|be+af << 32|
+ ctx.emit(Inst::VecMisc {
+ op: VecMisc2::Shll,
+ rd,
+ rn: rd.to_reg(),
+ size: VectorSize::Size32x2,
+ });
+
+ // Multiply the low components together, and accumulate with the high
+ // half.
+ // rd = |rd[1] + cg|rd[0] + ae|
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Umlal,
+ rd,
+ rn: tmp2.to_reg(),
+ rm: tmp1.to_reg(),
+ size: VectorSize::Size32x2,
+ });
+ } else {
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Mul,
+ rd,
+ rn,
+ rm,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+ }
+
+ Opcode::Umulhi | Opcode::Smulhi => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let is_signed = op == Opcode::Smulhi;
+ let input_ty = ctx.input_ty(insn, 0);
+ assert!(ctx.input_ty(insn, 1) == input_ty);
+ assert!(ctx.output_ty(insn, 0) == input_ty);
+
+ match input_ty {
+ I64 => {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let alu_op = if is_signed {
+ ALUOp::SMulH
+ } else {
+ ALUOp::UMulH
+ };
+ ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
+ }
+ I32 | I16 | I8 => {
+ let narrow_mode = if is_signed {
+ NarrowValueMode::SignExtend64
+ } else {
+ NarrowValueMode::ZeroExtend64
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
+ let ra = zero_reg();
+ ctx.emit(Inst::AluRRRR {
+ alu_op: ALUOp3::MAdd64,
+ rd,
+ rn,
+ rm,
+ ra,
+ });
+ let shift_op = if is_signed {
+ ALUOp::Asr64
+ } else {
+ ALUOp::Lsr64
+ };
+ let shift_amt = match input_ty {
+ I32 => 32,
+ I16 => 16,
+ I8 => 8,
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: shift_op,
+ rd,
+ rn: rd.to_reg(),
+ immshift: ImmShift::maybe_from_u64(shift_amt).unwrap(),
+ });
+ }
+ _ => {
+ panic!("Unsupported argument type for umulhi/smulhi: {}", input_ty);
+ }
+ }
+ }
+
+ Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => {
+ let is_signed = match op {
+ Opcode::Udiv | Opcode::Urem => false,
+ Opcode::Sdiv | Opcode::Srem => true,
+ _ => unreachable!(),
+ };
+ let is_rem = match op {
+ Opcode::Udiv | Opcode::Sdiv => false,
+ Opcode::Urem | Opcode::Srem => true,
+ _ => unreachable!(),
+ };
+ let narrow_mode = if is_signed {
+ NarrowValueMode::SignExtend64
+ } else {
+ NarrowValueMode::ZeroExtend64
+ };
+ // TODO: Add SDiv32 to implement 32-bit directly, rather
+ // than extending the input.
+ let div_op = if is_signed {
+ ALUOp::SDiv64
+ } else {
+ ALUOp::UDiv64
+ };
+
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
+ // The div instruction does not trap on divide by zero or signed overflow
+ // so checks are inserted below.
+ //
+ // div rd, rn, rm
+ ctx.emit(Inst::AluRRR {
+ alu_op: div_op,
+ rd,
+ rn,
+ rm,
+ });
+
+ if is_rem {
+ // Remainder (rn % rm) is implemented as:
+ //
+ // tmp = rn / rm
+ // rd = rn - (tmp*rm)
+ //
+ // use 'rd' for tmp and you have:
+ //
+ // div rd, rn, rm ; rd = rn / rm
+ // cbnz rm, #8 ; branch over trap
+ // udf ; divide by zero
+ // msub rd, rd, rm, rn ; rd = rn - rd * rm
+
+ // Check for divide by 0.
+ let trap_code = TrapCode::IntegerDivisionByZero;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Zero(rm),
+ });
+
+ ctx.emit(Inst::AluRRRR {
+ alu_op: ALUOp3::MSub64,
+ rd: rd,
+ rn: rd.to_reg(),
+ rm: rm,
+ ra: rn,
+ });
+ } else {
+ if div_op == ALUOp::SDiv64 {
+ // cbnz rm, #8
+ // udf ; divide by zero
+ // cmn rm, 1
+ // ccmp rn, 1, #nzcv, eq
+ // b.vc #8
+ // udf ; signed overflow
+
+ // Check for divide by 0.
+ let trap_code = TrapCode::IntegerDivisionByZero;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Zero(rm),
+ });
+
+ // Check for signed overflow. The only case is min_value / -1.
+ let ty = ty.unwrap();
+ // The following checks must be done in 32-bit or 64-bit, depending
+ // on the input type. Even though the initial div instruction is
+ // always done in 64-bit currently.
+ let size = OperandSize::from_ty(ty);
+ // Check RHS is -1.
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64),
+ rd: writable_zero_reg(),
+ rn: rm,
+ imm12: Imm12::maybe_from_u64(1).unwrap(),
+ });
+ // Check LHS is min_value, by subtracting 1 and branching if
+ // there is overflow.
+ ctx.emit(Inst::CCmpImm {
+ size,
+ rn,
+ imm: UImm5::maybe_from_u8(1).unwrap(),
+ nzcv: NZCV::new(false, false, false, false),
+ cond: Cond::Eq,
+ });
+ let trap_code = TrapCode::IntegerOverflow;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(Cond::Vs),
+ });
+ } else {
+ // cbnz rm, #8
+ // udf ; divide by zero
+
+ // Check for divide by 0.
+ let trap_code = TrapCode::IntegerDivisionByZero;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Zero(rm),
+ });
+ }
+ }
+ }
+
+ Opcode::Uextend | Opcode::Sextend => {
+ let output_ty = ty.unwrap();
+ let input_ty = ctx.input_ty(insn, 0);
+ let from_bits = ty_bits(input_ty) as u8;
+ let to_bits = ty_bits(output_ty) as u8;
+ let to_bits = std::cmp::max(32, to_bits);
+ assert!(from_bits <= to_bits);
+ if from_bits < to_bits {
+ let signed = op == Opcode::Sextend;
+ let rd = get_output_reg(ctx, outputs[0]);
+
+ if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) {
+ let idx =
+ if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(extract_insn) {
+ *imm
+ } else {
+ unreachable!();
+ };
+ let input = InsnInput {
+ insn: extract_insn,
+ input: 0,
+ };
+ let rn = put_input_in_reg(ctx, input, NarrowValueMode::None);
+ let size = VectorSize::from_ty(ctx.input_ty(extract_insn, 0));
+
+ if signed {
+ let scalar_size = OperandSize::from_ty(output_ty);
+
+ ctx.emit(Inst::MovFromVecSigned {
+ rd,
+ rn,
+ idx,
+ size,
+ scalar_size,
+ });
+ } else {
+ ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
+ }
+ } else {
+ // If we reach this point, we weren't able to incorporate the extend as
+ // a register-mode on another instruction, so we have a 'None'
+ // narrow-value/extend mode here, and we emit the explicit instruction.
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::Extend {
+ rd,
+ rn,
+ signed,
+ from_bits,
+ to_bits,
+ });
+ }
+ }
+ }
+
+ Opcode::Bnot => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
+ let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
+ // NOT rd, rm ==> ORR_NOT rd, zero, rm
+ ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
+ } else {
+ let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::VecMisc {
+ op: VecMisc2::Not,
+ rd,
+ rn: rm,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::Band
+ | Opcode::Bor
+ | Opcode::Bxor
+ | Opcode::BandNot
+ | Opcode::BorNot
+ | Opcode::BxorNot => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
+ let alu_op = match op {
+ Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
+ Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
+ Opcode::Bxor => choose_32_64(ty, ALUOp::Eor32, ALUOp::Eor64),
+ Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
+ Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64),
+ Opcode::BxorNot => choose_32_64(ty, ALUOp::EorNot32, ALUOp::EorNot64),
+ _ => unreachable!(),
+ };
+ ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm));
+ } else {
+ let alu_op = match op {
+ Opcode::Band => VecALUOp::And,
+ Opcode::BandNot => VecALUOp::Bic,
+ Opcode::Bor => VecALUOp::Orr,
+ Opcode::Bxor => VecALUOp::Eor,
+ _ => unreachable!(),
+ };
+
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+
+ ctx.emit(Inst::VecRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
+ let ty = ty.unwrap();
+ let rd = get_output_reg(ctx, outputs[0]);
+ if !ty.is_vector() {
+ let size = OperandSize::from_bits(ty_bits(ty));
+ let narrow_mode = match (op, size) {
+ (Opcode::Ishl, _) => NarrowValueMode::None,
+ (Opcode::Ushr, OperandSize::Size64) => NarrowValueMode::ZeroExtend64,
+ (Opcode::Ushr, OperandSize::Size32) => NarrowValueMode::ZeroExtend32,
+ (Opcode::Sshr, OperandSize::Size64) => NarrowValueMode::SignExtend64,
+ (Opcode::Sshr, OperandSize::Size32) => NarrowValueMode::SignExtend32,
+ _ => unreachable!(),
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
+ let alu_op = match op {
+ Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64),
+ Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
+ Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64),
+ _ => unreachable!(),
+ };
+ ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
+ } else {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let size = VectorSize::from_ty(ty);
+ let (alu_op, is_right_shift) = match op {
+ Opcode::Ishl => (VecALUOp::Sshl, false),
+ Opcode::Ushr => (VecALUOp::Ushl, true),
+ Opcode::Sshr => (VecALUOp::Sshl, true),
+ _ => unreachable!(),
+ };
+
+ let rm = if is_right_shift {
+ // Right shifts are implemented with a negative left shift.
+ let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+ let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+ let rn = zero_reg();
+ ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm));
+ tmp.to_reg()
+ } else {
+ put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
+ };
+
+ ctx.emit(Inst::VecDup { rd, rn: rm, size });
+
+ ctx.emit(Inst::VecRRR {
+ alu_op,
+ rd,
+ rn,
+ rm: rd.to_reg(),
+ size,
+ });
+ }
+ }
+
+ Opcode::Rotr | Opcode::Rotl => {
+ // aarch64 doesn't have a left-rotate instruction, but a left rotation of K places is
+ // effectively a right rotation of N - K places, if N is the integer's bit size. We
+ // implement left rotations with this trick.
+ //
+ // For a 32-bit or 64-bit rotate-right, we can use the ROR instruction directly.
+ //
+ // For a < 32-bit rotate-right, we synthesize this as:
+ //
+ // rotr rd, rn, rm
+ //
+ // =>
+ //
+ // zero-extend rn, <32-or-64>
+ // and tmp_masked_rm, rm, <bitwidth - 1>
+ // sub tmp1, tmp_masked_rm, <bitwidth>
+ // sub tmp1, zero, tmp1 ; neg
+ // lsr tmp2, rn, tmp_masked_rm
+ // lsl rd, rn, tmp1
+ // orr rd, rd, tmp2
+ //
+ // For a constant amount, we can instead do:
+ //
+ // zero-extend rn, <32-or-64>
+ // lsr tmp2, rn, #<shiftimm>
+ // lsl rd, rn, <bitwidth - shiftimm>
+ // orr rd, rd, tmp2
+
+ let is_rotl = op == Opcode::Rotl;
+
+ let ty = ty.unwrap();
+ let ty_bits_size = ty_bits(ty) as u8;
+
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(
+ ctx,
+ inputs[0],
+ if ty_bits_size <= 32 {
+ NarrowValueMode::ZeroExtend32
+ } else {
+ NarrowValueMode::ZeroExtend64
+ },
+ );
+ let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
+
+ if ty_bits_size == 32 || ty_bits_size == 64 {
+ let alu_op = choose_32_64(ty, ALUOp::RotR32, ALUOp::RotR64);
+ match rm {
+ ResultRegImmShift::ImmShift(mut immshift) => {
+ if is_rotl {
+ immshift.imm = ty_bits_size.wrapping_sub(immshift.value());
+ }
+ immshift.imm &= ty_bits_size - 1;
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op,
+ rd,
+ rn,
+ immshift,
+ });
+ }
+
+ ResultRegImmShift::Reg(rm) => {
+ let rm = if is_rotl {
+ // Really ty_bits_size - rn, but the upper bits of the result are
+ // ignored (because of the implicit masking done by the instruction),
+ // so this is equivalent to negating the input.
+ let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
+ let tmp = ctx.alloc_tmp(RegClass::I64, ty);
+ ctx.emit(Inst::AluRRR {
+ alu_op,
+ rd: tmp,
+ rn: zero_reg(),
+ rm,
+ });
+ tmp.to_reg()
+ } else {
+ rm
+ };
+ ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
+ }
+ }
+ } else {
+ debug_assert!(ty_bits_size < 32);
+
+ match rm {
+ ResultRegImmShift::Reg(reg) => {
+ let reg = if is_rotl {
+ // Really ty_bits_size - rn, but the upper bits of the result are
+ // ignored (because of the implicit masking done by the instruction),
+ // so this is equivalent to negating the input.
+ let tmp = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Sub32,
+ rd: tmp,
+ rn: zero_reg(),
+ rm: reg,
+ });
+ tmp.to_reg()
+ } else {
+ reg
+ };
+
+ // Explicitly mask the rotation count.
+ let tmp_masked_rm = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op: ALUOp::And32,
+ rd: tmp_masked_rm,
+ rn: reg,
+ imml: ImmLogic::maybe_from_u64((ty_bits_size - 1) as u64, I32).unwrap(),
+ });
+ let tmp_masked_rm = tmp_masked_rm.to_reg();
+
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub32,
+ rd: tmp1,
+ rn: tmp_masked_rm,
+ imm12: Imm12::maybe_from_u64(ty_bits_size as u64).unwrap(),
+ });
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Sub32,
+ rd: tmp1,
+ rn: zero_reg(),
+ rm: tmp1.to_reg(),
+ });
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Lsr32,
+ rd: tmp2,
+ rn,
+ rm: tmp_masked_rm,
+ });
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Lsl32,
+ rd,
+ rn,
+ rm: tmp1.to_reg(),
+ });
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Orr32,
+ rd,
+ rn: rd.to_reg(),
+ rm: tmp2.to_reg(),
+ });
+ }
+
+ ResultRegImmShift::ImmShift(mut immshift) => {
+ if is_rotl {
+ immshift.imm = ty_bits_size.wrapping_sub(immshift.value());
+ }
+ immshift.imm &= ty_bits_size - 1;
+
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, I32);
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr32,
+ rd: tmp1,
+ rn,
+ immshift: immshift.clone(),
+ });
+
+ let amount = immshift.value() & (ty_bits_size - 1);
+ let opp_shift =
+ ImmShift::maybe_from_u64(ty_bits_size as u64 - amount as u64).unwrap();
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl32,
+ rd,
+ rn,
+ immshift: opp_shift,
+ });
+
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Orr32,
+ rd,
+ rn: rd.to_reg(),
+ rm: tmp1.to_reg(),
+ });
+ }
+ }
+ }
+ }
+
+ Opcode::Bitrev | Opcode::Clz | Opcode::Cls | Opcode::Ctz => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let needs_zext = match op {
+ Opcode::Bitrev | Opcode::Ctz => false,
+ Opcode::Clz | Opcode::Cls => true,
+ _ => unreachable!(),
+ };
+ let ty = ty.unwrap();
+ let narrow_mode = if needs_zext && ty_bits(ty) == 64 {
+ NarrowValueMode::ZeroExtend64
+ } else if needs_zext {
+ NarrowValueMode::ZeroExtend32
+ } else {
+ NarrowValueMode::None
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let op_ty = match ty {
+ I8 | I16 | I32 => I32,
+ I64 => I64,
+ _ => panic!("Unsupported type for Bitrev/Clz/Cls"),
+ };
+ let bitop = match op {
+ Opcode::Clz | Opcode::Cls | Opcode::Bitrev => BitOp::from((op, op_ty)),
+ Opcode::Ctz => BitOp::from((Opcode::Bitrev, op_ty)),
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::BitRR { rd, rn, op: bitop });
+
+ // Both bitrev and ctz use a bit-reverse (rbit) instruction; ctz to reduce the problem
+ // to a clz, and bitrev as the main operation.
+ if op == Opcode::Bitrev || op == Opcode::Ctz {
+ // Reversing an n-bit value (n < 32) with a 32-bit bitrev instruction will place
+ // the reversed result in the highest n bits, so we need to shift them down into
+ // place.
+ let right_shift = match ty {
+ I8 => Some(24),
+ I16 => Some(16),
+ I32 => None,
+ I64 => None,
+ _ => panic!("Unsupported type for Bitrev"),
+ };
+ if let Some(s) = right_shift {
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr32,
+ rd,
+ rn: rd.to_reg(),
+ immshift: ImmShift::maybe_from_u64(s).unwrap(),
+ });
+ }
+ }
+
+ if op == Opcode::Ctz {
+ ctx.emit(Inst::BitRR {
+ op: BitOp::from((Opcode::Clz, op_ty)),
+ rd,
+ rn: rd.to_reg(),
+ });
+ }
+ }
+
+ Opcode::Popcnt => {
+ // Lower popcount using the following algorithm:
+ //
+ // x -= (x >> 1) & 0x5555555555555555
+ // x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333)
+ // x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f
+ // x += x << 8
+ // x += x << 16
+ // x += x << 32
+ // x >> 56
+ let ty = ty.unwrap();
+ let rd = get_output_reg(ctx, outputs[0]);
+ // FIXME(#1537): zero-extend 8/16/32-bit operands only to 32 bits,
+ // and fix the sequence below to work properly for this.
+ let narrow_mode = NarrowValueMode::ZeroExtend64;
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+
+ // If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
+ // the rest of the code is identical to the 64-bit version.
+ // lsr [wx]d, [wx]n, #1
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64),
+ rd: rd,
+ rn: rn,
+ immshift: ImmShift::maybe_from_u64(1).unwrap(),
+ });
+
+ // and xd, xd, #0x5555555555555555
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd: rd,
+ rn: rd.to_reg(),
+ imml: ImmLogic::maybe_from_u64(0x5555555555555555, I64).unwrap(),
+ });
+
+ // sub xd, xn, xd
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd: rd,
+ rn: rn,
+ rm: rd.to_reg(),
+ });
+
+ // and xt, xd, #0x3333333333333333
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd: tmp,
+ rn: rd.to_reg(),
+ imml: ImmLogic::maybe_from_u64(0x3333333333333333, I64).unwrap(),
+ });
+
+ // lsr xd, xd, #2
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr64,
+ rd: rd,
+ rn: rd.to_reg(),
+ immshift: ImmShift::maybe_from_u64(2).unwrap(),
+ });
+
+ // and xd, xd, #0x3333333333333333
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd: rd,
+ rn: rd.to_reg(),
+ imml: ImmLogic::maybe_from_u64(0x3333333333333333, I64).unwrap(),
+ });
+
+ // add xt, xd, xt
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: rd.to_reg(),
+ rm: tmp.to_reg(),
+ });
+
+ // add xt, xt, xt LSR #4
+ ctx.emit(Inst::AluRRRShift {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: tmp.to_reg(),
+ rm: tmp.to_reg(),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSR,
+ ShiftOpShiftImm::maybe_from_shift(4).unwrap(),
+ ),
+ });
+
+ // and xt, xt, #0x0f0f0f0f0f0f0f0f
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd: tmp,
+ rn: tmp.to_reg(),
+ imml: ImmLogic::maybe_from_u64(0x0f0f0f0f0f0f0f0f, I64).unwrap(),
+ });
+
+ // add xt, xt, xt, LSL #8
+ ctx.emit(Inst::AluRRRShift {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: tmp.to_reg(),
+ rm: tmp.to_reg(),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(8).unwrap(),
+ ),
+ });
+
+ // add xt, xt, xt, LSL #16
+ ctx.emit(Inst::AluRRRShift {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: tmp.to_reg(),
+ rm: tmp.to_reg(),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(16).unwrap(),
+ ),
+ });
+
+ // add xt, xt, xt, LSL #32
+ ctx.emit(Inst::AluRRRShift {
+ alu_op: ALUOp::Add64,
+ rd: tmp,
+ rn: tmp.to_reg(),
+ rm: tmp.to_reg(),
+ shiftop: ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(32).unwrap(),
+ ),
+ });
+
+ // lsr xd, xt, #56
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsr64,
+ rd: rd,
+ rn: tmp.to_reg(),
+ immshift: ImmShift::maybe_from_u64(56).unwrap(),
+ });
+ }
+
+ Opcode::Load
+ | Opcode::Uload8
+ | Opcode::Sload8
+ | Opcode::Uload16
+ | Opcode::Sload16
+ | Opcode::Uload32
+ | Opcode::Sload32
+ | Opcode::LoadComplex
+ | Opcode::Uload8Complex
+ | Opcode::Sload8Complex
+ | Opcode::Uload16Complex
+ | Opcode::Sload16Complex
+ | Opcode::Uload32Complex
+ | Opcode::Sload32Complex
+ | Opcode::Sload8x8
+ | Opcode::Uload8x8
+ | Opcode::Sload16x4
+ | Opcode::Uload16x4
+ | Opcode::Sload32x2
+ | Opcode::Uload32x2 => {
+ let off = ctx.data(insn).load_store_offset().unwrap();
+ let elem_ty = match op {
+ Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
+ I8
+ }
+ Opcode::Sload16
+ | Opcode::Uload16
+ | Opcode::Sload16Complex
+ | Opcode::Uload16Complex => I16,
+ Opcode::Sload32
+ | Opcode::Uload32
+ | Opcode::Sload32Complex
+ | Opcode::Uload32Complex => I32,
+ Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
+ Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
+ Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
+ Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
+ _ => unreachable!(),
+ };
+ let sign_extend = match op {
+ Opcode::Sload8
+ | Opcode::Sload8Complex
+ | Opcode::Sload16
+ | Opcode::Sload16Complex
+ | Opcode::Sload32
+ | Opcode::Sload32Complex => true,
+ _ => false,
+ };
+ let is_float = ty_has_float_or_vec_representation(elem_ty);
+
+ let mem = lower_address(ctx, elem_ty, &inputs[..], off);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let flags = ctx
+ .memflags(insn)
+ .expect("Load instruction should have memflags");
+
+ ctx.emit(match (ty_bits(elem_ty), sign_extend, is_float) {
+ (1, _, _) => Inst::ULoad8 { rd, mem, flags },
+ (8, false, _) => Inst::ULoad8 { rd, mem, flags },
+ (8, true, _) => Inst::SLoad8 { rd, mem, flags },
+ (16, false, _) => Inst::ULoad16 { rd, mem, flags },
+ (16, true, _) => Inst::SLoad16 { rd, mem, flags },
+ (32, false, false) => Inst::ULoad32 { rd, mem, flags },
+ (32, true, false) => Inst::SLoad32 { rd, mem, flags },
+ (32, _, true) => Inst::FpuLoad32 { rd, mem, flags },
+ (64, _, false) => Inst::ULoad64 { rd, mem, flags },
+ // Note that we treat some of the vector loads as scalar floating-point loads,
+ // which is correct in a little endian environment.
+ (64, _, true) => Inst::FpuLoad64 { rd, mem, flags },
+ (128, _, _) => Inst::FpuLoad128 { rd, mem, flags },
+ _ => panic!("Unsupported size in load"),
+ });
+
+ let vec_extend = match op {
+ Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
+ Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
+ Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
+ Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
+ Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
+ Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
+ _ => None,
+ };
+
+ if let Some(t) = vec_extend {
+ ctx.emit(Inst::VecExtend {
+ t,
+ rd,
+ rn: rd.to_reg(),
+ high_half: false,
+ });
+ }
+ }
+
+ Opcode::LoadSplat => {
+ let off = ctx.data(insn).load_store_offset().unwrap();
+ let ty = ty.unwrap();
+ let mem = lower_address(ctx, ty.lane_type(), &inputs[..], off);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let size = VectorSize::from_ty(ty);
+ let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+
+ ctx.emit(Inst::LoadAddr { rd: tmp, mem });
+ ctx.emit(Inst::VecLoadReplicate {
+ rd,
+ rn: tmp.to_reg(),
+ size,
+ });
+ }
+
+ Opcode::Store
+ | Opcode::Istore8
+ | Opcode::Istore16
+ | Opcode::Istore32
+ | Opcode::StoreComplex
+ | Opcode::Istore8Complex
+ | Opcode::Istore16Complex
+ | Opcode::Istore32Complex => {
+ let off = ctx.data(insn).load_store_offset().unwrap();
+ let elem_ty = match op {
+ Opcode::Istore8 | Opcode::Istore8Complex => I8,
+ Opcode::Istore16 | Opcode::Istore16Complex => I16,
+ Opcode::Istore32 | Opcode::Istore32Complex => I32,
+ Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
+ _ => unreachable!(),
+ };
+ let is_float = ty_has_float_or_vec_representation(elem_ty);
+ let flags = ctx
+ .memflags(insn)
+ .expect("Store instruction should have memflags");
+
+ let mem = lower_address(ctx, elem_ty, &inputs[1..], off);
+ let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+
+ ctx.emit(match (ty_bits(elem_ty), is_float) {
+ (1, _) | (8, _) => Inst::Store8 { rd, mem, flags },
+ (16, _) => Inst::Store16 { rd, mem, flags },
+ (32, false) => Inst::Store32 { rd, mem, flags },
+ (32, true) => Inst::FpuStore32 { rd, mem, flags },
+ (64, false) => Inst::Store64 { rd, mem, flags },
+ (64, true) => Inst::FpuStore64 { rd, mem, flags },
+ (128, _) => Inst::FpuStore128 { rd, mem, flags },
+ _ => panic!("Unsupported size in store"),
+ });
+ }
+
+ Opcode::StackAddr => {
+ let (stack_slot, offset) = match *ctx.data(insn) {
+ InstructionData::StackLoad {
+ opcode: Opcode::StackAddr,
+ stack_slot,
+ offset,
+ } => (stack_slot, offset),
+ _ => unreachable!(),
+ };
+ let rd = get_output_reg(ctx, outputs[0]);
+ let offset: i32 = offset.into();
+ let inst = ctx
+ .abi()
+ .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd);
+ ctx.emit(inst);
+ }
+
+ Opcode::AtomicRmw => {
+ let r_dst = get_output_reg(ctx, outputs[0]);
+ let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+ // Make sure that both args are in virtual regs, since in effect
+ // we have to do a parallel copy to get them safely to the AtomicRMW input
+ // regs, and that's not guaranteed safe if either is in a real reg.
+ r_addr = ctx.ensure_in_vreg(r_addr, I64);
+ r_arg2 = ctx.ensure_in_vreg(r_arg2, I64);
+ // Move the args to the preordained AtomicRMW input regs
+ ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
+ ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
+ // Now the AtomicRMW insn itself
+ let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
+ ctx.emit(Inst::AtomicRMW { ty: ty_access, op });
+ // And finally, copy the preordained AtomicRMW output reg to its destination.
+ ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
+ // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
+ }
+
+ Opcode::AtomicCas => {
+ // This is very similar to, but not identical to, the AtomicRmw case. Note
+ // that the AtomicCAS sequence does its own masking, so we don't need to worry
+ // about zero-extending narrow (I8/I16/I32) values here.
+ let r_dst = get_output_reg(ctx, outputs[0]);
+ let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+ // Make sure that all three args are in virtual regs. See corresponding comment
+ // for `Opcode::AtomicRmw` above.
+ r_addr = ctx.ensure_in_vreg(r_addr, I64);
+ r_expected = ctx.ensure_in_vreg(r_expected, I64);
+ r_replacement = ctx.ensure_in_vreg(r_replacement, I64);
+ // Move the args to the preordained AtomicCAS input regs
+ ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(xreg(26)),
+ r_expected,
+ I64,
+ ));
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(xreg(28)),
+ r_replacement,
+ I64,
+ ));
+ // Now the AtomicCAS itself, implemented in the normal way, with an LL-SC loop
+ ctx.emit(Inst::AtomicCAS { ty: ty_access });
+ // And finally, copy the preordained AtomicCAS output reg to its destination.
+ ctx.emit(Inst::gen_move(r_dst, xreg(27), I64));
+ // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that.
+ }
+
+ Opcode::AtomicLoad => {
+ let r_data = get_output_reg(ctx, outputs[0]);
+ let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+ ctx.emit(Inst::AtomicLoad {
+ ty: ty_access,
+ r_data,
+ r_addr,
+ });
+ }
+
+ Opcode::AtomicStore => {
+ let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty_access = ctx.input_ty(insn, 0);
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+ ctx.emit(Inst::AtomicStore {
+ ty: ty_access,
+ r_data,
+ r_addr,
+ });
+ }
+
+ Opcode::Fence => {
+ ctx.emit(Inst::Fence {});
+ }
+
+ Opcode::StackLoad | Opcode::StackStore => {
+ panic!("Direct stack memory access not supported; should not be used by Wasm");
+ }
+
+ Opcode::HeapAddr => {
+ panic!("heap_addr should have been removed by legalization!");
+ }
+
+ Opcode::TableAddr => {
+ panic!("table_addr should have been removed by legalization!");
+ }
+
+ Opcode::ConstAddr => unimplemented!(),
+
+ Opcode::Nop => {
+ // Nothing.
+ }
+
+ Opcode::Select => {
+ let flag_input = inputs[0];
+ let cond = if let Some(icmp_insn) =
+ maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
+ {
+ let condcode = ctx.data(icmp_insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+ lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed);
+ cond
+ } else if let Some(fcmp_insn) =
+ maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
+ {
+ let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
+ let cond = lower_fp_condcode(condcode);
+ lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn);
+ cond
+ } else {
+ let (cmp_op, narrow_mode) = if ty_bits(ctx.input_ty(insn, 0)) > 32 {
+ (ALUOp::SubS64, NarrowValueMode::ZeroExtend64)
+ } else {
+ (ALUOp::SubS32, NarrowValueMode::ZeroExtend32)
+ };
+
+ let rcond = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ // cmp rcond, #0
+ ctx.emit(Inst::AluRRR {
+ alu_op: cmp_op,
+ rd: writable_zero_reg(),
+ rn: rcond,
+ rm: zero_reg(),
+ });
+ Cond::Ne
+ };
+
+ // csel.cond rd, rn, rm
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+ let ty = ctx.output_ty(insn, 0);
+ let bits = ty_bits(ty);
+ let is_float = ty_has_float_or_vec_representation(ty);
+ if is_float && bits == 32 {
+ ctx.emit(Inst::FpuCSel32 { cond, rd, rn, rm });
+ } else if is_float && bits == 64 {
+ ctx.emit(Inst::FpuCSel64 { cond, rd, rn, rm });
+ } else if is_float && bits == 128 {
+ ctx.emit(Inst::VecCSel { cond, rd, rn, rm });
+ } else {
+ ctx.emit(Inst::CSel { cond, rd, rn, rm });
+ }
+ }
+
+ Opcode::Selectif | Opcode::SelectifSpectreGuard => {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+ // Verification ensures that the input is always a
+ // single-def ifcmp.
+ let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
+ lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
+
+ // csel.COND rd, rn, rm
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+ let ty = ctx.output_ty(insn, 0);
+ let bits = ty_bits(ty);
+ let is_float = ty_has_float_or_vec_representation(ty);
+ if is_float && bits == 32 {
+ ctx.emit(Inst::FpuCSel32 { cond, rd, rn, rm });
+ } else if is_float && bits == 64 {
+ ctx.emit(Inst::FpuCSel64 { cond, rd, rn, rm });
+ } else {
+ ctx.emit(Inst::CSel { cond, rd, rn, rm });
+ }
+ }
+
+ Opcode::Bitselect | Opcode::Vselect => {
+ let ty = ty.unwrap();
+ if !ty.is_vector() {
+ debug_assert_ne!(Opcode::Vselect, op);
+ let tmp = ctx.alloc_tmp(RegClass::I64, I64);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+ // AND rTmp, rn, rcond
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::And64,
+ rd: tmp,
+ rn,
+ rm: rcond,
+ });
+ // BIC rd, rm, rcond
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::AndNot64,
+ rd,
+ rn: rm,
+ rm: rcond,
+ });
+ // ORR rd, rd, rTmp
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Orr64,
+ rd,
+ rn: rd.to_reg(),
+ rm: tmp.to_reg(),
+ });
+ } else {
+ let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(rd, rcond, ty));
+
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Bsl,
+ rd,
+ rn,
+ rm,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::Trueif => {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+ // Verification ensures that the input is always a
+ // single-def ifcmp.
+ let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
+ lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::CSet { rd, cond });
+ normalize_bool_result(ctx, insn, rd);
+ }
+
+ Opcode::Trueff => {
+ let condcode = ctx.data(insn).fp_cond_code().unwrap();
+ let cond = lower_fp_condcode(condcode);
+ let ffcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ffcmp).unwrap();
+ lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::CSet { rd, cond });
+ normalize_bool_result(ctx, insn, rd);
+ }
+
+ Opcode::IsNull | Opcode::IsInvalid => {
+ // Null references are represented by the constant value 0; invalid references are
+ // represented by the constant value -1. See `define_reftypes()` in
+ // `meta/src/isa/x86/encodings.rs` to confirm.
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ctx.input_ty(insn, 0);
+ let (alu_op, const_value) = match op {
+ Opcode::IsNull => {
+ // cmp rn, #0
+ (choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64), 0)
+ }
+ Opcode::IsInvalid => {
+ // cmn rn, #1
+ (choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64), 1)
+ }
+ _ => unreachable!(),
+ };
+ let const_value = ResultRSEImm12::Imm12(Imm12::maybe_from_u64(const_value).unwrap());
+ ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, const_value));
+ ctx.emit(Inst::CSet { rd, cond: Cond::Eq });
+ normalize_bool_result(ctx, insn, rd);
+ }
+
+ Opcode::Copy => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ctx.input_ty(insn, 0);
+ ctx.emit(Inst::gen_move(rd, rn, ty));
+ }
+
+ Opcode::Breduce | Opcode::Ireduce => {
+ // Smaller integers/booleans are stored with high-order bits
+ // undefined, so we can simply do a copy.
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ ctx.emit(Inst::gen_move(rd, rn, ty));
+ }
+
+ Opcode::Bextend | Opcode::Bmask => {
+ // Bextend and Bmask both simply sign-extend. This works for:
+ // - Bextend, because booleans are stored as 0 / -1, so we
+ // sign-extend the -1 to a -1 in the wider width.
+ // - Bmask, because the resulting integer mask value must be
+ // all-ones (-1) if the argument is true.
+ //
+ // For a sign-extension from a 1-bit value (Case 1 below), we need
+ // to do things a bit specially, because the ISA does not have a
+ // 1-to-N-bit sign extension instruction. For 8-bit or wider
+ // sources (Case 2 below), we do a sign extension normally.
+
+ let from_ty = ctx.input_ty(insn, 0);
+ let to_ty = ctx.output_ty(insn, 0);
+ let from_bits = ty_bits(from_ty);
+ let to_bits = ty_bits(to_ty);
+
+ assert!(
+ from_bits <= 64 && to_bits <= 64,
+ "Vector Bextend not supported yet"
+ );
+ assert!(from_bits <= to_bits);
+
+ if from_bits == to_bits {
+ // Nothing.
+ } else if from_bits == 1 {
+ assert!(to_bits >= 8);
+ // Case 1: 1-bit to N-bit extension: AND the LSB of source into
+ // dest, generating a value of 0 or 1, then negate to get
+ // 0x000... or 0xfff...
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ // AND Rdest, Rsource, #1
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op: ALUOp::And64,
+ rd,
+ rn,
+ imml: ImmLogic::maybe_from_u64(1, I64).unwrap(),
+ });
+ // SUB Rdest, XZR, Rdest (i.e., NEG Rdest)
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Sub64,
+ rd,
+ rn: zero_reg(),
+ rm: rd.to_reg(),
+ });
+ } else {
+ // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A
+ // `true` (all ones, or `-1`) will be extended to -1 with the
+ // larger width.
+ assert!(from_bits >= 8);
+ let narrow_mode = if to_bits == 64 {
+ NarrowValueMode::SignExtend64
+ } else {
+ assert!(to_bits <= 32);
+ NarrowValueMode::SignExtend32
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(rd, rn, to_ty));
+ }
+ }
+
+ Opcode::Bint => {
+ // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
+ // out the LSB to give a 0 / 1-valued integer result.
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let output_bits = ty_bits(ctx.output_ty(insn, 0));
+
+ let (imm_ty, alu_op) = if output_bits > 32 {
+ (I64, ALUOp::And64)
+ } else {
+ (I32, ALUOp::And32)
+ };
+ ctx.emit(Inst::AluRRImmLogic {
+ alu_op,
+ rd,
+ rn,
+ imml: ImmLogic::maybe_from_u64(1, imm_ty).unwrap(),
+ });
+ }
+
+ Opcode::Bitcast => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ity = ctx.input_ty(insn, 0);
+ let oty = ctx.output_ty(insn, 0);
+ let ity_vec_reg = ty_has_float_or_vec_representation(ity);
+ let oty_vec_reg = ty_has_float_or_vec_representation(oty);
+ match (ity_vec_reg, oty_vec_reg) {
+ (true, true) => {
+ let narrow_mode = if ty_bits(ity) <= 32 && ty_bits(oty) <= 32 {
+ NarrowValueMode::ZeroExtend32
+ } else {
+ NarrowValueMode::ZeroExtend64
+ };
+ let rm = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ ctx.emit(Inst::gen_move(rd, rm, oty));
+ }
+ (false, false) => {
+ let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::gen_move(rd, rm, oty));
+ }
+ (false, true) => {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+ ctx.emit(Inst::MovToFpu {
+ rd,
+ rn,
+ size: ScalarSize::Size64,
+ });
+ }
+ (true, false) => {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::MovFromVec {
+ rd,
+ rn,
+ idx: 0,
+ size: VectorSize::Size64x2,
+ });
+ }
+ }
+ }
+
+ Opcode::FallthroughReturn | Opcode::Return => {
+ for (i, input) in inputs.iter().enumerate() {
+ // N.B.: according to the AArch64 ABI, the top bits of a register
+ // (above the bits for the value's type) are undefined, so we
+ // need not extend the return values.
+ let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
+ let retval_reg = ctx.retval(i);
+ let ty = ctx.input_ty(insn, i);
+ ctx.emit(Inst::gen_move(retval_reg, reg, ty));
+ }
+ // N.B.: the Ret itself is generated by the ABI.
+ }
+
+ Opcode::Ifcmp | Opcode::Ffcmp => {
+ // An Ifcmp/Ffcmp must always be seen as a use of a brif/brff or trueif/trueff
+ // instruction. This will always be the case as long as the IR uses an Ifcmp/Ffcmp from
+ // the same block, or a dominating block. In other words, it cannot pass through a BB
+ // param (phi). The flags pass of the verifier will ensure this.
+ panic!("Should never reach ifcmp as isel root!");
+ }
+
+ Opcode::Icmp => {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ let bits = ty_bits(ty);
+ let narrow_mode = match (bits <= 32, is_signed) {
+ (true, true) => NarrowValueMode::SignExtend32,
+ (true, false) => NarrowValueMode::ZeroExtend32,
+ (false, true) => NarrowValueMode::SignExtend64,
+ (false, false) => NarrowValueMode::ZeroExtend64,
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+
+ if !ty.is_vector() {
+ let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
+ let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
+ ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
+ ctx.emit(Inst::CSet { cond, rd });
+ normalize_bool_result(ctx, insn, rd);
+ } else {
+ let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
+ lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
+ }
+ }
+
+ Opcode::Fcmp => {
+ let condcode = ctx.data(insn).fp_cond_code().unwrap();
+ let cond = lower_fp_condcode(condcode);
+ let ty = ctx.input_ty(insn, 0);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+
+ if !ty.is_vector() {
+ match ty_bits(ty) {
+ 32 => {
+ ctx.emit(Inst::FpuCmp32 { rn, rm });
+ }
+ 64 => {
+ ctx.emit(Inst::FpuCmp64 { rn, rm });
+ }
+ _ => panic!("Bad float size"),
+ }
+ ctx.emit(Inst::CSet { cond, rd });
+ normalize_bool_result(ctx, insn, rd);
+ } else {
+ lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
+ }
+ }
+
+ Opcode::JumpTableEntry | Opcode::JumpTableBase => {
+ panic!("Should not appear: we handle BrTable directly");
+ }
+
+ Opcode::Debugtrap => {
+ ctx.emit(Inst::Brk);
+ }
+
+ Opcode::Trap | Opcode::ResumableTrap => {
+ let trap_code = ctx.data(insn).trap_code().unwrap();
+ ctx.emit_safepoint(Inst::Udf { trap_code });
+ }
+
+ Opcode::Trapif | Opcode::Trapff => {
+ let trap_code = ctx.data(insn).trap_code().unwrap();
+
+ let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ // The flags must not have been clobbered by any other
+ // instruction between the iadd_ifcout and this instruction, as
+ // verified by the CLIF validator; so we can simply use the
+ // flags here.
+ cond
+ } else if op == Opcode::Trapif {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+
+ // Verification ensures that the input is always a single-def ifcmp.
+ let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
+ lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
+ cond
+ } else {
+ let condcode = ctx.data(insn).fp_cond_code().unwrap();
+ let cond = lower_fp_condcode(condcode);
+
+ // Verification ensures that the input is always a
+ // single-def ffcmp.
+ let ffcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ffcmp).unwrap();
+ lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
+ cond
+ };
+
+ ctx.emit_safepoint(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(cond),
+ });
+ }
+
+ Opcode::Safepoint => {
+ panic!("safepoint instructions not used by new backend's safepoints!");
+ }
+
+ Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => {
+ panic!("trapz / trapnz / resumable_trapnz should have been removed by legalization!");
+ }
+
+ Opcode::FuncAddr => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let (extname, _) = ctx.call_target(insn).unwrap();
+ let extname = extname.clone();
+ ctx.emit(Inst::LoadExtName {
+ rd,
+ name: Box::new(extname),
+ offset: 0,
+ });
+ }
+
+ Opcode::GlobalValue => {
+ panic!("global_value should have been removed by legalization!");
+ }
+
+ Opcode::SymbolValue => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
+ let extname = extname.clone();
+ ctx.emit(Inst::LoadExtName {
+ rd,
+ name: Box::new(extname),
+ offset,
+ });
+ }
+
+ Opcode::Call | Opcode::CallIndirect => {
+ let caller_conv = ctx.abi().call_conv();
+ let (mut abi, inputs) = match op {
+ Opcode::Call => {
+ let (extname, dist) = ctx.call_target(insn).unwrap();
+ let extname = extname.clone();
+ let sig = ctx.call_sig(insn).unwrap();
+ assert!(inputs.len() == sig.params.len());
+ assert!(outputs.len() == sig.returns.len());
+ (
+ AArch64ABICaller::from_func(sig, &extname, dist, caller_conv)?,
+ &inputs[..],
+ )
+ }
+ Opcode::CallIndirect => {
+ let ptr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
+ let sig = ctx.call_sig(insn).unwrap();
+ assert!(inputs.len() - 1 == sig.params.len());
+ assert!(outputs.len() == sig.returns.len());
+ (
+ AArch64ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
+ &inputs[1..],
+ )
+ }
+ _ => unreachable!(),
+ };
+
+ abi.emit_stack_pre_adjust(ctx);
+ assert!(inputs.len() == abi.num_args());
+ for (i, input) in inputs.iter().enumerate() {
+ let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None);
+ abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+ }
+ abi.emit_call(ctx);
+ for (i, output) in outputs.iter().enumerate() {
+ let retval_reg = get_output_reg(ctx, *output);
+ abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+ }
+ abi.emit_stack_post_adjust(ctx);
+ }
+
+ Opcode::GetPinnedReg => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::mov(rd, xreg(PINNED_REG)));
+ }
+
+ Opcode::SetPinnedReg => {
+ let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ ctx.emit(Inst::mov(writable_xreg(PINNED_REG), rm));
+ }
+
+ Opcode::Spill
+ | Opcode::Fill
+ | Opcode::FillNop
+ | Opcode::Regmove
+ | Opcode::CopySpecial
+ | Opcode::CopyToSsa
+ | Opcode::CopyNop
+ | Opcode::AdjustSpDown
+ | Opcode::AdjustSpUpImm
+ | Opcode::AdjustSpDownImm
+ | Opcode::IfcmpSp
+ | Opcode::Regspill
+ | Opcode::Regfill => {
+ panic!("Unused opcode should not be encountered.");
+ }
+
+ Opcode::Jump
+ | Opcode::Fallthrough
+ | Opcode::Brz
+ | Opcode::Brnz
+ | Opcode::BrIcmp
+ | Opcode::Brif
+ | Opcode::Brff
+ | Opcode::IndirectJumpTableBr
+ | Opcode::BrTable => {
+ panic!("Branch opcode reached non-branch lowering logic!");
+ }
+
+ Opcode::Vconst => {
+ let value = const_param_to_u128(ctx, insn).expect("Invalid immediate bytes");
+ let rd = get_output_reg(ctx, outputs[0]);
+ lower_constant_f128(ctx, rd, value);
+ }
+
+ Opcode::RawBitcast => {
+ let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ ctx.emit(Inst::gen_move(rd, rm, ty));
+ }
+
+ Opcode::Extractlane => {
+ if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(insn) {
+ let idx = *imm;
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
+ let ty = ty.unwrap();
+
+ if ty_has_int_representation(ty) {
+ ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
+ // Plain moves are faster on some processors.
+ } else if idx == 0 {
+ ctx.emit(Inst::gen_move(rd, rn, ty));
+ } else {
+ ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size });
+ }
+ } else {
+ unreachable!();
+ }
+ }
+
+ Opcode::Insertlane => {
+ let idx = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) {
+ *imm
+ } else {
+ unreachable!();
+ };
+ let input_ty = ctx.input_ty(insn, 1);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ let size = VectorSize::from_ty(ty);
+
+ ctx.emit(Inst::gen_move(rd, rm, ty));
+
+ if ty_has_int_representation(input_ty) {
+ ctx.emit(Inst::MovToVec { rd, rn, idx, size });
+ } else {
+ ctx.emit(Inst::VecMovElement {
+ rd,
+ rn,
+ dest_idx: idx,
+ src_idx: 0,
+ size,
+ });
+ }
+ }
+
+ Opcode::Splat => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let size = VectorSize::from_ty(ty.unwrap());
+
+ if let Some((_, insn)) = maybe_input_insn_multi(
+ ctx,
+ inputs[0],
+ &[
+ Opcode::Bconst,
+ Opcode::F32const,
+ Opcode::F64const,
+ Opcode::Iconst,
+ ],
+ ) {
+ lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
+ } else if let Some(insn) =
+ maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Iconst, Opcode::Ireduce)
+ {
+ lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
+ } else if let Some(insn) =
+ maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Bconst, Opcode::Breduce)
+ {
+ lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
+ } else {
+ let input_ty = ctx.input_ty(insn, 0);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let inst = if ty_has_int_representation(input_ty) {
+ Inst::VecDup { rd, rn, size }
+ } else {
+ Inst::VecDupFromFpu { rd, rn, size }
+ };
+
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::ScalarToVector => {
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let input_ty = ctx.input_ty(insn, 0);
+ if (input_ty == I32 && ty.unwrap() == I32X4)
+ || (input_ty == I64 && ty.unwrap() == I64X2)
+ {
+ ctx.emit(Inst::MovToFpu {
+ rd,
+ rn,
+ size: ScalarSize::from_ty(input_ty),
+ });
+ } else {
+ return Err(CodegenError::Unsupported(format!(
+ "ScalarToVector: unsupported types {:?} -> {:?}",
+ input_ty, ty
+ )));
+ }
+ }
+
+ Opcode::VanyTrue | Opcode::VallTrue => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty.unwrap());
+
+ // This operation is implemented by using umaxp or uminv to
+ // create a scalar value, which is then compared against zero.
+ //
+ // umaxp vn.16b, vm.16, vm.16 / uminv bn, vm.16b
+ // mov xm, vn.d[0]
+ // cmp xm, #0
+ // cset xm, ne
+
+ let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
+
+ if op == Opcode::VanyTrue {
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Umaxp,
+ rd: tmp,
+ rn: rm,
+ rm: rm,
+ size,
+ });
+ } else {
+ ctx.emit(Inst::VecLanes {
+ op: VecLanesOp::Uminv,
+ rd: tmp,
+ rn: rm,
+ size,
+ });
+ };
+
+ ctx.emit(Inst::MovFromVec {
+ rd,
+ rn: tmp.to_reg(),
+ idx: 0,
+ size: VectorSize::Size64x2,
+ });
+
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: ALUOp::SubS64,
+ rd: writable_zero_reg(),
+ rn: rd.to_reg(),
+ imm12: Imm12::zero(),
+ });
+
+ ctx.emit(Inst::CSet { rd, cond: Cond::Ne });
+ normalize_bool_result(ctx, insn, rd);
+ }
+
+ Opcode::VhighBits => {
+ let dst_r = get_output_reg(ctx, outputs[0]);
+ let src_v = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ctx.input_ty(insn, 0);
+ // All three sequences use one integer temporary and two vector temporaries. The
+ // shift is done early so as to give the register allocator the possibility of using
+ // the same reg for `tmp_v1` and `src_v` in the case that this is the last use of
+ // `src_v`. See https://github.com/WebAssembly/simd/pull/201 for the background and
+ // derivation of these sequences. Alternative sequences are discussed in
+ // https://github.com/bytecodealliance/wasmtime/issues/2296, although they are not
+ // used here.
+ // Also .. FIXME: when https://github.com/bytecodealliance/wasmtime/pull/2310 is
+ // merged, use `lower_splat_constant` instead to generate the constants.
+ let tmp_r0 = ctx.alloc_tmp(RegClass::I64, I64);
+ let tmp_v0 = ctx.alloc_tmp(RegClass::V128, I8X16);
+ let tmp_v1 = ctx.alloc_tmp(RegClass::V128, I8X16);
+ match ty {
+ I8X16 => {
+ // sshr tmp_v1.16b, src_v.16b, #7
+ // mov tmp_r0, #0x0201
+ // movk tmp_r0, #0x0804, lsl 16
+ // movk tmp_r0, #0x2010, lsl 32
+ // movk tmp_r0, #0x8040, lsl 48
+ // dup tmp_v0.2d, tmp_r0
+ // and tmp_v1.16b, tmp_v1.16b, tmp_v0.16b
+ // ext tmp_v0.16b, tmp_v1.16b, tmp_v1.16b, #8
+ // zip1 tmp_v0.16b, tmp_v1.16b, tmp_v0.16b
+ // addv tmp_v0h, tmp_v0.8h
+ // mov dst_r, tmp_v0.h[0]
+ ctx.emit(Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: tmp_v1,
+ rn: src_v,
+ size: VectorSize::Size8x16,
+ imm: 7,
+ });
+ lower_constant_u64(ctx, tmp_r0, 0x8040201008040201u64);
+ ctx.emit(Inst::VecDup {
+ rd: tmp_v0,
+ rn: tmp_r0.to_reg(),
+ size: VectorSize::Size64x2,
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::And,
+ rd: tmp_v1,
+ rn: tmp_v1.to_reg(),
+ rm: tmp_v0.to_reg(),
+ size: VectorSize::Size8x16,
+ });
+ ctx.emit(Inst::VecExtract {
+ rd: tmp_v0,
+ rn: tmp_v1.to_reg(),
+ rm: tmp_v1.to_reg(),
+ imm4: 8,
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Zip1,
+ rd: tmp_v0,
+ rn: tmp_v1.to_reg(),
+ rm: tmp_v0.to_reg(),
+ size: VectorSize::Size8x16,
+ });
+ ctx.emit(Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: tmp_v0,
+ rn: tmp_v0.to_reg(),
+ size: VectorSize::Size16x8,
+ });
+ ctx.emit(Inst::MovFromVec {
+ rd: dst_r,
+ rn: tmp_v0.to_reg(),
+ idx: 0,
+ size: VectorSize::Size16x8,
+ });
+ }
+ I16X8 => {
+ // sshr tmp_v1.8h, src_v.8h, #15
+ // mov tmp_r0, #0x1
+ // movk tmp_r0, #0x2, lsl 16
+ // movk tmp_r0, #0x4, lsl 32
+ // movk tmp_r0, #0x8, lsl 48
+ // dup tmp_v0.2d, tmp_r0
+ // shl tmp_r0, tmp_r0, #4
+ // mov tmp_v0.d[1], tmp_r0
+ // and tmp_v0.16b, tmp_v1.16b, tmp_v0.16b
+ // addv tmp_v0h, tmp_v0.8h
+ // mov dst_r, tmp_v0.h[0]
+ ctx.emit(Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: tmp_v1,
+ rn: src_v,
+ size: VectorSize::Size16x8,
+ imm: 15,
+ });
+ lower_constant_u64(ctx, tmp_r0, 0x0008000400020001u64);
+ ctx.emit(Inst::VecDup {
+ rd: tmp_v0,
+ rn: tmp_r0.to_reg(),
+ size: VectorSize::Size64x2,
+ });
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl64,
+ rd: tmp_r0,
+ rn: tmp_r0.to_reg(),
+ immshift: ImmShift { imm: 4 },
+ });
+ ctx.emit(Inst::MovToVec {
+ rd: tmp_v0,
+ rn: tmp_r0.to_reg(),
+ idx: 1,
+ size: VectorSize::Size64x2,
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::And,
+ rd: tmp_v0,
+ rn: tmp_v1.to_reg(),
+ rm: tmp_v0.to_reg(),
+ size: VectorSize::Size8x16,
+ });
+ ctx.emit(Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: tmp_v0,
+ rn: tmp_v0.to_reg(),
+ size: VectorSize::Size16x8,
+ });
+ ctx.emit(Inst::MovFromVec {
+ rd: dst_r,
+ rn: tmp_v0.to_reg(),
+ idx: 0,
+ size: VectorSize::Size16x8,
+ });
+ }
+ I32X4 => {
+ // sshr tmp_v1.4s, src_v.4s, #31
+ // mov tmp_r0, #0x1
+ // movk tmp_r0, #0x2, lsl 32
+ // dup tmp_v0.2d, tmp_r0
+ // shl tmp_r0, tmp_r0, #2
+ // mov tmp_v0.d[1], tmp_r0
+ // and tmp_v0.16b, tmp_v1.16b, tmp_v0.16b
+ // addv tmp_v0s, tmp_v0.4s
+ // mov dst_r, tmp_v0.s[0]
+ ctx.emit(Inst::VecShiftImm {
+ op: VecShiftImmOp::Sshr,
+ rd: tmp_v1,
+ rn: src_v,
+ size: VectorSize::Size32x4,
+ imm: 31,
+ });
+ lower_constant_u64(ctx, tmp_r0, 0x0000000200000001u64);
+ ctx.emit(Inst::VecDup {
+ rd: tmp_v0,
+ rn: tmp_r0.to_reg(),
+ size: VectorSize::Size64x2,
+ });
+ ctx.emit(Inst::AluRRImmShift {
+ alu_op: ALUOp::Lsl64,
+ rd: tmp_r0,
+ rn: tmp_r0.to_reg(),
+ immshift: ImmShift { imm: 2 },
+ });
+ ctx.emit(Inst::MovToVec {
+ rd: tmp_v0,
+ rn: tmp_r0.to_reg(),
+ idx: 1,
+ size: VectorSize::Size64x2,
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::And,
+ rd: tmp_v0,
+ rn: tmp_v1.to_reg(),
+ rm: tmp_v0.to_reg(),
+ size: VectorSize::Size8x16,
+ });
+ ctx.emit(Inst::VecLanes {
+ op: VecLanesOp::Addv,
+ rd: tmp_v0,
+ rn: tmp_v0.to_reg(),
+ size: VectorSize::Size32x4,
+ });
+ ctx.emit(Inst::MovFromVec {
+ rd: dst_r,
+ rn: tmp_v0.to_reg(),
+ idx: 0,
+ size: VectorSize::Size32x4,
+ });
+ }
+ _ => panic!("arm64 isel: VhighBits unhandled, ty = {:?}", ty),
+ }
+ }
+
+ Opcode::Shuffle => {
+ let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes");
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ // 2 register table vector lookups require consecutive table registers;
+ // we satisfy this constraint by hardcoding the usage of v29 and v30.
+ let temp = writable_vreg(29);
+ let temp2 = writable_vreg(30);
+ let input_ty = ctx.input_ty(insn, 0);
+ assert_eq!(input_ty, ctx.input_ty(insn, 1));
+ // Make sure that both inputs are in virtual registers, since it is
+ // not guaranteed that we can get them safely to the temporaries if
+ // either is in a real register.
+ let rn = ctx.ensure_in_vreg(rn, input_ty);
+ let rn2 = ctx.ensure_in_vreg(rn2, input_ty);
+
+ lower_constant_f128(ctx, rd, mask);
+ ctx.emit(Inst::gen_move(temp, rn, input_ty));
+ ctx.emit(Inst::gen_move(temp2, rn2, input_ty));
+ ctx.emit(Inst::VecTbl2 {
+ rd,
+ rn: temp.to_reg(),
+ rn2: temp2.to_reg(),
+ rm: rd.to_reg(),
+ is_extension: false,
+ });
+ }
+
+ Opcode::Swizzle => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+
+ ctx.emit(Inst::VecTbl {
+ rd,
+ rn,
+ rm,
+ is_extension: false,
+ });
+ }
+
+ Opcode::Vsplit
+ | Opcode::Vconcat
+ | Opcode::Uload8x8Complex
+ | Opcode::Sload8x8Complex
+ | Opcode::Uload16x4Complex
+ | Opcode::Sload16x4Complex
+ | Opcode::Uload32x2Complex
+ | Opcode::Sload32x2Complex => {
+ // TODO
+ panic!("Vector ops not implemented.");
+ }
+
+ Opcode::Isplit | Opcode::Iconcat => panic!("Vector ops not supported."),
+
+ Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => {
+ let alu_op = match op {
+ Opcode::Umin => VecALUOp::Umin,
+ Opcode::Imin => VecALUOp::Smin,
+ Opcode::Umax => VecALUOp::Umax,
+ Opcode::Imax => VecALUOp::Smax,
+ _ => unreachable!(),
+ };
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::VecRRR {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+
+ Opcode::WideningPairwiseDotProductS => {
+ let r_y = get_output_reg(ctx, outputs[0]);
+ let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ if ty == I32X4 {
+ let tmp = ctx.alloc_tmp(RegClass::V128, I8X16);
+ // The args have type I16X8.
+ // "y = i32x4.dot_i16x8_s(a, b)"
+ // => smull tmp, a, b
+ // smull2 y, a, b
+ // addp y, tmp, y
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Smull,
+ rd: tmp,
+ rn: r_a,
+ rm: r_b,
+ size: VectorSize::Size16x8,
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Smull2,
+ rd: r_y,
+ rn: r_a,
+ rm: r_b,
+ size: VectorSize::Size16x8,
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Addp,
+ rd: r_y,
+ rn: tmp.to_reg(),
+ rm: r_y.to_reg(),
+ size: VectorSize::Size32x4,
+ });
+ } else {
+ return Err(CodegenError::Unsupported(format!(
+ "Opcode::WideningPairwiseDotProductS: unsupported laneage: {:?}",
+ ty
+ )));
+ }
+ }
+
+ Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
+ let ty = ty.unwrap();
+ let bits = ty_bits(ty);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ if !ty.is_vector() {
+ let fpu_op = match (op, bits) {
+ (Opcode::Fadd, 32) => FPUOp2::Add32,
+ (Opcode::Fadd, 64) => FPUOp2::Add64,
+ (Opcode::Fsub, 32) => FPUOp2::Sub32,
+ (Opcode::Fsub, 64) => FPUOp2::Sub64,
+ (Opcode::Fmul, 32) => FPUOp2::Mul32,
+ (Opcode::Fmul, 64) => FPUOp2::Mul64,
+ (Opcode::Fdiv, 32) => FPUOp2::Div32,
+ (Opcode::Fdiv, 64) => FPUOp2::Div64,
+ (Opcode::Fmin, 32) => FPUOp2::Min32,
+ (Opcode::Fmin, 64) => FPUOp2::Min64,
+ (Opcode::Fmax, 32) => FPUOp2::Max32,
+ (Opcode::Fmax, 64) => FPUOp2::Max64,
+ _ => panic!("Unknown op/bits combination"),
+ };
+ ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm });
+ } else {
+ let alu_op = match op {
+ Opcode::Fadd => VecALUOp::Fadd,
+ Opcode::Fsub => VecALUOp::Fsub,
+ Opcode::Fdiv => VecALUOp::Fdiv,
+ Opcode::Fmax => VecALUOp::Fmax,
+ Opcode::Fmin => VecALUOp::Fmin,
+ Opcode::Fmul => VecALUOp::Fmul,
+ _ => unreachable!(),
+ };
+
+ ctx.emit(Inst::VecRRR {
+ rd,
+ rn,
+ rm,
+ alu_op,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::FminPseudo | Opcode::FmaxPseudo => {
+ let ty = ctx.input_ty(insn, 0);
+ if ty == F32X4 || ty == F64X2 {
+ // pmin(a,b) => bitsel(b, a, cmpgt(a, b))
+ // pmax(a,b) => bitsel(b, a, cmpgt(b, a))
+ let r_dst = get_output_reg(ctx, outputs[0]);
+ let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ // Since we're going to write the output register `r_dst` anyway, we might as
+ // well first use it to hold the comparison result. This has the slightly unusual
+ // effect that we modify the output register in the first instruction (`fcmgt`)
+ // but read both the inputs again in the second instruction (`bsl`), which means
+ // that the output register can't be either of the input registers. Regalloc
+ // should handle this correctly, nevertheless.
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Fcmgt,
+ rd: r_dst,
+ rn: if op == Opcode::FminPseudo { r_a } else { r_b },
+ rm: if op == Opcode::FminPseudo { r_b } else { r_a },
+ size: if ty == F32X4 {
+ VectorSize::Size32x4
+ } else {
+ VectorSize::Size64x2
+ },
+ });
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Bsl,
+ rd: r_dst,
+ rn: r_b,
+ rm: r_a,
+ size: VectorSize::Size8x16,
+ });
+ } else {
+ panic!("Opcode::FminPseudo | Opcode::FmaxPseudo: unhandled type");
+ }
+ }
+
+ Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
+ let ty = ty.unwrap();
+ let bits = ty_bits(ty);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ if !ty.is_vector() {
+ let fpu_op = match (op, bits) {
+ (Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
+ (Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
+ (Opcode::Fneg, 32) => FPUOp1::Neg32,
+ (Opcode::Fneg, 64) => FPUOp1::Neg64,
+ (Opcode::Fabs, 32) => FPUOp1::Abs32,
+ (Opcode::Fabs, 64) => FPUOp1::Abs64,
+ (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"),
+ (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64,
+ (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32,
+ (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"),
+ _ => panic!("Unknown op/bits combination"),
+ };
+ ctx.emit(Inst::FpuRR { fpu_op, rd, rn });
+ } else {
+ let op = match op {
+ Opcode::Fabs => VecMisc2::Fabs,
+ Opcode::Fneg => VecMisc2::Fneg,
+ Opcode::Sqrt => VecMisc2::Fsqrt,
+ _ => unimplemented!(),
+ };
+
+ ctx.emit(Inst::VecMisc {
+ op,
+ rd,
+ rn,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ }
+
+ Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => {
+ let ty = ctx.output_ty(insn, 0);
+ if !ty.is_vector() {
+ let bits = ty_bits(ty);
+ let op = match (op, bits) {
+ (Opcode::Ceil, 32) => FpuRoundMode::Plus32,
+ (Opcode::Ceil, 64) => FpuRoundMode::Plus64,
+ (Opcode::Floor, 32) => FpuRoundMode::Minus32,
+ (Opcode::Floor, 64) => FpuRoundMode::Minus64,
+ (Opcode::Trunc, 32) => FpuRoundMode::Zero32,
+ (Opcode::Trunc, 64) => FpuRoundMode::Zero64,
+ (Opcode::Nearest, 32) => FpuRoundMode::Nearest32,
+ (Opcode::Nearest, 64) => FpuRoundMode::Nearest64,
+ _ => panic!("Unknown op/bits combination (scalar)"),
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::FpuRound { op, rd, rn });
+ } else {
+ let (op, size) = match (op, ty) {
+ (Opcode::Ceil, F32X4) => (VecMisc2::Frintp, VectorSize::Size32x4),
+ (Opcode::Ceil, F64X2) => (VecMisc2::Frintp, VectorSize::Size64x2),
+ (Opcode::Floor, F32X4) => (VecMisc2::Frintm, VectorSize::Size32x4),
+ (Opcode::Floor, F64X2) => (VecMisc2::Frintm, VectorSize::Size64x2),
+ (Opcode::Trunc, F32X4) => (VecMisc2::Frintz, VectorSize::Size32x4),
+ (Opcode::Trunc, F64X2) => (VecMisc2::Frintz, VectorSize::Size64x2),
+ (Opcode::Nearest, F32X4) => (VecMisc2::Frintn, VectorSize::Size32x4),
+ (Opcode::Nearest, F64X2) => (VecMisc2::Frintn, VectorSize::Size64x2),
+ _ => panic!("Unknown op/ty combination (vector){:?}", ty),
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::VecMisc { op, rd, rn, size });
+ }
+ }
+
+ Opcode::Fma => {
+ let bits = ty_bits(ctx.output_ty(insn, 0));
+ let fpu_op = match bits {
+ 32 => FPUOp3::MAdd32,
+ 64 => FPUOp3::MAdd64,
+ _ => panic!("Unknown op size"),
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ra = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::FpuRRRR {
+ fpu_op,
+ rn,
+ rm,
+ ra,
+ rd,
+ });
+ }
+
+ Opcode::Fcopysign => {
+ // Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
+ //
+ // This is a scalar Fcopysign.
+ // This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
+ //
+ // mov vd, vn
+ // ushr vtmp, vm, #63 / #31
+ // sli vd, vtmp, #63 / #31
+
+ let ty = ctx.output_ty(insn, 0);
+ let bits = ty_bits(ty) as u8;
+ assert!(bits == 32 || bits == 64);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+ let tmp = ctx.alloc_tmp(RegClass::V128, F64);
+
+ // Copy LHS to rd.
+ ctx.emit(Inst::FpuMove64 { rd, rn });
+
+ // Copy the sign bit to the lowest bit in tmp.
+ let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
+ ctx.emit(Inst::FpuRRI {
+ fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
+ rd: tmp,
+ rn: rm,
+ });
+
+ // Insert the bit from tmp into the sign bit of rd.
+ let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
+ ctx.emit(Inst::FpuRRI {
+ fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
+ rd,
+ rn: tmp.to_reg(),
+ });
+ }
+
+ Opcode::FcvtToUint | Opcode::FcvtToSint => {
+ let in_bits = ty_bits(ctx.input_ty(insn, 0));
+ let out_bits = ty_bits(ctx.output_ty(insn, 0));
+ let signed = op == Opcode::FcvtToSint;
+ let op = match (signed, in_bits, out_bits) {
+ (false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
+ (true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
+ (false, 32, 64) => FpuToIntOp::F32ToU64,
+ (true, 32, 64) => FpuToIntOp::F32ToI64,
+ (false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
+ (true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
+ (false, 64, 64) => FpuToIntOp::F64ToU64,
+ (true, 64, 64) => FpuToIntOp::F64ToI64,
+ _ => panic!("Unknown input/output-bits combination"),
+ };
+
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+
+ // First, check the output: it's important to carry the NaN conversion before the
+ // in-bounds conversion, per wasm semantics.
+
+ // Check that the input is not a NaN.
+ if in_bits == 32 {
+ ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
+ } else {
+ ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
+ }
+ let trap_code = TrapCode::BadConversionToInteger;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
+ });
+
+ let tmp = ctx.alloc_tmp(RegClass::V128, I128);
+
+ // Check that the input is in range, with "truncate towards zero" semantics. This means
+ // we allow values that are slightly out of range:
+ // - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
+ // can be represented), and strictly less than INT_MAX+1 (when this can be
+ // represented).
+ // - for unsigned conversions, we allow values strictly greater than -1, and strictly
+ // less than UINT_MAX+1 (when this can be represented).
+
+ if in_bits == 32 {
+ // From float32.
+ let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
+ (true, 8) => (
+ i8::min_value() as f32 - 1.,
+ FloatCC::GreaterThan,
+ i8::max_value() as f32 + 1.,
+ ),
+ (true, 16) => (
+ i16::min_value() as f32 - 1.,
+ FloatCC::GreaterThan,
+ i16::max_value() as f32 + 1.,
+ ),
+ (true, 32) => (
+ i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
+ FloatCC::GreaterThanOrEqual,
+ i32::max_value() as f32 + 1.,
+ ),
+ (true, 64) => (
+ i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
+ FloatCC::GreaterThanOrEqual,
+ i64::max_value() as f32 + 1.,
+ ),
+ (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
+ (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
+ (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
+ (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
+ _ => panic!("Unknown input/output-bits combination"),
+ };
+
+ // >= low_bound
+ lower_constant_f32(ctx, tmp, low_bound);
+ ctx.emit(Inst::FpuCmp32 {
+ rn,
+ rm: tmp.to_reg(),
+ });
+ let trap_code = TrapCode::IntegerOverflow;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
+ });
+
+ // <= high_bound
+ lower_constant_f32(ctx, tmp, high_bound);
+ ctx.emit(Inst::FpuCmp32 {
+ rn,
+ rm: tmp.to_reg(),
+ });
+ let trap_code = TrapCode::IntegerOverflow;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
+ });
+ } else {
+ // From float64.
+ let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
+ (true, 8) => (
+ i8::min_value() as f64 - 1.,
+ FloatCC::GreaterThan,
+ i8::max_value() as f64 + 1.,
+ ),
+ (true, 16) => (
+ i16::min_value() as f64 - 1.,
+ FloatCC::GreaterThan,
+ i16::max_value() as f64 + 1.,
+ ),
+ (true, 32) => (
+ i32::min_value() as f64 - 1.,
+ FloatCC::GreaterThan,
+ i32::max_value() as f64 + 1.,
+ ),
+ (true, 64) => (
+ i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
+ FloatCC::GreaterThanOrEqual,
+ i64::max_value() as f64 + 1.,
+ ),
+ (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
+ (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
+ (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
+ (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
+ _ => panic!("Unknown input/output-bits combination"),
+ };
+
+ // >= low_bound
+ lower_constant_f64(ctx, tmp, low_bound);
+ ctx.emit(Inst::FpuCmp64 {
+ rn,
+ rm: tmp.to_reg(),
+ });
+ let trap_code = TrapCode::IntegerOverflow;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
+ });
+
+ // <= high_bound
+ lower_constant_f64(ctx, tmp, high_bound);
+ ctx.emit(Inst::FpuCmp64 {
+ rn,
+ rm: tmp.to_reg(),
+ });
+ let trap_code = TrapCode::IntegerOverflow;
+ ctx.emit(Inst::TrapIf {
+ trap_code,
+ kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
+ });
+ };
+
+ // Do the conversion.
+ ctx.emit(Inst::FpuToInt { op, rd, rn });
+ }
+
+ Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
+ let ty = ty.unwrap();
+ let signed = op == Opcode::FcvtFromSint;
+ let rd = get_output_reg(ctx, outputs[0]);
+
+ if ty.is_vector() {
+ let op = if signed {
+ VecMisc2::Scvtf
+ } else {
+ VecMisc2::Ucvtf
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+
+ ctx.emit(Inst::VecMisc {
+ op,
+ rd,
+ rn,
+ size: VectorSize::from_ty(ty),
+ });
+ } else {
+ let in_bits = ty_bits(ctx.input_ty(insn, 0));
+ let out_bits = ty_bits(ty);
+ let op = match (signed, in_bits, out_bits) {
+ (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
+ (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
+ (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
+ (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
+ (false, 64, 32) => IntToFpuOp::U64ToF32,
+ (true, 64, 32) => IntToFpuOp::I64ToF32,
+ (false, 64, 64) => IntToFpuOp::U64ToF64,
+ (true, 64, 64) => IntToFpuOp::I64ToF64,
+ _ => panic!("Unknown input/output-bits combination"),
+ };
+ let narrow_mode = match (signed, in_bits) {
+ (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
+ (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
+ (false, 64) => NarrowValueMode::ZeroExtend64,
+ (true, 64) => NarrowValueMode::SignExtend64,
+ _ => panic!("Unknown input size"),
+ };
+ let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
+ ctx.emit(Inst::IntToFpu { op, rd, rn });
+ }
+ }
+
+ Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
+ let ty = ty.unwrap();
+ let out_signed = op == Opcode::FcvtToSintSat;
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = get_output_reg(ctx, outputs[0]);
+
+ if ty.is_vector() {
+ let op = if out_signed {
+ VecMisc2::Fcvtzs
+ } else {
+ VecMisc2::Fcvtzu
+ };
+
+ ctx.emit(Inst::VecMisc {
+ op,
+ rd,
+ rn,
+ size: VectorSize::from_ty(ty),
+ });
+ } else {
+ let in_ty = ctx.input_ty(insn, 0);
+ let in_bits = ty_bits(in_ty);
+ let out_bits = ty_bits(ty);
+ // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
+ // FMIN Vtmp2, Vin, Vtmp1
+ // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
+ // FMAX Vtmp2, Vtmp2, Vtmp1
+ // (if signed) FIMM Vtmp1, 0
+ // FCMP Vin, Vin
+ // FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
+ // convert Rout, Vtmp2
+
+ assert!(in_bits == 32 || in_bits == 64);
+ assert!(out_bits == 32 || out_bits == 64);
+
+ let min: f64 = match (out_bits, out_signed) {
+ (32, true) => std::i32::MIN as f64,
+ (32, false) => 0.0,
+ (64, true) => std::i64::MIN as f64,
+ (64, false) => 0.0,
+ _ => unreachable!(),
+ };
+
+ let max = match (out_bits, out_signed) {
+ (32, true) => std::i32::MAX as f64,
+ (32, false) => std::u32::MAX as f64,
+ (64, true) => std::i64::MAX as f64,
+ (64, false) => std::u64::MAX as f64,
+ _ => unreachable!(),
+ };
+
+ let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
+ let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
+
+ if in_bits == 32 {
+ lower_constant_f32(ctx, rtmp1, max as f32);
+ } else {
+ lower_constant_f64(ctx, rtmp1, max);
+ }
+ ctx.emit(Inst::FpuRRR {
+ fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
+ rd: rtmp2,
+ rn: rn,
+ rm: rtmp1.to_reg(),
+ });
+ if in_bits == 32 {
+ lower_constant_f32(ctx, rtmp1, min as f32);
+ } else {
+ lower_constant_f64(ctx, rtmp1, min);
+ }
+ ctx.emit(Inst::FpuRRR {
+ fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
+ rd: rtmp2,
+ rn: rtmp2.to_reg(),
+ rm: rtmp1.to_reg(),
+ });
+ if out_signed {
+ if in_bits == 32 {
+ lower_constant_f32(ctx, rtmp1, 0.0);
+ } else {
+ lower_constant_f64(ctx, rtmp1, 0.0);
+ }
+ }
+ if in_bits == 32 {
+ ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
+ ctx.emit(Inst::FpuCSel32 {
+ rd: rtmp2,
+ rn: rtmp1.to_reg(),
+ rm: rtmp2.to_reg(),
+ cond: Cond::Ne,
+ });
+ } else {
+ ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn });
+ ctx.emit(Inst::FpuCSel64 {
+ rd: rtmp2,
+ rn: rtmp1.to_reg(),
+ rm: rtmp2.to_reg(),
+ cond: Cond::Ne,
+ });
+ }
+
+ let cvt = match (in_bits, out_bits, out_signed) {
+ (32, 32, false) => FpuToIntOp::F32ToU32,
+ (32, 32, true) => FpuToIntOp::F32ToI32,
+ (32, 64, false) => FpuToIntOp::F32ToU64,
+ (32, 64, true) => FpuToIntOp::F32ToI64,
+ (64, 32, false) => FpuToIntOp::F64ToU32,
+ (64, 32, true) => FpuToIntOp::F64ToI32,
+ (64, 64, false) => FpuToIntOp::F64ToU64,
+ (64, 64, true) => FpuToIntOp::F64ToI64,
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::FpuToInt {
+ op: cvt,
+ rd,
+ rn: rtmp2.to_reg(),
+ });
+ }
+ }
+
+ Opcode::IaddIfcout => {
+ // This is a two-output instruction that is needed for the
+ // legalizer's explicit heap-check sequence, among possible other
+ // uses. Its second output is a flags output only ever meant to
+ // check for overflow using the
+ // `backend.unsigned_add_overflow_condition()` condition.
+ //
+ // Note that the CLIF validation will ensure that no flag-setting
+ // operation comes between this IaddIfcout and its use (e.g., a
+ // Trapif). Thus, we can rely on implicit communication through the
+ // processor flags rather than explicitly generating flags into a
+ // register. We simply use the variant of the add instruction that
+ // sets flags (`adds`) here.
+
+ // Ensure that the second output isn't directly called for: it
+ // should only be used by a flags-consuming op, which will directly
+ // understand this instruction and merge the comparison.
+ assert!(!ctx.is_reg_needed(insn, ctx.get_output(insn, 1).to_reg()));
+
+ // Now handle the iadd as above, except use an AddS opcode that sets
+ // flags.
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64);
+ ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+ }
+
+ Opcode::IaddImm
+ | Opcode::ImulImm
+ | Opcode::UdivImm
+ | Opcode::SdivImm
+ | Opcode::UremImm
+ | Opcode::SremImm
+ | Opcode::IrsubImm
+ | Opcode::IaddCin
+ | Opcode::IaddIfcin
+ | Opcode::IaddCout
+ | Opcode::IaddCarry
+ | Opcode::IaddIfcarry
+ | Opcode::IsubBin
+ | Opcode::IsubIfbin
+ | Opcode::IsubBout
+ | Opcode::IsubIfbout
+ | Opcode::IsubBorrow
+ | Opcode::IsubIfborrow
+ | Opcode::BandImm
+ | Opcode::BorImm
+ | Opcode::BxorImm
+ | Opcode::RotlImm
+ | Opcode::RotrImm
+ | Opcode::IshlImm
+ | Opcode::UshrImm
+ | Opcode::SshrImm
+ | Opcode::IcmpImm
+ | Opcode::IfcmpImm => {
+ panic!("ALU+imm and ALU+carry ops should not appear here!");
+ }
+
+ #[cfg(feature = "x86")]
+ Opcode::X86Udivmodx
+ | Opcode::X86Sdivmodx
+ | Opcode::X86Umulx
+ | Opcode::X86Smulx
+ | Opcode::X86Cvtt2si
+ | Opcode::X86Fmin
+ | Opcode::X86Fmax
+ | Opcode::X86Push
+ | Opcode::X86Pop
+ | Opcode::X86Bsr
+ | Opcode::X86Bsf
+ | Opcode::X86Pblendw
+ | Opcode::X86Pshufd
+ | Opcode::X86Pshufb
+ | Opcode::X86Pextr
+ | Opcode::X86Pinsr
+ | Opcode::X86Insertps
+ | Opcode::X86Movsd
+ | Opcode::X86Movlhps
+ | Opcode::X86Palignr
+ | Opcode::X86Psll
+ | Opcode::X86Psrl
+ | Opcode::X86Psra
+ | Opcode::X86Ptest
+ | Opcode::X86Pmaxs
+ | Opcode::X86Pmaxu
+ | Opcode::X86Pmins
+ | Opcode::X86Pminu
+ | Opcode::X86Pmullq
+ | Opcode::X86Pmuludq
+ | Opcode::X86Punpckh
+ | Opcode::X86Punpckl
+ | Opcode::X86Vcvtudq2ps
+ | Opcode::X86ElfTlsGetAddr
+ | Opcode::X86MachoTlsGetAddr => {
+ panic!("x86-specific opcode in supposedly arch-neutral IR!");
+ }
+
+ Opcode::DummySargT => unreachable!(),
+
+ Opcode::Iabs => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::VecMisc {
+ op: VecMisc2::Abs,
+ rd,
+ rn,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+ Opcode::AvgRound => {
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::VecRRR {
+ alu_op: VecALUOp::Urhadd,
+ rd,
+ rn,
+ rm,
+ size: VectorSize::from_ty(ty),
+ });
+ }
+
+ Opcode::Snarrow | Opcode::Unarrow => {
+ let op = if op == Opcode::Snarrow {
+ VecMiscNarrowOp::Sqxtn
+ } else {
+ VecMiscNarrowOp::Sqxtun
+ };
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
+ let ty = ty.unwrap();
+
+ ctx.emit(Inst::VecMiscNarrow {
+ op,
+ rd,
+ rn,
+ size: VectorSize::from_ty(ty),
+ high_half: false,
+ });
+ ctx.emit(Inst::VecMiscNarrow {
+ op,
+ rd,
+ rn: rn2,
+ size: VectorSize::from_ty(ty),
+ high_half: true,
+ });
+ }
+
+ Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
+ let lane_type = ty.unwrap().lane_type();
+ let rd = get_output_reg(ctx, outputs[0]);
+ let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+ let (t, high_half) = match (lane_type, op) {
+ (I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
+ (I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
+ (I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
+ (I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
+ (I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
+ (I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
+ (I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
+ (I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
+ _ => {
+ return Err(CodegenError::Unsupported(format!(
+ "Unsupported SIMD vector lane type: {:?}",
+ lane_type
+ )));
+ }
+ };
+
+ ctx.emit(Inst::VecExtend {
+ t,
+ rd,
+ rn,
+ high_half,
+ });
+ }
+
+ Opcode::TlsValue => unimplemented!("tls_value"),
+ }
+
+ Ok(())
+}
+
+pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ branches: &[IRInst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+) -> CodegenResult<()> {
+ // A block should end with at most two branches. The first may be a
+ // conditional branch; a conditional branch can be followed only by an
+ // unconditional branch or fallthrough. Otherwise, if only one branch,
+ // it may be an unconditional branch, a fallthrough, a return, or a
+ // trap. These conditions are verified by `is_ebb_basic()` during the
+ // verifier pass.
+ assert!(branches.len() <= 2);
+
+ if branches.len() == 2 {
+ // Must be a conditional branch followed by an unconditional branch.
+ let op0 = ctx.data(branches[0]).opcode();
+ let op1 = ctx.data(branches[1]).opcode();
+
+ assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
+ let taken = BranchTarget::Label(targets[0]);
+ let not_taken = match op1 {
+ Opcode::Jump => BranchTarget::Label(targets[1]),
+ Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
+ _ => unreachable!(), // assert above.
+ };
+
+ match op0 {
+ Opcode::Brz | Opcode::Brnz => {
+ let flag_input = InsnInput {
+ insn: branches[0],
+ input: 0,
+ };
+ if let Some(icmp_insn) =
+ maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint)
+ {
+ let condcode = ctx.data(icmp_insn).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+ let negated = op0 == Opcode::Brz;
+ let cond = if negated { cond.invert() } else { cond };
+
+ lower_icmp_or_ifcmp_to_flags(ctx, icmp_insn, is_signed);
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind: CondBrKind::Cond(cond),
+ });
+ } else if let Some(fcmp_insn) =
+ maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint)
+ {
+ let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap();
+ let cond = lower_fp_condcode(condcode);
+ let negated = op0 == Opcode::Brz;
+ let cond = if negated { cond.invert() } else { cond };
+
+ lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn);
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind: CondBrKind::Cond(cond),
+ });
+ } else {
+ let rt = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ NarrowValueMode::ZeroExtend64,
+ );
+ let kind = match op0 {
+ Opcode::Brz => CondBrKind::Zero(rt),
+ Opcode::Brnz => CondBrKind::NotZero(rt),
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ });
+ }
+ }
+ Opcode::BrIcmp => {
+ let condcode = ctx.data(branches[0]).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let kind = CondBrKind::Cond(cond);
+
+ let is_signed = condcode_is_signed(condcode);
+ let ty = ctx.input_ty(branches[0], 0);
+ let bits = ty_bits(ty);
+ let narrow_mode = match (bits <= 32, is_signed) {
+ (true, true) => NarrowValueMode::SignExtend32,
+ (true, false) => NarrowValueMode::ZeroExtend32,
+ (false, true) => NarrowValueMode::SignExtend64,
+ (false, false) => NarrowValueMode::ZeroExtend64,
+ };
+ let rn = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ narrow_mode,
+ );
+ let rm = put_input_in_rse_imm12(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 1,
+ },
+ narrow_mode,
+ );
+
+ let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
+ let rd = writable_zero_reg();
+ ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ });
+ }
+
+ Opcode::Brif => {
+ let condcode = ctx.data(branches[0]).cond_code().unwrap();
+ let cond = lower_condcode(condcode);
+ let kind = CondBrKind::Cond(cond);
+
+ let is_signed = condcode_is_signed(condcode);
+ let flag_input = InsnInput {
+ insn: branches[0],
+ input: 0,
+ };
+ if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) {
+ lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ });
+ } else {
+ // If the ifcmp result is actually placed in a
+ // register, we need to move it back into the flags.
+ let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
+ ctx.emit(Inst::MovToNZCV { rn });
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ });
+ }
+ }
+
+ Opcode::Brff => {
+ let condcode = ctx.data(branches[0]).fp_cond_code().unwrap();
+ let cond = lower_fp_condcode(condcode);
+ let kind = CondBrKind::Cond(cond);
+ let flag_input = InsnInput {
+ insn: branches[0],
+ input: 0,
+ };
+ if let Some(ffcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ffcmp) {
+ lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ });
+ } else {
+ // If the ffcmp result is actually placed in a
+ // register, we need to move it back into the flags.
+ let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None);
+ ctx.emit(Inst::MovToNZCV { rn });
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ kind,
+ });
+ }
+ }
+
+ _ => unimplemented!(),
+ }
+ } else {
+ // Must be an unconditional branch or an indirect branch.
+ let op = ctx.data(branches[0]).opcode();
+ match op {
+ Opcode::Jump | Opcode::Fallthrough => {
+ assert!(branches.len() == 1);
+ // In the Fallthrough case, the machine-independent driver
+ // fills in `targets[0]` with our fallthrough block, so this
+ // is valid for both Jump and Fallthrough.
+ ctx.emit(Inst::Jump {
+ dest: BranchTarget::Label(targets[0]),
+ });
+ }
+
+ Opcode::BrTable => {
+ // Expand `br_table index, default, JT` to:
+ //
+ // emit_island // this forces an island at this point
+ // // if the jumptable would push us past
+ // // the deadline
+ // subs idx, #jt_size
+ // b.hs default
+ // adr vTmp1, PC+16
+ // ldr vTmp2, [vTmp1, idx, lsl #2]
+ // add vTmp2, vTmp2, vTmp1
+ // br vTmp2
+ // [jumptable offsets relative to JT base]
+ let jt_size = targets.len() - 1;
+ assert!(jt_size <= std::u32::MAX as usize);
+
+ ctx.emit(Inst::EmitIsland {
+ needed_space: 4 * (6 + jt_size) as CodeOffset,
+ });
+
+ let ridx = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ NarrowValueMode::ZeroExtend32,
+ );
+
+ let rtmp1 = ctx.alloc_tmp(RegClass::I64, I32);
+ let rtmp2 = ctx.alloc_tmp(RegClass::I64, I32);
+
+ // Bounds-check, leaving condition codes for JTSequence's
+ // branch to default target below.
+ if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) {
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: ALUOp::SubS32,
+ rd: writable_zero_reg(),
+ rn: ridx,
+ imm12,
+ });
+ } else {
+ lower_constant_u64(ctx, rtmp1, jt_size as u64);
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::SubS32,
+ rd: writable_zero_reg(),
+ rn: ridx,
+ rm: rtmp1.to_reg(),
+ });
+ }
+
+ // Emit the compound instruction that does:
+ //
+ // b.hs default
+ // adr rA, jt
+ // ldrsw rB, [rA, rIndex, UXTW 2]
+ // add rA, rA, rB
+ // br rA
+ // [jt entries]
+ //
+ // This must be *one* instruction in the vcode because
+ // we cannot allow regalloc to insert any spills/fills
+ // in the middle of the sequence; otherwise, the ADR's
+ // PC-rel offset to the jumptable would be incorrect.
+ // (The alternative is to introduce a relocation pass
+ // for inlined jumptables, which is much worse, IMHO.)
+
+ let jt_targets: Vec<BranchTarget> = targets
+ .iter()
+ .skip(1)
+ .map(|bix| BranchTarget::Label(*bix))
+ .collect();
+ let default_target = BranchTarget::Label(targets[0]);
+ let targets_for_term: Vec<MachLabel> = targets.to_vec();
+ ctx.emit(Inst::JTSequence {
+ ridx,
+ rtmp1,
+ rtmp2,
+ info: Box::new(JTSequenceInfo {
+ targets: jt_targets,
+ default_target,
+ targets_for_term,
+ }),
+ });
+ }
+
+ _ => panic!("Unknown branch type!"),
+ }
+ }
+
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
new file mode 100644
index 0000000000..c3c56632d3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/mod.rs
@@ -0,0 +1,274 @@
+//! ARM 64-bit Instruction Set Architecture.
+
+use crate::ir::condcodes::IntCC;
+use crate::ir::Function;
+use crate::isa::Builder as IsaBuilder;
+use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
+use crate::result::CodegenResult;
+use crate::settings;
+
+use alloc::boxed::Box;
+
+use regalloc::{PrettyPrint, RealRegUniverse};
+use target_lexicon::{Aarch64Architecture, Architecture, Triple};
+
+// New backend:
+mod abi;
+pub(crate) mod inst;
+mod lower;
+mod lower_inst;
+
+use inst::create_reg_universe;
+
+use self::inst::EmitInfo;
+
+/// An AArch64 backend.
+pub struct AArch64Backend {
+ triple: Triple,
+ flags: settings::Flags,
+ reg_universe: RealRegUniverse,
+}
+
+impl AArch64Backend {
+ /// Create a new AArch64 backend with the given (shared) flags.
+ pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> AArch64Backend {
+ let reg_universe = create_reg_universe(&flags);
+ AArch64Backend {
+ triple,
+ flags,
+ reg_universe,
+ }
+ }
+
+ /// This performs lowering to VCode, register-allocates the code, computes block layout and
+ /// finalizes branches. The result is ready for binary emission.
+ fn compile_vcode(
+ &self,
+ func: &Function,
+ flags: settings::Flags,
+ ) -> CodegenResult<VCode<inst::Inst>> {
+ let emit_info = EmitInfo::new(flags.clone());
+ let abi = Box::new(abi::AArch64ABICallee::new(func, flags)?);
+ compile::compile::<AArch64Backend>(func, self, abi, emit_info)
+ }
+}
+
+impl MachBackend for AArch64Backend {
+ fn compile_function(
+ &self,
+ func: &Function,
+ want_disasm: bool,
+ ) -> CodegenResult<MachCompileResult> {
+ let flags = self.flags();
+ let vcode = self.compile_vcode(func, flags.clone())?;
+
+ let buffer = vcode.emit();
+ let frame_size = vcode.frame_size();
+ let unwind_info = vcode.unwind_info()?;
+
+ let disasm = if want_disasm {
+ Some(vcode.show_rru(Some(&create_reg_universe(flags))))
+ } else {
+ None
+ };
+
+ let buffer = buffer.finish();
+
+ Ok(MachCompileResult {
+ buffer,
+ frame_size,
+ disasm,
+ unwind_info,
+ })
+ }
+
+ fn name(&self) -> &'static str {
+ "aarch64"
+ }
+
+ fn triple(&self) -> Triple {
+ self.triple.clone()
+ }
+
+ fn flags(&self) -> &settings::Flags {
+ &self.flags
+ }
+
+ fn reg_universe(&self) -> &RealRegUniverse {
+ &self.reg_universe
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> IntCC {
+ // Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on
+ // overflow of an add.
+ IntCC::UnsignedGreaterThanOrEqual
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> IntCC {
+ // unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on
+ // underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the
+ // opposite of x86).
+ IntCC::UnsignedLessThan
+ }
+
+ #[cfg(feature = "unwind")]
+ fn emit_unwind_info(
+ &self,
+ result: &MachCompileResult,
+ kind: crate::machinst::UnwindInfoKind,
+ ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+ use crate::isa::unwind::UnwindInfo;
+ use crate::machinst::UnwindInfoKind;
+ Ok(match (result.unwind_info.as_ref(), kind) {
+ (Some(info), UnwindInfoKind::SystemV) => {
+ inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
+ }
+ (Some(_info), UnwindInfoKind::Windows) => {
+ // TODO: support Windows unwind info on AArch64
+ None
+ }
+ _ => None,
+ })
+ }
+
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ Some(inst::unwind::systemv::create_cie())
+ }
+}
+
+/// Create a new `isa::Builder`.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+ assert!(triple.architecture == Architecture::Aarch64(Aarch64Architecture::Aarch64));
+ IsaBuilder {
+ triple,
+ setup: settings::builder(),
+ constructor: |triple, shared_flags, _| {
+ let backend = AArch64Backend::new_with_flags(triple, shared_flags);
+ Box::new(TargetIsaAdapter::new(backend))
+ },
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::types::*;
+ use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
+ use crate::isa::CallConv;
+ use crate::settings;
+ use crate::settings::Configurable;
+ use core::str::FromStr;
+ use target_lexicon::Triple;
+
+ #[test]
+ fn test_compile_function() {
+ let name = ExternalName::testcase("test0");
+ let mut sig = Signature::new(CallConv::SystemV);
+ sig.params.push(AbiParam::new(I32));
+ sig.returns.push(AbiParam::new(I32));
+ let mut func = Function::with_name_signature(name, sig);
+
+ let bb0 = func.dfg.make_block();
+ let arg0 = func.dfg.append_block_param(bb0, I32);
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(bb0);
+ let v0 = pos.ins().iconst(I32, 0x1234);
+ let v1 = pos.ins().iadd(arg0, v0);
+ pos.ins().return_(&[v1]);
+
+ let mut shared_flags = settings::builder();
+ shared_flags.set("opt_level", "none").unwrap();
+ let backend = AArch64Backend::new_with_flags(
+ Triple::from_str("aarch64").unwrap(),
+ settings::Flags::new(shared_flags),
+ );
+ let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
+ let code = &buffer.data[..];
+
+ // stp x29, x30, [sp, #-16]!
+ // mov x29, sp
+ // mov x1, #0x1234
+ // add w0, w0, w1
+ // mov sp, x29
+ // ldp x29, x30, [sp], #16
+ // ret
+ let golden = vec![
+ 0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
+ 0x01, 0x0b, 0xbf, 0x03, 0x00, 0x91, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
+ ];
+
+ assert_eq!(code, &golden[..]);
+ }
+
+ #[test]
+ fn test_branch_lowering() {
+ let name = ExternalName::testcase("test0");
+ let mut sig = Signature::new(CallConv::SystemV);
+ sig.params.push(AbiParam::new(I32));
+ sig.returns.push(AbiParam::new(I32));
+ let mut func = Function::with_name_signature(name, sig);
+
+ let bb0 = func.dfg.make_block();
+ let arg0 = func.dfg.append_block_param(bb0, I32);
+ let bb1 = func.dfg.make_block();
+ let bb2 = func.dfg.make_block();
+ let bb3 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(bb0);
+ let v0 = pos.ins().iconst(I32, 0x1234);
+ let v1 = pos.ins().iadd(arg0, v0);
+ pos.ins().brnz(v1, bb1, &[]);
+ pos.ins().jump(bb2, &[]);
+ pos.insert_block(bb1);
+ pos.ins().brnz(v1, bb2, &[]);
+ pos.ins().jump(bb3, &[]);
+ pos.insert_block(bb2);
+ let v2 = pos.ins().iadd(v1, v0);
+ pos.ins().brnz(v2, bb2, &[]);
+ pos.ins().jump(bb1, &[]);
+ pos.insert_block(bb3);
+ let v3 = pos.ins().isub(v1, v0);
+ pos.ins().return_(&[v3]);
+
+ let mut shared_flags = settings::builder();
+ shared_flags.set("opt_level", "none").unwrap();
+ let backend = AArch64Backend::new_with_flags(
+ Triple::from_str("aarch64").unwrap(),
+ settings::Flags::new(shared_flags),
+ );
+ let result = backend
+ .compile_function(&mut func, /* want_disasm = */ false)
+ .unwrap();
+ let code = &result.buffer.data[..];
+
+ // stp x29, x30, [sp, #-16]!
+ // mov x29, sp
+ // mov x1, #0x1234 // #4660
+ // add w0, w0, w1
+ // mov w1, w0
+ // cbnz x1, 0x28
+ // mov x1, #0x1234 // #4660
+ // add w1, w0, w1
+ // mov w1, w1
+ // cbnz x1, 0x18
+ // mov w1, w0
+ // cbnz x1, 0x18
+ // mov x1, #0x1234 // #4660
+ // sub w0, w0, w1
+ // mov sp, x29
+ // ldp x29, x30, [sp], #16
+ // ret
+ let golden = vec![
+ 253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
+ 0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
+ 0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 191, 3, 0, 145, 253, 123,
+ 193, 168, 192, 3, 95, 214,
+ ];
+
+ assert_eq!(code, &golden[..]);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/abi.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/abi.rs
new file mode 100644
index 0000000000..edf1792e52
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/abi.rs
@@ -0,0 +1,471 @@
+//! Implementation of the 32-bit ARM ABI.
+
+use crate::ir;
+use crate::ir::types::*;
+use crate::isa;
+use crate::isa::arm32::inst::*;
+use crate::machinst::*;
+use crate::settings;
+use crate::{CodegenError, CodegenResult};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use smallvec::SmallVec;
+
+/// Support for the ARM ABI from the callee side (within a function body).
+pub(crate) type Arm32ABICallee = ABICalleeImpl<Arm32MachineDeps>;
+
+/// Support for the ARM ABI from the caller side (at a callsite).
+pub(crate) type Arm32ABICaller = ABICallerImpl<Arm32MachineDeps>;
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
+
+/// ARM-specific ABI behavior. This struct just serves as an implementation
+/// point for the trait; it is never actually instantiated.
+pub(crate) struct Arm32MachineDeps;
+
+impl Into<AMode> for StackAMode {
+ fn into(self) -> AMode {
+ match self {
+ StackAMode::FPOffset(off, ty) => AMode::FPOffset(off, ty),
+ StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset(off, ty),
+ StackAMode::SPOffset(off, ty) => AMode::SPOffset(off, ty),
+ }
+ }
+}
+
+impl ABIMachineSpec for Arm32MachineDeps {
+ type I = Inst;
+
+ fn word_bits() -> u32 {
+ 32
+ }
+
+ /// Return required stack alignment in bytes.
+ fn stack_align(_call_conv: isa::CallConv) -> u32 {
+ 8
+ }
+
+ fn compute_arg_locs(
+ _call_conv: isa::CallConv,
+ params: &[ir::AbiParam],
+ args_or_rets: ArgsOrRets,
+ add_ret_area_ptr: bool,
+ ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+ let mut next_rreg = 0;
+ let mut next_stack: u64 = 0;
+ let mut ret = vec![];
+ let mut stack_args = vec![];
+
+ let max_reg_val = 4; // r0-r3
+
+ for i in 0..params.len() {
+ let param = params[i];
+
+ // Validate "purpose".
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext
+ | &ir::ArgumentPurpose::Normal
+ | &ir::ArgumentPurpose::StackLimit
+ | &ir::ArgumentPurpose::SignatureId => {}
+ _ => panic!(
+ "Unsupported argument purpose {:?} in signature: {:?}",
+ param.purpose, params
+ ),
+ }
+ assert!(param.value_type.bits() <= 32);
+
+ if next_rreg < max_reg_val {
+ let reg = rreg(next_rreg);
+
+ ret.push(ABIArg::Reg(
+ reg.to_real_reg(),
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ next_rreg += 1;
+ } else {
+ // Arguments are stored on stack in reversed order.
+ // https://static.docs.arm.com/ihi0042/g/aapcs32.pdf
+
+ // Stack offset is not known yet. Store param info for later.
+ stack_args.push((param.value_type, param.extension, param.purpose));
+ next_stack += 4;
+ }
+ }
+
+ let extra_arg = if add_ret_area_ptr {
+ debug_assert!(args_or_rets == ArgsOrRets::Args);
+ if next_rreg < max_reg_val {
+ ret.push(ABIArg::Reg(
+ rreg(next_rreg).to_real_reg(),
+ I32,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ } else {
+ stack_args.push((
+ I32,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ next_stack += 4;
+ }
+ Some(ret.len() - 1)
+ } else {
+ None
+ };
+
+ // Now we can assign proper stack offsets to params.
+ let max_stack = next_stack;
+ for (ty, ext, purpose) in stack_args.into_iter().rev() {
+ next_stack -= 4;
+ ret.push(ABIArg::Stack(
+ (max_stack - next_stack) as i64,
+ ty,
+ ext,
+ purpose,
+ ));
+ }
+ assert_eq!(next_stack, 0);
+
+ next_stack = (next_stack + 7) & !7;
+
+ // To avoid overflow issues, limit the arg/return size to something
+ // reasonable -- here, 128 MB.
+ if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+ return Err(CodegenError::ImplLimitExceeded);
+ }
+
+ Ok((ret, next_stack as i64, extra_arg))
+ }
+
+ fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
+ 8 // frame pointer and link register
+ }
+
+ fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
+ Inst::gen_load(into_reg, mem.into(), ty)
+ }
+
+ fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst {
+ Inst::gen_store(from_reg, mem.into(), ty)
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst {
+ Inst::gen_move(to_reg, from_reg, ty)
+ }
+
+ fn gen_extend(
+ to_reg: Writable<Reg>,
+ from_reg: Reg,
+ is_signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ ) -> Inst {
+ assert!(to_bits == 32);
+ assert!(from_bits < 32);
+ Inst::Extend {
+ rd: to_reg,
+ rm: from_reg,
+ signed: is_signed,
+ from_bits,
+ }
+ }
+
+ fn gen_ret() -> Inst {
+ Inst::Ret
+ }
+
+ fn gen_epilogue_placeholder() -> Inst {
+ Inst::EpiloguePlaceholder
+ }
+
+ fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Inst; 4]> {
+ let mut insts = SmallVec::new();
+
+ if let Some(imm12) = UImm12::maybe_from_i64(imm as i64) {
+ insts.push(Inst::AluRRImm12 {
+ alu_op: ALUOp::Add,
+ rd: into_reg,
+ rn: from_reg,
+ imm12,
+ });
+ } else {
+ let scratch2 = writable_tmp2_reg();
+ insts.extend(Inst::load_constant(scratch2, imm));
+ insts.push(Inst::AluRRRShift {
+ alu_op: ALUOp::Add,
+ rd: into_reg,
+ rn: from_reg,
+ rm: scratch2.to_reg(),
+ shift: None,
+ });
+ }
+ insts
+ }
+
+ fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
+ let mut insts = SmallVec::new();
+ insts.push(Inst::Cmp {
+ rn: sp_reg(),
+ rm: limit_reg,
+ });
+ insts.push(Inst::TrapIf {
+ trap_info: ir::TrapCode::StackOverflow,
+ // Here `Lo` == "less than" when interpreting the two
+ // operands as unsigned integers.
+ cond: Cond::Lo,
+ });
+ insts
+ }
+
+ fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Inst {
+ let mem = mem.into();
+ Inst::LoadAddr { rd: into_reg, mem }
+ }
+
+ fn get_stacklimit_reg() -> Reg {
+ ip_reg()
+ }
+
+ fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Inst {
+ let mem = AMode::RegOffset(base, offset as i64);
+ Inst::gen_load(into_reg, mem, ty)
+ }
+
+ fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst {
+ let mem = AMode::RegOffset(base, offset as i64);
+ Inst::gen_store(from_reg, mem, ty)
+ }
+
+ fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Inst; 2]> {
+ let mut ret = SmallVec::new();
+
+ if amount == 0 {
+ return ret;
+ }
+ let (amount, is_sub) = if amount > 0 {
+ (amount, false)
+ } else {
+ (-amount, true)
+ };
+
+ let alu_op = if is_sub { ALUOp::Sub } else { ALUOp::Add };
+
+ if let Some(imm12) = UImm12::maybe_from_i64(amount as i64) {
+ ret.push(Inst::AluRRImm12 {
+ alu_op,
+ rd: writable_sp_reg(),
+ rn: sp_reg(),
+ imm12,
+ });
+ } else {
+ let tmp = writable_ip_reg();
+ ret.extend(Inst::load_constant(tmp, amount as u32));
+ ret.push(Inst::AluRRRShift {
+ alu_op,
+ rd: writable_sp_reg(),
+ rn: sp_reg(),
+ rm: tmp.to_reg(),
+ shift: None,
+ });
+ }
+ ret
+ }
+
+ fn gen_nominal_sp_adj(offset: i32) -> Inst {
+ let offset = i64::from(offset);
+ Inst::VirtualSPOffsetAdj { offset }
+ }
+
+ fn gen_prologue_frame_setup() -> SmallVec<[Inst; 2]> {
+ let mut ret = SmallVec::new();
+ let reg_list = vec![fp_reg(), lr_reg()];
+ ret.push(Inst::Push { reg_list });
+ ret.push(Inst::Mov {
+ rd: writable_fp_reg(),
+ rm: sp_reg(),
+ });
+ ret
+ }
+
+ fn gen_epilogue_frame_restore() -> SmallVec<[Inst; 2]> {
+ let mut ret = SmallVec::new();
+ ret.push(Inst::Mov {
+ rd: writable_sp_reg(),
+ rm: fp_reg(),
+ });
+ let reg_list = vec![writable_fp_reg(), writable_lr_reg()];
+ ret.push(Inst::Pop { reg_list });
+ ret
+ }
+
+ /// Returns stack bytes used as well as instructions. Does not adjust
+ /// nominal SP offset; caller will do that.
+ fn gen_clobber_save(
+ _call_conv: isa::CallConv,
+ _flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> (u64, SmallVec<[Inst; 16]>) {
+ let mut insts = SmallVec::new();
+ if fixed_frame_storage_size > 0 {
+ insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)).into_iter());
+ }
+ let clobbered_vec = get_callee_saves(clobbers);
+ let mut clobbered_vec: Vec<_> = clobbered_vec
+ .into_iter()
+ .map(|r| r.to_reg().to_reg())
+ .collect();
+ if clobbered_vec.len() % 2 == 1 {
+ // For alignment purposes.
+ clobbered_vec.push(ip_reg());
+ }
+ let stack_used = clobbered_vec.len() * 4;
+ if !clobbered_vec.is_empty() {
+ insts.push(Inst::Push {
+ reg_list: clobbered_vec,
+ });
+ }
+
+ (stack_used as u64, insts)
+ }
+
+ fn gen_clobber_restore(
+ _call_conv: isa::CallConv,
+ _flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ _fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> SmallVec<[Inst; 16]> {
+ let mut insts = SmallVec::new();
+ let clobbered_vec = get_callee_saves(clobbers);
+ let mut clobbered_vec: Vec<_> = clobbered_vec
+ .into_iter()
+ .map(|r| Writable::from_reg(r.to_reg().to_reg()))
+ .collect();
+ if clobbered_vec.len() % 2 == 1 {
+ clobbered_vec.push(writable_ip_reg());
+ }
+ if !clobbered_vec.is_empty() {
+ insts.push(Inst::Pop {
+ reg_list: clobbered_vec,
+ });
+ }
+ insts
+ }
+
+ fn gen_call(
+ dest: &CallDest,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: ir::Opcode,
+ tmp: Writable<Reg>,
+ _callee_conv: isa::CallConv,
+ _caller_conv: isa::CallConv,
+ ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
+ let mut insts = SmallVec::new();
+ match &dest {
+ &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push((
+ InstIsSafepoint::Yes,
+ Inst::Call {
+ info: Box::new(CallInfo {
+ dest: name.clone(),
+ uses,
+ defs,
+ opcode,
+ }),
+ },
+ )),
+ &CallDest::ExtName(ref name, RelocDistance::Far) => {
+ insts.push((
+ InstIsSafepoint::No,
+ Inst::LoadExtName {
+ rt: tmp,
+ name: Box::new(name.clone()),
+ offset: 0,
+ },
+ ));
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rm: tmp.to_reg(),
+ uses,
+ defs,
+ opcode,
+ }),
+ },
+ ));
+ }
+ &CallDest::Reg(reg) => insts.push((
+ InstIsSafepoint::Yes,
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rm: *reg,
+ uses,
+ defs,
+ opcode,
+ }),
+ },
+ )),
+ }
+
+ insts
+ }
+
+ fn get_number_of_spillslots_for_value(rc: RegClass, _ty: Type) -> u32 {
+ match rc {
+ RegClass::I32 => 1,
+ _ => panic!("Unexpected register class!"),
+ }
+ }
+
+ fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 {
+ s.virtual_sp_offset
+ }
+
+ fn get_nominal_sp_to_fp(s: &EmitState) -> i64 {
+ s.nominal_sp_to_fp
+ }
+
+ fn get_regs_clobbered_by_call(_: isa::CallConv) -> Vec<Writable<Reg>> {
+ let mut caller_saved = Vec::new();
+ for i in 0..15 {
+ let r = writable_rreg(i);
+ if is_reg_clobbered_by_call(r.to_reg().to_real_reg()) {
+ caller_saved.push(r);
+ }
+ }
+ caller_saved
+ }
+}
+
+fn is_callee_save(r: RealReg) -> bool {
+ let enc = r.get_hw_encoding();
+ 4 <= enc && enc <= 10
+}
+
+fn get_callee_saves(regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
+ let mut ret = Vec::new();
+ for &reg in regs.iter() {
+ if is_callee_save(reg.to_reg()) {
+ ret.push(reg);
+ }
+ }
+
+ // Sort registers for deterministic code output.
+ ret.sort_by_key(|r| r.to_reg().get_index());
+ ret
+}
+
+fn is_reg_clobbered_by_call(r: RealReg) -> bool {
+ let enc = r.get_hw_encoding();
+ enc <= 3
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/args.rs
new file mode 100644
index 0000000000..2c1b8e97d6
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/args.rs
@@ -0,0 +1,335 @@
+//! 32-bit ARM ISA definitions: instruction arguments.
+
+use crate::isa::arm32::inst::*;
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg};
+
+use std::string::String;
+
+/// A shift operator for a register or immediate.
+#[derive(Clone, Copy, Debug)]
+#[repr(u8)]
+pub enum ShiftOp {
+ LSL = 0b00,
+ LSR = 0b01,
+ ASR = 0b10,
+ ROR = 0b11,
+}
+
+impl ShiftOp {
+ /// Get the encoding of this shift op.
+ pub fn bits(self) -> u8 {
+ self as u8
+ }
+}
+
+/// A shift operator amount.
+#[derive(Clone, Copy, Debug)]
+pub struct ShiftOpShiftImm(u8);
+
+impl ShiftOpShiftImm {
+ /// Maximum shift for shifted-register operands.
+ pub const MAX_SHIFT: u32 = 31;
+
+ /// Create a new shiftop shift amount, if possible.
+ pub fn maybe_from_shift(shift: u32) -> Option<ShiftOpShiftImm> {
+ if shift <= Self::MAX_SHIFT {
+ Some(ShiftOpShiftImm(shift as u8))
+ } else {
+ None
+ }
+ }
+
+ /// Return the shift amount.
+ pub fn value(self) -> u8 {
+ self.0
+ }
+}
+
+/// A shift operator with an amount, guaranteed to be within range.
+#[derive(Clone, Debug)]
+pub struct ShiftOpAndAmt {
+ op: ShiftOp,
+ shift: ShiftOpShiftImm,
+}
+
+impl ShiftOpAndAmt {
+ pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt {
+ ShiftOpAndAmt { op, shift }
+ }
+
+ /// Get the shift op.
+ pub fn op(&self) -> ShiftOp {
+ self.op
+ }
+
+ /// Get the shift amount.
+ pub fn amt(&self) -> ShiftOpShiftImm {
+ self.shift
+ }
+}
+
+// An unsigned 8-bit immediate.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm8 {
+ /// The value.
+ value: u8,
+}
+
+impl UImm8 {
+ pub fn maybe_from_i64(value: i64) -> Option<UImm8> {
+ if 0 <= value && value < (1 << 8) {
+ Some(UImm8 { value: value as u8 })
+ } else {
+ None
+ }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ u32::from(self.value)
+ }
+}
+
+/// An unsigned 12-bit immediate.
+#[derive(Clone, Copy, Debug)]
+pub struct UImm12 {
+ /// The value.
+ value: u16,
+}
+
+impl UImm12 {
+ pub fn maybe_from_i64(value: i64) -> Option<UImm12> {
+ if 0 <= value && value < (1 << 12) {
+ Some(UImm12 {
+ value: value as u16,
+ })
+ } else {
+ None
+ }
+ }
+
+ /// Bits for encoding.
+ pub fn bits(&self) -> u32 {
+ u32::from(self.value)
+ }
+}
+
+/// An addressing mode specified for a load/store operation.
+#[derive(Clone, Debug)]
+pub enum AMode {
+ // Real addressing modes
+ /// Register plus register offset, which can be shifted left by imm2.
+ RegReg(Reg, Reg, u8),
+
+ /// Unsigned 12-bit immediate offset from reg.
+ RegOffset12(Reg, UImm12),
+
+ /// Immediate offset from program counter aligned to 4.
+ /// Cannot be used by store instructions.
+ PCRel(i32),
+
+ // Virtual addressing modes that are lowered at emission time:
+ /// Immediate offset from reg.
+ RegOffset(Reg, i64),
+
+ /// Signed immediate offset from stack pointer.
+ SPOffset(i64, Type),
+
+ /// Offset from the frame pointer.
+ FPOffset(i64, Type),
+
+ /// Signed immediate offset from "nominal stack pointer".
+ NominalSPOffset(i64, Type),
+}
+
+impl AMode {
+ /// Memory reference using the sum of two registers as an address.
+ pub fn reg_plus_reg(reg1: Reg, reg2: Reg, shift_amt: u8) -> AMode {
+ assert!(shift_amt <= 3);
+ AMode::RegReg(reg1, reg2, shift_amt)
+ }
+
+ /// Memory reference using the sum of a register and an immediate offset
+ /// as an address.
+ pub fn reg_plus_imm(reg: Reg, offset: i64) -> AMode {
+ AMode::RegOffset(reg, offset)
+ }
+}
+
+/// Condition for conditional branches.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum Cond {
+ Eq = 0,
+ Ne = 1,
+ Hs = 2,
+ Lo = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ Hi = 8,
+ Ls = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+}
+
+impl Cond {
+ /// Return the inverted condition.
+ pub fn invert(self) -> Cond {
+ match self {
+ Cond::Eq => Cond::Ne,
+ Cond::Ne => Cond::Eq,
+
+ Cond::Hs => Cond::Lo,
+ Cond::Lo => Cond::Hs,
+
+ Cond::Mi => Cond::Pl,
+ Cond::Pl => Cond::Mi,
+
+ Cond::Vs => Cond::Vc,
+ Cond::Vc => Cond::Vs,
+
+ Cond::Hi => Cond::Ls,
+ Cond::Ls => Cond::Hi,
+
+ Cond::Ge => Cond::Lt,
+ Cond::Lt => Cond::Ge,
+
+ Cond::Gt => Cond::Le,
+ Cond::Le => Cond::Gt,
+
+ Cond::Al => panic!("Cannot inverse {:?} condition", self),
+ }
+ }
+
+ /// Return the machine encoding of this condition.
+ pub fn bits(self) -> u16 {
+ self as u16
+ }
+}
+
+/// A branch target. Either unresolved (basic-block index) or resolved (offset
+/// from end of current instruction).
+#[derive(Clone, Copy, Debug)]
+pub enum BranchTarget {
+ /// An unresolved reference to a Label.
+ Label(MachLabel),
+ /// A fixed PC offset.
+ ResolvedOffset(i32),
+}
+
+impl BranchTarget {
+ /// Return the target's label, if it is a label-based target.
+ pub fn as_label(self) -> Option<MachLabel> {
+ match self {
+ BranchTarget::Label(l) => Some(l),
+ _ => None,
+ }
+ }
+
+ // Ready for embedding in instruction.
+ fn as_offset(self, inst_16_bit: bool) -> i32 {
+ match self {
+ BranchTarget::ResolvedOffset(off) => {
+ if inst_16_bit {
+ // pc is equal to end of the current inst + 2.
+ (off - 2) >> 1
+ } else {
+ // pc points to end of the current inst.
+ off >> 1
+ }
+ }
+ _ => 0,
+ }
+ }
+
+ // For 32-bit unconditional jump.
+ pub fn as_off24(self) -> u32 {
+ let off = self.as_offset(false);
+ assert!(off < (1 << 24));
+ assert!(off >= -(1 << 24));
+ (off as u32) & ((1 << 24) - 1)
+ }
+
+ // For 32-bit conditional jump.
+ pub fn as_off20(self) -> u32 {
+ let off = self.as_offset(false);
+ assert!(off < (1 << 20));
+ assert!(off >= -(1 << 20));
+ (off as u32) & ((1 << 20) - 1)
+ }
+}
+
+impl PrettyPrint for ShiftOpAndAmt {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let op = match self.op() {
+ ShiftOp::LSL => "lsl",
+ ShiftOp::LSR => "lsr",
+ ShiftOp::ASR => "asr",
+ ShiftOp::ROR => "ror",
+ };
+ format!("{} #{}", op, self.amt().value())
+ }
+}
+
+impl PrettyPrint for UImm8 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for UImm12 {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ format!("#{}", self.value)
+ }
+}
+
+impl PrettyPrint for AMode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &AMode::RegReg(rn, rm, imm2) => {
+ let shift = if imm2 != 0 {
+ format!(", lsl #{}", imm2)
+ } else {
+ "".to_string()
+ };
+ format!(
+ "[{}, {}{}]",
+ rn.show_rru(mb_rru),
+ rm.show_rru(mb_rru),
+ shift
+ )
+ }
+ &AMode::RegOffset12(rn, off) => {
+ format!("[{}, {}]", rn.show_rru(mb_rru), off.show_rru(mb_rru))
+ }
+ &AMode::PCRel(off) => format!("[pc, #{}]", off),
+ &AMode::RegOffset(..)
+ | &AMode::SPOffset(..)
+ | &AMode::FPOffset(..)
+ | &AMode::NominalSPOffset(..) => panic!("unexpected mem mode"),
+ }
+ }
+}
+
+impl PrettyPrint for Cond {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ let mut s = format!("{:?}", self);
+ s.make_ascii_lowercase();
+ s
+ }
+}
+
+impl PrettyPrint for BranchTarget {
+ fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ &BranchTarget::Label(label) => format!("label{:?}", label.get()),
+ &BranchTarget::ResolvedOffset(off) => format!("{}", off),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit.rs
new file mode 100644
index 0000000000..5e4a412e96
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit.rs
@@ -0,0 +1,829 @@
+//! 32-bit ARM ISA: binary code emission.
+
+use crate::binemit::{Reloc, StackMap};
+use crate::ir::SourceLoc;
+use crate::isa::arm32::inst::*;
+
+use core::convert::TryFrom;
+use log::debug;
+
+/// Memory addressing mode finalization: convert "special" modes (e.g.,
+/// nominal stack offset) into real addressing modes, possibly by
+/// emitting some helper instructions that come immediately before the use
+/// of this amode.
+pub fn mem_finalize(mem: &AMode, state: &EmitState) -> (SmallVec<[Inst; 4]>, AMode) {
+ match mem {
+ &AMode::RegOffset(_, off)
+ | &AMode::SPOffset(off, _)
+ | &AMode::FPOffset(off, _)
+ | &AMode::NominalSPOffset(off, _) => {
+ let basereg = match mem {
+ &AMode::RegOffset(reg, _) => reg,
+ &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => sp_reg(),
+ &AMode::FPOffset(..) => fp_reg(),
+ _ => unreachable!(),
+ };
+ let adj = match mem {
+ &AMode::NominalSPOffset(..) => {
+ debug!(
+ "mem_finalize: nominal SP offset {} + adj {} -> {}",
+ off,
+ state.virtual_sp_offset,
+ off + state.virtual_sp_offset
+ );
+ state.virtual_sp_offset
+ }
+ _ => 0,
+ };
+ let off = off + adj;
+
+ assert!(-(1 << 31) <= off && off <= (1 << 32));
+
+ if let Some(off) = UImm12::maybe_from_i64(off) {
+ let mem = AMode::RegOffset12(basereg, off);
+ (smallvec![], mem)
+ } else {
+ let tmp = writable_ip_reg();
+ let const_insts = Inst::load_constant(tmp, off as u32);
+ let mem = AMode::reg_plus_reg(basereg, tmp.to_reg(), 0);
+ (const_insts, mem)
+ }
+ }
+ // Just assert immediate is valid here.
+ _ => (smallvec![], mem.clone()),
+ }
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+fn machreg_to_gpr(m: Reg) -> u16 {
+ assert_eq!(m.get_class(), RegClass::I32);
+ u16::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
+}
+
+fn machreg_to_gpr_lo(m: Reg) -> u16 {
+ let gpr_lo = machreg_to_gpr(m);
+ assert!(gpr_lo < 8);
+ gpr_lo
+}
+
+fn machreg_is_lo(m: Reg) -> bool {
+ machreg_to_gpr(m) < 8
+}
+
+fn enc_16_rr(bits_15_6: u16, rd: Reg, rm: Reg) -> u16 {
+ (bits_15_6 << 6) | machreg_to_gpr_lo(rd) | (machreg_to_gpr_lo(rm) << 3)
+}
+
+fn enc_16_rr_any(bits_15_8: u16, rd: Reg, rm: Reg) -> u16 {
+ let rd = machreg_to_gpr(rd);
+ (bits_15_8 << 8) | (rd & 0x7) | ((rd >> 3) << 7) | (machreg_to_gpr(rm) << 3)
+}
+
+fn enc_16_mov(rd: Writable<Reg>, rm: Reg) -> u16 {
+ enc_16_rr_any(0b01000110, rd.to_reg(), rm)
+}
+
+fn enc_16_it(cond: Cond, insts: &Vec<CondInst>) -> u16 {
+ let cond = cond.bits();
+ let mut mask: u16 = 0;
+ for inst in insts.iter().skip(1) {
+ if inst.then {
+ mask |= cond & 0x1;
+ } else {
+ mask |= (cond & 0x1) ^ 0x1;
+ }
+ mask <<= 1;
+ }
+ mask |= 0x1;
+ mask <<= 4 - insts.len();
+ 0b1011_1111_0000_0000 | (cond << 4) | mask
+}
+
+fn enc_32_regs(
+ mut inst: u32,
+ reg_0: Option<Reg>,
+ reg_8: Option<Reg>,
+ reg_12: Option<Reg>,
+ reg_16: Option<Reg>,
+) -> u32 {
+ if let Some(reg_0) = reg_0 {
+ inst |= u32::from(machreg_to_gpr(reg_0));
+ }
+ if let Some(reg_8) = reg_8 {
+ inst |= u32::from(machreg_to_gpr(reg_8)) << 8;
+ }
+ if let Some(reg_12) = reg_12 {
+ inst |= u32::from(machreg_to_gpr(reg_12)) << 12;
+ }
+ if let Some(reg_16) = reg_16 {
+ inst |= u32::from(machreg_to_gpr(reg_16)) << 16;
+ }
+ inst
+}
+
+fn enc_32_reg_shift(inst: u32, shift: &Option<ShiftOpAndAmt>) -> u32 {
+ match shift {
+ Some(shift) => {
+ let op = u32::from(shift.op().bits());
+ let amt = u32::from(shift.amt().value());
+ let imm2 = amt & 0x3;
+ let imm3 = (amt >> 2) & 0x7;
+
+ inst | (op << 4) | (imm2 << 6) | (imm3 << 12)
+ }
+ None => inst,
+ }
+}
+
+fn enc_32_r_imm16(bits_31_20: u32, rd: Reg, imm16: u16) -> u32 {
+ let imm16 = u32::from(imm16);
+ let imm8 = imm16 & 0xff;
+ let imm3 = (imm16 >> 8) & 0x7;
+ let i = (imm16 >> 11) & 0x1;
+ let imm4 = (imm16 >> 12) & 0xf;
+
+ let inst = ((bits_31_20 << 20) & !(1 << 26)) | imm8 | (imm3 << 12) | (imm4 << 16) | (i << 26);
+ enc_32_regs(inst, None, Some(rd), None, None)
+}
+
+fn enc_32_rrr(bits_31_20: u32, bits_15_12: u32, bits_7_4: u32, rd: Reg, rm: Reg, rn: Reg) -> u32 {
+ let inst = (bits_31_20 << 20) | (bits_15_12 << 12) | (bits_7_4 << 4);
+ enc_32_regs(inst, Some(rm), Some(rd), None, Some(rn))
+}
+
+fn enc_32_imm12(inst: u32, imm12: UImm12) -> u32 {
+ let imm12 = imm12.bits();
+ let imm8 = imm12 & 0xff;
+ let imm3 = (imm12 >> 8) & 0x7;
+ let i = (imm12 >> 11) & 0x1;
+ inst | imm8 | (imm3 << 12) | (i << 26)
+}
+
+fn enc_32_mem_r(bits_24_20: u32, rt: Reg, rn: Reg, rm: Reg, imm2: u8) -> u32 {
+ let imm2 = u32::from(imm2);
+ let inst = (imm2 << 4) | (bits_24_20 << 20) | (0b11111 << 27);
+ enc_32_regs(inst, Some(rm), None, Some(rt), Some(rn))
+}
+
+fn enc_32_mem_off12(bits_24_20: u32, rt: Reg, rn: Reg, off12: UImm12) -> u32 {
+ let off12 = off12.bits();
+ let inst = off12 | (bits_24_20 << 20) | (0b11111 << 27);
+ enc_32_regs(inst, None, None, Some(rt), Some(rn))
+}
+
+fn enc_32_jump(target: BranchTarget) -> u32 {
+ let off24 = target.as_off24();
+ let imm11 = off24 & 0x7ff;
+ let imm10 = (off24 >> 11) & 0x3ff;
+ let i2 = (off24 >> 21) & 0x1;
+ let i1 = (off24 >> 22) & 0x1;
+ let s = (off24 >> 23) & 0x1;
+ let j1 = (i1 ^ s) ^ 1;
+ let j2 = (i2 ^ s) ^ 1;
+
+ 0b11110_0_0000000000_10_0_1_0_00000000000
+ | imm11
+ | (j2 << 11)
+ | (j1 << 13)
+ | (imm10 << 16)
+ | (s << 26)
+}
+
+fn enc_32_cond_branch(cond: Cond, target: BranchTarget) -> u32 {
+ let cond = u32::from(cond.bits());
+ let off20 = target.as_off20();
+ let imm11 = off20 & 0x7ff;
+ let imm6 = (off20 >> 11) & 0x3f;
+ let j1 = (off20 >> 17) & 0x1;
+ let j2 = (off20 >> 18) & 0x1;
+ let s = (off20 >> 19) & 0x1;
+
+ 0b11110_0_0000_000000_10_0_0_0_00000000000
+ | imm11
+ | (j2 << 11)
+ | (j1 << 13)
+ | (imm6 << 16)
+ | (cond << 22)
+ | (s << 26)
+}
+
+fn u32_swap_halfwords(x: u32) -> u32 {
+ (x >> 16) | (x << 16)
+}
+
+fn emit_32(inst: u32, sink: &mut MachBuffer<Inst>) {
+ let inst_hi = (inst >> 16) as u16;
+ let inst_lo = (inst & 0xffff) as u16;
+ sink.put2(inst_hi);
+ sink.put2(inst_lo);
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+ /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+ /// program point.
+ pub(crate) virtual_sp_offset: i64,
+ /// Offset of FP from nominal-SP.
+ pub(crate) nominal_sp_to_fp: i64,
+ /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+ stack_map: Option<StackMap>,
+ /// Source location of next machine code instruction to be emitted.
+ cur_srcloc: SourceLoc,
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+ fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
+ EmitState {
+ virtual_sp_offset: 0,
+ nominal_sp_to_fp: abi.frame_size() as i64,
+ stack_map: None,
+ cur_srcloc: SourceLoc::default(),
+ }
+ }
+
+ fn pre_safepoint(&mut self, stack_map: StackMap) {
+ self.stack_map = Some(stack_map);
+ }
+
+ fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
+ self.cur_srcloc = srcloc;
+ }
+}
+
+impl EmitState {
+ fn take_stack_map(&mut self) -> Option<StackMap> {
+ self.stack_map.take()
+ }
+
+ fn clear_post_insn(&mut self) {
+ self.stack_map = None;
+ }
+
+ fn cur_srcloc(&self) -> SourceLoc {
+ self.cur_srcloc
+ }
+}
+
+pub struct EmitInfo {
+ flags: settings::Flags,
+}
+
+impl EmitInfo {
+ pub(crate) fn new(flags: settings::Flags) -> Self {
+ EmitInfo { flags }
+ }
+}
+
+impl MachInstEmitInfo for EmitInfo {
+ fn flags(&self) -> &settings::Flags {
+ &self.flags
+ }
+}
+
+impl MachInstEmit for Inst {
+ type Info = EmitInfo;
+ type State = EmitState;
+ type UnwindInfo = super::unwind::Arm32UnwindInfo;
+
+ fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) {
+ let start_off = sink.cur_offset();
+
+ match self {
+ &Inst::Nop0 | &Inst::EpiloguePlaceholder => {}
+ &Inst::Nop2 => {
+ sink.put2(0b1011_1111_0000_0000);
+ }
+ &Inst::AluRRR { alu_op, rd, rn, rm } => {
+ let (bits_31_20, bits_15_12, bits_7_4) = match alu_op {
+ ALUOp::Lsl => (0b111110100000, 0b1111, 0b0000),
+ ALUOp::Lsr => (0b111110100010, 0b1111, 0b0000),
+ ALUOp::Asr => (0b111110100100, 0b1111, 0b0000),
+ ALUOp::Ror => (0b111110100110, 0b1111, 0b0000),
+ ALUOp::Qadd => (0b111110101000, 0b1111, 0b1000),
+ ALUOp::Qsub => (0b111110101000, 0b1111, 0b1010),
+ ALUOp::Mul => (0b111110110000, 0b1111, 0b0000),
+ ALUOp::Udiv => (0b111110111011, 0b1111, 0b1111),
+ ALUOp::Sdiv => (0b111110111001, 0b1111, 0b1111),
+ _ => panic!("Invalid ALUOp {:?} in RRR form!", alu_op),
+ };
+ emit_32(
+ enc_32_rrr(bits_31_20, bits_15_12, bits_7_4, rd.to_reg(), rm, rn),
+ sink,
+ );
+ }
+ &Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref shift,
+ } => {
+ let bits_31_24 = 0b111_0101;
+ let bits_24_20 = match alu_op {
+ ALUOp::And => 0b00000,
+ ALUOp::Bic => 0b00010,
+ ALUOp::Orr => 0b00100,
+ ALUOp::Orn => 0b00110,
+ ALUOp::Eor => 0b01000,
+ ALUOp::Add => 0b10000,
+ ALUOp::Adds => 0b10001,
+ ALUOp::Adc => 0b10100,
+ ALUOp::Adcs => 0b10101,
+ ALUOp::Sbc => 0b10110,
+ ALUOp::Sbcs => 0b10111,
+ ALUOp::Sub => 0b11010,
+ ALUOp::Subs => 0b11011,
+ ALUOp::Rsb => 0b11100,
+ _ => panic!("Invalid ALUOp {:?} in RRRShift form!", alu_op),
+ };
+ let bits_31_20 = (bits_31_24 << 5) | bits_24_20;
+ let inst = enc_32_rrr(bits_31_20, 0, 0, rd.to_reg(), rm, rn);
+ let inst = enc_32_reg_shift(inst, shift);
+ emit_32(inst, sink);
+ }
+ &Inst::AluRRShift {
+ alu_op,
+ rd,
+ rm,
+ ref shift,
+ } => {
+ let bits_24_21 = match alu_op {
+ ALUOp1::Mvn => 0b0011,
+ ALUOp1::Mov => 0b0010,
+ };
+ let inst = 0b1110101_0000_0_1111_0_000_0000_00_00_0000 | (bits_24_21 << 21);
+ let inst = enc_32_regs(inst, Some(rm), Some(rd.to_reg()), None, None);
+ let inst = enc_32_reg_shift(inst, shift);
+ emit_32(inst, sink);
+ }
+ &Inst::AluRRRR {
+ alu_op,
+ rd_hi,
+ rd_lo,
+ rn,
+ rm,
+ } => {
+ let (bits_22_20, bits_7_4) = match alu_op {
+ ALUOp::Smull => (0b000, 0b0000),
+ ALUOp::Umull => (0b010, 0b0000),
+ _ => panic!("Invalid ALUOp {:?} in RRRR form!", alu_op),
+ };
+ let inst = (0b111110111 << 23) | (bits_22_20 << 20) | (bits_7_4 << 4);
+ let inst = enc_32_regs(
+ inst,
+ Some(rm),
+ Some(rd_hi.to_reg()),
+ Some(rd_lo.to_reg()),
+ Some(rn),
+ );
+ emit_32(inst, sink);
+ }
+ &Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn,
+ imm12,
+ } => {
+ let bits_24_20 = match alu_op {
+ ALUOp::Add => 0b00000,
+ ALUOp::Sub => 0b01010,
+ _ => panic!("Invalid ALUOp {:?} in RRImm12 form!", alu_op),
+ };
+ let inst = (0b11110_0_1 << 25) | (bits_24_20 << 20);
+ let inst = enc_32_regs(inst, None, Some(rd.to_reg()), None, Some(rn));
+ let inst = enc_32_imm12(inst, imm12);
+ emit_32(inst, sink);
+ }
+ &Inst::AluRRImm8 {
+ alu_op,
+ rd,
+ rn,
+ imm8,
+ } => {
+ let bits_24_20 = match alu_op {
+ ALUOp::And => 0b00000,
+ ALUOp::Bic => 0b00010,
+ ALUOp::Orr => 0b00100,
+ ALUOp::Orn => 0b00110,
+ ALUOp::Eor => 0b01000,
+ ALUOp::Add => 0b10000,
+ ALUOp::Adds => 0b10001,
+ ALUOp::Adc => 0b10100,
+ ALUOp::Adcs => 0b10101,
+ ALUOp::Sbc => 0b10110,
+ ALUOp::Sbcs => 0b10111,
+ ALUOp::Sub => 0b11010,
+ ALUOp::Subs => 0b11011,
+ ALUOp::Rsb => 0b11100,
+ _ => panic!("Invalid ALUOp {:?} in RRImm8 form!", alu_op),
+ };
+ let imm8 = imm8.bits();
+ let inst = 0b11110_0_0_00000_0000_0_000_0000_00000000 | imm8 | (bits_24_20 << 20);
+ let inst = enc_32_regs(inst, None, Some(rd.to_reg()), None, Some(rn));
+ emit_32(inst, sink);
+ }
+ &Inst::AluRImm8 { alu_op, rd, imm8 } => {
+ let bits_24_20 = match alu_op {
+ ALUOp1::Mvn => 0b00110,
+ ALUOp1::Mov => 0b00100,
+ };
+ let imm8 = imm8.bits();
+ let inst = 0b11110_0_0_00000_1111_0_000_0000_00000000 | imm8 | (bits_24_20 << 20);
+ let inst = enc_32_regs(inst, None, Some(rd.to_reg()), None, None);
+ emit_32(inst, sink);
+ }
+ &Inst::BitOpRR { bit_op, rd, rm } => {
+ let (bits_22_20, bits_7_4) = match bit_op {
+ BitOp::Rbit => (0b001, 0b1010),
+ BitOp::Rev => (0b001, 0b1000),
+ BitOp::Clz => (0b011, 0b1000),
+ };
+ let inst =
+ 0b111110101_000_0000_1111_0000_0000_0000 | (bits_22_20 << 20) | (bits_7_4 << 4);
+ let inst = enc_32_regs(inst, Some(rm), Some(rd.to_reg()), None, Some(rm));
+ emit_32(inst, sink);
+ }
+ &Inst::Mov { rd, rm } => {
+ sink.put2(enc_16_mov(rd, rm));
+ }
+ &Inst::MovImm16 { rd, imm16 } => {
+ emit_32(enc_32_r_imm16(0b11110_0_100100, rd.to_reg(), imm16), sink);
+ }
+ &Inst::Movt { rd, imm16 } => {
+ emit_32(enc_32_r_imm16(0b11110_0_101100, rd.to_reg(), imm16), sink);
+ }
+ &Inst::Cmp { rn, rm } => {
+ // Check which 16-bit encoding is allowed.
+ if machreg_is_lo(rn) && machreg_is_lo(rm) {
+ sink.put2(enc_16_rr(0b0100001010, rn, rm));
+ } else {
+ sink.put2(enc_16_rr_any(0b01000101, rn, rm));
+ }
+ }
+ &Inst::CmpImm8 { rn, imm8 } => {
+ let inst = 0b11110_0_011011_0000_0_000_1111_00000000 | u32::from(imm8);
+ let inst = enc_32_regs(inst, None, None, None, Some(rn));
+ emit_32(inst, sink);
+ }
+ &Inst::Store { rt, ref mem, bits } => {
+ let (mem_insts, mem) = mem_finalize(mem, state);
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ // Register the offset at which the store instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ match mem {
+ AMode::RegReg(rn, rm, imm2) => {
+ let bits_24_20 = match bits {
+ 32 => 0b00100,
+ 16 => 0b00010,
+ 8 => 0b00000,
+ _ => panic!("Unsupported store case {:?}", self),
+ };
+ emit_32(enc_32_mem_r(bits_24_20, rt, rn, rm, imm2), sink);
+ }
+ AMode::RegOffset12(rn, off12) => {
+ let bits_24_20 = match bits {
+ 32 => 0b01100,
+ 16 => 0b01010,
+ 8 => 0b01000,
+ _ => panic!("Unsupported store case {:?}", self),
+ };
+ emit_32(enc_32_mem_off12(bits_24_20, rt, rn, off12), sink);
+ }
+ AMode::PCRel(_) => panic!("Unsupported store case {:?}", self),
+ _ => unreachable!(),
+ }
+ }
+ &Inst::Load {
+ rt,
+ ref mem,
+ bits,
+ sign_extend,
+ } => {
+ let (mem_insts, mem) = mem_finalize(mem, state);
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() {
+ // Register the offset at which the load instruction starts.
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+ match mem {
+ AMode::RegReg(rn, rm, imm2) => {
+ let bits_24_20 = match (bits, sign_extend) {
+ (32, _) => 0b00101,
+ (16, true) => 0b10011,
+ (16, false) => 0b00011,
+ (8, true) => 0b10001,
+ (8, false) => 0b00001,
+ _ => panic!("Unsupported load case {:?}", self),
+ };
+ emit_32(enc_32_mem_r(bits_24_20, rt.to_reg(), rn, rm, imm2), sink);
+ }
+ AMode::RegOffset12(rn, off12) => {
+ let bits_24_20 = match (bits, sign_extend) {
+ (32, _) => 0b01101,
+ (16, true) => 0b11011,
+ (16, false) => 0b01011,
+ (8, true) => 0b11001,
+ (8, false) => 0b01001,
+ _ => panic!("Unsupported load case {:?}", self),
+ };
+ emit_32(enc_32_mem_off12(bits_24_20, rt.to_reg(), rn, off12), sink);
+ }
+ AMode::PCRel(off12) => {
+ let mut bits_24_20 = match (bits, sign_extend) {
+ (32, _) => 0b00101,
+ (16, true) => 0b10011,
+ (16, false) => 0b00011,
+ (8, true) => 0b10001,
+ (8, false) => 0b00001,
+ _ => panic!("Unsupported load case {:?}", self),
+ };
+ let (u, off12) = if off12 > 0 { (1, off12) } else { (0, -off12) };
+ let off12 = UImm12::maybe_from_i64(i64::from(off12)).unwrap();
+ bits_24_20 |= u << 3;
+
+ emit_32(
+ enc_32_mem_off12(bits_24_20, rt.to_reg(), pc_reg(), off12),
+ sink,
+ );
+ }
+ _ => unreachable!(),
+ }
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ let (mem_insts, mem) = mem_finalize(mem, state);
+ for inst in mem_insts.into_iter() {
+ inst.emit(sink, emit_info, state);
+ }
+ let inst = match mem {
+ AMode::RegReg(reg1, reg2, shift) => {
+ let shift = u32::from(shift);
+ let shift_amt = ShiftOpShiftImm::maybe_from_shift(shift).unwrap();
+ let shift = ShiftOpAndAmt::new(ShiftOp::LSL, shift_amt);
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add,
+ rd,
+ rn: reg1,
+ rm: reg2,
+ shift: Some(shift),
+ }
+ }
+ AMode::RegOffset12(reg, imm12) => Inst::AluRRImm12 {
+ alu_op: ALUOp::Add,
+ rd,
+ rn: reg,
+ imm12,
+ },
+ AMode::PCRel(off12) => {
+ let (off12, alu_op) = if off12 > 0 {
+ (off12, ALUOp::Add)
+ } else {
+ (-off12, ALUOp::Sub)
+ };
+ let imm12 = UImm12::maybe_from_i64(i64::from(off12)).unwrap();
+ Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn: pc_reg(),
+ imm12,
+ }
+ }
+ _ => unreachable!(),
+ };
+ inst.emit(sink, emit_info, state);
+ }
+ &Inst::Extend {
+ rd,
+ rm,
+ from_bits,
+ signed,
+ } if from_bits >= 8 => {
+ let rd = rd.to_reg();
+ if machreg_is_lo(rd) && machreg_is_lo(rm) {
+ let bits_15_9 = match (from_bits, signed) {
+ (16, true) => 0b1011001000,
+ (16, false) => 0b1011001010,
+ (8, true) => 0b1011001001,
+ (8, false) => 0b1011001011,
+ _ => panic!("Unsupported Extend case: {:?}", self),
+ };
+ sink.put2(enc_16_rr(bits_15_9, rd, rm));
+ } else {
+ let bits_22_20 = match (from_bits, signed) {
+ (16, true) => 0b000,
+ (16, false) => 0b001,
+ (8, true) => 0b100,
+ (8, false) => 0b101,
+ _ => panic!("Unsupported Extend case: {:?}", self),
+ };
+ let inst = 0b111110100_000_11111111_0000_1000_0000 | (bits_22_20 << 20);
+ let inst = enc_32_regs(inst, Some(rm), Some(rd), None, None);
+ emit_32(inst, sink);
+ }
+ }
+ &Inst::Extend {
+ rd,
+ rm,
+ from_bits,
+ signed,
+ } if from_bits == 1 => {
+ let inst = Inst::AluRRImm8 {
+ alu_op: ALUOp::And,
+ rd,
+ rn: rm,
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ };
+ inst.emit(sink, emit_info, state);
+
+ if signed {
+ let inst = Inst::AluRRImm8 {
+ alu_op: ALUOp::Rsb,
+ rd,
+ rn: rd.to_reg(),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ };
+ inst.emit(sink, emit_info, state);
+ }
+ }
+ &Inst::Extend { .. } => {
+ panic!("Unsupported extend variant");
+ }
+ &Inst::It { cond, ref insts } => {
+ assert!(1 <= insts.len() && insts.len() <= 4);
+ assert!(insts[0].then);
+
+ sink.put2(enc_16_it(cond, insts));
+ for inst in insts.iter() {
+ inst.inst.emit(sink, emit_info, state);
+ }
+ }
+ &Inst::Push { ref reg_list } => match reg_list.len() {
+ 0 => panic!("Unsupported Push case: {:?}", self),
+ 1 => {
+ let reg = u32::from(machreg_to_gpr(reg_list[0]));
+ let inst: u32 = 0b1111100001001101_0000_110100000100 | (reg << 12);
+ emit_32(inst, sink);
+ }
+ _ => {
+ let mut inst: u32 = 0b1110100100101101 << 16;
+ for reg in reg_list {
+ inst |= 1 << machreg_to_gpr(*reg);
+ }
+ if inst & ((1 << 13) | (1 << 15)) != 0 {
+ panic!("Unsupported Push case: {:?}", self);
+ }
+ emit_32(inst, sink);
+ }
+ },
+ &Inst::Pop { ref reg_list } => match reg_list.len() {
+ 0 => panic!("Unsupported Pop case: {:?}", self),
+ 1 => {
+ let reg = u32::from(machreg_to_gpr(reg_list[0].to_reg()));
+ let inst: u32 = 0b1111100001011101_0000_101100000100 | (reg << 12);
+ emit_32(inst, sink);
+ }
+ _ => {
+ let mut inst: u32 = 0b1110100010111101 << 16;
+ for reg in reg_list {
+ inst |= 1 << machreg_to_gpr(reg.to_reg());
+ }
+ if (inst & (1 << 14) != 0) && (inst & (1 << 15) != 0) {
+ panic!("Unsupported Pop case: {:?}", self);
+ }
+ emit_32(inst, sink);
+ }
+ },
+ &Inst::Call { ref info } => {
+ let srcloc = state.cur_srcloc();
+ sink.add_reloc(srcloc, Reloc::Arm32Call, &info.dest, 0);
+ emit_32(0b11110_0_0000000000_11_0_1_0_00000000000, sink);
+ if info.opcode.is_call() {
+ sink.add_call_site(srcloc, info.opcode);
+ }
+ }
+ &Inst::CallInd { ref info } => {
+ let srcloc = state.cur_srcloc();
+ sink.put2(0b01000111_1_0000_000 | (machreg_to_gpr(info.rm) << 3));
+ if info.opcode.is_call() {
+ sink.add_call_site(srcloc, info.opcode);
+ }
+ }
+ &Inst::LoadExtName {
+ rt,
+ ref name,
+ offset,
+ } => {
+ // maybe nop2 (0|2) bytes (pc is now 4-aligned)
+ // ldr rt, [pc, #4] 4 bytes
+ // b continue 4 bytes
+ // addr 4 bytes
+ // continue:
+ //
+ if start_off & 0x3 != 0 {
+ Inst::Nop2.emit(sink, emit_info, state);
+ }
+ assert_eq!(sink.cur_offset() & 0x3, 0);
+
+ let mem = AMode::PCRel(4);
+ let inst = Inst::Load {
+ rt,
+ mem,
+ bits: 32,
+ sign_extend: false,
+ };
+ inst.emit(sink, emit_info, state);
+
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(4),
+ };
+ inst.emit(sink, emit_info, state);
+
+ let srcloc = state.cur_srcloc();
+ sink.add_reloc(srcloc, Reloc::Abs4, name, offset.into());
+ sink.put4(0);
+ }
+ &Inst::Ret => {
+ sink.put2(0b010001110_1110_000); // bx lr
+ }
+ &Inst::Jump { dest } => {
+ let off = sink.cur_offset();
+ // Indicate that the jump uses a label, if so, so that a fixup can occur later.
+ if let Some(l) = dest.as_label() {
+ sink.use_label_at_offset(off, l, LabelUse::Branch24);
+ sink.add_uncond_branch(off, off + 4, l);
+ }
+ emit_32(enc_32_jump(dest), sink);
+ }
+ &Inst::CondBr {
+ taken,
+ not_taken,
+ cond,
+ } => {
+ // Conditional part first.
+ let cond_off = sink.cur_offset();
+ if let Some(l) = taken.as_label() {
+ let label_use = LabelUse::Branch20;
+ sink.use_label_at_offset(cond_off, l, label_use);
+ let inverted = enc_32_cond_branch(cond.invert(), taken);
+ let inverted = u32_swap_halfwords(inverted).to_le_bytes();
+ sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]);
+ }
+ emit_32(enc_32_cond_branch(cond, taken), sink);
+
+ // Unconditional part.
+ let uncond_off = sink.cur_offset();
+ if let Some(l) = not_taken.as_label() {
+ sink.use_label_at_offset(uncond_off, l, LabelUse::Branch24);
+ sink.add_uncond_branch(uncond_off, uncond_off + 4, l);
+ }
+ emit_32(enc_32_jump(not_taken), sink);
+ }
+ &Inst::IndirectBr { rm, .. } => {
+ let inst = 0b010001110_0000_000 | (machreg_to_gpr(rm) << 3);
+ sink.put2(inst);
+ }
+ &Inst::Udf { trap_info } => {
+ let srcloc = state.cur_srcloc();
+ let code = trap_info;
+ sink.add_trap(srcloc, code);
+ sink.put2(0b11011110_00000000);
+ }
+ &Inst::Bkpt => {
+ sink.put2(0b10111110_00000000);
+ }
+ &Inst::TrapIf { cond, trap_info } => {
+ let cond = cond.invert();
+ let dest = BranchTarget::ResolvedOffset(2);
+ emit_32(enc_32_cond_branch(cond, dest), sink);
+
+ let trap = Inst::Udf { trap_info };
+ trap.emit(sink, emit_info, state);
+ }
+ &Inst::VirtualSPOffsetAdj { offset } => {
+ debug!(
+ "virtual sp offset adjusted by {} -> {}",
+ offset,
+ state.virtual_sp_offset + offset,
+ );
+ state.virtual_sp_offset += offset;
+ }
+ }
+
+ let end_off = sink.cur_offset();
+ debug_assert!((end_off - start_off) <= Inst::worst_case_size());
+ }
+
+ fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+ self.print_with_state(mb_rru, state)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit_tests.rs
new file mode 100644
index 0000000000..73269be999
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/emit_tests.rs
@@ -0,0 +1,1959 @@
+use crate::isa::arm32::inst::*;
+use crate::isa::test_utils;
+use crate::settings;
+
+use alloc::vec::Vec;
+
+#[test]
+fn test_arm32_emit() {
+ let flags = settings::Flags::new(settings::builder());
+ let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+ // litle endian order
+ insns.push((Inst::Nop0, "", "nop-zero-len"));
+ insns.push((Inst::Nop2, "00BF", "nop"));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsl,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "01FA02F0",
+ "lsl r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsl,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "09FA0AF8",
+ "lsl r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsr,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "21FA02F0",
+ "lsr r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Lsr,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "29FA0AF8",
+ "lsr r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Asr,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "41FA02F0",
+ "asr r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Asr,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "49FA0AF8",
+ "asr r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Ror,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "61FA02F0",
+ "ror r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Ror,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "69FA0AF8",
+ "ror r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Qadd,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "81FA82F0",
+ "qadd r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Qadd,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "89FA8AF8",
+ "qadd r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Qsub,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "81FAA2F0",
+ "qsub r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Qsub,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "89FAAAF8",
+ "qsub r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Mul,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "01FB02F0",
+ "mul r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Mul,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "09FB0AF8",
+ "mul r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Udiv,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "B1FBF2F0",
+ "udiv r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Udiv,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "B9FBFAF8",
+ "udiv r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Sdiv,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ },
+ "91FBF2F0",
+ "sdiv r0, r1, r2",
+ ));
+ insns.push((
+ Inst::AluRRR {
+ alu_op: ALUOp::Sdiv,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ },
+ "99FBFAF8",
+ "sdiv r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::And,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "01EAC250",
+ "and r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::And,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "09EA0A08",
+ "and r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Bic,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "21EAC250",
+ "bic r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Bic,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "29EA0A08",
+ "bic r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orr,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "41EAC250",
+ "orr r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orr,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "49EA0A08",
+ "orr r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orn,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "61EAC250",
+ "orn r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Orn,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "69EA0A08",
+ "orn r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Eor,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "81EAC250",
+ "eor r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Eor,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "89EA0A08",
+ "eor r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "01EBC250",
+ "add r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "09EB0A08",
+ "add r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Adds,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "11EBC250",
+ "adds r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Adds,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "19EB0A08",
+ "adds r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Adc,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "41EBC250",
+ "adc r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Adc,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "49EB0A08",
+ "adc r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Adcs,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "51EBC250",
+ "adcs r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Adcs,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "59EB0A08",
+ "adcs r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sbc,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "61EBC250",
+ "sbc r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sbc,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "69EB0A08",
+ "sbc r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sbcs,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "71EBC250",
+ "sbcs r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sbcs,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "79EB0A08",
+ "sbcs r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sub,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "A1EBC250",
+ "sub r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Sub,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "A9EB0A08",
+ "sub r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Subs,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "B1EBC250",
+ "subs r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Subs,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "B9EB0A08",
+ "subs r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Rsb,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ rm: rreg(2),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(23).unwrap(),
+ )),
+ },
+ "C1EBC250",
+ "rsb r0, r1, r2, lsl #23",
+ ));
+ insns.push((
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Rsb,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ rm: rreg(10),
+ shift: None,
+ },
+ "C9EB0A08",
+ "rsb r8, r9, r10",
+ ));
+ insns.push((
+ Inst::AluRRShift {
+ alu_op: ALUOp1::Mvn,
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(11).unwrap(),
+ )),
+ },
+ "6FEAC120",
+ "mvn r0, r1, lsl #11",
+ ));
+ insns.push((
+ Inst::AluRRShift {
+ alu_op: ALUOp1::Mvn,
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ shift: None,
+ },
+ "6FEA0908",
+ "mvn r8, r9",
+ ));
+ insns.push((
+ Inst::AluRRShift {
+ alu_op: ALUOp1::Mov,
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(11).unwrap(),
+ )),
+ },
+ "4FEAC120",
+ "mov r0, r1, lsl #11",
+ ));
+ insns.push((
+ Inst::AluRRShift {
+ alu_op: ALUOp1::Mov,
+ rd: writable_rreg(2),
+ rm: rreg(8),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::LSR,
+ ShiftOpShiftImm::maybe_from_shift(27).unwrap(),
+ )),
+ },
+ "4FEAD862",
+ "mov r2, r8, lsr #27",
+ ));
+ insns.push((
+ Inst::AluRRShift {
+ alu_op: ALUOp1::Mov,
+ rd: writable_rreg(9),
+ rm: rreg(3),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::ASR,
+ ShiftOpShiftImm::maybe_from_shift(3).unwrap(),
+ )),
+ },
+ "4FEAE309",
+ "mov r9, r3, asr #3",
+ ));
+ insns.push((
+ Inst::AluRRShift {
+ alu_op: ALUOp1::Mov,
+ rd: writable_rreg(10),
+ rm: rreg(11),
+ shift: Some(ShiftOpAndAmt::new(
+ ShiftOp::ROR,
+ ShiftOpShiftImm::maybe_from_shift(7).unwrap(),
+ )),
+ },
+ "4FEAFB1A",
+ "mov r10, fp, ror #7",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp::Smull,
+ rd_lo: writable_rreg(0),
+ rd_hi: writable_rreg(1),
+ rn: rreg(2),
+ rm: rreg(3),
+ },
+ "82FB0301",
+ "smull r0, r1, r2, r3",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp::Smull,
+ rd_lo: writable_rreg(8),
+ rd_hi: writable_rreg(9),
+ rn: rreg(10),
+ rm: rreg(11),
+ },
+ "8AFB0B89",
+ "smull r8, r9, r10, fp",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp::Umull,
+ rd_lo: writable_rreg(0),
+ rd_hi: writable_rreg(1),
+ rn: rreg(2),
+ rm: rreg(3),
+ },
+ "A2FB0301",
+ "umull r0, r1, r2, r3",
+ ));
+ insns.push((
+ Inst::AluRRRR {
+ alu_op: ALUOp::Umull,
+ rd_lo: writable_rreg(8),
+ rd_hi: writable_rreg(9),
+ rn: rreg(10),
+ rm: rreg(11),
+ },
+ "AAFB0B89",
+ "umull r8, r9, r10, fp",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm12: UImm12::maybe_from_i64(4095).unwrap(),
+ },
+ "01F6FF70",
+ "add r0, r1, #4095",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Add,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm12: UImm12::maybe_from_i64(0).unwrap(),
+ },
+ "09F20008",
+ "add r8, r9, #0",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm12: UImm12::maybe_from_i64(1999).unwrap(),
+ },
+ "A1F2CF70",
+ "sub r0, r1, #1999",
+ ));
+ insns.push((
+ Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm12: UImm12::maybe_from_i64(101).unwrap(),
+ },
+ "A9F26508",
+ "sub r8, r9, #101",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::And,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "01F0FF00",
+ "and r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::And,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "09F00108",
+ "and r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Bic,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "21F0FF00",
+ "bic r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Bic,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "29F00108",
+ "bic r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Orr,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "41F0FF00",
+ "orr r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Orr,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "49F00108",
+ "orr r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Orn,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "61F0FF00",
+ "orn r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Orn,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "69F00108",
+ "orn r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Eor,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "81F0FF00",
+ "eor r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Eor,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "89F00108",
+ "eor r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Add,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "01F1FF00",
+ "add r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Add,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "09F10108",
+ "add r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Adds,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "11F1FF00",
+ "adds r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Adds,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "19F10108",
+ "adds r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Adc,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "41F1FF00",
+ "adc r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Adc,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "49F10108",
+ "adc r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Adcs,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "51F1FF00",
+ "adcs r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Adcs,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "59F10108",
+ "adcs r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Sbc,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "61F1FF00",
+ "sbc r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Sbc,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "69F10108",
+ "sbc r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Sbcs,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "71F1FF00",
+ "sbcs r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Sbcs,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "79F10108",
+ "sbcs r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Sub,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "A1F1FF00",
+ "sub r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Sub,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "A9F10108",
+ "sub r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Subs,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "B1F1FF00",
+ "subs r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Subs,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "B9F10108",
+ "subs r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Rsb,
+ rd: writable_rreg(0),
+ rn: rreg(1),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "C1F1FF00",
+ "rsb r0, r1, #255",
+ ));
+ insns.push((
+ Inst::AluRRImm8 {
+ alu_op: ALUOp::Rsb,
+ rd: writable_rreg(8),
+ rn: rreg(9),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "C9F10108",
+ "rsb r8, r9, #1",
+ ));
+ insns.push((
+ Inst::AluRImm8 {
+ alu_op: ALUOp1::Mvn,
+ rd: writable_rreg(0),
+ imm8: UImm8::maybe_from_i64(255).unwrap(),
+ },
+ "6FF0FF00",
+ "mvn r0, #255",
+ ));
+ insns.push((
+ Inst::AluRImm8 {
+ alu_op: ALUOp1::Mvn,
+ rd: writable_rreg(8),
+ imm8: UImm8::maybe_from_i64(1).unwrap(),
+ },
+ "6FF00108",
+ "mvn r8, #1",
+ ));
+ insns.push((
+ Inst::AluRImm8 {
+ alu_op: ALUOp1::Mov,
+ rd: writable_rreg(0),
+ imm8: UImm8::maybe_from_i64(0).unwrap(),
+ },
+ "4FF00000",
+ "mov r0, #0",
+ ));
+ insns.push((
+ Inst::AluRImm8 {
+ alu_op: ALUOp1::Mov,
+ rd: writable_rreg(8),
+ imm8: UImm8::maybe_from_i64(176).unwrap(),
+ },
+ "4FF0B008",
+ "mov r8, #176",
+ ));
+ insns.push((
+ Inst::BitOpRR {
+ bit_op: BitOp::Rbit,
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ },
+ "91FAA1F0",
+ "rbit r0, r1",
+ ));
+ insns.push((
+ Inst::BitOpRR {
+ bit_op: BitOp::Rbit,
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ },
+ "99FAA9F8",
+ "rbit r8, r9",
+ ));
+ insns.push((
+ Inst::BitOpRR {
+ bit_op: BitOp::Rev,
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ },
+ "91FA81F0",
+ "rev r0, r1",
+ ));
+ insns.push((
+ Inst::BitOpRR {
+ bit_op: BitOp::Rev,
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ },
+ "99FA89F8",
+ "rev r8, r9",
+ ));
+ insns.push((
+ Inst::BitOpRR {
+ bit_op: BitOp::Clz,
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ },
+ "B1FA81F0",
+ "clz r0, r1",
+ ));
+ insns.push((
+ Inst::BitOpRR {
+ bit_op: BitOp::Clz,
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ },
+ "B9FA89F8",
+ "clz r8, r9",
+ ));
+ insns.push((
+ Inst::Mov {
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ },
+ "0846",
+ "mov r0, r1",
+ ));
+ insns.push((
+ Inst::Mov {
+ rd: writable_rreg(2),
+ rm: rreg(8),
+ },
+ "4246",
+ "mov r2, r8",
+ ));
+ insns.push((
+ Inst::Mov {
+ rd: writable_rreg(9),
+ rm: rreg(3),
+ },
+ "9946",
+ "mov r9, r3",
+ ));
+ insns.push((
+ Inst::Mov {
+ rd: writable_rreg(10),
+ rm: rreg(11),
+ },
+ "DA46",
+ "mov r10, fp",
+ ));
+ insns.push((
+ Inst::MovImm16 {
+ rd: writable_rreg(0),
+ imm16: 0,
+ },
+ "40F20000",
+ "mov r0, #0",
+ ));
+ insns.push((
+ Inst::MovImm16 {
+ rd: writable_rreg(1),
+ imm16: 15,
+ },
+ "40F20F01",
+ "mov r1, #15",
+ ));
+ insns.push((
+ Inst::MovImm16 {
+ rd: writable_rreg(2),
+ imm16: 255,
+ },
+ "40F2FF02",
+ "mov r2, #255",
+ ));
+ insns.push((
+ Inst::MovImm16 {
+ rd: writable_rreg(8),
+ imm16: 4095,
+ },
+ "40F6FF78",
+ "mov r8, #4095",
+ ));
+ insns.push((
+ Inst::MovImm16 {
+ rd: writable_rreg(9),
+ imm16: 65535,
+ },
+ "4FF6FF79",
+ "mov r9, #65535",
+ ));
+ insns.push((
+ Inst::Movt {
+ rd: writable_rreg(0),
+ imm16: 0,
+ },
+ "C0F20000",
+ "movt r0, #0",
+ ));
+ insns.push((
+ Inst::Movt {
+ rd: writable_rreg(1),
+ imm16: 15,
+ },
+ "C0F20F01",
+ "movt r1, #15",
+ ));
+ insns.push((
+ Inst::Movt {
+ rd: writable_rreg(2),
+ imm16: 255,
+ },
+ "C0F2FF02",
+ "movt r2, #255",
+ ));
+ insns.push((
+ Inst::Movt {
+ rd: writable_rreg(8),
+ imm16: 4095,
+ },
+ "C0F6FF78",
+ "movt r8, #4095",
+ ));
+ insns.push((
+ Inst::Movt {
+ rd: writable_rreg(9),
+ imm16: 65535,
+ },
+ "CFF6FF79",
+ "movt r9, #65535",
+ ));
+ insns.push((
+ Inst::Cmp {
+ rn: rreg(0),
+ rm: rreg(1),
+ },
+ "8842",
+ "cmp r0, r1",
+ ));
+ insns.push((
+ Inst::Cmp {
+ rn: rreg(2),
+ rm: rreg(8),
+ },
+ "4245",
+ "cmp r2, r8",
+ ));
+ insns.push((
+ Inst::Cmp {
+ rn: rreg(9),
+ rm: rreg(3),
+ },
+ "9945",
+ "cmp r9, r3",
+ ));
+ insns.push((
+ Inst::Cmp {
+ rn: rreg(10),
+ rm: rreg(11),
+ },
+ "DA45",
+ "cmp r10, fp",
+ ));
+ insns.push((
+ Inst::CmpImm8 {
+ rn: rreg(0),
+ imm8: 255,
+ },
+ "B0F1FF0F",
+ "cmp r0, #255",
+ ));
+ insns.push((
+ Inst::CmpImm8 {
+ rn: rreg(1),
+ imm8: 0,
+ },
+ "B1F1000F",
+ "cmp r1, #0",
+ ));
+ insns.push((
+ Inst::CmpImm8 {
+ rn: rreg(8),
+ imm8: 1,
+ },
+ "B8F1010F",
+ "cmp r8, #1",
+ ));
+
+ insns.push((
+ Inst::Store {
+ rt: rreg(0),
+ mem: AMode::reg_plus_reg(rreg(1), rreg(2), 0),
+ bits: 32,
+ },
+ "41F80200",
+ "str r0, [r1, r2]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(8),
+ mem: AMode::reg_plus_reg(rreg(9), rreg(10), 3),
+ bits: 32,
+ },
+ "49F83A80",
+ "str r8, [r9, r10, lsl #3]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(0),
+ mem: AMode::RegOffset(rreg(1), 4095),
+ bits: 32,
+ },
+ "C1F8FF0F",
+ "str r0, [r1, #4095]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(8),
+ mem: AMode::RegOffset(rreg(9), 0),
+ bits: 32,
+ },
+ "C9F80080",
+ "str r8, [r9, #0]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(7),
+ mem: AMode::RegOffset(rreg(11), 65535),
+ bits: 32,
+ },
+ "4FF6FF7C4BF80C70",
+ "mov ip, #65535 ; str r7, [fp, ip]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(10),
+ mem: AMode::RegOffset(rreg(4), 16777215),
+ bits: 32,
+ },
+ "4FF6FF7CC0F2FF0C44F80CA0",
+ "mov ip, #65535 ; movt ip, #255 ; str r10, [r4, ip]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(0),
+ mem: AMode::reg_plus_reg(rreg(1), rreg(2), 0),
+ bits: 16,
+ },
+ "21F80200",
+ "strh r0, [r1, r2]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(8),
+ mem: AMode::reg_plus_reg(rreg(9), rreg(10), 2),
+ bits: 16,
+ },
+ "29F82A80",
+ "strh r8, [r9, r10, lsl #2]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(0),
+ mem: AMode::RegOffset(rreg(1), 3210),
+ bits: 16,
+ },
+ "A1F88A0C",
+ "strh r0, [r1, #3210]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(8),
+ mem: AMode::RegOffset(rreg(9), 1),
+ bits: 16,
+ },
+ "A9F80180",
+ "strh r8, [r9, #1]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(7),
+ mem: AMode::RegOffset(rreg(11), 65535),
+ bits: 16,
+ },
+ "4FF6FF7C2BF80C70",
+ "mov ip, #65535 ; strh r7, [fp, ip]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(10),
+ mem: AMode::RegOffset(rreg(4), 16777215),
+ bits: 16,
+ },
+ "4FF6FF7CC0F2FF0C24F80CA0",
+ "mov ip, #65535 ; movt ip, #255 ; strh r10, [r4, ip]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(0),
+ mem: AMode::reg_plus_reg(rreg(1), rreg(2), 0),
+ bits: 8,
+ },
+ "01F80200",
+ "strb r0, [r1, r2]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(8),
+ mem: AMode::reg_plus_reg(rreg(9), rreg(10), 1),
+ bits: 8,
+ },
+ "09F81A80",
+ "strb r8, [r9, r10, lsl #1]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(0),
+ mem: AMode::RegOffset(rreg(1), 4),
+ bits: 8,
+ },
+ "81F80400",
+ "strb r0, [r1, #4]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(8),
+ mem: AMode::RegOffset(rreg(9), 777),
+ bits: 8,
+ },
+ "89F80983",
+ "strb r8, [r9, #777]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(7),
+ mem: AMode::RegOffset(rreg(11), 65535),
+ bits: 8,
+ },
+ "4FF6FF7C0BF80C70",
+ "mov ip, #65535 ; strb r7, [fp, ip]",
+ ));
+ insns.push((
+ Inst::Store {
+ rt: rreg(10),
+ mem: AMode::RegOffset(rreg(4), 16777215),
+ bits: 8,
+ },
+ "4FF6FF7CC0F2FF0C04F80CA0",
+ "mov ip, #65535 ; movt ip, #255 ; strb r10, [r4, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::reg_plus_reg(rreg(1), rreg(2), 0),
+ bits: 32,
+ sign_extend: false,
+ },
+ "51F80200",
+ "ldr r0, [r1, r2]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::reg_plus_reg(rreg(9), rreg(10), 1),
+ bits: 32,
+ sign_extend: false,
+ },
+ "59F81A80",
+ "ldr r8, [r9, r10, lsl #1]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::RegOffset(rreg(1), 55),
+ bits: 32,
+ sign_extend: false,
+ },
+ "D1F83700",
+ "ldr r0, [r1, #55]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::RegOffset(rreg(9), 1234),
+ bits: 32,
+ sign_extend: false,
+ },
+ "D9F8D284",
+ "ldr r8, [r9, #1234]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(7),
+ mem: AMode::RegOffset(rreg(11), 9876),
+ bits: 32,
+ sign_extend: false,
+ },
+ "42F2946C5BF80C70",
+ "mov ip, #9876 ; ldr r7, [fp, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(10),
+ mem: AMode::RegOffset(rreg(4), 252645135),
+ bits: 32,
+ sign_extend: false,
+ },
+ "40F60F7CC0F60F7C54F80CA0",
+ "mov ip, #3855 ; movt ip, #3855 ; ldr r10, [r4, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::PCRel(-56),
+ bits: 32,
+ sign_extend: false,
+ },
+ "5FF83800",
+ "ldr r0, [pc, #-56]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::PCRel(1024),
+ bits: 32,
+ sign_extend: false,
+ },
+ "DFF80084",
+ "ldr r8, [pc, #1024]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::reg_plus_reg(rreg(1), rreg(2), 0),
+ bits: 16,
+ sign_extend: true,
+ },
+ "31F90200",
+ "ldrsh r0, [r1, r2]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::reg_plus_reg(rreg(9), rreg(10), 2),
+ bits: 16,
+ sign_extend: false,
+ },
+ "39F82A80",
+ "ldrh r8, [r9, r10, lsl #2]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::RegOffset(rreg(1), 55),
+ bits: 16,
+ sign_extend: false,
+ },
+ "B1F83700",
+ "ldrh r0, [r1, #55]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::RegOffset(rreg(9), 1234),
+ bits: 16,
+ sign_extend: true,
+ },
+ "B9F9D284",
+ "ldrsh r8, [r9, #1234]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(7),
+ mem: AMode::RegOffset(rreg(11), 9876),
+ bits: 16,
+ sign_extend: true,
+ },
+ "42F2946C3BF90C70",
+ "mov ip, #9876 ; ldrsh r7, [fp, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(10),
+ mem: AMode::RegOffset(rreg(4), 252645135),
+ bits: 16,
+ sign_extend: false,
+ },
+ "40F60F7CC0F60F7C34F80CA0",
+ "mov ip, #3855 ; movt ip, #3855 ; ldrh r10, [r4, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::PCRel(56),
+ bits: 16,
+ sign_extend: false,
+ },
+ "BFF83800",
+ "ldrh r0, [pc, #56]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::PCRel(-1000),
+ bits: 16,
+ sign_extend: true,
+ },
+ "3FF9E883",
+ "ldrsh r8, [pc, #-1000]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::reg_plus_reg(rreg(1), rreg(2), 0),
+ bits: 8,
+ sign_extend: true,
+ },
+ "11F90200",
+ "ldrsb r0, [r1, r2]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::reg_plus_reg(rreg(9), rreg(10), 3),
+ bits: 8,
+ sign_extend: false,
+ },
+ "19F83A80",
+ "ldrb r8, [r9, r10, lsl #3]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::RegOffset(rreg(1), 55),
+ bits: 8,
+ sign_extend: false,
+ },
+ "91F83700",
+ "ldrb r0, [r1, #55]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::RegOffset(rreg(9), 1234),
+ bits: 8,
+ sign_extend: true,
+ },
+ "99F9D284",
+ "ldrsb r8, [r9, #1234]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(7),
+ mem: AMode::RegOffset(rreg(11), 9876),
+ bits: 8,
+ sign_extend: true,
+ },
+ "42F2946C1BF90C70",
+ "mov ip, #9876 ; ldrsb r7, [fp, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(10),
+ mem: AMode::RegOffset(rreg(4), 252645135),
+ bits: 8,
+ sign_extend: false,
+ },
+ "40F60F7CC0F60F7C14F80CA0",
+ "mov ip, #3855 ; movt ip, #3855 ; ldrb r10, [r4, ip]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(0),
+ mem: AMode::PCRel(72),
+ bits: 8,
+ sign_extend: false,
+ },
+ "9FF84800",
+ "ldrb r0, [pc, #72]",
+ ));
+ insns.push((
+ Inst::Load {
+ rt: writable_rreg(8),
+ mem: AMode::PCRel(-1234),
+ bits: 8,
+ sign_extend: true,
+ },
+ "1FF9D284",
+ "ldrsb r8, [pc, #-1234]",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ from_bits: 16,
+ signed: false,
+ },
+ "88B2",
+ "uxth r0, r1",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ from_bits: 16,
+ signed: false,
+ },
+ "1FFA89F8",
+ "uxth r8, r9",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ from_bits: 8,
+ signed: false,
+ },
+ "C8B2",
+ "uxtb r0, r1",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ from_bits: 8,
+ signed: false,
+ },
+ "5FFA89F8",
+ "uxtb r8, r9",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ from_bits: 16,
+ signed: true,
+ },
+ "08B2",
+ "sxth r0, r1",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ from_bits: 16,
+ signed: true,
+ },
+ "0FFA89F8",
+ "sxth r8, r9",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(0),
+ rm: rreg(1),
+ from_bits: 8,
+ signed: true,
+ },
+ "48B2",
+ "sxtb r0, r1",
+ ));
+ insns.push((
+ Inst::Extend {
+ rd: writable_rreg(8),
+ rm: rreg(9),
+ from_bits: 8,
+ signed: true,
+ },
+ "4FFA89F8",
+ "sxtb r8, r9",
+ ));
+ insns.push((
+ Inst::It {
+ cond: Cond::Eq,
+ insts: vec![CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), true)],
+ },
+ "08BF0046",
+ "it eq ; mov r0, r0",
+ ));
+ insns.push((
+ Inst::It {
+ cond: Cond::Ne,
+ insts: vec![
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), true),
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), false),
+ ],
+ },
+ "14BF00460046",
+ "ite ne ; mov r0, r0 ; mov r0, r0",
+ ));
+ insns.push((
+ Inst::It {
+ cond: Cond::Lt,
+ insts: vec![
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), true),
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), false),
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), true),
+ ],
+ },
+ "B6BF004600460046",
+ "itet lt ; mov r0, r0 ; mov r0, r0 ; mov r0, r0",
+ ));
+ insns.push((
+ Inst::It {
+ cond: Cond::Hs,
+ insts: vec![
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), true),
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), true),
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), false),
+ CondInst::new(Inst::mov(writable_rreg(0), rreg(0)), false),
+ ],
+ },
+ "27BF0046004600460046",
+ "ittee hs ; mov r0, r0 ; mov r0, r0 ; mov r0, r0 ; mov r0, r0",
+ ));
+ insns.push((
+ Inst::Push {
+ reg_list: vec![rreg(0)],
+ },
+ "4DF8040D",
+ "push {r0}",
+ ));
+ insns.push((
+ Inst::Push {
+ reg_list: vec![rreg(8)],
+ },
+ "4DF8048D",
+ "push {r8}",
+ ));
+ insns.push((
+ Inst::Push {
+ reg_list: vec![rreg(0), rreg(1), rreg(2), rreg(6), rreg(8)],
+ },
+ "2DE94701",
+ "push {r0, r1, r2, r6, r8}",
+ ));
+ insns.push((
+ Inst::Push {
+ reg_list: vec![rreg(8), rreg(9), rreg(10)],
+ },
+ "2DE90007",
+ "push {r8, r9, r10}",
+ ));
+ insns.push((
+ Inst::Pop {
+ reg_list: vec![writable_rreg(0)],
+ },
+ "5DF8040B",
+ "pop {r0}",
+ ));
+ insns.push((
+ Inst::Pop {
+ reg_list: vec![writable_rreg(8)],
+ },
+ "5DF8048B",
+ "pop {r8}",
+ ));
+ insns.push((
+ Inst::Pop {
+ reg_list: vec![
+ writable_rreg(0),
+ writable_rreg(1),
+ writable_rreg(2),
+ writable_rreg(6),
+ writable_rreg(8),
+ ],
+ },
+ "BDE84701",
+ "pop {r0, r1, r2, r6, r8}",
+ ));
+ insns.push((
+ Inst::Pop {
+ reg_list: vec![writable_rreg(8), writable_rreg(9), writable_rreg(10)],
+ },
+ "BDE80007",
+ "pop {r8, r9, r10}",
+ ));
+ insns.push((
+ Inst::Call {
+ info: Box::new(CallInfo {
+ dest: ExternalName::testcase("test0"),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ loc: SourceLoc::default(),
+ opcode: Opcode::Call,
+ }),
+ },
+ "00F000D0",
+ "bl 0",
+ ));
+ insns.push((
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rm: rreg(0),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ loc: SourceLoc::default(),
+ opcode: Opcode::CallIndirect,
+ }),
+ },
+ "8047",
+ "blx r0",
+ ));
+ insns.push((
+ Inst::CallInd {
+ info: Box::new(CallIndInfo {
+ rm: rreg(8),
+ uses: Vec::new(),
+ defs: Vec::new(),
+ loc: SourceLoc::default(),
+ opcode: Opcode::CallIndirect,
+ }),
+ },
+ "C047",
+ "blx r8",
+ ));
+ insns.push((Inst::Ret, "7047", "bx lr"));
+ insns.push((
+ Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(32),
+ },
+ "00F010B8",
+ "b 32",
+ ));
+ insns.push((
+ Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(0xfffff4),
+ },
+ "FFF3FA97",
+ "b 16777204",
+ ));
+ insns.push((
+ Inst::CondBr {
+ taken: BranchTarget::ResolvedOffset(20),
+ not_taken: BranchTarget::ResolvedOffset(68),
+ cond: Cond::Eq,
+ },
+ "00F00A8000F022B8",
+ "beq 20 ; b 68",
+ ));
+ insns.push((
+ Inst::CondBr {
+ taken: BranchTarget::ResolvedOffset(6),
+ not_taken: BranchTarget::ResolvedOffset(100),
+ cond: Cond::Gt,
+ },
+ "00F3038000F032B8",
+ "bgt 6 ; b 100",
+ ));
+ insns.push((
+ Inst::IndirectBr {
+ rm: rreg(0),
+ targets: vec![],
+ },
+ "0047",
+ "bx r0",
+ ));
+ insns.push((
+ Inst::IndirectBr {
+ rm: rreg(8),
+ targets: vec![],
+ },
+ "4047",
+ "bx r8",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ cond: Cond::Eq,
+ trap_info: TrapCode::Interrupt,
+ },
+ "40F0018000DE",
+ "bne 2 ; udf #0",
+ ));
+ insns.push((
+ Inst::TrapIf {
+ cond: Cond::Hs,
+ trap_info: TrapCode::Interrupt,
+ },
+ "C0F0018000DE",
+ "blo 2 ; udf #0",
+ ));
+ insns.push((
+ Inst::Udf {
+ trap_info: TrapCode::Interrupt,
+ },
+ "00DE",
+ "udf #0",
+ ));
+ insns.push((Inst::Bkpt, "00BE", "bkpt #0"));
+
+ // ========================================================
+ // Run the tests
+ let rru = regs::create_reg_universe();
+ for (insn, expected_encoding, expected_printing) in insns {
+ // Check the printed text is as expected.
+ let actual_printing = insn.show_rru(Some(&rru));
+ assert_eq!(expected_printing, actual_printing);
+ let mut sink = test_utils::TestCodeSink::new();
+ let mut buffer = MachBuffer::new();
+ insn.emit(&mut buffer, &flags, &mut Default::default());
+ let buffer = buffer.finish();
+ buffer.emit(&mut sink);
+ let actual_encoding = &sink.stringify();
+ assert_eq!(expected_encoding, actual_encoding, "{}", expected_printing);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/mod.rs
new file mode 100644
index 0000000000..fff01b7d82
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/mod.rs
@@ -0,0 +1,1358 @@
+//! This module defines 32-bit ARM specific machine instruction types.
+
+#![allow(dead_code)]
+
+use crate::binemit::CodeOffset;
+use crate::ir::types::{B1, B16, B32, B8, I16, I32, I8, IFLAGS};
+use crate::ir::{ExternalName, Opcode, TrapCode, Type};
+use crate::machinst::*;
+use crate::{settings, CodegenError, CodegenResult};
+
+use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
+use regalloc::{RegUsageCollector, RegUsageMapper};
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use smallvec::{smallvec, SmallVec};
+use std::string::{String, ToString};
+
+mod args;
+pub use self::args::*;
+mod emit;
+pub use self::emit::*;
+mod regs;
+pub use self::regs::*;
+pub mod unwind;
+
+#[cfg(test)]
+mod emit_tests;
+
+//=============================================================================
+// Instructions (top level): definition
+
+/// An ALU operation. This can be paired with several instruction formats
+/// below (see `Inst`) in any combination.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp {
+ Add,
+ Adds,
+ Adc,
+ Adcs,
+ Qadd,
+ Sub,
+ Subs,
+ Sbc,
+ Sbcs,
+ Rsb,
+ Qsub,
+ Mul,
+ Smull,
+ Umull,
+ Udiv,
+ Sdiv,
+ And,
+ Orr,
+ Orn,
+ Eor,
+ Bic,
+ Lsl,
+ Lsr,
+ Asr,
+ Ror,
+}
+
+/// An ALU operation with one argument.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum ALUOp1 {
+ Mvn,
+ Mov,
+}
+
+/// An operation on the bits of a register.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BitOp {
+ Rbit,
+ Rev,
+ Clz,
+}
+
+/// Additional information for (direct) Call instructions, left out of line to lower the size of
+/// the Inst enum.
+#[derive(Clone, Debug)]
+pub struct CallInfo {
+ pub dest: ExternalName,
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Writable<Reg>>,
+ pub opcode: Opcode,
+}
+
+/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
+/// enum.
+#[derive(Clone, Debug)]
+pub struct CallIndInfo {
+ pub rm: Reg,
+ pub uses: Vec<Reg>,
+ pub defs: Vec<Writable<Reg>>,
+ pub opcode: Opcode,
+}
+
+/// Instruction formats.
+#[derive(Clone, Debug)]
+pub enum Inst {
+ /// A no-op of zero size.
+ Nop0,
+
+ /// A no-op that is two bytes large.
+ Nop2,
+
+ /// An ALU operation with two register sources and one register destination.
+ AluRRR {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// An ALU operation with two register sources, one of which can be optionally shifted
+ /// and one register destination.
+ AluRRRShift {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ shift: Option<ShiftOpAndAmt>,
+ },
+
+ /// An ALU operation with one register source, which can be optionally shifted
+ /// and one register destination.
+ AluRRShift {
+ alu_op: ALUOp1,
+ rd: Writable<Reg>,
+ rm: Reg,
+ shift: Option<ShiftOpAndAmt>,
+ },
+
+ /// An ALU operation with two register sources and two register destinations.
+ AluRRRR {
+ alu_op: ALUOp,
+ rd_hi: Writable<Reg>,
+ rd_lo: Writable<Reg>,
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// An ALU operation with a register source and a 12-bit immediate source,
+ /// and a register destination.
+ AluRRImm12 {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ imm12: UImm12,
+ },
+
+ /// An ALU operation with a register source and a 8-bit immediate source,
+ /// and a register destination.
+ ///
+ /// In fact these instructions take a `modified immediate constant` operand,
+ /// which is encoded as a 12-bit immediate. The only case used here
+ /// is when high 4 bits of that 12-immediate are zeros.
+ /// In this case operand is simple 8-bit immediate.
+ /// For all possible operands see
+ /// https://static.docs.arm.com/ddi0406/c/DDI0406C_C_arm_architecture_reference_manual.pdf#G10.4954509
+ AluRRImm8 {
+ alu_op: ALUOp,
+ rd: Writable<Reg>,
+ rn: Reg,
+ imm8: UImm8,
+ },
+
+ /// An ALU operation with a 8-bit immediate and a register destination.
+ /// See `AluRRImm8` description above.
+ AluRImm8 {
+ alu_op: ALUOp1,
+ rd: Writable<Reg>,
+ imm8: UImm8,
+ },
+
+ /// A bit operation with a register source and a register destination.
+ BitOpRR {
+ bit_op: BitOp,
+ rd: Writable<Reg>,
+ rm: Reg,
+ },
+
+ /// A mov instruction with a GPR source and a GPR destination.
+ Mov {
+ rd: Writable<Reg>,
+ rm: Reg,
+ },
+
+ /// A move instruction with a 16-bit immediate source and a register destination.
+ MovImm16 {
+ rd: Writable<Reg>,
+ imm16: u16,
+ },
+
+ /// A move top instruction, which writes 16-bit immediate to the top
+ /// halfword of the destination register.
+ Movt {
+ rd: Writable<Reg>,
+ imm16: u16,
+ },
+
+ /// A compare instruction with two register arguments.
+ Cmp {
+ rn: Reg,
+ rm: Reg,
+ },
+
+ /// A compare instruction with a register operand and a 8-bit immediate operand.
+ CmpImm8 {
+ rn: Reg,
+ imm8: u8,
+ },
+
+ /// A store instruction, which stores to memory 8, 16 or 32-bit operand.
+ Store {
+ rt: Reg,
+ mem: AMode,
+ bits: u8,
+ },
+
+ /// A load instruction, which loads from memory 8, 16 or 32-bit operand,
+ /// which can be sign- or zero-extended.
+ Load {
+ rt: Writable<Reg>,
+ mem: AMode,
+ bits: u8,
+ sign_extend: bool,
+ },
+
+ /// Load address referenced by `mem` into `rd`.
+ LoadAddr {
+ rd: Writable<Reg>,
+ mem: AMode,
+ },
+
+ /// A sign- or zero-extend operation.
+ Extend {
+ rd: Writable<Reg>,
+ rm: Reg,
+ from_bits: u8,
+ signed: bool,
+ },
+
+ // An If-Then instruction, which makes up to four instructions conditinal.
+ It {
+ cond: Cond,
+ insts: Vec<CondInst>,
+ },
+
+ /// A push instuction, which stores registers to the stack and updates sp.
+ Push {
+ reg_list: Vec<Reg>,
+ },
+
+ /// A pop instuction, which load registers from the stack and updates sp.
+ Pop {
+ reg_list: Vec<Writable<Reg>>,
+ },
+
+ /// A machine call instruction.
+ Call {
+ info: Box<CallInfo>,
+ },
+
+ /// A machine indirect-call instruction.
+ CallInd {
+ info: Box<CallIndInfo>,
+ },
+
+ /// Load an inline symbol reference.
+ LoadExtName {
+ rt: Writable<Reg>,
+ name: Box<ExternalName>,
+ offset: i32,
+ },
+
+ /// A return instruction, which is encoded as `bx lr`.
+ Ret,
+
+ /// An unconditional branch.
+ Jump {
+ dest: BranchTarget,
+ },
+
+ /// A conditional branch.
+ CondBr {
+ taken: BranchTarget,
+ not_taken: BranchTarget,
+ cond: Cond,
+ },
+
+ /// An indirect branch through a register, augmented with set of all
+ /// possible successors.
+ IndirectBr {
+ rm: Reg,
+ targets: Vec<MachLabel>,
+ },
+
+ /// A conditional trap: execute a `udf` if the condition is true. This is
+ /// one VCode instruction because it uses embedded control flow; it is
+ /// logically a single-in, single-out region, but needs to appear as one
+ /// unit to the register allocator.
+ TrapIf {
+ cond: Cond,
+ trap_info: TrapCode,
+ },
+
+ /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at
+ /// runtime.
+ Udf {
+ trap_info: TrapCode,
+ },
+
+ /// A "breakpoint" instruction, used for e.g. traps and debug breakpoints.
+ Bkpt,
+
+ /// Marker, no-op in generated code: SP "virtual offset" is adjusted.
+ VirtualSPOffsetAdj {
+ offset: i64,
+ },
+
+ /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+ /// inserted there.
+ EpiloguePlaceholder,
+}
+
+/// An instruction inside an it block.
+#[derive(Clone, Debug)]
+pub struct CondInst {
+ inst: Inst,
+ // In which case execute the instruction:
+ // true => when it condition is met
+ // false => otherwise.
+ then: bool,
+}
+
+impl CondInst {
+ pub fn new(inst: Inst, then: bool) -> Self {
+ match inst {
+ Inst::It { .. }
+ | Inst::Ret { .. }
+ | Inst::Jump { .. }
+ | Inst::CondBr { .. }
+ | Inst::TrapIf { .. }
+ | Inst::EpiloguePlaceholder { .. }
+ | Inst::LoadExtName { .. } => panic!("Instruction {:?} cannot occur in it block", inst),
+ _ => Self { inst, then },
+ }
+ }
+}
+
+impl Inst {
+ /// Create a move instruction.
+ pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst {
+ Inst::Mov {
+ rd: to_reg,
+ rm: from_reg,
+ }
+ }
+
+ /// Create an instruction that loads a constant.
+ pub fn load_constant(rd: Writable<Reg>, value: u32) -> SmallVec<[Inst; 4]> {
+ let mut insts = smallvec![];
+ let imm_lo = (value & 0xffff) as u16;
+ let imm_hi = (value >> 16) as u16;
+
+ if imm_lo != 0 || imm_hi == 0 {
+ // imm_lo == 0 && imm_hi == 0 => we have to overwrite reg value with 0
+ insts.push(Inst::MovImm16 { rd, imm16: imm_lo });
+ }
+ if imm_hi != 0 {
+ insts.push(Inst::Movt { rd, imm16: imm_hi });
+ }
+
+ insts
+ }
+
+ /// Generic constructor for a load (zero-extending where appropriate).
+ pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type) -> Inst {
+ assert!(ty.bits() <= 32);
+ // Load 8 bits for B1.
+ let bits = std::cmp::max(ty.bits(), 8) as u8;
+
+ Inst::Load {
+ rt: into_reg,
+ mem,
+ bits,
+ sign_extend: false,
+ }
+ }
+
+ /// Generic constructor for a store.
+ pub fn gen_store(from_reg: Reg, mem: AMode, ty: Type) -> Inst {
+ assert!(ty.bits() <= 32);
+ // Store 8 bits for B1.
+ let bits = std::cmp::max(ty.bits(), 8) as u8;
+
+ Inst::Store {
+ rt: from_reg,
+ mem,
+ bits,
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: get_regs
+
+fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) {
+ match memarg {
+ &AMode::RegReg(rn, rm, ..) => {
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &AMode::RegOffset12(rn, ..) | &AMode::RegOffset(rn, _) => {
+ collector.add_use(rn);
+ }
+ &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {
+ collector.add_use(sp_reg());
+ }
+ &AMode::FPOffset(..) => {
+ collector.add_use(fp_reg());
+ }
+ &AMode::PCRel(_) => {}
+ }
+}
+
+fn arm32_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+ match inst {
+ &Inst::Nop0
+ | &Inst::Nop2
+ | &Inst::Ret
+ | &Inst::VirtualSPOffsetAdj { .. }
+ | &Inst::EpiloguePlaceholder
+ | &Inst::Jump { .. }
+ | &Inst::CondBr { .. }
+ | &Inst::Bkpt
+ | &Inst::Udf { .. }
+ | &Inst::TrapIf { .. } => {}
+ &Inst::AluRRR { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRRShift { rd, rn, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRShift { rd, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRRR {
+ rd_hi,
+ rd_lo,
+ rn,
+ rm,
+ ..
+ } => {
+ collector.add_def(rd_hi);
+ collector.add_def(rd_lo);
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::AluRRImm12 { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRRImm8 { rd, rn, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rn);
+ }
+ &Inst::AluRImm8 { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::BitOpRR { rd, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::Mov { rd, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::MovImm16 { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::Movt { rd, .. } => {
+ collector.add_def(rd);
+ }
+ &Inst::Cmp { rn, rm } => {
+ collector.add_use(rn);
+ collector.add_use(rm);
+ }
+ &Inst::CmpImm8 { rn, .. } => {
+ collector.add_use(rn);
+ }
+ &Inst::Store { rt, ref mem, .. } => {
+ collector.add_use(rt);
+ memarg_regs(mem, collector);
+ }
+ &Inst::Load { rt, ref mem, .. } => {
+ collector.add_def(rt);
+ memarg_regs(mem, collector);
+ }
+ &Inst::LoadAddr { rd, mem: _ } => {
+ collector.add_def(rd);
+ }
+ &Inst::Extend { rd, rm, .. } => {
+ collector.add_def(rd);
+ collector.add_use(rm);
+ }
+ &Inst::It { ref insts, .. } => {
+ for inst in insts.iter() {
+ arm32_get_regs(&inst.inst, collector);
+ }
+ }
+ &Inst::Push { ref reg_list } => {
+ for reg in reg_list {
+ collector.add_use(*reg);
+ }
+ }
+ &Inst::Pop { ref reg_list } => {
+ for reg in reg_list {
+ collector.add_def(*reg);
+ }
+ }
+ &Inst::Call { ref info, .. } => {
+ collector.add_uses(&*info.uses);
+ collector.add_defs(&*info.defs);
+ }
+ &Inst::CallInd { ref info, .. } => {
+ collector.add_uses(&*info.uses);
+ collector.add_defs(&*info.defs);
+ collector.add_use(info.rm);
+ }
+ &Inst::LoadExtName { rt, .. } => {
+ collector.add_def(rt);
+ }
+ &Inst::IndirectBr { rm, .. } => {
+ collector.add_use(rm);
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: map_regs
+
+fn arm32_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
+ fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
+ if r.is_virtual() {
+ let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg();
+ *r = new;
+ }
+ }
+
+ fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if r.to_reg().is_virtual() {
+ let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+ }
+
+ fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if r.to_reg().is_virtual() {
+ let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+ }
+
+ fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) {
+ match mem {
+ &mut AMode::RegReg(ref mut rn, ref mut rm, ..) => {
+ map_use(m, rn);
+ map_use(m, rm);
+ }
+ &mut AMode::RegOffset12(ref mut rn, ..) | &mut AMode::RegOffset(ref mut rn, ..) => {
+ map_use(m, rn)
+ }
+ &mut AMode::SPOffset(..)
+ | &mut AMode::FPOffset(..)
+ | &mut AMode::NominalSPOffset(..)
+ | &mut AMode::PCRel(_) => {}
+ };
+ }
+
+ match inst {
+ &mut Inst::Nop0
+ | &mut Inst::Nop2
+ | &mut Inst::Ret
+ | &mut Inst::VirtualSPOffsetAdj { .. }
+ | &mut Inst::EpiloguePlaceholder
+ | &mut Inst::Jump { .. }
+ | &mut Inst::CondBr { .. }
+ | &mut Inst::Bkpt
+ | &mut Inst::Udf { .. }
+ | &mut Inst::TrapIf { .. } => {}
+ &mut Inst::AluRRR {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRRShift {
+ ref mut rd,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRShift {
+ ref mut rd,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRRR {
+ ref mut rd_hi,
+ ref mut rd_lo,
+ ref mut rn,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd_hi);
+ map_def(mapper, rd_lo);
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::AluRRImm12 {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRRImm8 {
+ ref mut rd,
+ ref mut rn,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rn);
+ }
+ &mut Inst::AluRImm8 { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::BitOpRR {
+ ref mut rd,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::Mov {
+ ref mut rd,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::MovImm16 { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::Movt { ref mut rd, .. } => {
+ map_def(mapper, rd);
+ }
+ &mut Inst::Cmp {
+ ref mut rn,
+ ref mut rm,
+ } => {
+ map_use(mapper, rn);
+ map_use(mapper, rm);
+ }
+ &mut Inst::CmpImm8 { ref mut rn, .. } => {
+ map_use(mapper, rn);
+ }
+ &mut Inst::Store {
+ ref mut rt,
+ ref mut mem,
+ ..
+ } => {
+ map_use(mapper, rt);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Load {
+ ref mut rt,
+ ref mut mem,
+ ..
+ } => {
+ map_def(mapper, rt);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::LoadAddr {
+ ref mut rd,
+ ref mut mem,
+ } => {
+ map_def(mapper, rd);
+ map_mem(mapper, mem);
+ }
+ &mut Inst::Extend {
+ ref mut rd,
+ ref mut rm,
+ ..
+ } => {
+ map_def(mapper, rd);
+ map_use(mapper, rm);
+ }
+ &mut Inst::It { ref mut insts, .. } => {
+ for inst in insts.iter_mut() {
+ arm32_map_regs(&mut inst.inst, mapper);
+ }
+ }
+ &mut Inst::Push { ref mut reg_list } => {
+ for reg in reg_list {
+ map_use(mapper, reg);
+ }
+ }
+ &mut Inst::Pop { ref mut reg_list } => {
+ for reg in reg_list {
+ map_def(mapper, reg);
+ }
+ }
+ &mut Inst::Call { ref mut info } => {
+ for r in info.uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in info.defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ }
+ &mut Inst::CallInd { ref mut info, .. } => {
+ for r in info.uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in info.defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ map_use(mapper, &mut info.rm);
+ }
+ &mut Inst::LoadExtName { ref mut rt, .. } => {
+ map_def(mapper, rt);
+ }
+ &mut Inst::IndirectBr { ref mut rm, .. } => {
+ map_use(mapper, rm);
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+ type LabelUse = LabelUse;
+
+ fn get_regs(&self, collector: &mut RegUsageCollector) {
+ arm32_get_regs(self, collector)
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ arm32_map_regs(self, mapper);
+ }
+
+ fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+ match self {
+ &Inst::Mov { rd, rm } => Some((rd, rm)),
+ _ => None,
+ }
+ }
+
+ fn is_epilogue_placeholder(&self) -> bool {
+ if let Inst::EpiloguePlaceholder = self {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+ match self {
+ &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
+ &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()),
+ &Inst::CondBr {
+ taken, not_taken, ..
+ } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()),
+ &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]),
+ _ => MachTerminator::None,
+ }
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, _ty: Type) -> Inst {
+ assert_eq!(from_reg.get_class(), RegClass::I32);
+ assert_eq!(to_reg.to_reg().get_class(), from_reg.get_class());
+
+ Inst::mov(to_reg, from_reg)
+ }
+
+ fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ to_reg: Writable<Reg>,
+ value: u64,
+ ty: Type,
+ _alloc_tmp: F,
+ ) -> SmallVec<[Inst; 4]> {
+ match ty {
+ B1 | I8 | B8 | I16 | B16 | I32 | B32 => {
+ let v: i64 = value as i64;
+
+ if v >= (1 << 32) || v < -(1 << 32) {
+ panic!("Cannot load constant value {}", value)
+ }
+ Inst::load_constant(to_reg, value as u32)
+ }
+ _ => unimplemented!(),
+ }
+ }
+
+ fn gen_zero_len_nop() -> Inst {
+ Inst::Nop0
+ }
+
+ fn gen_nop(preferred_size: usize) -> Inst {
+ assert!(preferred_size >= 2);
+ Inst::Nop2
+ }
+
+ fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+ None
+ }
+
+ fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+ match ty {
+ I8 | I16 | I32 | B1 | B8 | B16 | B32 => Ok(RegClass::I32),
+ IFLAGS => Ok(RegClass::I32),
+ _ => Err(CodegenError::Unsupported(format!(
+ "Unexpected SSA-value type: {}",
+ ty
+ ))),
+ }
+ }
+
+ fn gen_jump(target: MachLabel) -> Inst {
+ Inst::Jump {
+ dest: BranchTarget::Label(target),
+ }
+ }
+
+ fn reg_universe(_flags: &settings::Flags) -> RealRegUniverse {
+ create_reg_universe()
+ }
+
+ fn worst_case_size() -> CodeOffset {
+ // It inst with four 32-bit instructions
+ 2 + 4 * 4
+ }
+
+ fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+ RegClass::I32
+ }
+}
+
+//=============================================================================
+// Pretty-printing of instructions.
+
+fn mem_finalize_for_show(
+ mem: &AMode,
+ mb_rru: Option<&RealRegUniverse>,
+ state: &EmitState,
+) -> (String, AMode) {
+ let (mem_insts, mem) = mem_finalize(mem, state);
+ let mut mem_str = mem_insts
+ .into_iter()
+ .map(|inst| inst.show_rru(mb_rru))
+ .collect::<Vec<_>>()
+ .join(" ; ");
+ if !mem_str.is_empty() {
+ mem_str += " ; ";
+ }
+
+ (mem_str, mem)
+}
+
+impl PrettyPrint for Inst {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.pretty_print(mb_rru, &mut EmitState::default())
+ }
+}
+
+impl Inst {
+ fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String {
+ fn op_name(alu_op: ALUOp) -> &'static str {
+ match alu_op {
+ ALUOp::Add => "add",
+ ALUOp::Adds => "adds",
+ ALUOp::Adc => "adc",
+ ALUOp::Adcs => "adcs",
+ ALUOp::Qadd => "qadd",
+ ALUOp::Sub => "sub",
+ ALUOp::Subs => "subs",
+ ALUOp::Sbc => "sbc",
+ ALUOp::Sbcs => "sbcs",
+ ALUOp::Rsb => "rsb",
+ ALUOp::Qsub => "qsub",
+ ALUOp::Mul => "mul",
+ ALUOp::Smull => "smull",
+ ALUOp::Umull => "umull",
+ ALUOp::Udiv => "udiv",
+ ALUOp::Sdiv => "sdiv",
+ ALUOp::And => "and",
+ ALUOp::Orr => "orr",
+ ALUOp::Orn => "orn",
+ ALUOp::Eor => "eor",
+ ALUOp::Bic => "bic",
+ ALUOp::Lsl => "lsl",
+ ALUOp::Lsr => "lsr",
+ ALUOp::Asr => "asr",
+ ALUOp::Ror => "ror",
+ }
+ }
+
+ fn reg_shift_str(
+ shift: &Option<ShiftOpAndAmt>,
+ mb_rru: Option<&RealRegUniverse>,
+ ) -> String {
+ if let Some(ref shift) = shift {
+ format!(", {}", shift.show_rru(mb_rru))
+ } else {
+ "".to_string()
+ }
+ }
+
+ match self {
+ &Inst::Nop0 => "nop-zero-len".to_string(),
+ &Inst::Nop2 => "nop".to_string(),
+ &Inst::AluRRR { alu_op, rd, rn, rm } => {
+ let op = op_name(alu_op);
+ let rd = rd.show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, rm)
+ }
+ &Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ ref shift,
+ } => {
+ let op = op_name(alu_op);
+ let rd = rd.show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ let shift = reg_shift_str(shift, mb_rru);
+ format!("{} {}, {}, {}{}", op, rd, rn, rm, shift)
+ }
+ &Inst::AluRRShift {
+ alu_op,
+ rd,
+ rm,
+ ref shift,
+ } => {
+ let op = match alu_op {
+ ALUOp1::Mvn => "mvn",
+ ALUOp1::Mov => "mov",
+ };
+ let rd = rd.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ let shift = reg_shift_str(shift, mb_rru);
+ format!("{} {}, {}{}", op, rd, rm, shift)
+ }
+ &Inst::AluRRRR {
+ alu_op,
+ rd_hi,
+ rd_lo,
+ rn,
+ rm,
+ } => {
+ let op = op_name(alu_op);
+ let rd_hi = rd_hi.show_rru(mb_rru);
+ let rd_lo = rd_lo.show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("{} {}, {}, {}, {}", op, rd_lo, rd_hi, rn, rm)
+ }
+ &Inst::AluRRImm12 {
+ alu_op,
+ rd,
+ rn,
+ imm12,
+ } => {
+ let op = op_name(alu_op);
+ let rd = rd.show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let imm = imm12.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imm)
+ }
+ &Inst::AluRRImm8 {
+ alu_op,
+ rd,
+ rn,
+ imm8,
+ } => {
+ let op = op_name(alu_op);
+ let rd = rd.show_rru(mb_rru);
+ let rn = rn.show_rru(mb_rru);
+ let imm = imm8.show_rru(mb_rru);
+ format!("{} {}, {}, {}", op, rd, rn, imm)
+ }
+ &Inst::AluRImm8 { alu_op, rd, imm8 } => {
+ let op = match alu_op {
+ ALUOp1::Mvn => "mvn",
+ ALUOp1::Mov => "mov",
+ };
+ let rd = rd.show_rru(mb_rru);
+ let imm = imm8.show_rru(mb_rru);
+ format!("{} {}, {}", op, rd, imm)
+ }
+ &Inst::BitOpRR { bit_op, rd, rm } => {
+ let op = match bit_op {
+ BitOp::Rbit => "rbit",
+ BitOp::Rev => "rev",
+ BitOp::Clz => "clz",
+ };
+ let rd = rd.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("{} {}, {}", op, rd, rm)
+ }
+ &Inst::Mov { rd, rm } => {
+ let rd = rd.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("mov {}, {}", rd, rm)
+ }
+ &Inst::MovImm16 { rd, imm16 } => {
+ let rd = rd.show_rru(mb_rru);
+ format!("mov {}, #{}", rd, imm16)
+ }
+ &Inst::Movt { rd, imm16 } => {
+ let rd = rd.show_rru(mb_rru);
+ format!("movt {}, #{}", rd, imm16)
+ }
+ &Inst::Cmp { rn, rm } => {
+ let rn = rn.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("cmp {}, {}", rn, rm)
+ }
+ &Inst::CmpImm8 { rn, imm8 } => {
+ let rn = rn.show_rru(mb_rru);
+ format!("cmp {}, #{}", rn, imm8)
+ }
+ &Inst::Store {
+ rt, ref mem, bits, ..
+ } => {
+ let op = match bits {
+ 32 => "str",
+ 16 => "strh",
+ 8 => "strb",
+ _ => panic!("Invalid bit amount {}", bits),
+ };
+ let rt = rt.show_rru(mb_rru);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}{} {}, {}", mem_str, op, rt, mem)
+ }
+ &Inst::Load {
+ rt,
+ ref mem,
+ bits,
+ sign_extend,
+ ..
+ } => {
+ let op = match (bits, sign_extend) {
+ (32, _) => "ldr",
+ (16, true) => "ldrsh",
+ (16, false) => "ldrh",
+ (8, true) => "ldrsb",
+ (8, false) => "ldrb",
+ (_, _) => panic!("Invalid bit amount {}", bits),
+ };
+ let rt = rt.show_rru(mb_rru);
+ let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state);
+ let mem = mem.show_rru(mb_rru);
+ format!("{}{} {}, {}", mem_str, op, rt, mem)
+ }
+ &Inst::LoadAddr { rd, ref mem } => {
+ let mut ret = String::new();
+ let (mem_insts, mem) = mem_finalize(mem, state);
+ for inst in mem_insts.into_iter() {
+ ret.push_str(&inst.show_rru(mb_rru));
+ }
+ let inst = match mem {
+ AMode::RegReg(rn, rm, shift) => {
+ let shift = u32::from(shift);
+ let shift_amt = ShiftOpShiftImm::maybe_from_shift(shift).unwrap();
+ let shift = ShiftOpAndAmt::new(ShiftOp::LSL, shift_amt);
+ Inst::AluRRRShift {
+ alu_op: ALUOp::Add,
+ rd,
+ rn,
+ rm,
+ shift: Some(shift),
+ }
+ }
+ AMode::RegOffset12(reg, imm12) => Inst::AluRRImm12 {
+ alu_op: ALUOp::Add,
+ rd,
+ rn: reg,
+ imm12,
+ },
+ _ => unreachable!(),
+ };
+ ret.push_str(&inst.show_rru(mb_rru));
+ ret
+ }
+ &Inst::Extend {
+ rd,
+ rm,
+ from_bits,
+ signed,
+ } => {
+ let op = match (from_bits, signed) {
+ (16, true) => "sxth",
+ (16, false) => "uxth",
+ (8, true) => "sxtb",
+ (8, false) => "uxtb",
+ _ => panic!("Unsupported extend case: {:?}", self),
+ };
+ let rd = rd.show_rru(mb_rru);
+ let rm = rm.show_rru(mb_rru);
+ format!("{} {}, {}", op, rd, rm)
+ }
+ &Inst::It { cond, ref insts } => {
+ let te: String = insts
+ .iter()
+ .skip(1)
+ .map(|i| if i.then { "t" } else { "e" })
+ .collect();
+ let cond = cond.show_rru(mb_rru);
+ let mut ret = format!("it{} {}", te, cond);
+ for inst in insts.into_iter() {
+ ret.push_str(" ; ");
+ ret.push_str(&inst.inst.show_rru(mb_rru));
+ }
+ ret
+ }
+ &Inst::Push { ref reg_list } => {
+ assert!(!reg_list.is_empty());
+ let first_reg = reg_list[0].show_rru(mb_rru);
+ let regs: String = reg_list
+ .iter()
+ .skip(1)
+ .map(|r| [",", &r.show_rru(mb_rru)].join(" "))
+ .collect();
+ format!("push {{{}{}}}", first_reg, regs)
+ }
+ &Inst::Pop { ref reg_list } => {
+ assert!(!reg_list.is_empty());
+ let first_reg = reg_list[0].show_rru(mb_rru);
+ let regs: String = reg_list
+ .iter()
+ .skip(1)
+ .map(|r| [",", &r.show_rru(mb_rru)].join(" "))
+ .collect();
+ format!("pop {{{}{}}}", first_reg, regs)
+ }
+ &Inst::Call { .. } => format!("bl 0"),
+ &Inst::CallInd { ref info, .. } => {
+ let rm = info.rm.show_rru(mb_rru);
+ format!("blx {}", rm)
+ }
+ &Inst::LoadExtName {
+ rt,
+ ref name,
+ offset,
+ } => {
+ let rt = rt.show_rru(mb_rru);
+ format!("ldr {}, [pc, #4] ; b 4 ; data {:?} + {}", rt, name, offset)
+ }
+ &Inst::Ret => "bx lr".to_string(),
+ &Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
+ &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+ &Inst::Jump { ref dest } => {
+ let dest = dest.show_rru(mb_rru);
+ format!("b {}", dest)
+ }
+ &Inst::CondBr {
+ ref taken,
+ ref not_taken,
+ ref cond,
+ } => {
+ let taken = taken.show_rru(mb_rru);
+ let not_taken = not_taken.show_rru(mb_rru);
+ let c = cond.show_rru(mb_rru);
+ format!("b{} {} ; b {}", c, taken, not_taken)
+ }
+ &Inst::IndirectBr { rm, .. } => {
+ let rm = rm.show_rru(mb_rru);
+ format!("bx {}", rm)
+ }
+ &Inst::Udf { .. } => "udf #0".to_string(),
+ &Inst::Bkpt => "bkpt #0".to_string(),
+ &Inst::TrapIf { cond, .. } => {
+ let c = cond.invert().show_rru(mb_rru);
+ format!("b{} 2 ; udf #0", c)
+ }
+ }
+ }
+}
+
+//=============================================================================
+// Label fixups and jump veneers.
+
+/// Different forms of label references for different instruction formats.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+ /// 20-bit branch offset used by 32-bit conditional jumps.
+ Branch20,
+
+ /// 24-bit branch offset used by 32-bit uncoditional jump instruction.
+ Branch24,
+}
+
+impl MachInstLabelUse for LabelUse {
+ /// Alignment for veneer code. Every instruction must be 4-byte-aligned.
+ const ALIGN: CodeOffset = 2;
+
+ // Branches range:
+ // 20-bit sign-extended immediate gives us range [-(2^19), 2^19 - 1].
+ // Left-shifted by 1 => [-(2^20), 2^20 - 2].
+ // PC is start of this instruction + 4 bytes => [-(2^20) + 4, 2^20 + 2].
+ // Likewise for Branch24.
+
+ /// Maximum PC-relative range (positive), inclusive.
+ fn max_pos_range(self) -> CodeOffset {
+ match self {
+ LabelUse::Branch20 => (1 << 20) + 2,
+ LabelUse::Branch24 => (1 << 24) + 2,
+ }
+ }
+
+ /// Maximum PC-relative range (negative).
+ fn max_neg_range(self) -> CodeOffset {
+ match self {
+ LabelUse::Branch20 => (1 << 20) - 4,
+ LabelUse::Branch24 => (1 << 24) - 4,
+ }
+ }
+
+ /// Size of window into code needed to do the patch.
+ fn patch_size(self) -> CodeOffset {
+ 4
+ }
+
+ /// Perform the patch.
+ fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+ let off = (label_offset as i64) - (use_offset as i64);
+ debug_assert!(off <= self.max_pos_range() as i64);
+ debug_assert!(off >= -(self.max_neg_range() as i64));
+ let off = off - 4;
+ match self {
+ LabelUse::Branch20 => {
+ let off = off as u32 >> 1;
+ let imm11 = (off & 0x7ff) as u16;
+ let imm6 = ((off >> 11) & 0x3f) as u16;
+ let j1 = ((off >> 17) & 0x1) as u16;
+ let j2 = ((off >> 18) & 0x1) as u16;
+ let s = ((off >> 19) & 0x1) as u16;
+ let insn_fst = u16::from_le_bytes([buffer[0], buffer[1]]);
+ let insn_fst = (insn_fst & !0x43f) | imm6 | (s << 10);
+ let insn_snd = u16::from_le_bytes([buffer[2], buffer[3]]);
+ let insn_snd = (insn_snd & !0x2fff) | imm11 | (j2 << 11) | (j1 << 13);
+ buffer[0..2].clone_from_slice(&u16::to_le_bytes(insn_fst));
+ buffer[2..4].clone_from_slice(&u16::to_le_bytes(insn_snd));
+ }
+ LabelUse::Branch24 => {
+ let off = off as u32 >> 1;
+ let imm11 = (off & 0x7ff) as u16;
+ let imm10 = ((off >> 11) & 0x3ff) as u16;
+ let s = ((off >> 23) & 0x1) as u16;
+ let j1 = (((off >> 22) & 0x1) as u16 ^ s) ^ 0x1;
+ let j2 = (((off >> 21) & 0x1) as u16 ^ s) ^ 0x1;
+ let insn_fst = u16::from_le_bytes([buffer[0], buffer[1]]);
+ let insn_fst = (insn_fst & !0x07ff) | imm10 | (s << 10);
+ let insn_snd = u16::from_le_bytes([buffer[2], buffer[3]]);
+ let insn_snd = (insn_snd & !0x2fff) | imm11 | (j2 << 11) | (j1 << 13);
+ buffer[0..2].clone_from_slice(&u16::to_le_bytes(insn_fst));
+ buffer[2..4].clone_from_slice(&u16::to_le_bytes(insn_snd));
+ }
+ }
+ }
+
+ fn supports_veneer(self) -> bool {
+ false
+ }
+
+ fn veneer_size(self) -> CodeOffset {
+ 0
+ }
+
+ fn generate_veneer(
+ self,
+ _buffer: &mut [u8],
+ _veneer_offset: CodeOffset,
+ ) -> (CodeOffset, LabelUse) {
+ panic!("Veneer not supported yet.")
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn patch_branch20() {
+ let label_use = LabelUse::Branch20;
+ let mut buffer = 0x8000_f000_u32.to_le_bytes(); // beq
+ let use_offset: CodeOffset = 0;
+ let label_offset: CodeOffset = label_use.max_pos_range();
+ label_use.patch(&mut buffer, use_offset, label_offset);
+ assert_eq!(u16::from_le_bytes([buffer[0], buffer[1]]), 0xf03f);
+ assert_eq!(u16::from_le_bytes([buffer[2], buffer[3]]), 0xafff);
+
+ let mut buffer = 0x8000_f000_u32.to_le_bytes(); // beq
+ let use_offset = label_use.max_neg_range();
+ let label_offset: CodeOffset = 0;
+ label_use.patch(&mut buffer, use_offset, label_offset);
+ assert_eq!(u16::from_le_bytes([buffer[0], buffer[1]]), 0xf400);
+ assert_eq!(u16::from_le_bytes([buffer[2], buffer[3]]), 0x8000);
+ }
+
+ #[test]
+ fn patch_branch24() {
+ let label_use = LabelUse::Branch24;
+ let mut buffer = 0x9000_f000_u32.to_le_bytes(); // b
+ let use_offset: CodeOffset = 0;
+ let label_offset: CodeOffset = label_use.max_pos_range();
+ label_use.patch(&mut buffer, use_offset, label_offset);
+ assert_eq!(u16::from_le_bytes([buffer[0], buffer[1]]), 0xf3ff);
+ assert_eq!(u16::from_le_bytes([buffer[2], buffer[3]]), 0x97ff);
+
+ let mut buffer = 0x9000_f000_u32.to_le_bytes(); // b
+ let use_offset = label_use.max_neg_range();
+ let label_offset: CodeOffset = 0;
+ label_use.patch(&mut buffer, use_offset, label_offset);
+ assert_eq!(u16::from_le_bytes([buffer[0], buffer[1]]), 0xf400);
+ assert_eq!(u16::from_le_bytes([buffer[2], buffer[3]]), 0x9000);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/regs.rs
new file mode 100644
index 0000000000..55df5c8db3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/regs.rs
@@ -0,0 +1,128 @@
+//! 32-bit ARM ISA definitions: registers.
+
+use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
+
+use std::string::ToString;
+
+/// Get a reference to a GPR.
+pub fn rreg(num: u8) -> Reg {
+ assert!(num < 16);
+ Reg::new_real(RegClass::I32, num, num)
+}
+
+/// Get a writable reference to a GPR.
+pub fn writable_rreg(num: u8) -> Writable<Reg> {
+ Writable::from_reg(rreg(num))
+}
+
+/// Get a reference to the program counter (r15).
+pub fn pc_reg() -> Reg {
+ rreg(15)
+}
+
+/// Get a writable reference to the program counter.
+pub fn writable_pc_reg() -> Writable<Reg> {
+ Writable::from_reg(pc_reg())
+}
+
+/// Get a reference to the link register (r14).
+pub fn lr_reg() -> Reg {
+ rreg(14)
+}
+
+/// Get a writable reference to the link register.
+pub fn writable_lr_reg() -> Writable<Reg> {
+ Writable::from_reg(lr_reg())
+}
+
+/// Get a reference to the stack pointer (r13).
+pub fn sp_reg() -> Reg {
+ rreg(13)
+}
+
+/// Get a writable reference to the stack pointer.
+pub fn writable_sp_reg() -> Writable<Reg> {
+ Writable::from_reg(sp_reg())
+}
+
+/// Get a reference to the intra-procedure-call scratch register (r12),
+/// which is used as a temporary register.
+pub fn ip_reg() -> Reg {
+ rreg(12)
+}
+
+/// Get a writable reference to the Intra-Procedure-call scratch register.
+pub fn writable_ip_reg() -> Writable<Reg> {
+ Writable::from_reg(ip_reg())
+}
+
+/// Get a reference to the frame pointer register (r11).
+pub fn fp_reg() -> Reg {
+ rreg(11)
+}
+
+/// Get a writable reference to the frame-pointer register.
+pub fn writable_fp_reg() -> Writable<Reg> {
+ Writable::from_reg(fp_reg())
+}
+
+/// Get a reference to the second temp register. We need this in some edge cases
+/// where we need both the ip and another temporary.
+///
+/// We use r10 for this role.
+pub fn tmp2_reg() -> Reg {
+ rreg(10)
+}
+
+/// Get a writable reference to the tmp2 reg.
+pub fn writable_tmp2_reg() -> Writable<Reg> {
+ Writable::from_reg(tmp2_reg())
+}
+
+/// Create the register universe.
+/// Use only GPR for now.
+pub fn create_reg_universe() -> RealRegUniverse {
+ let mut regs = vec![];
+ let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+ let r_reg_base = 0u8;
+ let r_reg_count = 10; // to exclude r10, fp, ip, sp, lr and pc.
+ for i in 0..r_reg_count {
+ let reg = Reg::new_real(
+ RegClass::I32,
+ /* enc = */ i,
+ /* index = */ r_reg_base + i,
+ )
+ .to_real_reg();
+ let name = format!("r{}", i);
+ regs.push((reg, name));
+ }
+ let r_reg_last = r_reg_base + r_reg_count - 1;
+
+ allocable_by_class[RegClass::I32.rc_to_usize()] = Some(RegClassInfo {
+ first: r_reg_base as usize,
+ last: r_reg_last as usize,
+ suggested_scratch: None,
+ });
+
+ // Other regs, not available to the allocator.
+ let allocable = regs.len();
+ regs.push((tmp2_reg().to_real_reg(), "r10".to_string()));
+ regs.push((fp_reg().to_real_reg(), "fp".to_string()));
+ regs.push((ip_reg().to_real_reg(), "ip".to_string()));
+ regs.push((sp_reg().to_real_reg(), "sp".to_string()));
+ regs.push((lr_reg().to_real_reg(), "lr".to_string()));
+ regs.push((pc_reg().to_real_reg(), "pc".to_string()));
+
+ // The indices in the register structs must match their
+ // actual indices in the array.
+ for (i, reg) in regs.iter().enumerate() {
+ assert_eq!(i, reg.0.get_index());
+ }
+
+ RealRegUniverse {
+ regs,
+ allocable,
+ allocable_by_class,
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/unwind.rs
new file mode 100644
index 0000000000..b9ffeba0cf
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/inst/unwind.rs
@@ -0,0 +1,14 @@
+use super::*;
+use crate::isa::unwind::input::UnwindInfo;
+use crate::result::CodegenResult;
+
+pub struct Arm32UnwindInfo;
+
+impl UnwindInfoGenerator<Inst> for Arm32UnwindInfo {
+ fn create_unwind_info(
+ _context: UnwindInfoContext<Inst>,
+ ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
+ // TODO
+ Ok(None)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/lower.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/lower.rs
new file mode 100644
index 0000000000..7c11ae95ba
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/lower.rs
@@ -0,0 +1,240 @@
+//! Lowering rules for 32-bit ARM.
+
+use crate::ir::condcodes::IntCC;
+use crate::ir::types::*;
+use crate::ir::Inst as IRInst;
+use crate::ir::{InstructionData, Opcode, TrapCode};
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::CodegenResult;
+
+use crate::isa::arm32::inst::*;
+use crate::isa::arm32::Arm32Backend;
+
+use super::lower_inst;
+
+use regalloc::{Reg, RegClass, Writable};
+
+//============================================================================
+// Lowering: convert instruction outputs to result types.
+
+/// Lower an instruction output to a 32-bit constant, if possible.
+pub(crate) fn output_to_const<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Option<u64> {
+ if out.output > 0 {
+ None
+ } else {
+ let inst_data = ctx.data(out.insn);
+ if inst_data.opcode() == Opcode::Null {
+ Some(0)
+ } else {
+ match inst_data {
+ &InstructionData::UnaryImm { opcode: _, imm } => {
+ // Only has Into for i64; we use u64 elsewhere, so we cast.
+ let imm: i64 = imm.into();
+ Some(imm as u64)
+ }
+ &InstructionData::UnaryBool { opcode: _, imm } => Some(u64::from(imm)),
+ &InstructionData::UnaryIeee32 { .. } | &InstructionData::UnaryIeee64 { .. } => {
+ unimplemented!()
+ }
+ _ => None,
+ }
+ }
+ }
+}
+
+/// How to handle narrow values loaded into registers.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum NarrowValueMode {
+ None,
+ /// Zero-extend to 32 bits if original is < 32 bits.
+ ZeroExtend,
+ /// Sign-extend to 32 bits if original is < 32 bits.
+ SignExtend,
+}
+
+/// Lower an instruction output to a reg.
+pub(crate) fn output_to_reg<C: LowerCtx<I = Inst>>(ctx: &mut C, out: InsnOutput) -> Writable<Reg> {
+ ctx.get_output(out.insn, out.output)
+}
+
+/// Lower an instruction input to a reg.
+///
+/// The given register will be extended appropriately, according to `narrow_mode`.
+pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ narrow_mode: NarrowValueMode,
+) -> Reg {
+ let ty = ctx.input_ty(input.insn, input.input);
+ let from_bits = ty.bits() as u8;
+ let inputs = ctx.get_input(input.insn, input.input);
+ let in_reg = if let Some(c) = inputs.constant {
+ let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
+ for inst in Inst::gen_constant(to_reg, c, ty, |reg_class, ty| ctx.alloc_tmp(reg_class, ty))
+ .into_iter()
+ {
+ ctx.emit(inst);
+ }
+ to_reg.to_reg()
+ } else {
+ ctx.use_input_reg(inputs);
+ inputs.reg
+ };
+
+ match (narrow_mode, from_bits) {
+ (NarrowValueMode::None, _) => in_reg,
+ (NarrowValueMode::ZeroExtend, 1) => {
+ let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+ ctx.emit(Inst::AluRRImm8 {
+ alu_op: ALUOp::And,
+ rd: tmp,
+ rn: in_reg,
+ imm8: UImm8::maybe_from_i64(0x1).unwrap(),
+ });
+ tmp.to_reg()
+ }
+ (NarrowValueMode::ZeroExtend, n) if n < 32 => {
+ let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rm: in_reg,
+ signed: false,
+ from_bits: n,
+ });
+ tmp.to_reg()
+ }
+ (NarrowValueMode::SignExtend, n) if n < 32 => {
+ let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+ ctx.emit(Inst::Extend {
+ rd: tmp,
+ rm: in_reg,
+ signed: true,
+ from_bits: n,
+ });
+ tmp.to_reg()
+ }
+ (NarrowValueMode::ZeroExtend, 32) | (NarrowValueMode::SignExtend, 32) => in_reg,
+ _ => panic!(
+ "Unsupported input width: input ty {} bits {} mode {:?}",
+ ty, from_bits, narrow_mode
+ ),
+ }
+}
+
+pub(crate) fn lower_constant<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, value: u64) {
+ // We allow sign bits for high word.
+ assert!((value >> 32) == 0x0 || (value >> 32) == (1 << 32) - 1);
+
+ for inst in Inst::load_constant(rd, (value & ((1 << 32) - 1)) as u32) {
+ ctx.emit(inst);
+ }
+}
+
+pub(crate) fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
+ let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+
+ ctx.emit(Inst::Cmp { rn, rm });
+}
+
+pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
+ match cc {
+ IntCC::Equal => Cond::Eq,
+ IntCC::NotEqual => Cond::Ne,
+ IntCC::SignedGreaterThanOrEqual => Cond::Ge,
+ IntCC::SignedGreaterThan => Cond::Gt,
+ IntCC::SignedLessThanOrEqual => Cond::Le,
+ IntCC::SignedLessThan => Cond::Lt,
+ IntCC::UnsignedGreaterThanOrEqual => Cond::Hs,
+ IntCC::UnsignedGreaterThan => Cond::Hi,
+ IntCC::UnsignedLessThanOrEqual => Cond::Ls,
+ IntCC::UnsignedLessThan => Cond::Lo,
+ IntCC::Overflow => Cond::Vs,
+ IntCC::NotOverflow => Cond::Vc,
+ }
+}
+
+/// Determines whether this condcode interprets inputs as signed or unsigned.
+pub(crate) fn condcode_is_signed(cc: IntCC) -> bool {
+ match cc {
+ IntCC::Equal => false,
+ IntCC::NotEqual => false,
+ IntCC::SignedGreaterThanOrEqual => true,
+ IntCC::SignedGreaterThan => true,
+ IntCC::SignedLessThanOrEqual => true,
+ IntCC::SignedLessThan => true,
+ IntCC::UnsignedGreaterThanOrEqual => false,
+ IntCC::UnsignedGreaterThan => false,
+ IntCC::UnsignedLessThanOrEqual => false,
+ IntCC::UnsignedLessThan => false,
+ IntCC::Overflow => true,
+ IntCC::NotOverflow => true,
+ }
+}
+
+//=============================================================================
+// Helpers for instruction lowering.
+
+pub(crate) fn ldst_offset(data: &InstructionData) -> Option<i32> {
+ match data {
+ &InstructionData::Load { offset, .. }
+ | &InstructionData::StackLoad { offset, .. }
+ | &InstructionData::LoadComplex { offset, .. }
+ | &InstructionData::Store { offset, .. }
+ | &InstructionData::StackStore { offset, .. }
+ | &InstructionData::StoreComplex { offset, .. } => Some(offset.into()),
+ _ => None,
+ }
+}
+
+pub(crate) fn inst_condcode(data: &InstructionData) -> Option<IntCC> {
+ match data {
+ &InstructionData::IntCond { cond, .. }
+ | &InstructionData::BranchIcmp { cond, .. }
+ | &InstructionData::IntCompare { cond, .. }
+ | &InstructionData::IntCondTrap { cond, .. }
+ | &InstructionData::BranchInt { cond, .. }
+ | &InstructionData::IntSelect { cond, .. }
+ | &InstructionData::IntCompareImm { cond, .. } => Some(cond),
+ _ => None,
+ }
+}
+
+pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
+ match data {
+ &InstructionData::Trap { code, .. }
+ | &InstructionData::CondTrap { code, .. }
+ | &InstructionData::IntCondTrap { code, .. } => Some(code),
+ &InstructionData::FloatCondTrap { code, .. } => {
+ panic!("Unexpected float cond trap {:?}", code)
+ }
+ _ => None,
+ }
+}
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for Arm32Backend {
+ type MInst = Inst;
+
+ fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
+ lower_inst::lower_insn_to_regs(ctx, ir_inst)
+ }
+
+ fn lower_branch_group<C: LowerCtx<I = Inst>>(
+ &self,
+ ctx: &mut C,
+ branches: &[IRInst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+ ) -> CodegenResult<()> {
+ lower_inst::lower_branch(ctx, branches, targets, fallthrough)
+ }
+
+ fn maybe_pinned_reg(&self) -> Option<Reg> {
+ None
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/lower_inst.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/lower_inst.rs
new file mode 100644
index 0000000000..05256b2540
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/lower_inst.rs
@@ -0,0 +1,608 @@
+//! Lower a single Cranelift instruction into vcode.
+
+use crate::ir::types::*;
+use crate::ir::Inst as IRInst;
+use crate::ir::Opcode;
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::CodegenResult;
+
+use crate::isa::arm32::abi::*;
+use crate::isa::arm32::inst::*;
+
+use regalloc::RegClass;
+use smallvec::SmallVec;
+
+use super::lower::*;
+
+/// Actually codegen an instruction's results into registers.
+pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ insn: IRInst,
+) -> CodegenResult<()> {
+ let op = ctx.data(insn).opcode();
+ let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
+ .map(|i| InsnInput { insn, input: i })
+ .collect();
+ let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
+ .map(|i| InsnOutput { insn, output: i })
+ .collect();
+ let ty = if outputs.len() > 0 {
+ let ty = ctx.output_ty(insn, 0);
+ if ty.bits() > 32 || ty.is_float() {
+ panic!("Cannot lower inst with type {}!", ty);
+ }
+ Some(ty)
+ } else {
+ None
+ };
+
+ match op {
+ Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
+ let value = output_to_const(ctx, outputs[0]).unwrap();
+ let rd = output_to_reg(ctx, outputs[0]);
+ lower_constant(ctx, rd, value);
+ }
+ Opcode::Iadd
+ | Opcode::IaddIfcin
+ | Opcode::IaddIfcout
+ | Opcode::IaddIfcarry
+ | Opcode::Isub
+ | Opcode::IsubIfbin
+ | Opcode::IsubIfbout
+ | Opcode::IsubIfborrow
+ | Opcode::Band
+ | Opcode::Bor
+ | Opcode::Bxor
+ | Opcode::BandNot
+ | Opcode::BorNot => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+
+ let alu_op = match op {
+ Opcode::Iadd => ALUOp::Add,
+ Opcode::IaddIfcin => ALUOp::Adc,
+ Opcode::IaddIfcout => ALUOp::Adds,
+ Opcode::IaddIfcarry => ALUOp::Adcs,
+ Opcode::Isub => ALUOp::Sub,
+ Opcode::IsubIfbin => ALUOp::Sbc,
+ Opcode::IsubIfbout => ALUOp::Subs,
+ Opcode::IsubIfborrow => ALUOp::Sbcs,
+ Opcode::Band => ALUOp::And,
+ Opcode::Bor => ALUOp::Orr,
+ Opcode::Bxor => ALUOp::Eor,
+ Opcode::BandNot => ALUOp::Bic,
+ Opcode::BorNot => ALUOp::Orn,
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::AluRRRShift {
+ alu_op,
+ rd,
+ rn,
+ rm,
+ shift: None,
+ });
+ }
+ Opcode::SaddSat | Opcode::SsubSat | Opcode::Imul | Opcode::Udiv | Opcode::Sdiv => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+
+ let alu_op = match op {
+ Opcode::SaddSat => ALUOp::Qadd,
+ Opcode::SsubSat => ALUOp::Qsub,
+ Opcode::Imul => ALUOp::Mul,
+ Opcode::Udiv => ALUOp::Udiv,
+ Opcode::Sdiv => ALUOp::Sdiv,
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
+ }
+ Opcode::Ineg => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+
+ ctx.emit(Inst::AluRRImm8 {
+ alu_op: ALUOp::Rsb,
+ rd,
+ rn,
+ imm8: UImm8::maybe_from_i64(0).unwrap(),
+ });
+ }
+ Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
+ let (alu_op, ext) = match op {
+ Opcode::Ishl => (ALUOp::Lsl, NarrowValueMode::None),
+ Opcode::Ushr => (ALUOp::Lsr, NarrowValueMode::ZeroExtend),
+ Opcode::Sshr => (ALUOp::Asr, NarrowValueMode::SignExtend),
+ _ => unreachable!(),
+ };
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], ext);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::ZeroExtend);
+ ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
+ }
+ Opcode::Rotr => {
+ if ty.unwrap().bits() != 32 {
+ unimplemented!()
+ }
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Ror,
+ rd,
+ rn,
+ rm,
+ });
+ }
+ Opcode::Rotl => {
+ if ty.unwrap().bits() != 32 {
+ unimplemented!()
+ }
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+ let tmp = ctx.alloc_tmp(RegClass::I32, I32);
+
+ // ror rd, rn, 32 - (rm & 31)
+ ctx.emit(Inst::AluRRImm8 {
+ alu_op: ALUOp::And,
+ rd: tmp,
+ rn: rm,
+ imm8: UImm8::maybe_from_i64(31).unwrap(),
+ });
+ ctx.emit(Inst::AluRRImm8 {
+ alu_op: ALUOp::Rsb,
+ rd: tmp,
+ rn: tmp.to_reg(),
+ imm8: UImm8::maybe_from_i64(32).unwrap(),
+ });
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Ror,
+ rd,
+ rn,
+ rm: tmp.to_reg(),
+ });
+ }
+ Opcode::Smulhi | Opcode::Umulhi => {
+ let ty = ty.unwrap();
+ let is_signed = op == Opcode::Smulhi;
+ match ty {
+ I32 => {
+ let rd_hi = output_to_reg(ctx, outputs[0]);
+ let rd_lo = ctx.alloc_tmp(RegClass::I32, ty);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+
+ let alu_op = if is_signed {
+ ALUOp::Smull
+ } else {
+ ALUOp::Umull
+ };
+ ctx.emit(Inst::AluRRRR {
+ alu_op,
+ rd_hi,
+ rd_lo,
+ rn,
+ rm,
+ });
+ }
+ I16 | I8 => {
+ let narrow_mode = if is_signed {
+ NarrowValueMode::SignExtend
+ } else {
+ NarrowValueMode::ZeroExtend
+ };
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], narrow_mode);
+ let rm = input_to_reg(ctx, inputs[1], narrow_mode);
+
+ ctx.emit(Inst::AluRRR {
+ alu_op: ALUOp::Mul,
+ rd,
+ rn,
+ rm,
+ });
+ let shift_amt = if ty == I16 { 16 } else { 8 };
+ let imm8 = UImm8::maybe_from_i64(shift_amt).unwrap();
+ let alu_op = if is_signed { ALUOp::Asr } else { ALUOp::Lsr };
+
+ ctx.emit(Inst::AluRRImm8 {
+ alu_op,
+ rd,
+ rn: rd.to_reg(),
+ imm8,
+ });
+ }
+ _ => panic!("Unexpected type {} in lower {}!", ty, op),
+ }
+ }
+ Opcode::Bnot => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+
+ ctx.emit(Inst::AluRRShift {
+ alu_op: ALUOp1::Mvn,
+ rd,
+ rm,
+ shift: None,
+ });
+ }
+ Opcode::Clz | Opcode::Ctz => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend);
+ let ty = ctx.output_ty(insn, 0);
+
+ let in_reg = if op == Opcode::Ctz {
+ ctx.emit(Inst::BitOpRR {
+ bit_op: BitOp::Rbit,
+ rd,
+ rm,
+ });
+ rd.to_reg()
+ } else {
+ rm
+ };
+ ctx.emit(Inst::BitOpRR {
+ bit_op: BitOp::Clz,
+ rd,
+ rm: in_reg,
+ });
+
+ if ty.bits() < 32 {
+ let imm12 = UImm12::maybe_from_i64(32 - ty.bits() as i64).unwrap();
+ ctx.emit(Inst::AluRRImm12 {
+ alu_op: ALUOp::Sub,
+ rd,
+ rn: rd.to_reg(),
+ imm12,
+ });
+ }
+ }
+ Opcode::Bitrev => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ctx.output_ty(insn, 0);
+ let bit_op = BitOp::Rbit;
+
+ match ty.bits() {
+ 32 => ctx.emit(Inst::BitOpRR { bit_op, rd, rm }),
+ n if n < 32 => {
+ let shift = ShiftOpAndAmt::new(
+ ShiftOp::LSL,
+ ShiftOpShiftImm::maybe_from_shift(32 - n as u32).unwrap(),
+ );
+ ctx.emit(Inst::AluRRShift {
+ alu_op: ALUOp1::Mov,
+ rd,
+ rm,
+ shift: Some(shift),
+ });
+ ctx.emit(Inst::BitOpRR {
+ bit_op,
+ rd,
+ rm: rd.to_reg(),
+ });
+ }
+ _ => panic!("Unexpected output type {}", ty),
+ }
+ }
+ Opcode::Icmp | Opcode::Ifcmp => {
+ let condcode = inst_condcode(ctx.data(insn)).unwrap();
+ let cond = lower_condcode(condcode);
+ let is_signed = condcode_is_signed(condcode);
+
+ let narrow_mode = if is_signed {
+ NarrowValueMode::SignExtend
+ } else {
+ NarrowValueMode::ZeroExtend
+ };
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], narrow_mode);
+ let rm = input_to_reg(ctx, inputs[1], narrow_mode);
+
+ ctx.emit(Inst::Cmp { rn, rm });
+
+ if op == Opcode::Icmp {
+ let mut it_insts = vec![];
+ it_insts.push(CondInst::new(Inst::MovImm16 { rd, imm16: 1 }, true));
+ it_insts.push(CondInst::new(Inst::MovImm16 { rd, imm16: 0 }, false));
+ ctx.emit(Inst::It {
+ cond,
+ insts: it_insts,
+ });
+ }
+ }
+ Opcode::Trueif => {
+ let cmp_insn = ctx
+ .get_input(inputs[0].insn, inputs[0].input)
+ .inst
+ .unwrap()
+ .0;
+ debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
+ emit_cmp(ctx, cmp_insn);
+
+ let condcode = inst_condcode(ctx.data(insn)).unwrap();
+ let cond = lower_condcode(condcode);
+ let rd = output_to_reg(ctx, outputs[0]);
+
+ let mut it_insts = vec![];
+ it_insts.push(CondInst::new(Inst::MovImm16 { rd, imm16: 1 }, true));
+ it_insts.push(CondInst::new(Inst::MovImm16 { rd, imm16: 0 }, false));
+
+ ctx.emit(Inst::It {
+ cond,
+ insts: it_insts,
+ });
+ }
+ Opcode::Select | Opcode::Selectif => {
+ let cond = if op == Opcode::Select {
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend);
+ ctx.emit(Inst::CmpImm8 { rn, imm8: 0 });
+ Cond::Ne
+ } else {
+ // Verification ensures that the input is always a single-def ifcmp.
+ let cmp_insn = ctx
+ .get_input(inputs[0].insn, inputs[0].input)
+ .inst
+ .unwrap()
+ .0;
+ debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
+ emit_cmp(ctx, cmp_insn);
+
+ let condcode = inst_condcode(ctx.data(insn)).unwrap();
+ lower_condcode(condcode)
+ };
+ let r1 = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+ let r2 = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
+ let out_reg = output_to_reg(ctx, outputs[0]);
+
+ let mut it_insts = vec![];
+ it_insts.push(CondInst::new(Inst::mov(out_reg, r1), true));
+ it_insts.push(CondInst::new(Inst::mov(out_reg, r2), false));
+
+ ctx.emit(Inst::It {
+ cond,
+ insts: it_insts,
+ });
+ }
+ Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
+ let off = ldst_offset(ctx.data(insn)).unwrap();
+ let elem_ty = match op {
+ Opcode::Istore8 => I8,
+ Opcode::Istore16 => I16,
+ Opcode::Istore32 => I32,
+ Opcode::Store => ctx.input_ty(insn, 0),
+ _ => unreachable!(),
+ };
+ if elem_ty.bits() > 32 {
+ unimplemented!()
+ }
+ let bits = elem_ty.bits() as u8;
+
+ assert_eq!(inputs.len(), 2, "only one input for store memory operands");
+ let rt = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let base = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+
+ let mem = AMode::RegOffset(base, i64::from(off));
+
+ ctx.emit(Inst::Store { rt, mem, bits });
+ }
+ Opcode::Load
+ | Opcode::Uload8
+ | Opcode::Sload8
+ | Opcode::Uload16
+ | Opcode::Sload16
+ | Opcode::Uload32
+ | Opcode::Sload32 => {
+ let off = ldst_offset(ctx.data(insn)).unwrap();
+ let elem_ty = match op {
+ Opcode::Sload8 | Opcode::Uload8 => I8,
+ Opcode::Sload16 | Opcode::Uload16 => I16,
+ Opcode::Sload32 | Opcode::Uload32 => I32,
+ Opcode::Load => ctx.output_ty(insn, 0),
+ _ => unreachable!(),
+ };
+ if elem_ty.bits() > 32 {
+ unimplemented!()
+ }
+ let bits = elem_ty.bits() as u8;
+
+ let sign_extend = match op {
+ Opcode::Sload8 | Opcode::Sload16 | Opcode::Sload32 => true,
+ _ => false,
+ };
+ let out_reg = output_to_reg(ctx, outputs[0]);
+
+ assert_eq!(inputs.len(), 2, "only one input for store memory operands");
+ let base = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
+ let mem = AMode::RegOffset(base, i64::from(off));
+
+ ctx.emit(Inst::Load {
+ rt: out_reg,
+ mem,
+ bits,
+ sign_extend,
+ });
+ }
+ Opcode::Uextend | Opcode::Sextend => {
+ let output_ty = ty.unwrap();
+ let input_ty = ctx.input_ty(insn, 0);
+ let from_bits = input_ty.bits() as u8;
+ let to_bits = 32;
+ let signed = op == Opcode::Sextend;
+
+ let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let rd = output_to_reg(ctx, outputs[0]);
+
+ if output_ty.bits() > 32 {
+ panic!("Unexpected output type {}", output_ty);
+ }
+ if from_bits < to_bits {
+ ctx.emit(Inst::Extend {
+ rd,
+ rm,
+ from_bits,
+ signed,
+ });
+ }
+ }
+ Opcode::Bint | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => {
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend);
+ let rd = output_to_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+
+ ctx.emit(Inst::gen_move(rd, rn, ty));
+ }
+ Opcode::Copy => {
+ let rd = output_to_reg(ctx, outputs[0]);
+ let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
+ let ty = ctx.input_ty(insn, 0);
+
+ ctx.emit(Inst::gen_move(rd, rn, ty));
+ }
+ Opcode::Debugtrap => {
+ ctx.emit(Inst::Bkpt);
+ }
+ Opcode::Trap => {
+ let trap_info = inst_trapcode(ctx.data(insn)).unwrap();
+ ctx.emit(Inst::Udf { trap_info })
+ }
+ Opcode::Trapif => {
+ let cmp_insn = ctx
+ .get_input(inputs[0].insn, inputs[0].input)
+ .inst
+ .unwrap()
+ .0;
+ debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
+ emit_cmp(ctx, cmp_insn);
+
+ let trap_info = inst_trapcode(ctx.data(insn)).unwrap();
+ let condcode = inst_condcode(ctx.data(insn)).unwrap();
+ let cond = lower_condcode(condcode);
+
+ ctx.emit(Inst::TrapIf { cond, trap_info });
+ }
+ Opcode::FallthroughReturn | Opcode::Return => {
+ for (i, input) in inputs.iter().enumerate() {
+ let reg = input_to_reg(ctx, *input, NarrowValueMode::None);
+ let retval_reg = ctx.retval(i);
+ let ty = ctx.input_ty(insn, i);
+
+ ctx.emit(Inst::gen_move(retval_reg, reg, ty));
+ }
+ }
+ Opcode::Call | Opcode::CallIndirect => {
+ let caller_conv = ctx.abi().call_conv();
+ let (mut abi, inputs) = match op {
+ Opcode::Call => {
+ let (extname, dist) = ctx.call_target(insn).unwrap();
+ let extname = extname.clone();
+ let sig = ctx.call_sig(insn).unwrap();
+ assert_eq!(inputs.len(), sig.params.len());
+ assert_eq!(outputs.len(), sig.returns.len());
+ (
+ Arm32ABICaller::from_func(sig, &extname, dist, caller_conv)?,
+ &inputs[..],
+ )
+ }
+ Opcode::CallIndirect => {
+ let ptr = input_to_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend);
+ let sig = ctx.call_sig(insn).unwrap();
+ assert_eq!(inputs.len() - 1, sig.params.len());
+ assert_eq!(outputs.len(), sig.returns.len());
+ (
+ Arm32ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
+ &inputs[1..],
+ )
+ }
+ _ => unreachable!(),
+ };
+ assert_eq!(inputs.len(), abi.num_args());
+ for (i, input) in inputs.iter().enumerate().filter(|(i, _)| *i <= 3) {
+ let arg_reg = input_to_reg(ctx, *input, NarrowValueMode::None);
+ abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+ }
+ abi.emit_call(ctx);
+ for (i, output) in outputs.iter().enumerate() {
+ let retval_reg = output_to_reg(ctx, *output);
+ abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+ }
+ }
+ _ => panic!("lowering {} unimplemented!", op),
+ }
+
+ Ok(())
+}
+
+pub(crate) fn lower_branch<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ branches: &[IRInst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+) -> CodegenResult<()> {
+ // A block should end with at most two branches. The first may be a
+ // conditional branch; a conditional branch can be followed only by an
+ // unconditional branch or fallthrough. Otherwise, if only one branch,
+ // it may be an unconditional branch, a fallthrough, a return, or a
+ // trap. These conditions are verified by `is_ebb_basic()` during the
+ // verifier pass.
+ assert!(branches.len() <= 2);
+
+ if branches.len() == 2 {
+ // Must be a conditional branch followed by an unconditional branch.
+ let op0 = ctx.data(branches[0]).opcode();
+ let op1 = ctx.data(branches[1]).opcode();
+
+ assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
+ let taken = BranchTarget::Label(targets[0]);
+ let not_taken = match op1 {
+ Opcode::Jump => BranchTarget::Label(targets[1]),
+ Opcode::Fallthrough => BranchTarget::Label(fallthrough.unwrap()),
+ _ => unreachable!(), // assert above.
+ };
+ match op0 {
+ Opcode::Brz | Opcode::Brnz => {
+ let rn = input_to_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ NarrowValueMode::ZeroExtend,
+ );
+ let cond = if op0 == Opcode::Brz {
+ Cond::Eq
+ } else {
+ Cond::Ne
+ };
+
+ ctx.emit(Inst::CmpImm8 { rn, imm8: 0 });
+ ctx.emit(Inst::CondBr {
+ taken,
+ not_taken,
+ cond,
+ });
+ }
+ _ => unimplemented!(),
+ }
+ } else {
+ // Must be an unconditional branch or an indirect branch.
+ let op = ctx.data(branches[0]).opcode();
+ match op {
+ Opcode::Jump | Opcode::Fallthrough => {
+ assert_eq!(branches.len(), 1);
+ // In the Fallthrough case, the machine-independent driver
+ // fills in `targets[0]` with our fallthrough block, so this
+ // is valid for both Jump and Fallthrough.
+ ctx.emit(Inst::Jump {
+ dest: BranchTarget::Label(targets[0]),
+ });
+ }
+ _ => unimplemented!(),
+ }
+ }
+
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs b/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs
new file mode 100644
index 0000000000..4b9701fd1d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/arm32/mod.rs
@@ -0,0 +1,123 @@
+//! 32-bit ARM Instruction Set Architecture.
+
+use crate::ir::condcodes::IntCC;
+use crate::ir::Function;
+use crate::isa::Builder as IsaBuilder;
+use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
+use crate::result::CodegenResult;
+use crate::settings;
+
+use alloc::boxed::Box;
+use regalloc::{PrettyPrint, RealRegUniverse};
+use target_lexicon::{Architecture, ArmArchitecture, Triple};
+
+// New backend:
+mod abi;
+mod inst;
+mod lower;
+mod lower_inst;
+
+use inst::{create_reg_universe, EmitInfo};
+
+/// An ARM32 backend.
+pub struct Arm32Backend {
+ triple: Triple,
+ flags: settings::Flags,
+ reg_universe: RealRegUniverse,
+}
+
+impl Arm32Backend {
+ /// Create a new ARM32 backend with the given (shared) flags.
+ pub fn new_with_flags(triple: Triple, flags: settings::Flags) -> Arm32Backend {
+ let reg_universe = create_reg_universe();
+ Arm32Backend {
+ triple,
+ flags,
+ reg_universe,
+ }
+ }
+
+ fn compile_vcode(
+ &self,
+ func: &Function,
+ flags: settings::Flags,
+ ) -> CodegenResult<VCode<inst::Inst>> {
+ // This performs lowering to VCode, register-allocates the code, computes
+ // block layout and finalizes branches. The result is ready for binary emission.
+ let emit_info = EmitInfo::new(flags.clone());
+ let abi = Box::new(abi::Arm32ABICallee::new(func, flags)?);
+ compile::compile::<Arm32Backend>(func, self, abi, emit_info)
+ }
+}
+
+impl MachBackend for Arm32Backend {
+ fn compile_function(
+ &self,
+ func: &Function,
+ want_disasm: bool,
+ ) -> CodegenResult<MachCompileResult> {
+ let flags = self.flags();
+ let vcode = self.compile_vcode(func, flags.clone())?;
+ let buffer = vcode.emit();
+ let frame_size = vcode.frame_size();
+
+ let disasm = if want_disasm {
+ Some(vcode.show_rru(Some(&create_reg_universe())))
+ } else {
+ None
+ };
+
+ let buffer = buffer.finish();
+
+ Ok(MachCompileResult {
+ buffer,
+ frame_size,
+ disasm,
+ unwind_info: None,
+ })
+ }
+
+ fn name(&self) -> &'static str {
+ "arm32"
+ }
+
+ fn triple(&self) -> Triple {
+ self.triple.clone()
+ }
+
+ fn flags(&self) -> &settings::Flags {
+ &self.flags
+ }
+
+ fn reg_universe(&self) -> &RealRegUniverse {
+ &self.reg_universe
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> IntCC {
+ // Carry flag set.
+ IntCC::UnsignedGreaterThanOrEqual
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> IntCC {
+ // Carry flag clear.
+ IntCC::UnsignedLessThan
+ }
+}
+
+/// Create a new `isa::Builder`.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+ assert!(match triple.architecture {
+ Architecture::Arm(ArmArchitecture::Arm)
+ | Architecture::Arm(ArmArchitecture::Armv7)
+ | Architecture::Arm(ArmArchitecture::Armv6) => true,
+ _ => false,
+ });
+ IsaBuilder {
+ triple,
+ setup: settings::builder(),
+ constructor: |triple, shared_flags, _| {
+ let backend = Arm32Backend::new_with_flags(triple, shared_flags);
+ Box::new(TargetIsaAdapter::new(backend))
+ },
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/call_conv.rs b/third_party/rust/cranelift-codegen/src/isa/call_conv.rs
new file mode 100644
index 0000000000..61a94e5a43
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/call_conv.rs
@@ -0,0 +1,106 @@
+use crate::settings::{self, LibcallCallConv};
+use core::fmt;
+use core::str;
+use target_lexicon::{CallingConvention, Triple};
+
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// Calling convention identifiers.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub enum CallConv {
+ /// Best performance, not ABI-stable
+ Fast,
+ /// Smallest caller code size, not ABI-stable
+ Cold,
+ /// System V-style convention used on many platforms
+ SystemV,
+ /// Windows "fastcall" convention, also used for x64 and ARM
+ WindowsFastcall,
+ /// SpiderMonkey WebAssembly convention on systems using natively SystemV
+ BaldrdashSystemV,
+ /// SpiderMonkey WebAssembly convention on Windows
+ BaldrdashWindows,
+ /// SpiderMonkey WebAssembly convention for "ABI-2020", with extra TLS
+ /// register slots in the frame.
+ Baldrdash2020,
+ /// Specialized convention for the probestack function
+ Probestack,
+}
+
+impl CallConv {
+ /// Return the default calling convention for the given target triple.
+ pub fn triple_default(triple: &Triple) -> Self {
+ match triple.default_calling_convention() {
+ // Default to System V for unknown targets because most everything
+ // uses System V.
+ Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV,
+ Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall,
+ Ok(unimp) => unimplemented!("calling convention: {:?}", unimp),
+ }
+ }
+
+ /// Returns the calling convention used for libcalls according to the current flags.
+ pub fn for_libcall(flags: &settings::Flags, default_call_conv: CallConv) -> Self {
+ match flags.libcall_call_conv() {
+ LibcallCallConv::IsaDefault => default_call_conv,
+ LibcallCallConv::Fast => Self::Fast,
+ LibcallCallConv::Cold => Self::Cold,
+ LibcallCallConv::SystemV => Self::SystemV,
+ LibcallCallConv::WindowsFastcall => Self::WindowsFastcall,
+ LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV,
+ LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows,
+ LibcallCallConv::Baldrdash2020 => Self::Baldrdash2020,
+ LibcallCallConv::Probestack => Self::Probestack,
+ }
+ }
+
+ /// Is the calling convention extending the Windows Fastcall ABI?
+ pub fn extends_windows_fastcall(self) -> bool {
+ match self {
+ Self::WindowsFastcall | Self::BaldrdashWindows => true,
+ _ => false,
+ }
+ }
+
+ /// Is the calling convention extending the Baldrdash ABI?
+ pub fn extends_baldrdash(self) -> bool {
+ match self {
+ Self::BaldrdashSystemV | Self::BaldrdashWindows | Self::Baldrdash2020 => true,
+ _ => false,
+ }
+ }
+}
+
+impl fmt::Display for CallConv {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str(match *self {
+ Self::Fast => "fast",
+ Self::Cold => "cold",
+ Self::SystemV => "system_v",
+ Self::WindowsFastcall => "windows_fastcall",
+ Self::BaldrdashSystemV => "baldrdash_system_v",
+ Self::BaldrdashWindows => "baldrdash_windows",
+ Self::Baldrdash2020 => "baldrdash_2020",
+ Self::Probestack => "probestack",
+ })
+ }
+}
+
+impl str::FromStr for CallConv {
+ type Err = ();
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ match s {
+ "fast" => Ok(Self::Fast),
+ "cold" => Ok(Self::Cold),
+ "system_v" => Ok(Self::SystemV),
+ "windows_fastcall" => Ok(Self::WindowsFastcall),
+ "baldrdash_system_v" => Ok(Self::BaldrdashSystemV),
+ "baldrdash_windows" => Ok(Self::BaldrdashWindows),
+ "baldrdash_2020" => Ok(Self::Baldrdash2020),
+ "probestack" => Ok(Self::Probestack),
+ _ => Err(()),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/constraints.rs b/third_party/rust/cranelift-codegen/src/isa/constraints.rs
new file mode 100644
index 0000000000..c87c3bd9d4
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/constraints.rs
@@ -0,0 +1,207 @@
+//! Register constraints for instruction operands.
+//!
+//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only
+//! works if the operands and results satisfy certain constraints. Constraints on immediate
+//! operands are checked by instruction predicates when the recipe is chosen.
+//!
+//! It is the register allocator's job to make sure that the register constraints on value operands
+//! are satisfied.
+
+use crate::binemit::CodeOffset;
+use crate::ir::{Function, Inst, ValueLoc};
+use crate::isa::{RegClass, RegUnit};
+use crate::regalloc::RegDiversions;
+
+/// Register constraint for a single value operand or instruction result.
+#[derive(PartialEq, Debug)]
+pub struct OperandConstraint {
+ /// The kind of constraint.
+ pub kind: ConstraintKind,
+
+ /// The register class of the operand.
+ ///
+ /// This applies to all kinds of constraints, but with slightly different meaning.
+ pub regclass: RegClass,
+}
+
+impl OperandConstraint {
+ /// Check if this operand constraint is satisfied by the given value location.
+ /// For tied constraints, this only checks the register class, not that the
+ /// counterpart operand has the same value location.
+ pub fn satisfied(&self, loc: ValueLoc) -> bool {
+ match self.kind {
+ ConstraintKind::Reg | ConstraintKind::Tied(_) => {
+ if let ValueLoc::Reg(reg) = loc {
+ self.regclass.contains(reg)
+ } else {
+ false
+ }
+ }
+ ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => {
+ loc == ValueLoc::Reg(reg) && self.regclass.contains(reg)
+ }
+ ConstraintKind::Stack => {
+ if let ValueLoc::Stack(_) = loc {
+ true
+ } else {
+ false
+ }
+ }
+ }
+ }
+}
+
+/// The different kinds of operand constraints.
+#[derive(Clone, Copy, PartialEq, Eq, Debug)]
+pub enum ConstraintKind {
+ /// This operand or result must be a register from the given register class.
+ Reg,
+
+ /// This operand or result must be a fixed register.
+ ///
+ /// The constraint's `regclass` field is the top-level register class containing the fixed
+ /// register.
+ FixedReg(RegUnit),
+
+ /// This result value must use the same register as an input value operand.
+ ///
+ /// The associated number is the index of the input value operand this result is tied to. The
+ /// constraint's `regclass` field is the same as the tied operand's register class.
+ ///
+ /// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and
+ /// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for
+ /// the out operand is `Tied(in)`.
+ Tied(u8),
+
+ /// This operand must be a fixed register, and it has a tied counterpart.
+ ///
+ /// This works just like `FixedReg`, but additionally indicates that there are identical
+ /// input/output operands for this fixed register. For an input operand, this means that the
+ /// value will be clobbered by the instruction
+ FixedTied(RegUnit),
+
+ /// This operand must be a value in a stack slot.
+ ///
+ /// The constraint's `regclass` field is the register class that would normally be used to load
+ /// and store values of this type.
+ Stack,
+}
+
+/// Value operand constraints for an encoding recipe.
+#[derive(PartialEq, Clone)]
+pub struct RecipeConstraints {
+ /// Constraints for the instruction's fixed value operands.
+ ///
+ /// If the instruction takes a variable number of operands, the register constraints for those
+ /// operands must be computed dynamically.
+ ///
+ /// - For branches and jumps, block arguments must match the expectations of the destination block.
+ /// - For calls and returns, the calling convention ABI specifies constraints.
+ pub ins: &'static [OperandConstraint],
+
+ /// Constraints for the instruction's fixed results.
+ ///
+ /// If the instruction produces a variable number of results, it's probably a call and the
+ /// constraints must be derived from the calling convention ABI.
+ pub outs: &'static [OperandConstraint],
+
+ /// Are any of the input constraints `FixedReg` or `FixedTied`?
+ pub fixed_ins: bool,
+
+ /// Are any of the output constraints `FixedReg` or `FixedTied`?
+ pub fixed_outs: bool,
+
+ /// Are any of the input/output constraints `Tied` (but not `FixedTied`)?
+ pub tied_ops: bool,
+
+ /// Does this instruction clobber the CPU flags?
+ ///
+ /// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
+ pub clobbers_flags: bool,
+}
+
+impl RecipeConstraints {
+ /// Check that these constraints are satisfied by the operands on `inst`.
+ pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
+ for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
+ let loc = divert.get(arg, &func.locations);
+
+ if let ConstraintKind::Tied(out_index) = constraint.kind {
+ let out_val = func.dfg.inst_results(inst)[out_index as usize];
+ let out_loc = func.locations[out_val];
+ if loc != out_loc {
+ return false;
+ }
+ }
+
+ if !constraint.satisfied(loc) {
+ return false;
+ }
+ }
+
+ for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
+ let loc = divert.get(arg, &func.locations);
+ if !constraint.satisfied(loc) {
+ return false;
+ }
+ }
+
+ true
+ }
+}
+
+/// Constraints on the range of a branch instruction.
+///
+/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.
+/// The origin depends on the ISA and the specific instruction:
+///
+/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`.
+/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte
+/// branch instruction.
+/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`.
+#[derive(Clone, Copy, Debug)]
+pub struct BranchRange {
+ /// Offset in bytes from the address of the branch instruction to the origin used for computing
+ /// the branch displacement. This is the destination of a branch that encodes a 0 displacement.
+ pub origin: u8,
+
+ /// Number of bits in the signed byte displacement encoded in the instruction. This does not
+ /// account for branches that can only target aligned addresses.
+ pub bits: u8,
+}
+
+impl BranchRange {
+ /// Determine if this branch range can represent the range from `branch` to `dest`, where
+ /// `branch` is the code offset of the branch instruction itself and `dest` is the code offset
+ /// of the destination block header.
+ ///
+ /// This method does not detect if the range is larger than 2 GB.
+ pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool {
+ let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32;
+ let s = 32 - self.bits;
+ d == d << s >> s
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn branch_range() {
+ // ARM T1 branch.
+ let t1 = BranchRange { origin: 4, bits: 9 };
+ assert!(t1.contains(0, 0));
+ assert!(t1.contains(0, 2));
+ assert!(t1.contains(2, 0));
+ assert!(t1.contains(1000, 1000));
+
+ // Forward limit.
+ assert!(t1.contains(1000, 1258));
+ assert!(!t1.contains(1000, 1260));
+
+ // Backward limit
+ assert!(t1.contains(1000, 748));
+ assert!(!t1.contains(1000, 746));
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/enc_tables.rs b/third_party/rust/cranelift-codegen/src/isa/enc_tables.rs
new file mode 100644
index 0000000000..e21557497e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/enc_tables.rs
@@ -0,0 +1,292 @@
+//! Support types for generated encoding tables.
+//!
+//! This module contains types and functions for working with the encoding tables generated by
+//! `cranelift-codegen/meta/src/gen_encodings.rs`.
+
+use crate::constant_hash::{probe, Table};
+use crate::ir::{Function, InstructionData, Opcode, Type};
+use crate::isa::{Encoding, Legalize};
+use crate::settings::PredicateView;
+use core::ops::Range;
+
+/// A recipe predicate.
+///
+/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
+///
+/// A None predicate is always satisfied.
+pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
+
+/// An instruction predicate.
+///
+/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
+/// can't depend on ISA settings.
+pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
+
+/// Legalization action to perform when no encoding can be found for an instruction.
+///
+/// This is an index into an ISA-specific table of legalization actions.
+pub type LegalizeCode = u8;
+
+/// Level 1 hash table entry.
+///
+/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
+/// variable, using `INVALID` for non-polymorphic instructions.
+///
+/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
+/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
+/// have a power-of-two size.
+///
+/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
+/// size of the `LEVEL2` table.
+///
+/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
+/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of
+/// bounds.
+pub struct Level1Entry<OffT: Into<u32> + Copy> {
+ pub ty: Type,
+ pub log2len: u8,
+ pub legalize: LegalizeCode,
+ pub offset: OffT,
+}
+
+impl<OffT: Into<u32> + Copy> Level1Entry<OffT> {
+ /// Get the level 2 table range indicated by this entry.
+ fn range(&self) -> Range<usize> {
+ let b = self.offset.into() as usize;
+ b..b + (1 << self.log2len)
+ }
+}
+
+impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
+ fn len(&self) -> usize {
+ self.len()
+ }
+
+ fn key(&self, idx: usize) -> Option<Type> {
+ if self[idx].log2len != !0 {
+ Some(self[idx].ty)
+ } else {
+ None
+ }
+ }
+}
+
+/// Level 2 hash table entry.
+///
+/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
+/// table where the encoding recipes for the instruction are stored.
+///
+/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
+/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
+/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
+/// bits.
+///
+/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
+pub struct Level2Entry<OffT: Into<u32> + Copy> {
+ pub opcode: Option<Opcode>,
+ pub offset: OffT,
+}
+
+impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
+ fn len(&self) -> usize {
+ self.len()
+ }
+
+ fn key(&self, idx: usize) -> Option<Opcode> {
+ self[idx].opcode
+ }
+}
+
+/// Two-level hash table lookup and iterator construction.
+///
+/// Given the controlling type variable and instruction opcode, find the corresponding encoding
+/// list.
+///
+/// Returns an iterator that produces legal encodings for `inst`.
+pub fn lookup_enclist<'a, OffT1, OffT2>(
+ ctrl_typevar: Type,
+ inst: &'a InstructionData,
+ func: &'a Function,
+ level1_table: &'static [Level1Entry<OffT1>],
+ level2_table: &'static [Level2Entry<OffT2>],
+ enclist: &'static [EncListEntry],
+ legalize_actions: &'static [Legalize],
+ recipe_preds: &'static [RecipePredicate],
+ inst_preds: &'static [InstPredicate],
+ isa_preds: PredicateView<'a>,
+) -> Encodings<'a>
+where
+ OffT1: Into<u32> + Copy,
+ OffT2: Into<u32> + Copy,
+{
+ let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) {
+ Err(l1idx) => {
+ // No level 1 entry found for the type.
+ // We have a sentinel entry with the default legalization code.
+ (!0, level1_table[l1idx].legalize)
+ }
+ Ok(l1idx) => {
+ // We have a valid level 1 entry for this type.
+ let l1ent = &level1_table[l1idx];
+ let offset = match level2_table.get(l1ent.range()) {
+ Some(l2tab) => {
+ let opcode = inst.opcode();
+ match probe(l2tab, opcode, opcode as usize) {
+ Ok(l2idx) => l2tab[l2idx].offset.into() as usize,
+ Err(_) => !0,
+ }
+ }
+ // The l1ent range is invalid. This means that we just have a customized
+ // legalization code for this type. The level 2 table is empty.
+ None => !0,
+ };
+ (offset, l1ent.legalize)
+ }
+ };
+
+ // Now we have an offset into `enclist` that is `!0` when no encoding list could be found.
+ // The default legalization code is always valid.
+ Encodings::new(
+ offset,
+ legalize,
+ inst,
+ func,
+ enclist,
+ legalize_actions,
+ recipe_preds,
+ inst_preds,
+ isa_preds,
+ )
+}
+
+/// Encoding list entry.
+///
+/// Encoding lists are represented as sequences of u16 words.
+pub type EncListEntry = u16;
+
+/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`.
+const PRED_BITS: u8 = 12;
+const PRED_MASK: usize = (1 << PRED_BITS) - 1;
+/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`.
+const PRED_START: usize = 0x1000;
+
+/// An iterator over legal encodings for the instruction.
+pub struct Encodings<'a> {
+ // Current offset into `enclist`, or out of bounds after we've reached the end.
+ offset: usize,
+ // Legalization code to use of no encoding is found.
+ legalize: LegalizeCode,
+ inst: &'a InstructionData,
+ func: &'a Function,
+ enclist: &'static [EncListEntry],
+ legalize_actions: &'static [Legalize],
+ recipe_preds: &'static [RecipePredicate],
+ inst_preds: &'static [InstPredicate],
+ isa_preds: PredicateView<'a>,
+}
+
+impl<'a> Encodings<'a> {
+ /// Creates a new instance of `Encodings`.
+ ///
+ /// This iterator provides search for encodings that applies to the given instruction. The
+ /// encoding lists are laid out such that first call to `next` returns valid entry in the list
+ /// or `None`.
+ pub fn new(
+ offset: usize,
+ legalize: LegalizeCode,
+ inst: &'a InstructionData,
+ func: &'a Function,
+ enclist: &'static [EncListEntry],
+ legalize_actions: &'static [Legalize],
+ recipe_preds: &'static [RecipePredicate],
+ inst_preds: &'static [InstPredicate],
+ isa_preds: PredicateView<'a>,
+ ) -> Self {
+ Encodings {
+ offset,
+ inst,
+ func,
+ legalize,
+ isa_preds,
+ recipe_preds,
+ inst_preds,
+ enclist,
+ legalize_actions,
+ }
+ }
+
+ /// Get the legalization action that caused the enumeration of encodings to stop.
+ /// This can be the default legalization action for the type or a custom code for the
+ /// instruction.
+ ///
+ /// This method must only be called after the iterator returns `None`.
+ pub fn legalize(&self) -> Legalize {
+ debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
+ self.legalize_actions[self.legalize as usize]
+ }
+
+ /// Check if the `rpred` recipe predicate is satisfied.
+ fn check_recipe(&self, rpred: RecipePredicate) -> bool {
+ match rpred {
+ Some(p) => p(self.isa_preds, self.inst),
+ None => true,
+ }
+ }
+
+ /// Check an instruction or isa predicate.
+ fn check_pred(&self, pred: usize) -> bool {
+ if let Some(&p) = self.inst_preds.get(pred) {
+ p(self.func, self.inst)
+ } else {
+ let pred = pred - self.inst_preds.len();
+ self.isa_preds.test(pred)
+ }
+ }
+}
+
+impl<'a> Iterator for Encodings<'a> {
+ type Item = Encoding;
+
+ fn next(&mut self) -> Option<Encoding> {
+ while let Some(entryref) = self.enclist.get(self.offset) {
+ let entry = *entryref as usize;
+
+ // Check for "recipe+bits".
+ let recipe = entry >> 1;
+ if let Some(&rpred) = self.recipe_preds.get(recipe) {
+ let bits = self.offset + 1;
+ if entry & 1 == 0 {
+ self.offset += 2; // Next entry.
+ } else {
+ self.offset = !0; // Stop.
+ }
+ if self.check_recipe(rpred) {
+ return Some(Encoding::new(recipe as u16, self.enclist[bits]));
+ }
+ continue;
+ }
+
+ // Check for "stop with legalize".
+ if entry < PRED_START {
+ self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
+ self.offset = !0; // Stop.
+ return None;
+ }
+
+ // Finally, this must be a predicate entry.
+ let pred_entry = entry - PRED_START;
+ let skip = pred_entry >> PRED_BITS;
+ let pred = pred_entry & PRED_MASK;
+
+ if self.check_pred(pred) {
+ self.offset += 1;
+ } else if skip == 0 {
+ self.offset = !0; // Stop.
+ return None;
+ } else {
+ self.offset += 1 + skip;
+ }
+ }
+ None
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/encoding.rs b/third_party/rust/cranelift-codegen/src/isa/encoding.rs
new file mode 100644
index 0000000000..99894cab2c
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/encoding.rs
@@ -0,0 +1,163 @@
+//! The `Encoding` struct.
+
+use crate::binemit::CodeOffset;
+use crate::ir::{Function, Inst};
+use crate::isa::constraints::{BranchRange, RecipeConstraints};
+use crate::regalloc::RegDiversions;
+use core::fmt;
+
+/// Bits needed to encode an instruction as binary machine code.
+///
+/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
+/// encoding *bits*. The recipe determines the native instruction format and the mapping of
+/// operands to encoded bits. The encoding bits provide additional information to the recipe,
+/// typically parts of the opcode.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct Encoding {
+ recipe: u16,
+ bits: u16,
+}
+
+impl Encoding {
+ /// Create a new `Encoding` containing `(recipe, bits)`.
+ pub fn new(recipe: u16, bits: u16) -> Self {
+ Self { recipe, bits }
+ }
+
+ /// Get the recipe number in this encoding.
+ pub fn recipe(self) -> usize {
+ self.recipe as usize
+ }
+
+ /// Get the recipe-specific encoding bits.
+ pub fn bits(self) -> u16 {
+ self.bits
+ }
+
+ /// Is this a legal encoding, or the default placeholder?
+ pub fn is_legal(self) -> bool {
+ self != Self::default()
+ }
+}
+
+/// The default encoding is the illegal one.
+impl Default for Encoding {
+ fn default() -> Self {
+ Self::new(0xffff, 0xffff)
+ }
+}
+
+/// ISA-independent display of an encoding.
+impl fmt::Display for Encoding {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if self.is_legal() {
+ write!(f, "{}#{:02x}", self.recipe, self.bits)
+ } else {
+ write!(f, "-")
+ }
+ }
+}
+
+/// Temporary object that holds enough context to properly display an encoding.
+/// This is meant to be created by `EncInfo::display()`.
+pub struct DisplayEncoding {
+ pub encoding: Encoding,
+ pub recipe_names: &'static [&'static str],
+}
+
+impl fmt::Display for DisplayEncoding {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if self.encoding.is_legal() {
+ write!(
+ f,
+ "{}#{:02x}",
+ self.recipe_names[self.encoding.recipe()],
+ self.encoding.bits
+ )
+ } else {
+ write!(f, "-")
+ }
+ }
+}
+
+type SizeCalculatorFn = fn(&RecipeSizing, Encoding, Inst, &RegDiversions, &Function) -> u8;
+
+/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most
+/// encodings; others can be variable and longer than this base size, depending on the registers
+/// they're using and use a different function, specific per platform.
+pub fn base_size(
+ sizing: &RecipeSizing,
+ _: Encoding,
+ _: Inst,
+ _: &RegDiversions,
+ _: &Function,
+) -> u8 {
+ sizing.base_size
+}
+
+/// Code size information for an encoding recipe.
+///
+/// Encoding recipes may have runtime-determined instruction size.
+pub struct RecipeSizing {
+ /// Minimum size in bytes of instructions encoded with this recipe.
+ pub base_size: u8,
+
+ /// Method computing the instruction's real size, given inputs and outputs.
+ pub compute_size: SizeCalculatorFn,
+
+ /// Allowed branch range in this recipe, if any.
+ ///
+ /// All encoding recipes for branches have exact branch range information.
+ pub branch_range: Option<BranchRange>,
+}
+
+/// Information about all the encodings in this ISA.
+#[derive(Clone)]
+pub struct EncInfo {
+ /// Constraints on value operands per recipe.
+ pub constraints: &'static [RecipeConstraints],
+
+ /// Code size information per recipe.
+ pub sizing: &'static [RecipeSizing],
+
+ /// Names of encoding recipes.
+ pub names: &'static [&'static str],
+}
+
+impl EncInfo {
+ /// Get the value operand constraints for `enc` if it is a legal encoding.
+ pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
+ self.constraints.get(enc.recipe())
+ }
+
+ /// Create an object that can display an ISA-dependent encoding properly.
+ pub fn display(&self, enc: Encoding) -> DisplayEncoding {
+ DisplayEncoding {
+ encoding: enc,
+ recipe_names: self.names,
+ }
+ }
+
+ /// Get the size in bytes of `inst`, if it were encoded with `enc`.
+ ///
+ /// Returns 0 for illegal encodings.
+ pub fn byte_size(
+ &self,
+ enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+ ) -> CodeOffset {
+ self.sizing.get(enc.recipe()).map_or(0, |s| {
+ let compute_size = s.compute_size;
+ CodeOffset::from(compute_size(&s, enc, inst, divert, func))
+ })
+ }
+
+ /// Get the branch range that is supported by `enc`, if any.
+ ///
+ /// This will never return `None` for a legal branch encoding.
+ pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
+ self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/mod.rs b/third_party/rust/cranelift-codegen/src/isa/mod.rs
new file mode 100644
index 0000000000..2e56c025d0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/mod.rs
@@ -0,0 +1,447 @@
+//! Instruction Set Architectures.
+//!
+//! The `isa` module provides a `TargetIsa` trait which provides the behavior specialization needed
+//! by the ISA-independent code generator. The sub-modules of this module provide definitions for
+//! the instruction sets that Cranelift can target. Each sub-module has it's own implementation of
+//! `TargetIsa`.
+//!
+//! # Constructing a `TargetIsa` instance
+//!
+//! The target ISA is built from the following information:
+//!
+//! - The name of the target ISA as a string. Cranelift is a cross-compiler, so the ISA to target
+//! can be selected dynamically. Individual ISAs can be left out when Cranelift is compiled, so a
+//! string is used to identify the proper sub-module.
+//! - Values for settings that apply to all ISAs. This is represented by a `settings::Flags`
+//! instance.
+//! - Values for ISA-specific settings.
+//!
+//! The `isa::lookup()` function is the main entry point which returns an `isa::Builder`
+//! appropriate for the requested ISA:
+//!
+//! ```
+//! # extern crate cranelift_codegen;
+//! # #[macro_use] extern crate target_lexicon;
+//! use cranelift_codegen::isa;
+//! use cranelift_codegen::settings::{self, Configurable};
+//! use std::str::FromStr;
+//! use target_lexicon::Triple;
+//!
+//! let shared_builder = settings::builder();
+//! let shared_flags = settings::Flags::new(shared_builder);
+//!
+//! match isa::lookup(triple!("riscv32")) {
+//! Err(_) => {
+//! // The RISC-V target ISA is not available.
+//! }
+//! Ok(mut isa_builder) => {
+//! isa_builder.set("supports_m", "on");
+//! let isa = isa_builder.finish(shared_flags);
+//! }
+//! }
+//! ```
+//!
+//! The configured target ISA trait object is a `Box<TargetIsa>` which can be used for multiple
+//! concurrent function compilations.
+
+pub use crate::isa::call_conv::CallConv;
+pub use crate::isa::constraints::{
+ BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
+};
+pub use crate::isa::enc_tables::Encodings;
+pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
+pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
+pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
+
+use crate::binemit;
+use crate::flowgraph;
+use crate::ir;
+#[cfg(feature = "unwind")]
+use crate::isa::unwind::systemv::RegisterMappingError;
+use crate::machinst::MachBackend;
+use crate::regalloc;
+use crate::result::CodegenResult;
+use crate::settings;
+use crate::settings::SetResult;
+use crate::timing;
+use alloc::borrow::Cow;
+use alloc::boxed::Box;
+use core::any::Any;
+use core::fmt;
+use core::fmt::{Debug, Formatter};
+use target_lexicon::{triple, Architecture, PointerWidth, Triple};
+use thiserror::Error;
+
+#[cfg(feature = "riscv")]
+mod riscv;
+
+#[cfg(feature = "x86")]
+mod x86;
+
+#[cfg(feature = "x64")]
+mod x64;
+
+#[cfg(feature = "arm32")]
+mod arm32;
+
+#[cfg(feature = "arm64")]
+pub(crate) mod aarch64;
+
+pub mod unwind;
+
+mod call_conv;
+mod constraints;
+mod enc_tables;
+mod encoding;
+pub mod registers;
+mod stack;
+
+#[cfg(test)]
+mod test_utils;
+
+/// Returns a builder that can create a corresponding `TargetIsa`
+/// or `Err(LookupError::SupportDisabled)` if not enabled.
+macro_rules! isa_builder {
+ ($name: ident, $feature: tt, $triple: ident) => {{
+ #[cfg(feature = $feature)]
+ {
+ Ok($name::isa_builder($triple))
+ }
+ #[cfg(not(feature = $feature))]
+ {
+ Err(LookupError::SupportDisabled)
+ }
+ }};
+}
+
+/// Look for an ISA for the given `triple`.
+/// Return a builder that can create a corresponding `TargetIsa`.
+pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
+ match triple.architecture {
+ Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => {
+ isa_builder!(riscv, "riscv", triple)
+ }
+ Architecture::X86_32 { .. } | Architecture::X86_64 => {
+ if cfg!(feature = "x64") {
+ isa_builder!(x64, "x64", triple)
+ } else {
+ isa_builder!(x86, "x86", triple)
+ }
+ }
+ Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
+ Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
+ _ => Err(LookupError::Unsupported),
+ }
+}
+
+/// Look for a supported ISA with the given `name`.
+/// Return a builder that can create a corresponding `TargetIsa`.
+pub fn lookup_by_name(name: &str) -> Result<Builder, LookupError> {
+ use alloc::str::FromStr;
+ lookup(triple!(name))
+}
+
+/// Describes reason for target lookup failure
+#[derive(Error, PartialEq, Eq, Copy, Clone, Debug)]
+pub enum LookupError {
+ /// Support for this target was disabled in the current build.
+ #[error("Support for this target is disabled")]
+ SupportDisabled,
+
+ /// Support for this target has not yet been implemented.
+ #[error("Support for this target has not been implemented yet")]
+ Unsupported,
+}
+
+/// Builder for a `TargetIsa`.
+/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`.
+#[derive(Clone)]
+pub struct Builder {
+ triple: Triple,
+ setup: settings::Builder,
+ constructor: fn(Triple, settings::Flags, settings::Builder) -> Box<dyn TargetIsa>,
+}
+
+impl Builder {
+ /// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a
+ /// fully configured `TargetIsa` trait object.
+ pub fn finish(self, shared_flags: settings::Flags) -> Box<dyn TargetIsa> {
+ (self.constructor)(self.triple, shared_flags, self.setup)
+ }
+}
+
+impl settings::Configurable for Builder {
+ fn set(&mut self, name: &str, value: &str) -> SetResult<()> {
+ self.setup.set(name, value)
+ }
+
+ fn enable(&mut self, name: &str) -> SetResult<()> {
+ self.setup.enable(name)
+ }
+}
+
+/// After determining that an instruction doesn't have an encoding, how should we proceed to
+/// legalize it?
+///
+/// The `Encodings` iterator returns a legalization function to call.
+pub type Legalize =
+ fn(ir::Inst, &mut ir::Function, &mut flowgraph::ControlFlowGraph, &dyn TargetIsa) -> bool;
+
+/// This struct provides information that a frontend may need to know about a target to
+/// produce Cranelift IR for the target.
+#[derive(Clone, Copy, Hash)]
+pub struct TargetFrontendConfig {
+ /// The default calling convention of the target.
+ pub default_call_conv: CallConv,
+
+ /// The pointer width of the target.
+ pub pointer_width: PointerWidth,
+}
+
+impl TargetFrontendConfig {
+ /// Get the pointer type of this target.
+ pub fn pointer_type(self) -> ir::Type {
+ ir::Type::int(u16::from(self.pointer_bits())).unwrap()
+ }
+
+ /// Get the width of pointers on this target, in units of bits.
+ pub fn pointer_bits(self) -> u8 {
+ self.pointer_width.bits()
+ }
+
+ /// Get the width of pointers on this target, in units of bytes.
+ pub fn pointer_bytes(self) -> u8 {
+ self.pointer_width.bytes()
+ }
+}
+
+/// Methods that are specialized to a target ISA. Implies a Display trait that shows the
+/// shared flags, as well as any isa-specific flags.
+pub trait TargetIsa: fmt::Display + Send + Sync {
+ /// Get the name of this ISA.
+ fn name(&self) -> &'static str;
+
+ /// Get the target triple that was used to make this trait object.
+ fn triple(&self) -> &Triple;
+
+ /// Get the ISA-independent flags that were used to make this trait object.
+ fn flags(&self) -> &settings::Flags;
+
+ /// Get the default calling convention of this target.
+ fn default_call_conv(&self) -> CallConv {
+ CallConv::triple_default(self.triple())
+ }
+
+ /// Get the pointer type of this ISA.
+ fn pointer_type(&self) -> ir::Type {
+ ir::Type::int(u16::from(self.pointer_bits())).unwrap()
+ }
+
+ /// Get the width of pointers on this ISA.
+ fn pointer_width(&self) -> PointerWidth {
+ self.triple().pointer_width().unwrap()
+ }
+
+ /// Get the width of pointers on this ISA, in units of bits.
+ fn pointer_bits(&self) -> u8 {
+ self.pointer_width().bits()
+ }
+
+ /// Get the width of pointers on this ISA, in units of bytes.
+ fn pointer_bytes(&self) -> u8 {
+ self.pointer_width().bytes()
+ }
+
+ /// Get the information needed by frontends producing Cranelift IR.
+ fn frontend_config(&self) -> TargetFrontendConfig {
+ TargetFrontendConfig {
+ default_call_conv: self.default_call_conv(),
+ pointer_width: self.pointer_width(),
+ }
+ }
+
+ /// Does the CPU implement scalar comparisons using a CPU flags register?
+ fn uses_cpu_flags(&self) -> bool {
+ false
+ }
+
+ /// Does the CPU implement multi-register addressing?
+ fn uses_complex_addresses(&self) -> bool {
+ false
+ }
+
+ /// Get a data structure describing the registers in this ISA.
+ fn register_info(&self) -> RegInfo;
+
+ #[cfg(feature = "unwind")]
+ /// Map a Cranelift register to its corresponding DWARF register.
+ fn map_dwarf_register(&self, _: RegUnit) -> Result<u16, RegisterMappingError> {
+ Err(RegisterMappingError::UnsupportedArchitecture)
+ }
+
+ /// Returns an iterator over legal encodings for the instruction.
+ fn legal_encodings<'a>(
+ &'a self,
+ func: &'a ir::Function,
+ inst: &'a ir::InstructionData,
+ ctrl_typevar: ir::Type,
+ ) -> Encodings<'a>;
+
+ /// Encode an instruction after determining it is legal.
+ ///
+ /// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
+ /// Otherwise, return `Legalize` action.
+ ///
+ /// This is also the main entry point for determining if an instruction is legal.
+ fn encode(
+ &self,
+ func: &ir::Function,
+ inst: &ir::InstructionData,
+ ctrl_typevar: ir::Type,
+ ) -> Result<Encoding, Legalize> {
+ let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
+ iter.next().ok_or_else(|| iter.legalize())
+ }
+
+ /// Get a data structure describing the instruction encodings in this ISA.
+ fn encoding_info(&self) -> EncInfo;
+
+ /// Legalize a function signature.
+ ///
+ /// This is used to legalize both the signature of the function being compiled and any called
+ /// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
+ /// arguments and return values.
+ ///
+ /// Arguments with types that are not supported by the ABI can be expanded into multiple
+ /// arguments:
+ ///
+ /// - Integer types that are too large to fit in a register can be broken into multiple
+ /// arguments of a smaller integer type.
+ /// - Floating point types can be bit-cast to an integer type of the same size, and possible
+ /// broken into smaller integer types.
+ /// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
+ ///
+ /// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
+ ///
+ /// When this function is called to legalize the signature of the function currently being
+ /// compiled, `current` is true. The legalized signature can then also contain special purpose
+ /// arguments and return values such as:
+ ///
+ /// - A `link` argument representing the link registers on RISC architectures that don't push
+ /// the return address on the stack.
+ /// - A `link` return value which will receive the value that was passed to the `link`
+ /// argument.
+ /// - An `sret` argument can be added if one wasn't present already. This is necessary if the
+ /// signature returns more values than registers are available for returning values.
+ /// - An `sret` return value can be added if the ABI requires a function to return its `sret`
+ /// argument in a register.
+ ///
+ /// Arguments and return values for the caller's frame pointer and other callee-saved registers
+ /// should not be added by this function. These arguments are not added until after register
+ /// allocation.
+ fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool);
+
+ /// Get the register class that should be used to represent an ABI argument or return value of
+ /// type `ty`. This should be the top-level register class that contains the argument
+ /// registers.
+ ///
+ /// This function can assume that it will only be asked to provide register classes for types
+ /// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
+ fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
+
+ /// Get the set of allocatable registers that can be used when compiling `func`.
+ ///
+ /// This set excludes reserved registers like the stack pointer and other special-purpose
+ /// registers.
+ fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
+
+ /// Compute the stack layout and insert prologue and epilogue code into `func`.
+ ///
+ /// Return an error if the stack frame is too large.
+ fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
+ let _tt = timing::prologue_epilogue();
+ // This default implementation is unlikely to be good enough.
+ use crate::ir::stackslot::{StackOffset, StackSize};
+ use crate::stack_layout::layout_stack;
+
+ let word_size = StackSize::from(self.pointer_bytes());
+
+ // Account for the SpiderMonkey standard prologue pushes.
+ if func.signature.call_conv.extends_baldrdash() {
+ let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size;
+ let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+ ss.offset = Some(-(bytes as StackOffset));
+ func.stack_slots.push(ss);
+ }
+
+ let is_leaf = func.is_leaf();
+ layout_stack(&mut func.stack_slots, is_leaf, word_size)?;
+ Ok(())
+ }
+
+ /// Emit binary machine code for a single instruction into the `sink` trait object.
+ ///
+ /// Note that this will call `put*` methods on the `sink` trait object via its vtable which
+ /// is not the fastest way of emitting code.
+ ///
+ /// This function is under the "testing_hooks" feature, and is only suitable for use by
+ /// test harnesses. It increases code size, and is inefficient.
+ #[cfg(feature = "testing_hooks")]
+ fn emit_inst(
+ &self,
+ func: &ir::Function,
+ inst: ir::Inst,
+ divert: &mut regalloc::RegDiversions,
+ sink: &mut dyn binemit::CodeSink,
+ );
+
+ /// Emit a whole function into memory.
+ fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
+
+ /// IntCC condition for Unsigned Addition Overflow (Carry).
+ fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC;
+
+ /// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry).
+ fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC;
+
+ /// Creates unwind information for the function.
+ ///
+ /// Returns `None` if there is no unwind information for the function.
+ #[cfg(feature = "unwind")]
+ fn create_unwind_info(
+ &self,
+ _func: &ir::Function,
+ ) -> CodegenResult<Option<unwind::UnwindInfo>> {
+ // By default, an ISA has no unwind information
+ Ok(None)
+ }
+
+ /// Creates a new System V Common Information Entry for the ISA.
+ ///
+ /// Returns `None` if the ISA does not support System V unwind information.
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ // By default, an ISA cannot create a System V CIE
+ None
+ }
+
+ /// Get the new-style MachBackend, if this is an adapter around one.
+ fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
+ None
+ }
+
+ /// Return an [Any] reference for downcasting to the ISA-specific implementation of this trait
+ /// with `isa.as_any().downcast_ref::<isa::foo::Isa>()`.
+ fn as_any(&self) -> &dyn Any;
+}
+
+impl Debug for &dyn TargetIsa {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "TargetIsa {{ triple: {:?}, pointer_width: {:?}}}",
+ self.triple(),
+ self.pointer_width()
+ )
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/registers.rs b/third_party/rust/cranelift-codegen/src/isa/registers.rs
new file mode 100644
index 0000000000..e67ae13453
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/registers.rs
@@ -0,0 +1,360 @@
+//! Data structures describing the registers in an ISA.
+
+use crate::entity::EntityRef;
+use core::fmt;
+
+/// Register units are the smallest units of register allocation.
+///
+/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA
+/// has aliasing registers, the aliasing can be modeled with registers that cover multiple
+/// register units.
+///
+/// The register allocator will enforce that each register unit only gets used for one thing.
+pub type RegUnit = u16;
+
+/// A bit mask indexed by register classes.
+///
+/// The size of this type is determined by the ISA with the most register classes.
+pub type RegClassMask = u32;
+
+/// A bit mask indexed by register units.
+///
+/// The size of this type is determined by the target ISA that has the most register units defined.
+/// Currently that is arm32 which has 64+16 units.
+pub type RegUnitMask = [RegClassMask; 3];
+
+/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a
+/// contiguous range of register units.
+///
+/// The `RegBank` struct provides a static description of a register bank.
+pub struct RegBank {
+ /// The name of this register bank as defined in the ISA's DSL definition.
+ pub name: &'static str,
+
+ /// The first register unit in this bank.
+ pub first_unit: RegUnit,
+
+ /// The total number of register units in this bank.
+ pub units: RegUnit,
+
+ /// Array of specially named register units. This array can be shorter than the number of units
+ /// in the bank.
+ pub names: &'static [&'static str],
+
+ /// Name prefix to use for those register units in the bank not covered by the `names` array.
+ /// The remaining register units will be named this prefix followed by their decimal offset in
+ /// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ...
+ pub prefix: &'static str,
+
+ /// Index of the first top-level register class in this bank.
+ pub first_toprc: usize,
+
+ /// Number of top-level register classes in this bank.
+ ///
+ /// The top-level register classes in a bank are guaranteed to be numbered sequentially from
+ /// `first_toprc`, and all top-level register classes across banks come before any sub-classes.
+ pub num_toprcs: usize,
+
+ /// Is register pressure tracking enabled for this bank?
+ pub pressure_tracking: bool,
+}
+
+impl RegBank {
+ /// Does this bank contain `regunit`?
+ fn contains(&self, regunit: RegUnit) -> bool {
+ regunit >= self.first_unit && regunit - self.first_unit < self.units
+ }
+
+ /// Try to parse a regunit name. The name is not expected to begin with `%`.
+ fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
+ match self.names.iter().position(|&x| x == name) {
+ Some(offset) => {
+ // This is one of the special-cased names.
+ Some(offset as RegUnit)
+ }
+ None => {
+ // Try a regular prefixed name.
+ if name.starts_with(self.prefix) {
+ name[self.prefix.len()..].parse().ok()
+ } else {
+ None
+ }
+ }
+ }
+ .and_then(|offset| {
+ if offset < self.units {
+ Some(offset + self.first_unit)
+ } else {
+ None
+ }
+ })
+ }
+
+ /// Write `regunit` to `w`, assuming that it belongs to this bank.
+ /// All regunits are written with a `%` prefix.
+ fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result {
+ let offset = regunit - self.first_unit;
+ assert!(offset < self.units);
+ if (offset as usize) < self.names.len() {
+ write!(f, "%{}", self.names[offset as usize])
+ } else {
+ write!(f, "%{}{}", self.prefix, offset)
+ }
+ }
+}
+
+/// A register class reference.
+///
+/// All register classes are statically defined in tables generated from the meta descriptions.
+pub type RegClass = &'static RegClassData;
+
+/// Data about a register class.
+///
+/// A register class represents a subset of the registers in a bank. It describes the set of
+/// permitted registers for a register operand in a given encoding of an instruction.
+///
+/// A register class can be a subset of another register class. The top-level register classes are
+/// disjoint.
+pub struct RegClassData {
+ /// The name of the register class.
+ pub name: &'static str,
+
+ /// The index of this class in the ISA's RegInfo description.
+ pub index: u8,
+
+ /// How many register units to allocate per register.
+ pub width: u8,
+
+ /// Index of the register bank this class belongs to.
+ pub bank: u8,
+
+ /// Index of the top-level register class contains this one.
+ pub toprc: u8,
+
+ /// The first register unit in this class.
+ pub first: RegUnit,
+
+ /// Bit-mask of sub-classes of this register class, including itself.
+ ///
+ /// Bits correspond to RC indexes.
+ pub subclasses: RegClassMask,
+
+ /// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the
+ /// first register unit in each allocatable register.
+ pub mask: RegUnitMask,
+
+ /// The global `RegInfo` instance containing this register class.
+ pub info: &'static RegInfo,
+
+ /// The "pinned" register of the associated register bank.
+ ///
+ /// This register must be non-volatile (callee-preserved) and must not be the fixed
+ /// output register of any instruction.
+ pub pinned_reg: Option<RegUnit>,
+}
+
+impl RegClassData {
+ /// Get the register class index corresponding to the intersection of `self` and `other`.
+ ///
+ /// This register class is guaranteed to exist if the register classes overlap. If the register
+ /// classes don't overlap, returns `None`.
+ pub fn intersect_index(&self, other: RegClass) -> Option<RegClassIndex> {
+ // Compute the set of common subclasses.
+ let mask = self.subclasses & other.subclasses;
+
+ if mask == 0 {
+ // No overlap.
+ None
+ } else {
+ // Register class indexes are topologically ordered, so the largest common subclass has
+ // the smallest index.
+ Some(RegClassIndex(mask.trailing_zeros() as u8))
+ }
+ }
+
+ /// Get the intersection of `self` and `other`.
+ pub fn intersect(&self, other: RegClass) -> Option<RegClass> {
+ self.intersect_index(other).map(|rci| self.info.rc(rci))
+ }
+
+ /// Returns true if `other` is a subclass of this register class.
+ /// A register class is considered to be a subclass of itself.
+ pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
+ self.subclasses & (1 << other.into().0) as u32 != 0
+ }
+
+ /// Get the top-level register class containing this class.
+ pub fn toprc(&self) -> RegClass {
+ self.info.rc(RegClassIndex(self.toprc))
+ }
+
+ /// Get a specific register unit in this class.
+ pub fn unit(&self, offset: usize) -> RegUnit {
+ let uoffset = offset * usize::from(self.width);
+ self.first + uoffset as RegUnit
+ }
+
+ /// Does this register class contain `regunit`?
+ pub fn contains(&self, regunit: RegUnit) -> bool {
+ self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32) as u32) != 0
+ }
+
+ /// If the pinned register is used, is the given regunit the pinned register of this class?
+ #[inline]
+ pub fn is_pinned_reg(&self, enabled: bool, regunit: RegUnit) -> bool {
+ enabled
+ && self
+ .pinned_reg
+ .map_or(false, |pinned_reg| pinned_reg == regunit)
+ }
+
+ /// Calculate the index of the register inside the class.
+ pub fn index_of(&self, regunit: RegUnit) -> u16 {
+ assert!(
+ self.contains(regunit),
+ "the {} register class does not contain {}",
+ self.name,
+ regunit
+ );
+ regunit - self.first
+ }
+}
+
+impl fmt::Display for RegClassData {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str(self.name)
+ }
+}
+
+impl fmt::Debug for RegClassData {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str(self.name)
+ }
+}
+
+/// Within an ISA, register classes are uniquely identified by their index.
+impl PartialEq for RegClassData {
+ fn eq(&self, other: &Self) -> bool {
+ self.index == other.index
+ }
+}
+
+/// A small reference to a register class.
+///
+/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method
+/// can be used to get the real register class reference back.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct RegClassIndex(u8);
+
+impl EntityRef for RegClassIndex {
+ fn new(idx: usize) -> Self {
+ Self(idx as u8)
+ }
+
+ fn index(self) -> usize {
+ usize::from(self.0)
+ }
+}
+
+impl From<RegClass> for RegClassIndex {
+ fn from(rc: RegClass) -> Self {
+ Self(rc.index)
+ }
+}
+
+impl fmt::Display for RegClassIndex {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "rci{}", self.0)
+ }
+}
+
+/// Test of two registers overlap.
+///
+/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to
+/// determine the width (in regunits) of the register.
+pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool {
+ let end1 = reg1 + RegUnit::from(rc1.width);
+ let end2 = reg2 + RegUnit::from(rc2.width);
+ !(end1 <= reg2 || end2 <= reg1)
+}
+
+/// Information about the registers in an ISA.
+///
+/// The `RegUnit` data structure collects all relevant static information about the registers in an
+/// ISA.
+#[derive(Clone)]
+pub struct RegInfo {
+ /// All register banks, ordered by their `first_unit`. The register banks are disjoint, but
+ /// there may be holes of unused register unit numbers between banks due to alignment.
+ pub banks: &'static [RegBank],
+
+ /// All register classes ordered topologically so a sub-class always follows its parent.
+ pub classes: &'static [RegClass],
+}
+
+impl RegInfo {
+ /// Get the register bank holding `regunit`.
+ pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> {
+ // We could do a binary search, but most ISAs have only two register banks...
+ self.banks.iter().find(|b| b.contains(regunit))
+ }
+
+ /// Try to parse a regunit name. The name is not expected to begin with `%`.
+ pub fn parse_regunit(&self, name: &str) -> Option<RegUnit> {
+ self.banks
+ .iter()
+ .filter_map(|b| b.parse_regunit(name))
+ .next()
+ }
+
+ /// Make a temporary object that can display a register unit.
+ pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit {
+ DisplayRegUnit {
+ regunit,
+ reginfo: self,
+ }
+ }
+
+ /// Get the register class corresponding to `idx`.
+ pub fn rc(&self, idx: RegClassIndex) -> RegClass {
+ self.classes[idx.index()]
+ }
+
+ /// Get the top-level register class containing the `idx` class.
+ pub fn toprc(&self, idx: RegClassIndex) -> RegClass {
+ self.classes[self.rc(idx).toprc as usize]
+ }
+}
+
+/// Temporary object that holds enough information to print a register unit.
+pub struct DisplayRegUnit<'a> {
+ regunit: RegUnit,
+ reginfo: &'a RegInfo,
+}
+
+impl<'a> fmt::Display for DisplayRegUnit<'a> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.reginfo.bank_containing_regunit(self.regunit) {
+ Some(b) => b.write_regunit(f, self.regunit),
+ None => write!(f, "%INVALID{}", self.regunit),
+ }
+ }
+}
+
+#[test]
+fn assert_sizes() {
+ use cranelift_codegen_shared::constants;
+ use std::mem::size_of;
+
+ // In these tests, size_of returns number of bytes: we actually want the number of bits, so
+ // multiply these by 8.
+ assert!(
+ (size_of::<RegClassMask>() * 8) <= constants::MAX_NUM_REG_CLASSES,
+ "need to bump MAX_NUM_REG_CLASSES or change RegClassMask type"
+ );
+
+ assert!(
+ constants::MAX_NUM_REG_CLASSES < (1 << (size_of::<RegClassIndex>() * 8)),
+ "need to change RegClassIndex's type to a wider type"
+ );
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/riscv/abi.rs b/third_party/rust/cranelift-codegen/src/isa/riscv/abi.rs
new file mode 100644
index 0000000000..44c5f36afe
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/abi.rs
@@ -0,0 +1,149 @@
+//! RISC-V ABI implementation.
+//!
+//! This module implements the RISC-V calling convention through the primary `legalize_signature()`
+//! entry point.
+//!
+//! This doesn't support the soft-float ABI at the moment.
+
+use super::registers::{FPR, GPR};
+use super::settings;
+use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
+use crate::isa::RegClass;
+use crate::regalloc::RegisterSet;
+use alloc::borrow::Cow;
+use core::i32;
+use target_lexicon::Triple;
+
+struct Args {
+ pointer_bits: u8,
+ pointer_bytes: u8,
+ pointer_type: Type,
+ regs: u32,
+ reg_limit: u32,
+ offset: u32,
+}
+
+impl Args {
+ fn new(bits: u8, enable_e: bool) -> Self {
+ Self {
+ pointer_bits: bits,
+ pointer_bytes: bits / 8,
+ pointer_type: Type::int(u16::from(bits)).unwrap(),
+ regs: 0,
+ reg_limit: if enable_e { 6 } else { 8 },
+ offset: 0,
+ }
+ }
+}
+
+impl ArgAssigner for Args {
+ fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+ fn align(value: u32, to: u32) -> u32 {
+ (value + to - 1) & !(to - 1)
+ }
+
+ let ty = arg.value_type;
+
+ // Check for a legal type.
+ // RISC-V doesn't have SIMD at all, so break all vectors down.
+ if ty.is_vector() {
+ return ValueConversion::VectorSplit.into();
+ }
+
+ // Large integers and booleans are broken down to fit in a register.
+ if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) {
+ // Align registers and stack to a multiple of two pointers.
+ self.regs = align(self.regs, 2);
+ self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes));
+ return ValueConversion::IntSplit.into();
+ }
+
+ // Small integers are extended to the size of a pointer register.
+ if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
+ match arg.extension {
+ ArgumentExtension::None => {}
+ ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+ ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+ }
+ }
+
+ if self.regs < self.reg_limit {
+ // Assign to a register.
+ let reg = if ty.is_float() {
+ FPR.unit(10 + self.regs as usize)
+ } else {
+ GPR.unit(10 + self.regs as usize)
+ };
+ self.regs += 1;
+ ArgumentLoc::Reg(reg).into()
+ } else {
+ // Assign a stack location.
+ let loc = ArgumentLoc::Stack(self.offset as i32);
+ self.offset += u32::from(self.pointer_bytes);
+ debug_assert!(self.offset <= i32::MAX as u32);
+ loc.into()
+ }
+ }
+}
+
+/// Legalize `sig` for RISC-V.
+pub fn legalize_signature(
+ sig: &mut Cow<ir::Signature>,
+ triple: &Triple,
+ isa_flags: &settings::Flags,
+ current: bool,
+) {
+ let bits = triple.pointer_width().unwrap().bits();
+
+ let mut args = Args::new(bits, isa_flags.enable_e());
+ if let Some(new_params) = legalize_args(&sig.params, &mut args) {
+ sig.to_mut().params = new_params;
+ }
+
+ let mut rets = Args::new(bits, isa_flags.enable_e());
+ if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
+ sig.to_mut().returns = new_returns;
+ }
+
+ if current {
+ let ptr = Type::int(u16::from(bits)).unwrap();
+
+ // Add the link register as an argument and return value.
+ //
+ // The `jalr` instruction implementing a return can technically accept the return address
+ // in any register, but a micro-architecture with a return address predictor will only
+ // recognize it as a return if the address is in `x1`.
+ let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1));
+ sig.to_mut().params.push(link);
+ sig.to_mut().returns.push(link);
+ }
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: Type) -> RegClass {
+ if ty.is_float() {
+ FPR
+ } else {
+ GPR
+ }
+}
+
+pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
+ let mut regs = RegisterSet::new();
+ regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
+ // %x1 is the link register which is available for allocation.
+ regs.take(GPR, GPR.unit(2)); // Stack pointer.
+ regs.take(GPR, GPR.unit(3)); // Global pointer.
+ regs.take(GPR, GPR.unit(4)); // Thread pointer.
+ // TODO: %x8 is the frame pointer. Reserve it?
+
+ // Remove %x16 and up for RV32E.
+ if isa_flags.enable_e() {
+ for u in 16..32 {
+ regs.take(GPR, GPR.unit(u));
+ }
+ }
+
+ regs
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/riscv/binemit.rs b/third_party/rust/cranelift-codegen/src/isa/riscv/binemit.rs
new file mode 100644
index 0000000000..a1d2b82e12
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/binemit.rs
@@ -0,0 +1,182 @@
+//! Emitting binary RISC-V machine code.
+
+use crate::binemit::{bad_encoding, CodeSink, Reloc};
+use crate::ir::{Function, Inst, InstructionData};
+use crate::isa::{RegUnit, StackBaseMask, StackRef, TargetIsa};
+use crate::predicates::is_signed_int;
+use crate::regalloc::RegDiversions;
+use core::u32;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs"));
+
+/// R-type instructions.
+///
+/// 31 24 19 14 11 6
+/// funct7 rs2 rs1 funct3 rd opcode
+/// 25 20 15 12 7 0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
+fn put_r<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) {
+ let bits = u32::from(bits);
+ let opcode5 = bits & 0x1f;
+ let funct3 = (bits >> 5) & 0x7;
+ let funct7 = (bits >> 8) & 0x7f;
+ let rs1 = u32::from(rs1) & 0x1f;
+ let rs2 = u32::from(rs2) & 0x1f;
+ let rd = u32::from(rd) & 0x1f;
+
+ // 0-6: opcode
+ let mut i = 0x3;
+ i |= opcode5 << 2;
+ i |= rd << 7;
+ i |= funct3 << 12;
+ i |= rs1 << 15;
+ i |= rs2 << 20;
+ i |= funct7 << 25;
+
+ sink.put4(i);
+}
+
+/// R-type instructions with a shift amount instead of rs2.
+///
+/// 31 25 19 14 11 6
+/// funct7 shamt rs1 funct3 rd opcode
+/// 25 20 15 12 7 0
+///
+/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31.
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`.
+fn put_rshamt<CS: CodeSink + ?Sized>(
+ bits: u16,
+ rs1: RegUnit,
+ shamt: i64,
+ rd: RegUnit,
+ sink: &mut CS,
+) {
+ let bits = u32::from(bits);
+ let opcode5 = bits & 0x1f;
+ let funct3 = (bits >> 5) & 0x7;
+ let funct7 = (bits >> 8) & 0x7f;
+ let rs1 = u32::from(rs1) & 0x1f;
+ let shamt = shamt as u32 & 0x3f;
+ let rd = u32::from(rd) & 0x1f;
+
+ // 0-6: opcode
+ let mut i = 0x3;
+ i |= opcode5 << 2;
+ i |= rd << 7;
+ i |= funct3 << 12;
+ i |= rs1 << 15;
+ i |= shamt << 20;
+ i |= funct7 << 25;
+
+ sink.put4(i);
+}
+
+/// I-type instructions.
+///
+/// 31 19 14 11 6
+/// imm rs1 funct3 rd opcode
+/// 20 15 12 7 0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_i<CS: CodeSink + ?Sized>(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) {
+ let bits = u32::from(bits);
+ let opcode5 = bits & 0x1f;
+ let funct3 = (bits >> 5) & 0x7;
+ let rs1 = u32::from(rs1) & 0x1f;
+ let rd = u32::from(rd) & 0x1f;
+
+ // 0-6: opcode
+ let mut i = 0x3;
+ i |= opcode5 << 2;
+ i |= rd << 7;
+ i |= funct3 << 12;
+ i |= rs1 << 15;
+ i |= (imm << 20) as u32;
+
+ sink.put4(i);
+}
+
+/// U-type instructions.
+///
+/// 31 11 6
+/// imm rd opcode
+/// 12 7 0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_u<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
+ let bits = u32::from(bits);
+ let opcode5 = bits & 0x1f;
+ let rd = u32::from(rd) & 0x1f;
+
+ // 0-6: opcode
+ let mut i = 0x3;
+ i |= opcode5 << 2;
+ i |= rd << 7;
+ i |= imm as u32 & 0xfffff000;
+
+ sink.put4(i);
+}
+
+/// SB-type branch instructions.
+///
+/// 31 24 19 14 11 6
+/// imm rs2 rs1 funct3 imm opcode
+/// 25 20 15 12 7 0
+///
+/// Encoding bits: `opcode[6:2] | (funct3 << 5)`
+fn put_sb<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) {
+ let bits = u32::from(bits);
+ let opcode5 = bits & 0x1f;
+ let funct3 = (bits >> 5) & 0x7;
+ let rs1 = u32::from(rs1) & 0x1f;
+ let rs2 = u32::from(rs2) & 0x1f;
+
+ debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm);
+ let imm = imm as u32;
+
+ // 0-6: opcode
+ let mut i = 0x3;
+ i |= opcode5 << 2;
+ i |= funct3 << 12;
+ i |= rs1 << 15;
+ i |= rs2 << 20;
+
+ // The displacement is completely hashed up.
+ i |= ((imm >> 11) & 0x1) << 7;
+ i |= ((imm >> 1) & 0xf) << 8;
+ i |= ((imm >> 5) & 0x3f) << 25;
+ i |= ((imm >> 12) & 0x1) << 31;
+
+ sink.put4(i);
+}
+
+/// UJ-type jump instructions.
+///
+/// 31 11 6
+/// imm rd opcode
+/// 12 7 0
+///
+/// Encoding bits: `opcode[6:2]`
+fn put_uj<CS: CodeSink + ?Sized>(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) {
+ let bits = u32::from(bits);
+ let opcode5 = bits & 0x1f;
+ let rd = u32::from(rd) & 0x1f;
+
+ debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm);
+ let imm = imm as u32;
+
+ // 0-6: opcode
+ let mut i = 0x3;
+ i |= opcode5 << 2;
+ i |= rd << 7;
+
+ // The displacement is completely hashed up.
+ i |= imm & 0xff000;
+ i |= ((imm >> 11) & 0x1) << 20;
+ i |= ((imm >> 1) & 0x3ff) << 21;
+ i |= ((imm >> 20) & 0x1) << 31;
+
+ sink.put4(i);
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/riscv/enc_tables.rs b/third_party/rust/cranelift-codegen/src/isa/riscv/enc_tables.rs
new file mode 100644
index 0000000000..76184ad727
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/enc_tables.rs
@@ -0,0 +1,18 @@
+//! Encoding tables for RISC-V.
+
+use super::registers::*;
+use crate::ir;
+use crate::isa;
+use crate::isa::constraints::*;
+use crate::isa::enc_tables::*;
+use crate::isa::encoding::{base_size, RecipeSizing};
+use crate::predicates;
+
+// Include the generated encoding tables:
+// - `LEVEL1_RV32`
+// - `LEVEL1_RV64`
+// - `LEVEL2`
+// - `ENCLIST`
+// - `INFO`
+include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs"));
diff --git a/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs b/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs
new file mode 100644
index 0000000000..e69a3a0e12
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/mod.rs
@@ -0,0 +1,295 @@
+//! RISC-V Instruction Set Architecture.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+
+use super::super::settings as shared_settings;
+#[cfg(feature = "testing_hooks")]
+use crate::binemit::CodeSink;
+use crate::binemit::{emit_function, MemoryCodeSink};
+use crate::ir;
+use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use crate::isa::Builder as IsaBuilder;
+use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use crate::regalloc;
+use alloc::borrow::Cow;
+use alloc::boxed::Box;
+use core::any::Any;
+use core::fmt;
+use target_lexicon::{PointerWidth, Triple};
+
+#[allow(dead_code)]
+struct Isa {
+ triple: Triple,
+ shared_flags: shared_settings::Flags,
+ isa_flags: settings::Flags,
+ cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating RISC-V targets.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+ IsaBuilder {
+ triple,
+ setup: settings::builder(),
+ constructor: isa_constructor,
+ }
+}
+
+fn isa_constructor(
+ triple: Triple,
+ shared_flags: shared_settings::Flags,
+ builder: shared_settings::Builder,
+) -> Box<dyn TargetIsa> {
+ let level1 = match triple.pointer_width().unwrap() {
+ PointerWidth::U16 => panic!("16-bit RISC-V unrecognized"),
+ PointerWidth::U32 => &enc_tables::LEVEL1_RV32[..],
+ PointerWidth::U64 => &enc_tables::LEVEL1_RV64[..],
+ };
+ Box::new(Isa {
+ triple,
+ isa_flags: settings::Flags::new(&shared_flags, builder),
+ shared_flags,
+ cpumode: level1,
+ })
+}
+
+impl TargetIsa for Isa {
+ fn name(&self) -> &'static str {
+ "riscv"
+ }
+
+ fn triple(&self) -> &Triple {
+ &self.triple
+ }
+
+ fn flags(&self) -> &shared_settings::Flags {
+ &self.shared_flags
+ }
+
+ fn register_info(&self) -> RegInfo {
+ registers::INFO.clone()
+ }
+
+ fn encoding_info(&self) -> EncInfo {
+ enc_tables::INFO.clone()
+ }
+
+ fn legal_encodings<'a>(
+ &'a self,
+ func: &'a ir::Function,
+ inst: &'a ir::InstructionData,
+ ctrl_typevar: ir::Type,
+ ) -> Encodings<'a> {
+ lookup_enclist(
+ ctrl_typevar,
+ inst,
+ func,
+ self.cpumode,
+ &enc_tables::LEVEL2[..],
+ &enc_tables::ENCLISTS[..],
+ &enc_tables::LEGALIZE_ACTIONS[..],
+ &enc_tables::RECIPE_PREDICATES[..],
+ &enc_tables::INST_PREDICATES[..],
+ self.isa_flags.predicate_view(),
+ )
+ }
+
+ fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
+ abi::legalize_signature(sig, &self.triple, &self.isa_flags, current)
+ }
+
+ fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+ abi::regclass_for_abi_type(ty)
+ }
+
+ fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
+ abi::allocatable_registers(func, &self.isa_flags)
+ }
+
+ #[cfg(feature = "testing_hooks")]
+ fn emit_inst(
+ &self,
+ func: &ir::Function,
+ inst: ir::Inst,
+ divert: &mut regalloc::RegDiversions,
+ sink: &mut dyn CodeSink,
+ ) {
+ binemit::emit_inst(func, inst, divert, sink, self)
+ }
+
+ fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+ emit_function(func, binemit::emit_inst, sink, self)
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
+ unimplemented!()
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
+ unimplemented!()
+ }
+
+ fn as_any(&self) -> &dyn Any {
+ self as &dyn Any
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::ir::{immediates, types};
+ use crate::ir::{Function, InstructionData, Opcode};
+ use crate::isa;
+ use crate::settings::{self, Configurable};
+ use alloc::string::{String, ToString};
+ use core::str::FromStr;
+ use target_lexicon::triple;
+
+ fn encstr(isa: &dyn isa::TargetIsa, enc: Result<isa::Encoding, isa::Legalize>) -> String {
+ match enc {
+ Ok(e) => isa.encoding_info().display(e).to_string(),
+ Err(_) => "no encoding".to_string(),
+ }
+ }
+
+ #[test]
+ fn test_64bitenc() {
+ let shared_builder = settings::builder();
+ let shared_flags = settings::Flags::new(shared_builder);
+ let isa = isa::lookup(triple!("riscv64"))
+ .unwrap()
+ .finish(shared_flags);
+
+ let mut func = Function::new();
+ let block = func.dfg.make_block();
+ let arg64 = func.dfg.append_block_param(block, types::I64);
+ let arg32 = func.dfg.append_block_param(block, types::I32);
+
+ // Try to encode iadd_imm.i64 v1, -10.
+ let inst64 = InstructionData::BinaryImm64 {
+ opcode: Opcode::IaddImm,
+ arg: arg64,
+ imm: immediates::Imm64::new(-10),
+ };
+
+ // ADDI is I/0b00100
+ assert_eq!(
+ encstr(&*isa, isa.encode(&func, &inst64, types::I64)),
+ "Ii#04"
+ );
+
+ // Try to encode iadd_imm.i64 v1, -10000.
+ let inst64_large = InstructionData::BinaryImm64 {
+ opcode: Opcode::IaddImm,
+ arg: arg64,
+ imm: immediates::Imm64::new(-10000),
+ };
+
+ // Immediate is out of range for ADDI.
+ assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
+
+ // Create an iadd_imm.i32 which is encodable in RV64.
+ let inst32 = InstructionData::BinaryImm64 {
+ opcode: Opcode::IaddImm,
+ arg: arg32,
+ imm: immediates::Imm64::new(10),
+ };
+
+ // ADDIW is I/0b00110
+ assert_eq!(
+ encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
+ "Ii#06"
+ );
+ }
+
+ // Same as above, but for RV32.
+ #[test]
+ fn test_32bitenc() {
+ let shared_builder = settings::builder();
+ let shared_flags = settings::Flags::new(shared_builder);
+ let isa = isa::lookup(triple!("riscv32"))
+ .unwrap()
+ .finish(shared_flags);
+
+ let mut func = Function::new();
+ let block = func.dfg.make_block();
+ let arg64 = func.dfg.append_block_param(block, types::I64);
+ let arg32 = func.dfg.append_block_param(block, types::I32);
+
+ // Try to encode iadd_imm.i64 v1, -10.
+ let inst64 = InstructionData::BinaryImm64 {
+ opcode: Opcode::IaddImm,
+ arg: arg64,
+ imm: immediates::Imm64::new(-10),
+ };
+
+ // In 32-bit mode, an i64 bit add should be narrowed.
+ assert!(isa.encode(&func, &inst64, types::I64).is_err());
+
+ // Try to encode iadd_imm.i64 v1, -10000.
+ let inst64_large = InstructionData::BinaryImm64 {
+ opcode: Opcode::IaddImm,
+ arg: arg64,
+ imm: immediates::Imm64::new(-10000),
+ };
+
+ // In 32-bit mode, an i64 bit add should be narrowed.
+ assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
+
+ // Create an iadd_imm.i32 which is encodable in RV32.
+ let inst32 = InstructionData::BinaryImm64 {
+ opcode: Opcode::IaddImm,
+ arg: arg32,
+ imm: immediates::Imm64::new(10),
+ };
+
+ // ADDI is I/0b00100
+ assert_eq!(
+ encstr(&*isa, isa.encode(&func, &inst32, types::I32)),
+ "Ii#04"
+ );
+
+ // Create an imul.i32 which is encodable in RV32, but only when use_m is true.
+ let mul32 = InstructionData::Binary {
+ opcode: Opcode::Imul,
+ args: [arg32, arg32],
+ };
+
+ assert!(isa.encode(&func, &mul32, types::I32).is_err());
+ }
+
+ #[test]
+ fn test_rv32m() {
+ let shared_builder = settings::builder();
+ let shared_flags = settings::Flags::new(shared_builder);
+
+ // Set the supports_m stting which in turn enables the use_m predicate that unlocks
+ // encodings for imul.
+ let mut isa_builder = isa::lookup(triple!("riscv32")).unwrap();
+ isa_builder.enable("supports_m").unwrap();
+
+ let isa = isa_builder.finish(shared_flags);
+
+ let mut func = Function::new();
+ let block = func.dfg.make_block();
+ let arg32 = func.dfg.append_block_param(block, types::I32);
+
+ // Create an imul.i32 which is encodable in RV32M.
+ let mul32 = InstructionData::Binary {
+ opcode: Opcode::Imul,
+ args: [arg32, arg32],
+ };
+ assert_eq!(
+ encstr(&*isa, isa.encode(&func, &mul32, types::I32)),
+ "R#10c"
+ );
+ }
+}
+
+impl fmt::Display for Isa {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/riscv/registers.rs b/third_party/rust/cranelift-codegen/src/isa/riscv/registers.rs
new file mode 100644
index 0000000000..9043b7f65f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/registers.rs
@@ -0,0 +1,50 @@
+//! RISC-V register descriptions.
+
+use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs"));
+
+#[cfg(test)]
+mod tests {
+ use super::{FPR, GPR, INFO};
+ use crate::isa::RegUnit;
+ use alloc::string::{String, ToString};
+
+ #[test]
+ fn unit_encodings() {
+ assert_eq!(INFO.parse_regunit("x0"), Some(0));
+ assert_eq!(INFO.parse_regunit("x31"), Some(31));
+ assert_eq!(INFO.parse_regunit("f0"), Some(32));
+ assert_eq!(INFO.parse_regunit("f31"), Some(63));
+
+ assert_eq!(INFO.parse_regunit("x32"), None);
+ assert_eq!(INFO.parse_regunit("f32"), None);
+ }
+
+ #[test]
+ fn unit_names() {
+ fn uname(ru: RegUnit) -> String {
+ INFO.display_regunit(ru).to_string()
+ }
+
+ assert_eq!(uname(0), "%x0");
+ assert_eq!(uname(1), "%x1");
+ assert_eq!(uname(31), "%x31");
+ assert_eq!(uname(32), "%f0");
+ assert_eq!(uname(33), "%f1");
+ assert_eq!(uname(63), "%f31");
+ assert_eq!(uname(64), "%INVALID64");
+ }
+
+ #[test]
+ fn classes() {
+ assert!(GPR.contains(GPR.unit(0)));
+ assert!(GPR.contains(GPR.unit(31)));
+ assert!(!FPR.contains(GPR.unit(0)));
+ assert!(!FPR.contains(GPR.unit(31)));
+ assert!(!GPR.contains(FPR.unit(0)));
+ assert!(!GPR.contains(FPR.unit(31)));
+ assert!(FPR.contains(FPR.unit(0)));
+ assert!(FPR.contains(FPR.unit(31)));
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/riscv/settings.rs b/third_party/rust/cranelift-codegen/src/isa/riscv/settings.rs
new file mode 100644
index 0000000000..40aa3bed2b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/riscv/settings.rs
@@ -0,0 +1,56 @@
+//! RISC-V Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/riscv/mod.rs`.
+include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
+
+#[cfg(test)]
+mod tests {
+ use super::{builder, Flags};
+ use crate::settings::{self, Configurable};
+ use alloc::string::ToString;
+
+ #[test]
+ fn display_default() {
+ let shared = settings::Flags::new(settings::builder());
+ let b = builder();
+ let f = Flags::new(&shared, b);
+ assert_eq!(
+ f.to_string(),
+ "[riscv]\n\
+ supports_m = false\n\
+ supports_a = false\n\
+ supports_f = false\n\
+ supports_d = false\n\
+ enable_m = true\n\
+ enable_e = false\n"
+ );
+ // Predicates are not part of the Display output.
+ assert_eq!(f.full_float(), false);
+ }
+
+ #[test]
+ fn predicates() {
+ let mut sb = settings::builder();
+ sb.set("enable_simd", "true").unwrap();
+ let shared = settings::Flags::new(sb);
+ let mut b = builder();
+ b.enable("supports_f").unwrap();
+ b.enable("supports_d").unwrap();
+ let f = Flags::new(&shared, b);
+ assert_eq!(f.full_float(), true);
+
+ let mut sb = settings::builder();
+ sb.set("enable_simd", "false").unwrap();
+ let shared = settings::Flags::new(sb);
+ let mut b = builder();
+ b.enable("supports_f").unwrap();
+ b.enable("supports_d").unwrap();
+ let f = Flags::new(&shared, b);
+ assert_eq!(f.full_float(), false);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/stack.rs b/third_party/rust/cranelift-codegen/src/isa/stack.rs
new file mode 100644
index 0000000000..ae093bed28
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/stack.rs
@@ -0,0 +1,95 @@
+//! Low-level details of stack accesses.
+//!
+//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type
+//! defined in this module expresses the low-level details of accessing a stack slot from an
+//! encoded instruction.
+
+use crate::ir::stackslot::{StackOffset, StackSlotKind, StackSlots};
+use crate::ir::StackSlot;
+
+/// A method for referencing a stack slot in the current stack frame.
+///
+/// Stack slots are addressed with a constant offset from a base register. The base can be the
+/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone
+/// of a large stack frame.
+#[derive(Clone, Copy, Debug)]
+pub struct StackRef {
+ /// The base register to use for addressing.
+ pub base: StackBase,
+
+ /// Immediate offset from the base register to the first byte of the stack slot.
+ pub offset: StackOffset,
+}
+
+impl StackRef {
+ /// Get a reference to the stack slot `ss` using one of the base pointers in `mask`.
+ pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option<Self> {
+ // Try an SP-relative reference.
+ if mask.contains(StackBase::SP) {
+ return Some(Self::sp(ss, frame));
+ }
+
+ // No reference possible with this mask.
+ None
+ }
+
+ /// Get a reference to `ss` using the stack pointer as a base.
+ pub fn sp(ss: StackSlot, frame: &StackSlots) -> Self {
+ let size = frame
+ .layout_info
+ .expect("Stack layout must be computed before referencing stack slots")
+ .frame_size;
+ let slot = &frame[ss];
+ let offset = if slot.kind == StackSlotKind::OutgoingArg {
+ // Outgoing argument slots have offsets relative to our stack pointer.
+ slot.offset.unwrap()
+ } else {
+ // All other slots have offsets relative to our caller's stack frame.
+ // Offset where SP is pointing. (All ISAs have stacks growing downwards.)
+ let sp_offset = -(size as StackOffset);
+ slot.offset.unwrap() - sp_offset
+ };
+ Self {
+ base: StackBase::SP,
+ offset,
+ }
+ }
+}
+
+/// Generic base register for referencing stack slots.
+///
+/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for
+/// those two base pointers.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum StackBase {
+ /// Use the stack pointer.
+ SP = 0,
+
+ /// Use the frame pointer (if one is present).
+ FP = 1,
+
+ /// Use an explicit zone pointer in a general-purpose register.
+ ///
+ /// This feature is not yet implemented.
+ Zone = 2,
+}
+
+/// Bit mask of supported stack bases.
+///
+/// Many instruction encodings can use different base registers while others only work with the
+/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given
+/// instruction encoding.
+///
+/// This behaves like a set of `StackBase` variants.
+///
+/// The internal representation as a `u8` is public because stack base masks are used in constant
+/// tables generated from the meta-language encoding definitions.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct StackBaseMask(pub u8);
+
+impl StackBaseMask {
+ /// Check if this mask contains the `base` variant.
+ pub fn contains(self, base: StackBase) -> bool {
+ self.0 & (1 << base as usize) != 0
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/test_utils.rs b/third_party/rust/cranelift-codegen/src/isa/test_utils.rs
new file mode 100644
index 0000000000..01c500d6ca
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/test_utils.rs
@@ -0,0 +1,86 @@
+// This is unused when no platforms with the new backend are enabled.
+#![allow(dead_code)]
+
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc};
+use crate::ir::Value;
+use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
+use crate::isa::TargetIsa;
+
+use alloc::vec::Vec;
+use std::string::String;
+
+pub struct TestCodeSink {
+ bytes: Vec<u8>,
+}
+
+impl TestCodeSink {
+ /// Create a new TestCodeSink.
+ pub fn new() -> TestCodeSink {
+ TestCodeSink { bytes: vec![] }
+ }
+
+ /// Return the code emitted to this sink as a hex string.
+ pub fn stringify(&self) -> String {
+ // This is pretty lame, but whatever ..
+ use std::fmt::Write;
+ let mut s = String::with_capacity(self.bytes.len() * 2);
+ for b in &self.bytes {
+ write!(&mut s, "{:02X}", b).unwrap();
+ }
+ s
+ }
+}
+
+impl CodeSink for TestCodeSink {
+ fn offset(&self) -> CodeOffset {
+ self.bytes.len() as CodeOffset
+ }
+
+ fn put1(&mut self, x: u8) {
+ self.bytes.push(x);
+ }
+
+ fn put2(&mut self, x: u16) {
+ self.bytes.push((x >> 0) as u8);
+ self.bytes.push((x >> 8) as u8);
+ }
+
+ fn put4(&mut self, mut x: u32) {
+ for _ in 0..4 {
+ self.bytes.push(x as u8);
+ x >>= 8;
+ }
+ }
+
+ fn put8(&mut self, mut x: u64) {
+ for _ in 0..8 {
+ self.bytes.push(x as u8);
+ x >>= 8;
+ }
+ }
+
+ fn reloc_external(
+ &mut self,
+ _srcloc: SourceLoc,
+ _rel: Reloc,
+ _name: &ExternalName,
+ _addend: Addend,
+ ) {
+ }
+
+ fn reloc_constant(&mut self, _rel: Reloc, _constant_offset: ConstantOffset) {}
+
+ fn reloc_jt(&mut self, _rel: Reloc, _jt: JumpTable) {}
+
+ fn trap(&mut self, _code: TrapCode, _srcloc: SourceLoc) {}
+
+ fn begin_jumptables(&mut self) {}
+
+ fn begin_rodata(&mut self) {}
+
+ fn end_codegen(&mut self) {}
+
+ fn add_stack_map(&mut self, _val_list: &[Value], _func: &Function, _isa: &dyn TargetIsa) {}
+
+ fn add_call_site(&mut self, _opcode: Opcode, _srcloc: SourceLoc) {}
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/unwind.rs
new file mode 100644
index 0000000000..a4c5f0b6b7
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind.rs
@@ -0,0 +1,88 @@
+//! Represents information relating to function unwinding.
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+#[cfg(feature = "unwind")]
+pub mod systemv;
+
+#[cfg(feature = "unwind")]
+pub mod winx64;
+
+/// Represents unwind information for a single function.
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+#[non_exhaustive]
+pub enum UnwindInfo {
+ /// Windows x64 ABI unwind information.
+ #[cfg(feature = "unwind")]
+ WindowsX64(winx64::UnwindInfo),
+ /// System V ABI unwind information.
+ #[cfg(feature = "unwind")]
+ SystemV(systemv::UnwindInfo),
+}
+
+/// Intermediate representation for the unwind information
+/// generated by a backend.
+pub mod input {
+ use crate::binemit::CodeOffset;
+ use alloc::vec::Vec;
+ #[cfg(feature = "enable-serde")]
+ use serde::{Deserialize, Serialize};
+
+ /// Elementary operation in the unwind operations.
+ #[derive(Clone, Debug, PartialEq, Eq)]
+ #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+ pub enum UnwindCode<Reg> {
+ /// Defines that a register is saved at the specified offset.
+ SaveRegister {
+ /// The saved register.
+ reg: Reg,
+ /// The specified offset relative to the stack pointer.
+ stack_offset: u32,
+ },
+ /// Defines that a register is as defined before call.
+ RestoreRegister {
+ /// The restored register.
+ reg: Reg,
+ },
+ /// The stack pointer was adjusted to allocate the stack.
+ StackAlloc {
+ /// Size to allocate.
+ size: u32,
+ },
+ /// The stack pointer was adjusted to free the stack.
+ StackDealloc {
+ /// Size to deallocate.
+ size: u32,
+ },
+ /// The alternative register was assigned as frame pointer base.
+ SetFramePointer {
+ /// The specified register.
+ reg: Reg,
+ },
+ /// Restores a frame pointer base to default register.
+ RestoreFramePointer,
+ /// Saves the state.
+ RememberState,
+ /// Restores the state.
+ RestoreState,
+ }
+
+ /// Unwind information as generated by a backend.
+ #[derive(Clone, Debug, PartialEq, Eq)]
+ #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+ pub struct UnwindInfo<Reg> {
+ /// Size of the prologue.
+ pub prologue_size: CodeOffset,
+ /// Unwind codes for prologue.
+ pub prologue_unwind_codes: Vec<(CodeOffset, UnwindCode<Reg>)>,
+ /// Unwind codes for epilogues.
+ pub epilogues_unwind_codes: Vec<Vec<(CodeOffset, UnwindCode<Reg>)>>,
+ /// Entire function size.
+ pub function_size: CodeOffset,
+ /// Platform word size in bytes.
+ pub word_size: u8,
+ /// Initial stack pointer offset.
+ pub initial_sp_offset: u8,
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs
new file mode 100644
index 0000000000..dfb2ef5936
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind/systemv.rs
@@ -0,0 +1,313 @@
+//! System V ABI unwind information.
+
+use crate::isa::unwind::input;
+use crate::result::{CodegenError, CodegenResult};
+use alloc::vec::Vec;
+use gimli::write::{Address, FrameDescriptionEntry};
+use thiserror::Error;
+
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+type Register = u16;
+
+/// Enumerate the errors possible in mapping Cranelift registers to their DWARF equivalent.
+#[allow(missing_docs)]
+#[derive(Error, Debug, PartialEq, Eq)]
+pub enum RegisterMappingError {
+ #[error("unable to find bank for register info")]
+ MissingBank,
+ #[error("register mapping is currently only implemented for x86_64")]
+ UnsupportedArchitecture,
+ #[error("unsupported register bank: {0}")]
+ UnsupportedRegisterBank(&'static str),
+}
+
+// This mirrors gimli's CallFrameInstruction, but is serializable
+// This excludes CfaExpression, Expression, ValExpression due to
+// https://github.com/gimli-rs/gimli/issues/513.
+// TODO: if gimli ever adds serialization support, remove this type
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub(crate) enum CallFrameInstruction {
+ Cfa(Register, i32),
+ CfaRegister(Register),
+ CfaOffset(i32),
+ Restore(Register),
+ Undefined(Register),
+ SameValue(Register),
+ Offset(Register, i32),
+ ValOffset(Register, i32),
+ Register(Register, Register),
+ RememberState,
+ RestoreState,
+ ArgsSize(u32),
+}
+
+impl From<gimli::write::CallFrameInstruction> for CallFrameInstruction {
+ fn from(cfi: gimli::write::CallFrameInstruction) -> Self {
+ use gimli::write::CallFrameInstruction;
+
+ match cfi {
+ CallFrameInstruction::Cfa(reg, offset) => Self::Cfa(reg.0, offset),
+ CallFrameInstruction::CfaRegister(reg) => Self::CfaRegister(reg.0),
+ CallFrameInstruction::CfaOffset(offset) => Self::CfaOffset(offset),
+ CallFrameInstruction::Restore(reg) => Self::Restore(reg.0),
+ CallFrameInstruction::Undefined(reg) => Self::Undefined(reg.0),
+ CallFrameInstruction::SameValue(reg) => Self::SameValue(reg.0),
+ CallFrameInstruction::Offset(reg, offset) => Self::Offset(reg.0, offset),
+ CallFrameInstruction::ValOffset(reg, offset) => Self::ValOffset(reg.0, offset),
+ CallFrameInstruction::Register(reg1, reg2) => Self::Register(reg1.0, reg2.0),
+ CallFrameInstruction::RememberState => Self::RememberState,
+ CallFrameInstruction::RestoreState => Self::RestoreState,
+ CallFrameInstruction::ArgsSize(size) => Self::ArgsSize(size),
+ _ => {
+ // Cranelift's unwind support does not generate `CallFrameInstruction`s with
+ // Expression at this moment, and it is not trivial to
+ // serialize such instructions.
+ panic!("CallFrameInstruction with Expression not supported");
+ }
+ }
+ }
+}
+
+impl Into<gimli::write::CallFrameInstruction> for CallFrameInstruction {
+ fn into(self) -> gimli::write::CallFrameInstruction {
+ use gimli::{write::CallFrameInstruction, Register};
+
+ match self {
+ Self::Cfa(reg, offset) => CallFrameInstruction::Cfa(Register(reg), offset),
+ Self::CfaRegister(reg) => CallFrameInstruction::CfaRegister(Register(reg)),
+ Self::CfaOffset(offset) => CallFrameInstruction::CfaOffset(offset),
+ Self::Restore(reg) => CallFrameInstruction::Restore(Register(reg)),
+ Self::Undefined(reg) => CallFrameInstruction::Undefined(Register(reg)),
+ Self::SameValue(reg) => CallFrameInstruction::SameValue(Register(reg)),
+ Self::Offset(reg, offset) => CallFrameInstruction::Offset(Register(reg), offset),
+ Self::ValOffset(reg, offset) => CallFrameInstruction::ValOffset(Register(reg), offset),
+ Self::Register(reg1, reg2) => {
+ CallFrameInstruction::Register(Register(reg1), Register(reg2))
+ }
+ Self::RememberState => CallFrameInstruction::RememberState,
+ Self::RestoreState => CallFrameInstruction::RestoreState,
+ Self::ArgsSize(size) => CallFrameInstruction::ArgsSize(size),
+ }
+ }
+}
+
+/// Maps UnwindInfo register to gimli's index space.
+pub(crate) trait RegisterMapper<Reg> {
+ /// Maps Reg.
+ fn map(&self, reg: Reg) -> Result<Register, RegisterMappingError>;
+ /// Gets stack pointer register.
+ fn sp(&self) -> Register;
+}
+
+/// Represents unwind information for a single System V ABI function.
+///
+/// This representation is not ISA specific.
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct UnwindInfo {
+ instructions: Vec<(u32, CallFrameInstruction)>,
+ len: u32,
+}
+
+impl UnwindInfo {
+ pub(crate) fn build<'b, Reg: PartialEq + Copy>(
+ unwind: input::UnwindInfo<Reg>,
+ map_reg: &'b dyn RegisterMapper<Reg>,
+ ) -> CodegenResult<Self> {
+ use input::UnwindCode;
+ let mut builder = InstructionBuilder::new(unwind.initial_sp_offset, map_reg);
+
+ for (offset, c) in unwind.prologue_unwind_codes.iter().chain(
+ unwind
+ .epilogues_unwind_codes
+ .iter()
+ .map(|c| c.iter())
+ .flatten(),
+ ) {
+ match c {
+ UnwindCode::SaveRegister { reg, stack_offset } => {
+ builder
+ .save_reg(*offset, *reg, *stack_offset)
+ .map_err(CodegenError::RegisterMappingError)?;
+ }
+ UnwindCode::StackAlloc { size } => {
+ builder.adjust_sp_down_imm(*offset, *size as i64);
+ }
+ UnwindCode::StackDealloc { size } => {
+ builder.adjust_sp_up_imm(*offset, *size as i64);
+ }
+ UnwindCode::RestoreRegister { reg } => {
+ builder
+ .restore_reg(*offset, *reg)
+ .map_err(CodegenError::RegisterMappingError)?;
+ }
+ UnwindCode::SetFramePointer { reg } => {
+ builder
+ .set_cfa_reg(*offset, *reg)
+ .map_err(CodegenError::RegisterMappingError)?;
+ }
+ UnwindCode::RestoreFramePointer => {
+ builder.restore_cfa(*offset);
+ }
+ UnwindCode::RememberState => {
+ builder.remember_state(*offset);
+ }
+ UnwindCode::RestoreState => {
+ builder.restore_state(*offset);
+ }
+ }
+ }
+
+ let instructions = builder.instructions;
+ let len = unwind.function_size;
+
+ Ok(Self { instructions, len })
+ }
+
+ /// Converts the unwind information into a `FrameDescriptionEntry`.
+ pub fn to_fde(&self, address: Address) -> gimli::write::FrameDescriptionEntry {
+ let mut fde = FrameDescriptionEntry::new(address, self.len);
+
+ for (offset, inst) in &self.instructions {
+ fde.add_instruction(*offset, inst.clone().into());
+ }
+
+ fde
+ }
+}
+
+struct InstructionBuilder<'a, Reg: PartialEq + Copy> {
+ sp_offset: i32,
+ frame_register: Option<Reg>,
+ saved_state: Option<(i32, Option<Reg>)>,
+ map_reg: &'a dyn RegisterMapper<Reg>,
+ instructions: Vec<(u32, CallFrameInstruction)>,
+}
+
+impl<'a, Reg: PartialEq + Copy> InstructionBuilder<'a, Reg> {
+ fn new(sp_offset: u8, map_reg: &'a (dyn RegisterMapper<Reg> + 'a)) -> Self {
+ Self {
+ sp_offset: sp_offset as i32, // CFA offset starts at the specified offset to account for the return address on stack
+ saved_state: None,
+ frame_register: None,
+ map_reg,
+ instructions: Vec::new(),
+ }
+ }
+
+ fn save_reg(
+ &mut self,
+ offset: u32,
+ reg: Reg,
+ stack_offset: u32,
+ ) -> Result<(), RegisterMappingError> {
+ // Pushes in the prologue are register saves, so record an offset of the save
+ self.instructions.push((
+ offset,
+ CallFrameInstruction::Offset(
+ self.map_reg.map(reg)?,
+ stack_offset as i32 - self.sp_offset,
+ ),
+ ));
+
+ Ok(())
+ }
+
+ fn adjust_sp_down_imm(&mut self, offset: u32, imm: i64) {
+ assert!(imm <= core::u32::MAX as i64);
+
+ self.sp_offset += imm as i32;
+
+ // Don't adjust the CFA if we're using a frame pointer
+ if self.frame_register.is_some() {
+ return;
+ }
+
+ self.instructions
+ .push((offset, CallFrameInstruction::CfaOffset(self.sp_offset)));
+ }
+
+ fn adjust_sp_up_imm(&mut self, offset: u32, imm: i64) {
+ assert!(imm <= core::u32::MAX as i64);
+
+ self.sp_offset -= imm as i32;
+
+ // Don't adjust the CFA if we're using a frame pointer
+ if self.frame_register.is_some() {
+ return;
+ }
+
+ let cfa_inst_ofs = {
+ // Scan to find and merge with CFA instruction with the same offset.
+ let mut it = self.instructions.iter_mut();
+ loop {
+ match it.next_back() {
+ Some((i_offset, i)) if *i_offset == offset => {
+ if let CallFrameInstruction::Cfa(_, o) = i {
+ break Some(o);
+ }
+ }
+ _ => {
+ break None;
+ }
+ }
+ }
+ };
+
+ if let Some(o) = cfa_inst_ofs {
+ // Update previous CFA instruction.
+ *o = self.sp_offset;
+ } else {
+ // Add just CFA offset instruction.
+ self.instructions
+ .push((offset, CallFrameInstruction::CfaOffset(self.sp_offset)));
+ }
+ }
+
+ fn set_cfa_reg(&mut self, offset: u32, reg: Reg) -> Result<(), RegisterMappingError> {
+ self.instructions.push((
+ offset,
+ CallFrameInstruction::CfaRegister(self.map_reg.map(reg)?),
+ ));
+ self.frame_register = Some(reg);
+ Ok(())
+ }
+
+ fn restore_cfa(&mut self, offset: u32) {
+ // Restore SP and its offset.
+ self.instructions.push((
+ offset,
+ CallFrameInstruction::Cfa(self.map_reg.sp(), self.sp_offset),
+ ));
+ self.frame_register = None;
+ }
+
+ fn restore_reg(&mut self, offset: u32, reg: Reg) -> Result<(), RegisterMappingError> {
+ // Pops in the epilogue are register restores, so record a "same value" for the register
+ self.instructions.push((
+ offset,
+ CallFrameInstruction::SameValue(self.map_reg.map(reg)?),
+ ));
+
+ Ok(())
+ }
+
+ fn remember_state(&mut self, offset: u32) {
+ self.saved_state = Some((self.sp_offset, self.frame_register));
+
+ self.instructions
+ .push((offset, CallFrameInstruction::RememberState));
+ }
+
+ fn restore_state(&mut self, offset: u32) {
+ let (sp_offset, frame_register) = self.saved_state.take().unwrap();
+ self.sp_offset = sp_offset;
+ self.frame_register = frame_register;
+
+ self.instructions
+ .push((offset, CallFrameInstruction::RestoreState));
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs b/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs
new file mode 100644
index 0000000000..b3c21fc473
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/unwind/winx64.rs
@@ -0,0 +1,294 @@
+//! Windows x64 ABI unwind information.
+
+use crate::isa::{unwind::input, RegUnit};
+use crate::result::{CodegenError, CodegenResult};
+use alloc::vec::Vec;
+use byteorder::{ByteOrder, LittleEndian};
+use log::warn;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// Maximum (inclusive) size of a "small" stack allocation
+const SMALL_ALLOC_MAX_SIZE: u32 = 128;
+/// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits
+const LARGE_ALLOC_16BIT_MAX_SIZE: u32 = 524280;
+
+struct Writer<'a> {
+ buf: &'a mut [u8],
+ offset: usize,
+}
+
+impl<'a> Writer<'a> {
+ pub fn new(buf: &'a mut [u8]) -> Self {
+ Self { buf, offset: 0 }
+ }
+
+ fn write_u8(&mut self, v: u8) {
+ self.buf[self.offset] = v;
+ self.offset += 1;
+ }
+
+ fn write_u16<T: ByteOrder>(&mut self, v: u16) {
+ T::write_u16(&mut self.buf[self.offset..(self.offset + 2)], v);
+ self.offset += 2;
+ }
+
+ fn write_u32<T: ByteOrder>(&mut self, v: u32) {
+ T::write_u32(&mut self.buf[self.offset..(self.offset + 4)], v);
+ self.offset += 4;
+ }
+}
+
+/// The supported unwind codes for the x64 Windows ABI.
+///
+/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
+/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here.
+/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values.
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub(crate) enum UnwindCode {
+ PushRegister {
+ offset: u8,
+ reg: u8,
+ },
+ SaveXmm {
+ offset: u8,
+ reg: u8,
+ stack_offset: u32,
+ },
+ StackAlloc {
+ offset: u8,
+ size: u32,
+ },
+}
+
+impl UnwindCode {
+ fn emit(&self, writer: &mut Writer) {
+ enum UnwindOperation {
+ PushNonvolatileRegister = 0,
+ LargeStackAlloc = 1,
+ SmallStackAlloc = 2,
+ SaveXmm128 = 8,
+ SaveXmm128Far = 9,
+ }
+
+ match self {
+ Self::PushRegister { offset, reg } => {
+ writer.write_u8(*offset);
+ writer.write_u8((*reg << 4) | (UnwindOperation::PushNonvolatileRegister as u8));
+ }
+ Self::SaveXmm {
+ offset,
+ reg,
+ stack_offset,
+ } => {
+ writer.write_u8(*offset);
+ let scaled_stack_offset = stack_offset / 16;
+ if scaled_stack_offset <= core::u16::MAX as u32 {
+ writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128 as u8));
+ writer.write_u16::<LittleEndian>(scaled_stack_offset as u16);
+ } else {
+ writer.write_u8((*reg << 4) | (UnwindOperation::SaveXmm128Far as u8));
+ writer.write_u16::<LittleEndian>(*stack_offset as u16);
+ writer.write_u16::<LittleEndian>((stack_offset >> 16) as u16);
+ }
+ }
+ Self::StackAlloc { offset, size } => {
+ // Stack allocations on Windows must be a multiple of 8 and be at least 1 slot
+ assert!(*size >= 8);
+ assert!((*size % 8) == 0);
+
+ writer.write_u8(*offset);
+ if *size <= SMALL_ALLOC_MAX_SIZE {
+ writer.write_u8(
+ ((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8,
+ );
+ } else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
+ writer.write_u8(UnwindOperation::LargeStackAlloc as u8);
+ writer.write_u16::<LittleEndian>((*size / 8) as u16);
+ } else {
+ writer.write_u8((1 << 4) | (UnwindOperation::LargeStackAlloc as u8));
+ writer.write_u32::<LittleEndian>(*size);
+ }
+ }
+ };
+ }
+
+ fn node_count(&self) -> usize {
+ match self {
+ Self::StackAlloc { size, .. } => {
+ if *size <= SMALL_ALLOC_MAX_SIZE {
+ 1
+ } else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE {
+ 2
+ } else {
+ 3
+ }
+ }
+ Self::SaveXmm { stack_offset, .. } => {
+ if *stack_offset <= core::u16::MAX as u32 {
+ 2
+ } else {
+ 3
+ }
+ }
+ _ => 1,
+ }
+ }
+}
+
+pub(crate) enum MappedRegister {
+ Int(u8),
+ Xmm(u8),
+}
+
+/// Maps UnwindInfo register to Windows x64 unwind data.
+pub(crate) trait RegisterMapper {
+ /// Maps RegUnit.
+ fn map(reg: RegUnit) -> MappedRegister;
+}
+
+/// Represents Windows x64 unwind information.
+///
+/// For information about Windows x64 unwind info, see:
+/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub struct UnwindInfo {
+ pub(crate) flags: u8,
+ pub(crate) prologue_size: u8,
+ pub(crate) frame_register: Option<u8>,
+ pub(crate) frame_register_offset: u8,
+ pub(crate) unwind_codes: Vec<UnwindCode>,
+}
+
+impl UnwindInfo {
+ /// Gets the emit size of the unwind information, in bytes.
+ pub fn emit_size(&self) -> usize {
+ let node_count = self.node_count();
+
+ // Calculation of the size requires no SEH handler or chained info
+ assert!(self.flags == 0);
+
+ // Size of fixed part of UNWIND_INFO is 4 bytes
+ // Then comes the UNWIND_CODE nodes (2 bytes each)
+ // Then comes 2 bytes of padding for the unwind codes if necessary
+ // Next would come the SEH data, but we assert above that the function doesn't have SEH data
+
+ 4 + (node_count * 2) + if (node_count & 1) == 1 { 2 } else { 0 }
+ }
+
+ /// Emits the unwind information into the given mutable byte slice.
+ ///
+ /// This function will panic if the slice is not at least `emit_size` in length.
+ pub fn emit(&self, buf: &mut [u8]) {
+ const UNWIND_INFO_VERSION: u8 = 1;
+
+ let node_count = self.node_count();
+ assert!(node_count <= 256);
+
+ let mut writer = Writer::new(buf);
+
+ writer.write_u8((self.flags << 3) | UNWIND_INFO_VERSION);
+ writer.write_u8(self.prologue_size);
+ writer.write_u8(node_count as u8);
+
+ if let Some(reg) = self.frame_register {
+ writer.write_u8((self.frame_register_offset << 4) | reg);
+ } else {
+ writer.write_u8(0);
+ }
+
+ // Unwind codes are written in reverse order (prologue offset descending)
+ for code in self.unwind_codes.iter().rev() {
+ code.emit(&mut writer);
+ }
+
+ // To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes
+ if (node_count & 1) == 1 {
+ writer.write_u16::<LittleEndian>(0);
+ }
+
+ // Ensure the correct number of bytes was emitted
+ assert_eq!(writer.offset, self.emit_size());
+ }
+
+ fn node_count(&self) -> usize {
+ self.unwind_codes
+ .iter()
+ .fold(0, |nodes, c| nodes + c.node_count())
+ }
+
+ pub(crate) fn build<MR: RegisterMapper>(
+ unwind: input::UnwindInfo<RegUnit>,
+ ) -> CodegenResult<Self> {
+ use crate::isa::unwind::input::UnwindCode as InputUnwindCode;
+
+ let word_size: u32 = unwind.word_size.into();
+ let mut unwind_codes = Vec::new();
+ for (offset, c) in unwind.prologue_unwind_codes.iter() {
+ match c {
+ InputUnwindCode::SaveRegister { reg, stack_offset } => {
+ let reg = MR::map(*reg);
+ let offset = ensure_unwind_offset(*offset)?;
+ match reg {
+ MappedRegister::Int(reg) => {
+ // Attempt to convert sequence of the `InputUnwindCode`:
+ // `StackAlloc { size = word_size }`, `SaveRegister { stack_offset: 0 }`
+ // to the shorter `UnwindCode::PushRegister`.
+ let push_reg_sequence = if let Some(UnwindCode::StackAlloc {
+ offset: alloc_offset,
+ size,
+ }) = unwind_codes.last()
+ {
+ *size == word_size && offset == *alloc_offset && *stack_offset == 0
+ } else {
+ false
+ };
+ if push_reg_sequence {
+ *unwind_codes.last_mut().unwrap() =
+ UnwindCode::PushRegister { offset, reg };
+ } else {
+ // TODO add `UnwindCode::SaveRegister` to handle multiple register
+ // pushes with single `UnwindCode::StackAlloc`.
+ return Err(CodegenError::Unsupported(
+ "Unsupported UnwindCode::PushRegister sequence".into(),
+ ));
+ }
+ }
+ MappedRegister::Xmm(reg) => {
+ unwind_codes.push(UnwindCode::SaveXmm {
+ offset,
+ reg,
+ stack_offset: *stack_offset,
+ });
+ }
+ }
+ }
+ InputUnwindCode::StackAlloc { size } => {
+ unwind_codes.push(UnwindCode::StackAlloc {
+ offset: ensure_unwind_offset(*offset)?,
+ size: *size,
+ });
+ }
+ _ => {}
+ }
+ }
+
+ Ok(Self {
+ flags: 0, // this assumes cranelift functions have no SEH handlers
+ prologue_size: ensure_unwind_offset(unwind.prologue_size)?,
+ frame_register: None,
+ frame_register_offset: 0,
+ unwind_codes,
+ })
+ }
+}
+
+fn ensure_unwind_offset(offset: u32) -> CodegenResult<u8> {
+ if offset > 255 {
+ warn!("function prologues cannot exceed 255 bytes in size for Windows x64");
+ return Err(CodegenError::CodeTooLarge);
+ }
+ Ok(offset as u8)
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
new file mode 100644
index 0000000000..f4c7624f36
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
@@ -0,0 +1,794 @@
+//! Implementation of the standard x64 ABI.
+
+use crate::ir::types::*;
+use crate::ir::{self, types, MemFlags, TrapCode, Type};
+use crate::isa;
+use crate::isa::{x64::inst::*, CallConv};
+use crate::machinst::abi_impl::*;
+use crate::machinst::*;
+use crate::settings;
+use crate::{CodegenError, CodegenResult};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use args::*;
+use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use smallvec::{smallvec, SmallVec};
+use std::convert::TryFrom;
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
+
+/// Offset in stack-arg area to callee-TLS slot in Baldrdash-2020 calling convention.
+static BALDRDASH_CALLEE_TLS_OFFSET: i64 = 0;
+/// Offset in stack-arg area to caller-TLS slot in Baldrdash-2020 calling convention.
+static BALDRDASH_CALLER_TLS_OFFSET: i64 = 8;
+
+/// Try to fill a Baldrdash register, returning it if it was found.
+fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
+ if call_conv.extends_baldrdash() {
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext => {
+ // This is SpiderMonkey's `WasmTlsReg`.
+ Some(ABIArg::Reg(
+ regs::r14().to_real_reg(),
+ types::I64,
+ param.extension,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::SignatureId => {
+ // This is SpiderMonkey's `WasmTableCallSigReg`.
+ Some(ABIArg::Reg(
+ regs::r10().to_real_reg(),
+ types::I64,
+ param.extension,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::CalleeTLS => {
+ // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
+ assert!(call_conv == isa::CallConv::Baldrdash2020);
+ Some(ABIArg::Stack(
+ BALDRDASH_CALLEE_TLS_OFFSET,
+ ir::types::I64,
+ ir::ArgumentExtension::None,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::CallerTLS => {
+ // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
+ assert!(call_conv == isa::CallConv::Baldrdash2020);
+ Some(ABIArg::Stack(
+ BALDRDASH_CALLER_TLS_OFFSET,
+ ir::types::I64,
+ ir::ArgumentExtension::None,
+ param.purpose,
+ ))
+ }
+ _ => None,
+ }
+ } else {
+ None
+ }
+}
+
+/// Support for the x64 ABI from the callee side (within a function body).
+pub(crate) type X64ABICallee = ABICalleeImpl<X64ABIMachineSpec>;
+
+/// Support for the x64 ABI from the caller side (at a callsite).
+pub(crate) type X64ABICaller = ABICallerImpl<X64ABIMachineSpec>;
+
+/// Implementation of ABI primitives for x64.
+pub(crate) struct X64ABIMachineSpec;
+
+impl ABIMachineSpec for X64ABIMachineSpec {
+ type I = Inst;
+
+ fn word_bits() -> u32 {
+ 64
+ }
+
+ /// Return required stack alignment in bytes.
+ fn stack_align(_call_conv: isa::CallConv) -> u32 {
+ 16
+ }
+
+ fn compute_arg_locs(
+ call_conv: isa::CallConv,
+ params: &[ir::AbiParam],
+ args_or_rets: ArgsOrRets,
+ add_ret_area_ptr: bool,
+ ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+ let is_baldrdash = call_conv.extends_baldrdash();
+ let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
+
+ let mut next_gpr = 0;
+ let mut next_vreg = 0;
+ let mut next_stack: u64 = 0;
+ let mut ret = vec![];
+
+ if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
+ // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
+ // caller TLS-register values, respectively.
+ next_stack = 16;
+ }
+
+ for i in 0..params.len() {
+ // Process returns backward, according to the SpiderMonkey ABI (which we
+ // adopt internally if `is_baldrdash` is set).
+ let param = match (args_or_rets, is_baldrdash) {
+ (ArgsOrRets::Args, _) => &params[i],
+ (ArgsOrRets::Rets, false) => &params[i],
+ (ArgsOrRets::Rets, true) => &params[params.len() - 1 - i],
+ };
+
+ // Validate "purpose".
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext
+ | &ir::ArgumentPurpose::Normal
+ | &ir::ArgumentPurpose::StackLimit
+ | &ir::ArgumentPurpose::SignatureId
+ | &ir::ArgumentPurpose::CalleeTLS
+ | &ir::ArgumentPurpose::CallerTLS => {}
+ _ => panic!(
+ "Unsupported argument purpose {:?} in signature: {:?}",
+ param.purpose, params
+ ),
+ }
+
+ let intreg = in_int_reg(param.value_type);
+ let vecreg = in_vec_reg(param.value_type);
+ debug_assert!(intreg || vecreg);
+ debug_assert!(!(intreg && vecreg));
+
+ let (next_reg, candidate) = if intreg {
+ let candidate = match args_or_rets {
+ ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
+ ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i),
+ };
+ debug_assert!(candidate
+ .map(|r| r.get_class() == RegClass::I64)
+ .unwrap_or(true));
+ (&mut next_gpr, candidate)
+ } else {
+ let candidate = match args_or_rets {
+ ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
+ ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i),
+ };
+ debug_assert!(candidate
+ .map(|r| r.get_class() == RegClass::V128)
+ .unwrap_or(true));
+ (&mut next_vreg, candidate)
+ };
+
+ if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
+ assert!(intreg);
+ ret.push(param);
+ } else if let Some(reg) = candidate {
+ ret.push(ABIArg::Reg(
+ reg.to_real_reg(),
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ *next_reg += 1;
+ } else {
+ // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
+ // stack alignment happens separately after all args.)
+ let size = (param.value_type.bits() / 8) as u64;
+ let size = std::cmp::max(size, 8);
+ // Align.
+ debug_assert!(size.is_power_of_two());
+ next_stack = (next_stack + size - 1) & !(size - 1);
+ ret.push(ABIArg::Stack(
+ next_stack as i64,
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ next_stack += size;
+ }
+ }
+
+ if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
+ ret.reverse();
+ }
+
+ let extra_arg = if add_ret_area_ptr {
+ debug_assert!(args_or_rets == ArgsOrRets::Args);
+ if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
+ ret.push(ABIArg::Reg(
+ reg.to_real_reg(),
+ types::I64,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ } else {
+ ret.push(ABIArg::Stack(
+ next_stack as i64,
+ types::I64,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ next_stack += 8;
+ }
+ Some(ret.len() - 1)
+ } else {
+ None
+ };
+
+ next_stack = (next_stack + 15) & !15;
+
+ // To avoid overflow issues, limit the arg/return size to something reasonable.
+ if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+ return Err(CodegenError::ImplLimitExceeded);
+ }
+
+ Ok((ret, next_stack as i64, extra_arg))
+ }
+
+ fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 {
+ if call_conv.extends_baldrdash() {
+ let num_words = flags.baldrdash_prologue_words() as i64;
+ debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
+ num_words * 8
+ } else {
+ 16 // frame pointer + return address.
+ }
+ }
+
+ fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
+ let ext_kind = match ty {
+ types::B1
+ | types::B8
+ | types::I8
+ | types::B16
+ | types::I16
+ | types::B32
+ | types::I32 => ExtKind::SignExtend,
+ types::B64 | types::I64 | types::R64 | types::F32 | types::F64 => ExtKind::None,
+ _ if ty.bytes() == 16 => ExtKind::None,
+ _ => panic!("load_stack({})", ty),
+ };
+ Inst::load(ty, mem, into_reg, ext_kind)
+ }
+
+ fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
+ Inst::store(ty, from_reg, mem)
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
+ Inst::gen_move(to_reg, from_reg, ty)
+ }
+
+ /// Generate an integer-extend operation.
+ fn gen_extend(
+ to_reg: Writable<Reg>,
+ from_reg: Reg,
+ is_signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ ) -> Self::I {
+ let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
+ .expect(&format!("invalid extension: {} -> {}", from_bits, to_bits));
+ if is_signed {
+ Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
+ } else {
+ Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
+ }
+ }
+
+ fn gen_ret() -> Self::I {
+ Inst::ret()
+ }
+
+ fn gen_epilogue_placeholder() -> Self::I {
+ Inst::epilogue_placeholder()
+ }
+
+ fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]> {
+ let mut ret = SmallVec::new();
+ if from_reg != into_reg.to_reg() {
+ ret.push(Inst::gen_move(into_reg, from_reg, I64));
+ }
+ ret.push(Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(imm),
+ into_reg,
+ ));
+ ret
+ }
+
+ fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> {
+ smallvec![
+ Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg),
+ Inst::TrapIf {
+ // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
+ cc: CC::NBE,
+ trap_code: TrapCode::StackOverflow,
+ },
+ ]
+ }
+
+ fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Self::I {
+ let mem: SyntheticAmode = mem.into();
+ Inst::lea(mem, into_reg)
+ }
+
+ fn get_stacklimit_reg() -> Reg {
+ debug_assert!(
+ !is_callee_save_systemv(regs::r10().to_real_reg())
+ && !is_callee_save_baldrdash(regs::r10().to_real_reg())
+ );
+
+ // As per comment on trait definition, we must return a caller-save
+ // register here.
+ regs::r10()
+ }
+
+ fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
+ // Only ever used for I64s; if that changes, see if the ExtKind below needs to be changed.
+ assert_eq!(ty, I64);
+ let simm32 = offset as u32;
+ let mem = Amode::imm_reg(simm32, base);
+ Inst::load(ty, mem, into_reg, ExtKind::None)
+ }
+
+ fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
+ let simm32 = offset as u32;
+ let mem = Amode::imm_reg(simm32, base);
+ Inst::store(ty, from_reg, mem)
+ }
+
+ fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]> {
+ let (alu_op, amount) = if amount >= 0 {
+ (AluRmiROpcode::Add, amount)
+ } else {
+ (AluRmiROpcode::Sub, -amount)
+ };
+
+ let amount = amount as u32;
+
+ smallvec![Inst::alu_rmi_r(
+ true,
+ alu_op,
+ RegMemImm::imm(amount),
+ Writable::from_reg(regs::rsp()),
+ )]
+ }
+
+ fn gen_nominal_sp_adj(offset: i32) -> Self::I {
+ Inst::VirtualSPOffsetAdj {
+ offset: offset as i64,
+ }
+ }
+
+ fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> {
+ let r_rsp = regs::rsp();
+ let r_rbp = regs::rbp();
+ let w_rbp = Writable::from_reg(r_rbp);
+ let mut insts = SmallVec::new();
+ // RSP before the call will be 0 % 16. So here, it is 8 % 16.
+ insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
+ // RSP is now 0 % 16
+ insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
+ insts
+ }
+
+ fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> {
+ let mut insts = SmallVec::new();
+ insts.push(Inst::mov_r_r(
+ true,
+ regs::rbp(),
+ Writable::from_reg(regs::rsp()),
+ ));
+ insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
+ insts
+ }
+
+ fn gen_clobber_save(
+ call_conv: isa::CallConv,
+ _: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> (u64, SmallVec<[Self::I; 16]>) {
+ let mut insts = SmallVec::new();
+ // Find all clobbered registers that are callee-save. These are only I64
+ // registers (all XMM registers are caller-save) so we can compute the
+ // total size of the needed stack space easily.
+ let clobbered = get_callee_saves(&call_conv, clobbers);
+ let clobbered_size = 8 * clobbered.len() as u32;
+ let stack_size = clobbered_size + fixed_frame_storage_size;
+ // Align to 16 bytes.
+ let stack_size = (stack_size + 15) & !15;
+ // Adjust the stack pointer downward with one `sub rsp, IMM`
+ // instruction.
+ if stack_size > 0 {
+ insts.push(Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Sub,
+ RegMemImm::imm(stack_size),
+ Writable::from_reg(regs::rsp()),
+ ));
+ }
+ // Store each clobbered register in order at offsets from RSP.
+ let mut cur_offset = 0;
+ for reg in &clobbered {
+ let r_reg = reg.to_reg();
+ match r_reg.get_class() {
+ RegClass::I64 => {
+ insts.push(Inst::mov_r_m(
+ /* bytes = */ 8,
+ r_reg.to_reg(),
+ Amode::imm_reg(cur_offset, regs::rsp()),
+ ));
+ cur_offset += 8;
+ }
+ // No XMM regs are callee-save, so we do not need to implement
+ // this.
+ _ => unimplemented!(),
+ }
+ }
+
+ (clobbered_size as u64, insts)
+ }
+
+ fn gen_clobber_restore(
+ call_conv: isa::CallConv,
+ flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ _fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> SmallVec<[Self::I; 16]> {
+ let mut insts = SmallVec::new();
+
+ let clobbered = get_callee_saves(&call_conv, clobbers);
+ let stack_size = 8 * clobbered.len() as u32;
+ let stack_size = (stack_size + 15) & !15;
+
+ // Restore regs by loading from offsets of RSP.
+ let mut cur_offset = 0;
+ for reg in &clobbered {
+ let rreg = reg.to_reg();
+ match rreg.get_class() {
+ RegClass::I64 => {
+ insts.push(Inst::mov64_m_r(
+ Amode::imm_reg(cur_offset, regs::rsp()),
+ Writable::from_reg(rreg.to_reg()),
+ ));
+ cur_offset += 8;
+ }
+ _ => unimplemented!(),
+ }
+ }
+ // Adjust RSP back upward.
+ if stack_size > 0 {
+ insts.push(Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(stack_size),
+ Writable::from_reg(regs::rsp()),
+ ));
+ }
+
+ // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
+ // register. We may have allocated it for something else and clobbered
+ // it, but the ABI expects us to leave the TLS register unchanged.
+ if call_conv == isa::CallConv::Baldrdash2020 {
+ let off = BALDRDASH_CALLEE_TLS_OFFSET + Self::fp_to_arg_offset(call_conv, flags);
+ insts.push(Inst::mov64_m_r(
+ Amode::imm_reg(off as u32, regs::rbp()),
+ Writable::from_reg(regs::r14()),
+ ));
+ }
+
+ insts
+ }
+
+ /// Generate a call instruction/sequence.
+ fn gen_call(
+ dest: &CallDest,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: ir::Opcode,
+ tmp: Writable<Reg>,
+ _callee_conv: isa::CallConv,
+ _caller_conv: isa::CallConv,
+ ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> {
+ let mut insts = SmallVec::new();
+ match dest {
+ &CallDest::ExtName(ref name, RelocDistance::Near) => {
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::call_known(name.clone(), uses, defs, opcode),
+ ));
+ }
+ &CallDest::ExtName(ref name, RelocDistance::Far) => {
+ insts.push((
+ InstIsSafepoint::No,
+ Inst::LoadExtName {
+ dst: tmp,
+ name: Box::new(name.clone()),
+ offset: 0,
+ },
+ ));
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, opcode),
+ ));
+ }
+ &CallDest::Reg(reg) => {
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode),
+ ));
+ }
+ }
+ insts
+ }
+
+ fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
+ // We allocate in terms of 8-byte slots.
+ match (rc, ty) {
+ (RegClass::I64, _) => 1,
+ (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1,
+ (RegClass::V128, _) => 2,
+ _ => panic!("Unexpected register class!"),
+ }
+ }
+
+ fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64 {
+ s.virtual_sp_offset
+ }
+
+ fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64 {
+ s.nominal_sp_to_fp
+ }
+
+ fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
+ let mut caller_saved = vec![
+ // Systemv calling convention:
+ // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
+ Writable::from_reg(regs::rsi()),
+ Writable::from_reg(regs::rdi()),
+ Writable::from_reg(regs::rax()),
+ Writable::from_reg(regs::rcx()),
+ Writable::from_reg(regs::rdx()),
+ Writable::from_reg(regs::r8()),
+ Writable::from_reg(regs::r9()),
+ Writable::from_reg(regs::r10()),
+ Writable::from_reg(regs::r11()),
+ // - XMM: all the registers!
+ Writable::from_reg(regs::xmm0()),
+ Writable::from_reg(regs::xmm1()),
+ Writable::from_reg(regs::xmm2()),
+ Writable::from_reg(regs::xmm3()),
+ Writable::from_reg(regs::xmm4()),
+ Writable::from_reg(regs::xmm5()),
+ Writable::from_reg(regs::xmm6()),
+ Writable::from_reg(regs::xmm7()),
+ Writable::from_reg(regs::xmm8()),
+ Writable::from_reg(regs::xmm9()),
+ Writable::from_reg(regs::xmm10()),
+ Writable::from_reg(regs::xmm11()),
+ Writable::from_reg(regs::xmm12()),
+ Writable::from_reg(regs::xmm13()),
+ Writable::from_reg(regs::xmm14()),
+ Writable::from_reg(regs::xmm15()),
+ ];
+
+ if call_conv_of_callee.extends_baldrdash() {
+ caller_saved.push(Writable::from_reg(regs::r12()));
+ caller_saved.push(Writable::from_reg(regs::r13()));
+ // Not r14; implicitly preserved in the entry.
+ caller_saved.push(Writable::from_reg(regs::r15()));
+ caller_saved.push(Writable::from_reg(regs::rbx()));
+ }
+
+ caller_saved
+ }
+}
+
+impl From<StackAMode> for SyntheticAmode {
+ fn from(amode: StackAMode) -> Self {
+ // We enforce a 128 MB stack-frame size limit above, so these
+ // `expect()`s should never fail.
+ match amode {
+ StackAMode::FPOffset(off, _ty) => {
+ let off = i32::try_from(off)
+ .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first");
+ let simm32 = off as u32;
+ SyntheticAmode::Real(Amode::ImmReg {
+ simm32,
+ base: regs::rbp(),
+ flags: MemFlags::trusted(),
+ })
+ }
+ StackAMode::NominalSPOffset(off, _ty) => {
+ let off = i32::try_from(off).expect(
+ "Offset in NominalSPOffset is greater than 2GB; should hit impl limit first",
+ );
+ let simm32 = off as u32;
+ SyntheticAmode::nominal_sp_offset(simm32)
+ }
+ StackAMode::SPOffset(off, _ty) => {
+ let off = i32::try_from(off)
+ .expect("Offset in SPOffset is greater than 2GB; should hit impl limit first");
+ let simm32 = off as u32;
+ SyntheticAmode::Real(Amode::ImmReg {
+ simm32,
+ base: regs::rsp(),
+ flags: MemFlags::trusted(),
+ })
+ }
+ }
+ }
+}
+
+fn in_int_reg(ty: types::Type) -> bool {
+ match ty {
+ types::I8
+ | types::I16
+ | types::I32
+ | types::I64
+ | types::B1
+ | types::B8
+ | types::B16
+ | types::B32
+ | types::B64
+ | types::R64 => true,
+ types::R32 => panic!("unexpected 32-bits refs on x64!"),
+ _ => false,
+ }
+}
+
+fn in_vec_reg(ty: types::Type) -> bool {
+ match ty {
+ types::F32 | types::F64 => true,
+ _ if ty.is_vector() => true,
+ _ => false,
+ }
+}
+
+fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast
+ | CallConv::Cold
+ | CallConv::SystemV
+ | CallConv::BaldrdashSystemV
+ | CallConv::Baldrdash2020 => {}
+ _ => panic!("int args only supported for SysV calling convention"),
+ };
+ match idx {
+ 0 => Some(regs::rdi()),
+ 1 => Some(regs::rsi()),
+ 2 => Some(regs::rdx()),
+ 3 => Some(regs::rcx()),
+ 4 => Some(regs::r8()),
+ 5 => Some(regs::r9()),
+ _ => None,
+ }
+}
+
+fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast
+ | CallConv::Cold
+ | CallConv::SystemV
+ | CallConv::BaldrdashSystemV
+ | CallConv::Baldrdash2020 => {}
+ _ => panic!("float args only supported for SysV calling convention"),
+ };
+ match idx {
+ 0 => Some(regs::xmm0()),
+ 1 => Some(regs::xmm1()),
+ 2 => Some(regs::xmm2()),
+ 3 => Some(regs::xmm3()),
+ 4 => Some(regs::xmm4()),
+ 5 => Some(regs::xmm5()),
+ 6 => Some(regs::xmm6()),
+ 7 => Some(regs::xmm7()),
+ _ => None,
+ }
+}
+
+fn get_intreg_for_retval_systemv(
+ call_conv: &CallConv,
+ intreg_idx: usize,
+ retval_idx: usize,
+) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
+ 0 => Some(regs::rax()),
+ 1 => Some(regs::rdx()),
+ _ => None,
+ },
+ CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
+ if intreg_idx == 0 && retval_idx == 0 {
+ Some(regs::rax())
+ } else {
+ None
+ }
+ }
+ CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+ }
+}
+
+fn get_fltreg_for_retval_systemv(
+ call_conv: &CallConv,
+ fltreg_idx: usize,
+ retval_idx: usize,
+) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
+ 0 => Some(regs::xmm0()),
+ 1 => Some(regs::xmm1()),
+ _ => None,
+ },
+ CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
+ if fltreg_idx == 0 && retval_idx == 0 {
+ Some(regs::xmm0())
+ } else {
+ None
+ }
+ }
+ CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+ }
+}
+
+fn is_callee_save_systemv(r: RealReg) -> bool {
+ use regs::*;
+ match r.get_class() {
+ RegClass::I64 => match r.get_hw_encoding() as u8 {
+ ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
+ _ => false,
+ },
+ RegClass::V128 => false,
+ _ => unimplemented!(),
+ }
+}
+
+fn is_callee_save_baldrdash(r: RealReg) -> bool {
+ use regs::*;
+ match r.get_class() {
+ RegClass::I64 => {
+ if r.get_hw_encoding() as u8 == ENC_R14 {
+ // r14 is the WasmTlsReg and is preserved implicitly.
+ false
+ } else {
+ // Defer to native for the other ones.
+ is_callee_save_systemv(r)
+ }
+ }
+ RegClass::V128 => false,
+ _ => unimplemented!(),
+ }
+}
+
+fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
+ let mut regs: Vec<Writable<RealReg>> = match call_conv {
+ CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
+ .iter()
+ .cloned()
+ .filter(|r| is_callee_save_baldrdash(r.to_reg()))
+ .collect(),
+ CallConv::BaldrdashWindows => {
+ todo!("baldrdash windows");
+ }
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
+ .iter()
+ .cloned()
+ .filter(|r| is_callee_save_systemv(r.to_reg()))
+ .collect(),
+ CallConv::WindowsFastcall => todo!("windows fastcall"),
+ CallConv::Probestack => todo!("probestack?"),
+ };
+ // Sort registers for deterministic code output. We can do an unstable sort because the
+ // registers will be unique (there are no dups).
+ regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+ regs
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
new file mode 100644
index 0000000000..6a8f65feb3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@@ -0,0 +1,1215 @@
+//! Instruction operand sub-components (aka "parts"): definitions and printing.
+
+use super::regs::{self, show_ireg_sized};
+use super::EmitState;
+use crate::ir::condcodes::{FloatCC, IntCC};
+use crate::ir::MemFlags;
+use crate::machinst::*;
+use regalloc::{
+ PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
+ RegUsageMapper, Writable,
+};
+use std::fmt;
+use std::string::String;
+
+/// A possible addressing mode (amode) that can be used in instructions.
+/// These denote a 64-bit value only.
+#[derive(Clone, Debug)]
+pub enum Amode {
+ /// Immediate sign-extended and a Register.
+ ImmReg {
+ simm32: u32,
+ base: Reg,
+ flags: MemFlags,
+ },
+
+ /// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
+ ImmRegRegShift {
+ simm32: u32,
+ base: Reg,
+ index: Reg,
+ shift: u8, /* 0 .. 3 only */
+ flags: MemFlags,
+ },
+
+ /// sign-extend-32-to-64(Immediate) + RIP (instruction pointer).
+ /// To wit: not supported in 32-bits mode.
+ RipRelative { target: MachLabel },
+}
+
+impl Amode {
+ pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
+ debug_assert!(base.get_class() == RegClass::I64);
+ Self::ImmReg {
+ simm32,
+ base,
+ flags: MemFlags::trusted(),
+ }
+ }
+
+ pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
+ debug_assert!(base.get_class() == RegClass::I64);
+ debug_assert!(index.get_class() == RegClass::I64);
+ debug_assert!(shift <= 3);
+ Self::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ flags: MemFlags::trusted(),
+ }
+ }
+
+ pub(crate) fn rip_relative(target: MachLabel) -> Self {
+ Self::RipRelative { target }
+ }
+
+ pub(crate) fn with_flags(&self, flags: MemFlags) -> Self {
+ match self {
+ &Self::ImmReg { simm32, base, .. } => Self::ImmReg {
+ simm32,
+ base,
+ flags,
+ },
+ &Self::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ ..
+ } => Self::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ flags,
+ },
+ _ => panic!("Amode {:?} cannot take memflags", self),
+ }
+ }
+
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ Amode::ImmReg { base, .. } => {
+ collector.add_use(*base);
+ }
+ Amode::ImmRegRegShift { base, index, .. } => {
+ collector.add_use(*base);
+ collector.add_use(*index);
+ }
+ Amode::RipRelative { .. } => {
+ // RIP isn't involved in regalloc.
+ }
+ }
+ }
+
+ pub(crate) fn get_flags(&self) -> MemFlags {
+ match self {
+ Amode::ImmReg { flags, .. } => *flags,
+ Amode::ImmRegRegShift { flags, .. } => *flags,
+ Amode::RipRelative { .. } => MemFlags::trusted(),
+ }
+ }
+
+ pub(crate) fn can_trap(&self) -> bool {
+ !self.get_flags().notrap()
+ }
+}
+
+impl PrettyPrint for Amode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ Amode::ImmReg { simm32, base, .. } => {
+ format!("{}({})", *simm32 as i32, base.show_rru(mb_rru))
+ }
+ Amode::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ ..
+ } => format!(
+ "{}({},{},{})",
+ *simm32 as i32,
+ base.show_rru(mb_rru),
+ index.show_rru(mb_rru),
+ 1 << shift
+ ),
+ Amode::RipRelative { ref target } => format!("label{}(%rip)", target.get()),
+ }
+ }
+}
+
+/// A Memory Address. These denote a 64-bit value only.
+/// Used for usual addressing modes as well as addressing modes used during compilation, when the
+/// moving SP offset is not known.
+#[derive(Clone)]
+pub enum SyntheticAmode {
+ /// A real amode.
+ Real(Amode),
+
+ /// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop
+ /// within the function.
+ NominalSPOffset { simm32: u32 },
+}
+
+impl SyntheticAmode {
+ pub(crate) fn nominal_sp_offset(simm32: u32) -> Self {
+ SyntheticAmode::NominalSPOffset { simm32 }
+ }
+
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector),
+ SyntheticAmode::NominalSPOffset { .. } => {
+ // Nothing to do; the base is SP and isn't involved in regalloc.
+ }
+ }
+ }
+
+ pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ SyntheticAmode::Real(addr) => addr.map_uses(map),
+ SyntheticAmode::NominalSPOffset { .. } => {
+ // Nothing to do.
+ }
+ }
+ }
+
+ pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode {
+ match self {
+ SyntheticAmode::Real(addr) => addr.clone(),
+ SyntheticAmode::NominalSPOffset { simm32 } => {
+ let off = *simm32 as i64 + state.virtual_sp_offset;
+ // TODO will require a sequence of add etc.
+ assert!(
+ off <= u32::max_value() as i64,
+ "amode finalize: add sequence NYI"
+ );
+ Amode::imm_reg(off as u32, regs::rsp())
+ }
+ }
+ }
+}
+
+impl Into<SyntheticAmode> for Amode {
+ fn into(self) -> SyntheticAmode {
+ SyntheticAmode::Real(self)
+ }
+}
+
+impl PrettyPrint for SyntheticAmode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
+ SyntheticAmode::NominalSPOffset { simm32 } => {
+ format!("rsp({} + virtual offset)", *simm32 as i32)
+ }
+ }
+ }
+}
+
+/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
+/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
+/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
+/// `simm32` is its sign-extension out to 64 bits.
+#[derive(Clone)]
+pub enum RegMemImm {
+ Reg { reg: Reg },
+ Mem { addr: SyntheticAmode },
+ Imm { simm32: u32 },
+}
+
+impl RegMemImm {
+ pub(crate) fn reg(reg: Reg) -> Self {
+ debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
+ Self::Reg { reg }
+ }
+ pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
+ Self::Mem { addr: addr.into() }
+ }
+ pub(crate) fn imm(simm32: u32) -> Self {
+ Self::Imm { simm32 }
+ }
+
+ /// Asserts that in register mode, the reg class is the one that's expected.
+ pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
+ if let Self::Reg { reg } = self {
+ debug_assert_eq!(reg.get_class(), expected_reg_class);
+ }
+ }
+
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ Self::Reg { reg } => collector.add_use(*reg),
+ Self::Mem { addr } => addr.get_regs_as_uses(collector),
+ Self::Imm { .. } => {}
+ }
+ }
+
+ pub(crate) fn to_reg(&self) -> Option<Reg> {
+ match self {
+ Self::Reg { reg } => Some(*reg),
+ _ => None,
+ }
+ }
+}
+
+impl PrettyPrint for RegMemImm {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.show_rru_sized(mb_rru, 8)
+ }
+}
+
+impl PrettyPrintSized for RegMemImm {
+ fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+ match self {
+ Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
+ Self::Mem { addr } => addr.show_rru(mb_rru),
+ Self::Imm { simm32 } => format!("${}", *simm32 as i32),
+ }
+ }
+}
+
+/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
+/// 32, 64, or 128 bit value.
+#[derive(Clone)]
+pub enum RegMem {
+ Reg { reg: Reg },
+ Mem { addr: SyntheticAmode },
+}
+
+impl RegMem {
+ pub(crate) fn reg(reg: Reg) -> Self {
+ debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
+ Self::Reg { reg }
+ }
+ pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
+ Self::Mem { addr: addr.into() }
+ }
+ /// Asserts that in register mode, the reg class is the one that's expected.
+ pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
+ if let Self::Reg { reg } = self {
+ debug_assert_eq!(reg.get_class(), expected_reg_class);
+ }
+ }
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ RegMem::Reg { reg } => collector.add_use(*reg),
+ RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
+ }
+ }
+ pub(crate) fn to_reg(&self) -> Option<Reg> {
+ match self {
+ RegMem::Reg { reg } => Some(*reg),
+ _ => None,
+ }
+ }
+}
+
+impl From<Writable<Reg>> for RegMem {
+ fn from(r: Writable<Reg>) -> Self {
+ RegMem::reg(r.to_reg())
+ }
+}
+
+impl PrettyPrint for RegMem {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.show_rru_sized(mb_rru, 8)
+ }
+}
+
+impl PrettyPrintSized for RegMem {
+ fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+ match self {
+ RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
+ RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
+ }
+ }
+}
+
+/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
+#[derive(Copy, Clone, PartialEq)]
+pub enum AluRmiROpcode {
+ Add,
+ Sub,
+ And,
+ Or,
+ Xor,
+ /// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
+ Mul,
+}
+
+impl fmt::Debug for AluRmiROpcode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ AluRmiROpcode::Add => "add",
+ AluRmiROpcode::Sub => "sub",
+ AluRmiROpcode::And => "and",
+ AluRmiROpcode::Or => "or",
+ AluRmiROpcode::Xor => "xor",
+ AluRmiROpcode::Mul => "imul",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for AluRmiROpcode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+#[derive(Clone, PartialEq)]
+pub enum UnaryRmROpcode {
+ /// Bit-scan reverse.
+ Bsr,
+ /// Bit-scan forward.
+ Bsf,
+}
+
+impl fmt::Debug for UnaryRmROpcode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
+ UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
+ }
+ }
+}
+
+impl fmt::Display for UnaryRmROpcode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+pub(crate) enum InstructionSet {
+ SSE,
+ SSE2,
+ SSSE3,
+ SSE41,
+ SSE42,
+}
+
+/// Some SSE operations requiring 2 operands r/m and r.
+#[derive(Clone, Copy, PartialEq)]
+pub enum SseOpcode {
+ Addps,
+ Addpd,
+ Addss,
+ Addsd,
+ Andps,
+ Andpd,
+ Andnps,
+ Andnpd,
+ Comiss,
+ Comisd,
+ Cmpps,
+ Cmppd,
+ Cmpss,
+ Cmpsd,
+ Cvtdq2ps,
+ Cvtsd2ss,
+ Cvtsd2si,
+ Cvtsi2ss,
+ Cvtsi2sd,
+ Cvtss2si,
+ Cvtss2sd,
+ Cvttps2dq,
+ Cvttss2si,
+ Cvttsd2si,
+ Divps,
+ Divpd,
+ Divss,
+ Divsd,
+ Insertps,
+ Maxps,
+ Maxpd,
+ Maxss,
+ Maxsd,
+ Minps,
+ Minpd,
+ Minss,
+ Minsd,
+ Movaps,
+ Movapd,
+ Movd,
+ Movdqa,
+ Movdqu,
+ Movlhps,
+ Movmskps,
+ Movmskpd,
+ Movq,
+ Movss,
+ Movsd,
+ Movups,
+ Movupd,
+ Mulps,
+ Mulpd,
+ Mulss,
+ Mulsd,
+ Orps,
+ Orpd,
+ Pabsb,
+ Pabsw,
+ Pabsd,
+ Packsswb,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Paddsb,
+ Paddsw,
+ Paddusb,
+ Paddusw,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pcmpeqb,
+ Pcmpeqw,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpgtb,
+ Pcmpgtw,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pextrb,
+ Pextrw,
+ Pextrd,
+ Pinsrb,
+ Pinsrw,
+ Pinsrd,
+ Pmaxsb,
+ Pmaxsw,
+ Pmaxsd,
+ Pmaxub,
+ Pmaxuw,
+ Pmaxud,
+ Pminsb,
+ Pminsw,
+ Pminsd,
+ Pminub,
+ Pminuw,
+ Pminud,
+ Pmovmskb,
+ Pmulld,
+ Pmullw,
+ Pmuludq,
+ Por,
+ Pshufb,
+ Pshufd,
+ Psllw,
+ Pslld,
+ Psllq,
+ Psraw,
+ Psrad,
+ Psrlw,
+ Psrld,
+ Psrlq,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Psubsb,
+ Psubsw,
+ Psubusb,
+ Psubusw,
+ Ptest,
+ Pxor,
+ Rcpss,
+ Roundss,
+ Roundsd,
+ Rsqrtss,
+ Sqrtps,
+ Sqrtpd,
+ Sqrtss,
+ Sqrtsd,
+ Subps,
+ Subpd,
+ Subss,
+ Subsd,
+ Ucomiss,
+ Ucomisd,
+ Xorps,
+ Xorpd,
+}
+
+impl SseOpcode {
+ /// Which `InstructionSet` is the first supporting this opcode?
+ pub(crate) fn available_from(&self) -> InstructionSet {
+ use InstructionSet::*;
+ match self {
+ SseOpcode::Addps
+ | SseOpcode::Addss
+ | SseOpcode::Andps
+ | SseOpcode::Andnps
+ | SseOpcode::Comiss
+ | SseOpcode::Cmpps
+ | SseOpcode::Cmpss
+ | SseOpcode::Cvtsi2ss
+ | SseOpcode::Cvtss2si
+ | SseOpcode::Cvttss2si
+ | SseOpcode::Divps
+ | SseOpcode::Divss
+ | SseOpcode::Maxps
+ | SseOpcode::Maxss
+ | SseOpcode::Minps
+ | SseOpcode::Minss
+ | SseOpcode::Movaps
+ | SseOpcode::Movlhps
+ | SseOpcode::Movmskps
+ | SseOpcode::Movss
+ | SseOpcode::Movups
+ | SseOpcode::Mulps
+ | SseOpcode::Mulss
+ | SseOpcode::Orps
+ | SseOpcode::Rcpss
+ | SseOpcode::Rsqrtss
+ | SseOpcode::Sqrtps
+ | SseOpcode::Sqrtss
+ | SseOpcode::Subps
+ | SseOpcode::Subss
+ | SseOpcode::Ucomiss
+ | SseOpcode::Xorps => SSE,
+
+ SseOpcode::Addpd
+ | SseOpcode::Addsd
+ | SseOpcode::Andpd
+ | SseOpcode::Andnpd
+ | SseOpcode::Cmppd
+ | SseOpcode::Cmpsd
+ | SseOpcode::Comisd
+ | SseOpcode::Cvtdq2ps
+ | SseOpcode::Cvtsd2ss
+ | SseOpcode::Cvtsd2si
+ | SseOpcode::Cvtsi2sd
+ | SseOpcode::Cvtss2sd
+ | SseOpcode::Cvttps2dq
+ | SseOpcode::Cvttsd2si
+ | SseOpcode::Divpd
+ | SseOpcode::Divsd
+ | SseOpcode::Maxpd
+ | SseOpcode::Maxsd
+ | SseOpcode::Minpd
+ | SseOpcode::Minsd
+ | SseOpcode::Movapd
+ | SseOpcode::Movd
+ | SseOpcode::Movmskpd
+ | SseOpcode::Movq
+ | SseOpcode::Movsd
+ | SseOpcode::Movupd
+ | SseOpcode::Movdqa
+ | SseOpcode::Movdqu
+ | SseOpcode::Mulpd
+ | SseOpcode::Mulsd
+ | SseOpcode::Orpd
+ | SseOpcode::Packsswb
+ | SseOpcode::Paddb
+ | SseOpcode::Paddd
+ | SseOpcode::Paddq
+ | SseOpcode::Paddw
+ | SseOpcode::Paddsb
+ | SseOpcode::Paddsw
+ | SseOpcode::Paddusb
+ | SseOpcode::Paddusw
+ | SseOpcode::Pand
+ | SseOpcode::Pandn
+ | SseOpcode::Pavgb
+ | SseOpcode::Pavgw
+ | SseOpcode::Pcmpeqb
+ | SseOpcode::Pcmpeqw
+ | SseOpcode::Pcmpeqd
+ | SseOpcode::Pcmpgtb
+ | SseOpcode::Pcmpgtw
+ | SseOpcode::Pcmpgtd
+ | SseOpcode::Pextrw
+ | SseOpcode::Pinsrw
+ | SseOpcode::Pmaxsw
+ | SseOpcode::Pmaxub
+ | SseOpcode::Pminsw
+ | SseOpcode::Pminub
+ | SseOpcode::Pmovmskb
+ | SseOpcode::Pmullw
+ | SseOpcode::Pmuludq
+ | SseOpcode::Por
+ | SseOpcode::Pshufd
+ | SseOpcode::Psllw
+ | SseOpcode::Pslld
+ | SseOpcode::Psllq
+ | SseOpcode::Psraw
+ | SseOpcode::Psrad
+ | SseOpcode::Psrlw
+ | SseOpcode::Psrld
+ | SseOpcode::Psrlq
+ | SseOpcode::Psubb
+ | SseOpcode::Psubd
+ | SseOpcode::Psubq
+ | SseOpcode::Psubw
+ | SseOpcode::Psubsb
+ | SseOpcode::Psubsw
+ | SseOpcode::Psubusb
+ | SseOpcode::Psubusw
+ | SseOpcode::Pxor
+ | SseOpcode::Sqrtpd
+ | SseOpcode::Sqrtsd
+ | SseOpcode::Subpd
+ | SseOpcode::Subsd
+ | SseOpcode::Ucomisd
+ | SseOpcode::Xorpd => SSE2,
+
+ SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
+
+ SseOpcode::Insertps
+ | SseOpcode::Pcmpeqq
+ | SseOpcode::Pextrb
+ | SseOpcode::Pextrd
+ | SseOpcode::Pinsrb
+ | SseOpcode::Pinsrd
+ | SseOpcode::Pmaxsb
+ | SseOpcode::Pmaxsd
+ | SseOpcode::Pmaxuw
+ | SseOpcode::Pmaxud
+ | SseOpcode::Pminsb
+ | SseOpcode::Pminsd
+ | SseOpcode::Pminuw
+ | SseOpcode::Pminud
+ | SseOpcode::Pmulld
+ | SseOpcode::Ptest
+ | SseOpcode::Roundss
+ | SseOpcode::Roundsd => SSE41,
+
+ SseOpcode::Pcmpgtq => SSE42,
+ }
+ }
+
+ /// Returns the src operand size for an instruction.
+ pub(crate) fn src_size(&self) -> u8 {
+ match self {
+ SseOpcode::Movd => 4,
+ _ => 8,
+ }
+ }
+}
+
+impl fmt::Debug for SseOpcode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ SseOpcode::Addps => "addps",
+ SseOpcode::Addpd => "addpd",
+ SseOpcode::Addss => "addss",
+ SseOpcode::Addsd => "addsd",
+ SseOpcode::Andpd => "andpd",
+ SseOpcode::Andps => "andps",
+ SseOpcode::Andnps => "andnps",
+ SseOpcode::Andnpd => "andnpd",
+ SseOpcode::Cmpps => "cmpps",
+ SseOpcode::Cmppd => "cmppd",
+ SseOpcode::Cmpss => "cmpss",
+ SseOpcode::Cmpsd => "cmpsd",
+ SseOpcode::Comiss => "comiss",
+ SseOpcode::Comisd => "comisd",
+ SseOpcode::Cvtdq2ps => "cvtdq2ps",
+ SseOpcode::Cvtsd2ss => "cvtsd2ss",
+ SseOpcode::Cvtsd2si => "cvtsd2si",
+ SseOpcode::Cvtsi2ss => "cvtsi2ss",
+ SseOpcode::Cvtsi2sd => "cvtsi2sd",
+ SseOpcode::Cvtss2si => "cvtss2si",
+ SseOpcode::Cvtss2sd => "cvtss2sd",
+ SseOpcode::Cvttps2dq => "cvttps2dq",
+ SseOpcode::Cvttss2si => "cvttss2si",
+ SseOpcode::Cvttsd2si => "cvttsd2si",
+ SseOpcode::Divps => "divps",
+ SseOpcode::Divpd => "divpd",
+ SseOpcode::Divss => "divss",
+ SseOpcode::Divsd => "divsd",
+ SseOpcode::Insertps => "insertps",
+ SseOpcode::Maxps => "maxps",
+ SseOpcode::Maxpd => "maxpd",
+ SseOpcode::Maxss => "maxss",
+ SseOpcode::Maxsd => "maxsd",
+ SseOpcode::Minps => "minps",
+ SseOpcode::Minpd => "minpd",
+ SseOpcode::Minss => "minss",
+ SseOpcode::Minsd => "minsd",
+ SseOpcode::Movaps => "movaps",
+ SseOpcode::Movapd => "movapd",
+ SseOpcode::Movd => "movd",
+ SseOpcode::Movdqa => "movdqa",
+ SseOpcode::Movdqu => "movdqu",
+ SseOpcode::Movlhps => "movlhps",
+ SseOpcode::Movmskps => "movmskps",
+ SseOpcode::Movmskpd => "movmskpd",
+ SseOpcode::Movq => "movq",
+ SseOpcode::Movss => "movss",
+ SseOpcode::Movsd => "movsd",
+ SseOpcode::Movups => "movups",
+ SseOpcode::Movupd => "movupd",
+ SseOpcode::Mulps => "mulps",
+ SseOpcode::Mulpd => "mulpd",
+ SseOpcode::Mulss => "mulss",
+ SseOpcode::Mulsd => "mulsd",
+ SseOpcode::Orpd => "orpd",
+ SseOpcode::Orps => "orps",
+ SseOpcode::Pabsb => "pabsb",
+ SseOpcode::Pabsw => "pabsw",
+ SseOpcode::Pabsd => "pabsd",
+ SseOpcode::Packsswb => "packsswb",
+ SseOpcode::Paddb => "paddb",
+ SseOpcode::Paddd => "paddd",
+ SseOpcode::Paddq => "paddq",
+ SseOpcode::Paddw => "paddw",
+ SseOpcode::Paddsb => "paddsb",
+ SseOpcode::Paddsw => "paddsw",
+ SseOpcode::Paddusb => "paddusb",
+ SseOpcode::Paddusw => "paddusw",
+ SseOpcode::Pand => "pand",
+ SseOpcode::Pandn => "pandn",
+ SseOpcode::Pavgb => "pavgb",
+ SseOpcode::Pavgw => "pavgw",
+ SseOpcode::Pcmpeqb => "pcmpeqb",
+ SseOpcode::Pcmpeqw => "pcmpeqw",
+ SseOpcode::Pcmpeqd => "pcmpeqd",
+ SseOpcode::Pcmpeqq => "pcmpeqq",
+ SseOpcode::Pcmpgtb => "pcmpgtb",
+ SseOpcode::Pcmpgtw => "pcmpgtw",
+ SseOpcode::Pcmpgtd => "pcmpgtd",
+ SseOpcode::Pcmpgtq => "pcmpgtq",
+ SseOpcode::Pextrb => "pextrb",
+ SseOpcode::Pextrw => "pextrw",
+ SseOpcode::Pextrd => "pextrd",
+ SseOpcode::Pinsrb => "pinsrb",
+ SseOpcode::Pinsrw => "pinsrw",
+ SseOpcode::Pinsrd => "pinsrd",
+ SseOpcode::Pmaxsb => "pmaxsb",
+ SseOpcode::Pmaxsw => "pmaxsw",
+ SseOpcode::Pmaxsd => "pmaxsd",
+ SseOpcode::Pmaxub => "pmaxub",
+ SseOpcode::Pmaxuw => "pmaxuw",
+ SseOpcode::Pmaxud => "pmaxud",
+ SseOpcode::Pminsb => "pminsb",
+ SseOpcode::Pminsw => "pminsw",
+ SseOpcode::Pminsd => "pminsd",
+ SseOpcode::Pminub => "pminub",
+ SseOpcode::Pminuw => "pminuw",
+ SseOpcode::Pminud => "pminud",
+ SseOpcode::Pmovmskb => "pmovmskb",
+ SseOpcode::Pmulld => "pmulld",
+ SseOpcode::Pmullw => "pmullw",
+ SseOpcode::Pmuludq => "pmuludq",
+ SseOpcode::Por => "por",
+ SseOpcode::Pshufb => "pshufb",
+ SseOpcode::Pshufd => "pshufd",
+ SseOpcode::Psllw => "psllw",
+ SseOpcode::Pslld => "pslld",
+ SseOpcode::Psllq => "psllq",
+ SseOpcode::Psraw => "psraw",
+ SseOpcode::Psrad => "psrad",
+ SseOpcode::Psrlw => "psrlw",
+ SseOpcode::Psrld => "psrld",
+ SseOpcode::Psrlq => "psrlq",
+ SseOpcode::Psubb => "psubb",
+ SseOpcode::Psubd => "psubd",
+ SseOpcode::Psubq => "psubq",
+ SseOpcode::Psubw => "psubw",
+ SseOpcode::Psubsb => "psubsb",
+ SseOpcode::Psubsw => "psubsw",
+ SseOpcode::Psubusb => "psubusb",
+ SseOpcode::Psubusw => "psubusw",
+ SseOpcode::Ptest => "ptest",
+ SseOpcode::Pxor => "pxor",
+ SseOpcode::Rcpss => "rcpss",
+ SseOpcode::Roundss => "roundss",
+ SseOpcode::Roundsd => "roundsd",
+ SseOpcode::Rsqrtss => "rsqrtss",
+ SseOpcode::Sqrtps => "sqrtps",
+ SseOpcode::Sqrtpd => "sqrtpd",
+ SseOpcode::Sqrtss => "sqrtss",
+ SseOpcode::Sqrtsd => "sqrtsd",
+ SseOpcode::Subps => "subps",
+ SseOpcode::Subpd => "subpd",
+ SseOpcode::Subss => "subss",
+ SseOpcode::Subsd => "subsd",
+ SseOpcode::Ucomiss => "ucomiss",
+ SseOpcode::Ucomisd => "ucomisd",
+ SseOpcode::Xorps => "xorps",
+ SseOpcode::Xorpd => "xorpd",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for SseOpcode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
+/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
+/// values can be extended.
+#[derive(Clone, PartialEq)]
+pub enum ExtKind {
+ None,
+ SignExtend,
+ ZeroExtend,
+}
+
+/// These indicate ways of extending (widening) a value, using the Intel
+/// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
+#[derive(Clone, PartialEq)]
+pub enum ExtMode {
+ /// Byte -> Longword.
+ BL,
+ /// Byte -> Quadword.
+ BQ,
+ /// Word -> Longword.
+ WL,
+ /// Word -> Quadword.
+ WQ,
+ /// Longword -> Quadword.
+ LQ,
+}
+
+impl ExtMode {
+ /// Calculate the `ExtMode` from passed bit lengths of the from/to types.
+ pub(crate) fn new(from_bits: u16, to_bits: u16) -> Option<ExtMode> {
+ match (from_bits, to_bits) {
+ (1, 8) | (1, 16) | (1, 32) | (8, 16) | (8, 32) => Some(ExtMode::BL),
+ (1, 64) | (8, 64) => Some(ExtMode::BQ),
+ (16, 32) => Some(ExtMode::WL),
+ (16, 64) => Some(ExtMode::WQ),
+ (32, 64) => Some(ExtMode::LQ),
+ _ => None,
+ }
+ }
+
+ /// Return the source register size in bytes.
+ pub(crate) fn src_size(&self) -> u8 {
+ match self {
+ ExtMode::BL | ExtMode::BQ => 1,
+ ExtMode::WL | ExtMode::WQ => 2,
+ ExtMode::LQ => 4,
+ }
+ }
+
+ /// Return the destination register size in bytes.
+ pub(crate) fn dst_size(&self) -> u8 {
+ match self {
+ ExtMode::BL | ExtMode::WL => 4,
+ ExtMode::BQ | ExtMode::WQ | ExtMode::LQ => 8,
+ }
+ }
+}
+
+impl fmt::Debug for ExtMode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ ExtMode::BL => "bl",
+ ExtMode::BQ => "bq",
+ ExtMode::WL => "wl",
+ ExtMode::WQ => "wq",
+ ExtMode::LQ => "lq",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for ExtMode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
+#[derive(Clone)]
+pub enum ShiftKind {
+ ShiftLeft,
+ /// Inserts zeros in the most significant bits.
+ ShiftRightLogical,
+ /// Replicates the sign bit in the most significant bits.
+ ShiftRightArithmetic,
+ RotateLeft,
+ RotateRight,
+}
+
+impl fmt::Debug for ShiftKind {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ ShiftKind::ShiftLeft => "shl",
+ ShiftKind::ShiftRightLogical => "shr",
+ ShiftKind::ShiftRightArithmetic => "sar",
+ ShiftKind::RotateLeft => "rol",
+ ShiftKind::RotateRight => "ror",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for ShiftKind {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// What kind of division or remainer instruction this is?
+#[derive(Clone)]
+pub enum DivOrRemKind {
+ SignedDiv,
+ UnsignedDiv,
+ SignedRem,
+ UnsignedRem,
+}
+
+impl DivOrRemKind {
+ pub(crate) fn is_signed(&self) -> bool {
+ match self {
+ DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true,
+ _ => false,
+ }
+ }
+
+ pub(crate) fn is_div(&self) -> bool {
+ match self {
+ DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true,
+ _ => false,
+ }
+ }
+}
+
+/// These indicate condition code tests. Not all are represented since not all are useful in
+/// compiler-generated code.
+#[derive(Copy, Clone)]
+#[repr(u8)]
+pub enum CC {
+ /// overflow
+ O = 0,
+ /// no overflow
+ NO = 1,
+
+ /// < unsigned
+ B = 2,
+ /// >= unsigned
+ NB = 3,
+
+ /// zero
+ Z = 4,
+ /// not-zero
+ NZ = 5,
+
+ /// <= unsigned
+ BE = 6,
+ /// > unsigned
+ NBE = 7,
+
+ /// negative
+ S = 8,
+ /// not-negative
+ NS = 9,
+
+ /// < signed
+ L = 12,
+ /// >= signed
+ NL = 13,
+
+ /// <= signed
+ LE = 14,
+ /// > signed
+ NLE = 15,
+
+ /// parity
+ P = 10,
+
+ /// not parity
+ NP = 11,
+}
+
+impl CC {
+ pub(crate) fn from_intcc(intcc: IntCC) -> Self {
+ match intcc {
+ IntCC::Equal => CC::Z,
+ IntCC::NotEqual => CC::NZ,
+ IntCC::SignedGreaterThanOrEqual => CC::NL,
+ IntCC::SignedGreaterThan => CC::NLE,
+ IntCC::SignedLessThanOrEqual => CC::LE,
+ IntCC::SignedLessThan => CC::L,
+ IntCC::UnsignedGreaterThanOrEqual => CC::NB,
+ IntCC::UnsignedGreaterThan => CC::NBE,
+ IntCC::UnsignedLessThanOrEqual => CC::BE,
+ IntCC::UnsignedLessThan => CC::B,
+ IntCC::Overflow => CC::O,
+ IntCC::NotOverflow => CC::NO,
+ }
+ }
+
+ pub(crate) fn invert(&self) -> Self {
+ match self {
+ CC::O => CC::NO,
+ CC::NO => CC::O,
+
+ CC::B => CC::NB,
+ CC::NB => CC::B,
+
+ CC::Z => CC::NZ,
+ CC::NZ => CC::Z,
+
+ CC::BE => CC::NBE,
+ CC::NBE => CC::BE,
+
+ CC::S => CC::NS,
+ CC::NS => CC::S,
+
+ CC::L => CC::NL,
+ CC::NL => CC::L,
+
+ CC::LE => CC::NLE,
+ CC::NLE => CC::LE,
+
+ CC::P => CC::NP,
+ CC::NP => CC::P,
+ }
+ }
+
+ pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self {
+ match floatcc {
+ FloatCC::Ordered => CC::NP,
+ FloatCC::Unordered => CC::P,
+ // Alias for NE
+ FloatCC::OrderedNotEqual => CC::NZ,
+ // Alias for E
+ FloatCC::UnorderedOrEqual => CC::Z,
+ // Alias for A
+ FloatCC::GreaterThan => CC::NBE,
+ // Alias for AE
+ FloatCC::GreaterThanOrEqual => CC::NB,
+ FloatCC::UnorderedOrLessThan => CC::B,
+ FloatCC::UnorderedOrLessThanOrEqual => CC::BE,
+ FloatCC::Equal
+ | FloatCC::NotEqual
+ | FloatCC::LessThan
+ | FloatCC::LessThanOrEqual
+ | FloatCC::UnorderedOrGreaterThan
+ | FloatCC::UnorderedOrGreaterThanOrEqual => panic!(
+ "{:?} can't be lowered to a CC code; treat as special case.",
+ floatcc
+ ),
+ }
+ }
+
+ pub(crate) fn get_enc(self) -> u8 {
+ self as u8
+ }
+}
+
+impl fmt::Debug for CC {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ CC::O => "o",
+ CC::NO => "no",
+ CC::B => "b",
+ CC::NB => "nb",
+ CC::Z => "z",
+ CC::NZ => "nz",
+ CC::BE => "be",
+ CC::NBE => "nbe",
+ CC::S => "s",
+ CC::NS => "ns",
+ CC::L => "l",
+ CC::NL => "nl",
+ CC::LE => "le",
+ CC::NLE => "nle",
+ CC::P => "p",
+ CC::NP => "np",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for CC {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// Encode the ways that floats can be compared. This is used in float comparisons such as `cmpps`,
+/// e.g.; it is distinguished from other float comparisons (e.g. `ucomiss`) in that those use EFLAGS
+/// whereas [FcmpImm] is used as an immediate.
+pub(crate) enum FcmpImm {
+ Equal = 0x00,
+ LessThan = 0x01,
+ LessThanOrEqual = 0x02,
+ Unordered = 0x03,
+ NotEqual = 0x04,
+ UnorderedOrGreaterThanOrEqual = 0x05,
+ UnorderedOrGreaterThan = 0x06,
+ Ordered = 0x07,
+}
+
+impl FcmpImm {
+ pub(crate) fn encode(self) -> u8 {
+ self as u8
+ }
+}
+
+impl From<FloatCC> for FcmpImm {
+ fn from(cond: FloatCC) -> Self {
+ match cond {
+ FloatCC::Equal => FcmpImm::Equal,
+ FloatCC::LessThan => FcmpImm::LessThan,
+ FloatCC::LessThanOrEqual => FcmpImm::LessThanOrEqual,
+ FloatCC::Unordered => FcmpImm::Unordered,
+ FloatCC::NotEqual => FcmpImm::NotEqual,
+ FloatCC::UnorderedOrGreaterThanOrEqual => FcmpImm::UnorderedOrGreaterThanOrEqual,
+ FloatCC::UnorderedOrGreaterThan => FcmpImm::UnorderedOrGreaterThan,
+ FloatCC::Ordered => FcmpImm::Ordered,
+ _ => panic!("unable to create comparison predicate for {}", cond),
+ }
+ }
+}
+
+/// An operand's size in bits.
+#[derive(Clone, Copy, PartialEq)]
+pub enum OperandSize {
+ Size32,
+ Size64,
+}
+
+impl OperandSize {
+ pub(crate) fn from_bytes(num_bytes: u32) -> Self {
+ match num_bytes {
+ 1 | 2 | 4 => OperandSize::Size32,
+ 8 => OperandSize::Size64,
+ _ => unreachable!(),
+ }
+ }
+
+ pub(crate) fn to_bytes(&self) -> u8 {
+ match self {
+ Self::Size32 => 4,
+ Self::Size64 => 8,
+ }
+ }
+
+ pub(crate) fn to_bits(&self) -> u8 {
+ match self {
+ Self::Size32 => 32,
+ Self::Size64 => 64,
+ }
+ }
+}
+
+/// An x64 memory fence kind.
+#[derive(Clone)]
+#[allow(dead_code)]
+pub enum FenceKind {
+ /// `mfence` instruction ("Memory Fence")
+ MFence,
+ /// `lfence` instruction ("Load Fence")
+ LFence,
+ /// `sfence` instruction ("Store Fence")
+ SFence,
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
new file mode 100644
index 0000000000..dd4125a2da
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
@@ -0,0 +1,2819 @@
+use crate::binemit::{Addend, Reloc};
+use crate::ir::immediates::{Ieee32, Ieee64};
+use crate::ir::TrapCode;
+use crate::isa::x64::inst::args::*;
+use crate::isa::x64::inst::*;
+use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
+use core::convert::TryInto;
+use log::debug;
+use regalloc::{Reg, RegClass, Writable};
+
+fn low8_will_sign_extend_to_64(x: u32) -> bool {
+ let xs = (x as i32) as i64;
+ xs == ((xs << 56) >> 56)
+}
+
+fn low8_will_sign_extend_to_32(x: u32) -> bool {
+ let xs = x as i32;
+ xs == ((xs << 24) >> 24)
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+// For all of the routines that take both a memory-or-reg operand (sometimes
+// called "E" in the Intel documentation) and a reg-only operand ("G" in
+// Intelese), the order is always G first, then E.
+//
+// "enc" in the following means "hardware register encoding number".
+
+#[inline(always)]
+fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
+ debug_assert!(m0d < 4);
+ debug_assert!(enc_reg_g < 8);
+ debug_assert!(rm_e < 8);
+ ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
+}
+
+#[inline(always)]
+fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
+ debug_assert!(shift < 4);
+ debug_assert!(enc_index < 8);
+ debug_assert!(enc_base < 8);
+ ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
+}
+
+/// Get the encoding number of a GPR.
+#[inline(always)]
+fn int_reg_enc(reg: Reg) -> u8 {
+ debug_assert!(reg.is_real());
+ debug_assert_eq!(reg.get_class(), RegClass::I64);
+ reg.get_hw_encoding()
+}
+
+/// Get the encoding number of any register.
+#[inline(always)]
+fn reg_enc(reg: Reg) -> u8 {
+ debug_assert!(reg.is_real());
+ reg.get_hw_encoding()
+}
+
+/// A small bit field to record a REX prefix specification:
+/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
+/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+struct RexFlags(u8);
+
+impl RexFlags {
+ /// By default, set the W field, and don't always emit.
+ #[inline(always)]
+ fn set_w() -> Self {
+ Self(0)
+ }
+ /// Creates a new RexPrefix for which the REX.W bit will be cleared.
+ #[inline(always)]
+ fn clear_w() -> Self {
+ Self(1)
+ }
+
+ #[inline(always)]
+ fn always_emit(&mut self) -> &mut Self {
+ self.0 = self.0 | 2;
+ self
+ }
+
+ #[inline(always)]
+ fn must_clear_w(&self) -> bool {
+ (self.0 & 1) != 0
+ }
+ #[inline(always)]
+ fn must_always_emit(&self) -> bool {
+ (self.0 & 2) != 0
+ }
+
+ #[inline(always)]
+ fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
+ let w = if self.must_clear_w() { 0 } else { 1 };
+ let r = (enc_g >> 3) & 1;
+ let x = 0;
+ let b = (enc_e >> 3) & 1;
+ let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+ if rex != 0x40 || self.must_always_emit() {
+ sink.put1(rex);
+ }
+ }
+
+ #[inline(always)]
+ fn emit_three_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_index: u8, enc_base: u8) {
+ let w = if self.must_clear_w() { 0 } else { 1 };
+ let r = (enc_g >> 3) & 1;
+ let x = (enc_index >> 3) & 1;
+ let b = (enc_base >> 3) & 1;
+ let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+ if rex != 0x40 || self.must_always_emit() {
+ sink.put1(rex);
+ }
+ }
+}
+
+/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
+/// covers only the small set of possibilities that we actually need.
+enum LegacyPrefixes {
+ /// No prefix bytes
+ None,
+ /// Operand Size Override -- here, denoting "16-bit operation"
+ _66,
+ /// The Lock prefix
+ _F0,
+ /// Operand size override and Lock
+ _66F0,
+ /// REPNE, but no specific meaning here -- is just an opcode extension
+ _F2,
+ /// REP/REPE, but no specific meaning here -- is just an opcode extension
+ _F3,
+}
+
+impl LegacyPrefixes {
+ #[inline(always)]
+ fn emit(&self, sink: &mut MachBuffer<Inst>) {
+ match self {
+ LegacyPrefixes::_66 => sink.put1(0x66),
+ LegacyPrefixes::_F0 => sink.put1(0xF0),
+ LegacyPrefixes::_66F0 => {
+ // I don't think the order matters, but in any case, this is the same order that
+ // the GNU assembler uses.
+ sink.put1(0x66);
+ sink.put1(0xF0);
+ }
+ LegacyPrefixes::_F2 => sink.put1(0xF2),
+ LegacyPrefixes::_F3 => sink.put1(0xF3),
+ LegacyPrefixes::None => (),
+ }
+ }
+}
+
+/// This is the core 'emit' function for instructions that reference memory.
+///
+/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
+/// create and emit:
+/// - first the legacy prefixes, if any
+/// - then the REX prefix, if needed
+/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
+/// - then the MOD/RM byte,
+/// - then optionally, a SIB byte,
+/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
+///
+/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
+/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
+/// instructions will require their own emitter functions.
+///
+/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
+///
+/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
+/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
+/// 0xF3_0F_27 and `num_opcodes` == 3.
+///
+/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
+/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
+/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
+/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
+/// indicate a 64-bit operation.
+fn emit_std_enc_mem(
+ sink: &mut MachBuffer<Inst>,
+ state: &EmitState,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ mut num_opcodes: usize,
+ enc_g: u8,
+ mem_e: &Amode,
+ rex: RexFlags,
+) {
+ // General comment for this function: the registers in `mem_e` must be
+ // 64-bit integer registers, because they are part of an address
+ // expression. But `enc_g` can be derived from a register of any class.
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && mem_e.can_trap() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ prefixes.emit(sink);
+
+ match mem_e {
+ Amode::ImmReg { simm32, base, .. } => {
+ // First, the REX byte.
+ let enc_e = int_reg_enc(*base);
+ rex.emit_two_op(sink, enc_g, enc_e);
+
+ // Now the opcode(s). These include any other prefixes the caller
+ // hands to us.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // Now the mod/rm and associated immediates. This is
+ // significantly complicated due to the multiple special cases.
+ if *simm32 == 0
+ && enc_e != regs::ENC_RSP
+ && enc_e != regs::ENC_RBP
+ && enc_e != regs::ENC_R12
+ && enc_e != regs::ENC_R13
+ {
+ // FIXME JRS 2020Feb11: those four tests can surely be
+ // replaced by a single mask-and-compare check. We should do
+ // that because this routine is likely to be hot.
+ sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
+ } else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
+ sink.put1(encode_modrm(0, enc_g & 7, 4));
+ sink.put1(0x24);
+ } else if low8_will_sign_extend_to_32(*simm32)
+ && enc_e != regs::ENC_RSP
+ && enc_e != regs::ENC_R12
+ {
+ sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
+ sink.put1((simm32 & 0xFF) as u8);
+ } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
+ sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
+ sink.put4(*simm32);
+ } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
+ && low8_will_sign_extend_to_32(*simm32)
+ {
+ // REX.B distinguishes RSP from R12
+ sink.put1(encode_modrm(1, enc_g & 7, 4));
+ sink.put1(0x24);
+ sink.put1((simm32 & 0xFF) as u8);
+ } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
+ //.. wait for test case for RSP case
+ // REX.B distinguishes RSP from R12
+ sink.put1(encode_modrm(2, enc_g & 7, 4));
+ sink.put1(0x24);
+ sink.put4(*simm32);
+ } else {
+ unreachable!("ImmReg");
+ }
+ }
+
+ Amode::ImmRegRegShift {
+ simm32,
+ base: reg_base,
+ index: reg_index,
+ shift,
+ ..
+ } => {
+ let enc_base = int_reg_enc(*reg_base);
+ let enc_index = int_reg_enc(*reg_index);
+
+ // The rex byte.
+ rex.emit_three_op(sink, enc_g, enc_index, enc_base);
+
+ // All other prefixes and opcodes.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // modrm, SIB, immediates.
+ if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
+ sink.put1(encode_modrm(1, enc_g & 7, 4));
+ sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
+ sink.put1(*simm32 as u8);
+ } else if enc_index != regs::ENC_RSP {
+ sink.put1(encode_modrm(2, enc_g & 7, 4));
+ sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
+ sink.put4(*simm32);
+ } else {
+ panic!("ImmRegRegShift");
+ }
+ }
+
+ Amode::RipRelative { ref target } => {
+ // First, the REX byte, with REX.B = 0.
+ rex.emit_two_op(sink, enc_g, 0);
+
+ // Now the opcode(s). These include any other prefixes the caller
+ // hands to us.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // RIP-relative is mod=00, rm=101.
+ sink.put1(encode_modrm(0, enc_g & 7, 0b101));
+
+ let offset = sink.cur_offset();
+ sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
+ sink.put4(0);
+ }
+ }
+}
+
+/// This is the core 'emit' function for instructions that do not reference memory.
+///
+/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
+/// operand is a register rather than memory. Hence it is much simpler.
+fn emit_std_enc_enc(
+ sink: &mut MachBuffer<Inst>,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ mut num_opcodes: usize,
+ enc_g: u8,
+ enc_e: u8,
+ rex: RexFlags,
+) {
+ // EncG and EncE can be derived from registers of any class, and they
+ // don't even have to be from the same class. For example, for an
+ // integer-to-FP conversion insn, one might be RegClass::I64 and the other
+ // RegClass::V128.
+
+ // The legacy prefixes.
+ prefixes.emit(sink);
+
+ // The rex byte.
+ rex.emit_two_op(sink, enc_g, enc_e);
+
+ // All other prefixes and opcodes.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // Now the mod/rm byte. The instruction we're generating doesn't access
+ // memory, so there is no SIB byte or immediate -- we're done.
+ sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
+}
+
+// These are merely wrappers for the above two functions that facilitate passing
+// actual `Reg`s rather than their encodings.
+
+fn emit_std_reg_mem(
+ sink: &mut MachBuffer<Inst>,
+ state: &EmitState,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ num_opcodes: usize,
+ reg_g: Reg,
+ mem_e: &Amode,
+ rex: RexFlags,
+) {
+ let enc_g = reg_enc(reg_g);
+ emit_std_enc_mem(
+ sink,
+ state,
+ prefixes,
+ opcodes,
+ num_opcodes,
+ enc_g,
+ mem_e,
+ rex,
+ );
+}
+
+fn emit_std_reg_reg(
+ sink: &mut MachBuffer<Inst>,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ num_opcodes: usize,
+ reg_g: Reg,
+ reg_e: Reg,
+ rex: RexFlags,
+) {
+ let enc_g = reg_enc(reg_g);
+ let enc_e = reg_enc(reg_e);
+ emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
+}
+
+/// Write a suitable number of bits from an imm64 to the sink.
+fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
+ match size {
+ 8 | 4 => sink.put4(simm32),
+ 2 => sink.put2(simm32 as u16),
+ 1 => sink.put1(simm32 as u8),
+ _ => unreachable!(),
+ }
+}
+
+/// A small helper to generate a signed conversion instruction.
+fn emit_signed_cvt(
+ sink: &mut MachBuffer<Inst>,
+ info: &EmitInfo,
+ state: &mut EmitState,
+ src: Reg,
+ dst: Writable<Reg>,
+ to_f64: bool,
+) {
+ // Handle an unsigned int, which is the "easy" case: a signed conversion will do the
+ // right thing.
+ let op = if to_f64 {
+ SseOpcode::Cvtsi2sd
+ } else {
+ SseOpcode::Cvtsi2ss
+ };
+ let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst);
+ inst.emit(sink, info, state);
+}
+
+/// Emits a one way conditional jump if CC is set (true).
+fn one_way_jmp(sink: &mut MachBuffer<Inst>, cc: CC, label: MachLabel) {
+ let cond_start = sink.cur_offset();
+ let cond_disp_off = cond_start + 2;
+ sink.use_label_at_offset(cond_disp_off, label, LabelUse::JmpRel32);
+ sink.put1(0x0F);
+ sink.put1(0x80 + cc.get_enc());
+ sink.put4(0x0);
+}
+
+/// Emits a relocation, attaching the current source location as well.
+fn emit_reloc(
+ sink: &mut MachBuffer<Inst>,
+ state: &EmitState,
+ kind: Reloc,
+ name: &ExternalName,
+ addend: Addend,
+) {
+ let srcloc = state.cur_srcloc();
+ sink.add_reloc(srcloc, kind, name, addend);
+}
+
+/// The top-level emit function.
+///
+/// Important! Do not add improved (shortened) encoding cases to existing
+/// instructions without also adding tests for those improved encodings. That
+/// is a dangerous game that leads to hard-to-track-down errors in the emitted
+/// code.
+///
+/// For all instructions, make sure to have test coverage for all of the
+/// following situations. Do this by creating the cross product resulting from
+/// applying the following rules to each operand:
+///
+/// (1) for any insn that mentions a register: one test using a register from
+/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
+/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
+/// This helps detect incorrect REX prefix construction.
+///
+/// (2) for any insn that mentions a byte register: one test for each of the
+/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
+/// [r8b .. r11b] and [r12b .. r15b]. This checks that
+/// apparently-redundant REX prefixes are retained when required.
+///
+/// (3) for any insn that contains an immediate field, check the following
+/// cases: field is zero, field is in simm8 range (-128 .. 127), field is
+/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some
+/// instructions that require a 32-bit immediate have a short-form encoding
+/// when the imm is in simm8 range.
+///
+/// Rules (1), (2) and (3) don't apply for registers within address expressions
+/// (`Addr`s). Those are already pretty well tested, and the registers in them
+/// don't have any effect on the containing instruction (apart from possibly
+/// require REX prefix bits).
+///
+/// When choosing registers for a test, avoid using registers with the same
+/// offset within a given group. For example, don't use rax and r8, since they
+/// both have the lowest 3 bits as 000, and so the test won't detect errors
+/// where those 3-bit register sub-fields are confused by the emitter. Instead
+/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl
+/// and bpl since they have the same offset in their group; use instead (eg) cl
+/// and sil.
+///
+/// For all instructions, also add a test that uses only low-half registers
+/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
+/// prefixes are correctly omitted. This low-half restriction must apply to
+/// _all_ registers in the insn, even those in address expressions.
+///
+/// Following these rules creates large numbers of test cases, but it's the
+/// only way to make the emitter reliable.
+///
+/// Known possible improvements:
+///
+/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
+/// care?)
+pub(crate) fn emit(
+ inst: &Inst,
+ sink: &mut MachBuffer<Inst>,
+ info: &EmitInfo,
+ state: &mut EmitState,
+) {
+ if let Some(iset_requirement) = inst.isa_requirement() {
+ match iset_requirement {
+ // Cranelift assumes SSE2 at least.
+ InstructionSet::SSE | InstructionSet::SSE2 => {}
+ InstructionSet::SSSE3 => assert!(info.isa_flags.has_ssse3()),
+ InstructionSet::SSE41 => assert!(info.isa_flags.has_sse41()),
+ InstructionSet::SSE42 => assert!(info.isa_flags.has_sse42()),
+ }
+ }
+
+ match inst {
+ Inst::AluRmiR {
+ is_64,
+ op,
+ src,
+ dst: reg_g,
+ } => {
+ let rex = if *is_64 {
+ RexFlags::set_w()
+ } else {
+ RexFlags::clear_w()
+ };
+
+ if *op == AluRmiROpcode::Mul {
+ // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
+ // we have to special-case it.
+ match src {
+ RegMemImm::Reg { reg: reg_e } => {
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ 0x0FAF,
+ 2,
+ reg_g.to_reg(),
+ *reg_e,
+ rex,
+ );
+ }
+
+ RegMemImm::Mem { addr } => {
+ let amode = addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x0FAF,
+ 2,
+ reg_g.to_reg(),
+ &amode,
+ rex,
+ );
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ let use_imm8 = low8_will_sign_extend_to_32(*simm32);
+ let opcode = if use_imm8 { 0x6B } else { 0x69 };
+ // Yes, really, reg_g twice.
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcode,
+ 1,
+ reg_g.to_reg(),
+ reg_g.to_reg(),
+ rex,
+ );
+ emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32);
+ }
+ }
+ } else {
+ let (opcode_r, opcode_m, subopcode_i) = match op {
+ AluRmiROpcode::Add => (0x01, 0x03, 0),
+ AluRmiROpcode::Sub => (0x29, 0x2B, 5),
+ AluRmiROpcode::And => (0x21, 0x23, 4),
+ AluRmiROpcode::Or => (0x09, 0x0B, 1),
+ AluRmiROpcode::Xor => (0x31, 0x33, 6),
+ AluRmiROpcode::Mul => panic!("unreachable"),
+ };
+
+ match src {
+ RegMemImm::Reg { reg: reg_e } => {
+ // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
+ // duality). Do this too, so as to be able to compare generated machine
+ // code easily.
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcode_r,
+ 1,
+ *reg_e,
+ reg_g.to_reg(),
+ rex,
+ );
+ // NB: if this is ever extended to handle byte size ops, be sure to retain
+ // redundant REX prefixes.
+ }
+
+ RegMemImm::Mem { addr } => {
+ // Here we revert to the "normal" G-E ordering.
+ let amode = addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ opcode_m,
+ 1,
+ reg_g.to_reg(),
+ &amode,
+ rex,
+ );
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ let use_imm8 = low8_will_sign_extend_to_32(*simm32);
+ let opcode = if use_imm8 { 0x83 } else { 0x81 };
+ // And also here we use the "normal" G-E ordering.
+ let enc_g = int_reg_enc(reg_g.to_reg());
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ opcode,
+ 1,
+ subopcode_i,
+ enc_g,
+ rex,
+ );
+ emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32);
+ }
+ }
+ }
+ }
+
+ Inst::UnaryRmR { size, op, src, dst } => {
+ let (prefix, rex_flags) = match size {
+ 2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!(),
+ };
+
+ let (opcode, num_opcodes) = match op {
+ UnaryRmROpcode::Bsr => (0x0fbd, 2),
+ UnaryRmROpcode::Bsf => (0x0fbc, 2),
+ };
+
+ match src {
+ RegMem::Reg { reg: src } => emit_std_reg_reg(
+ sink,
+ prefix,
+ opcode,
+ num_opcodes,
+ dst.to_reg(),
+ *src,
+ rex_flags,
+ ),
+ RegMem::Mem { addr: src } => {
+ let amode = src.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ num_opcodes,
+ dst.to_reg(),
+ &amode,
+ rex_flags,
+ );
+ }
+ }
+ }
+
+ Inst::Not { size, src } => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ let subopcode = 2;
+ let src = int_reg_enc(src.to_reg());
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+ }
+
+ Inst::Neg { size, src } => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ let subopcode = 3;
+ let src = int_reg_enc(src.to_reg());
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+ }
+
+ Inst::Div {
+ size,
+ signed,
+ divisor,
+ } => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ let loc = state.cur_srcloc();
+ sink.add_trap(loc, TrapCode::IntegerDivisionByZero);
+
+ let subopcode = if *signed { 7 } else { 6 };
+ match divisor {
+ RegMem::Reg { reg } => {
+ let src = int_reg_enc(*reg);
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+ }
+ RegMem::Mem { addr: src } => {
+ let amode = src.finalize(state);
+ emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags);
+ }
+ }
+ }
+
+ Inst::MulHi { size, signed, rhs } => {
+ let (prefix, rex_flags) = match size {
+ 2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!(),
+ };
+
+ let subopcode = if *signed { 5 } else { 4 };
+ match rhs {
+ RegMem::Reg { reg } => {
+ let src = int_reg_enc(*reg);
+ emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
+ }
+ RegMem::Mem { addr: src } => {
+ let amode = src.finalize(state);
+ emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags);
+ }
+ }
+ }
+
+ Inst::SignExtendData { size } => match size {
+ 1 => {
+ sink.put1(0x66);
+ sink.put1(0x98);
+ }
+ 2 => {
+ sink.put1(0x66);
+ sink.put1(0x99);
+ }
+ 4 => sink.put1(0x99),
+ 8 => {
+ sink.put1(0x48);
+ sink.put1(0x99);
+ }
+ _ => unreachable!(),
+ },
+
+ Inst::CheckedDivOrRemSeq {
+ kind,
+ size,
+ divisor,
+ tmp,
+ } => {
+ // Generates the following code sequence:
+ //
+ // ;; check divide by zero:
+ // cmp 0 %divisor
+ // jnz $after_trap
+ // ud2
+ // $after_trap:
+ //
+ // ;; for signed modulo/div:
+ // cmp -1 %divisor
+ // jnz $do_op
+ // ;; for signed modulo, result is 0
+ // mov #0, %rdx
+ // j $done
+ // ;; for signed div, check for integer overflow against INT_MIN of the right size
+ // cmp INT_MIN, %rax
+ // jnz $do_op
+ // ud2
+ //
+ // $do_op:
+ // ;; if signed
+ // cdq ;; sign-extend from rax into rdx
+ // ;; else
+ // mov #0, %rdx
+ // idiv %divisor
+ //
+ // $done:
+ debug_assert!(info.flags().avoid_div_traps());
+
+ // Check if the divisor is zero, first.
+ let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg());
+ inst.emit(sink, info, state);
+
+ let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero);
+ inst.emit(sink, info, state);
+
+ let (do_op, done_label) = if kind.is_signed() {
+ // Now check if the divisor is -1.
+ let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor.to_reg());
+ inst.emit(sink, info, state);
+
+ let do_op = sink.get_label();
+
+ // If not equal, jump to do-op.
+ one_way_jmp(sink, CC::NZ, do_op);
+
+ // Here, divisor == -1.
+ if !kind.is_div() {
+ // x % -1 = 0; put the result into the destination, $rdx.
+ let done_label = sink.get_label();
+
+ let inst = Inst::imm(
+ OperandSize::from_bytes(*size as u32),
+ 0,
+ Writable::from_reg(regs::rdx()),
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done_label);
+ inst.emit(sink, info, state);
+
+ (Some(do_op), Some(done_label))
+ } else {
+ // Check for integer overflow.
+ if *size == 8 {
+ let tmp = tmp.expect("temporary for i64 sdiv");
+
+ let inst = Inst::imm(OperandSize::Size64, 0x8000000000000000, tmp);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
+ inst.emit(sink, info, state);
+ } else {
+ let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
+ inst.emit(sink, info, state);
+ }
+
+ // If not equal, jump over the trap.
+ let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+
+ (Some(do_op), None)
+ }
+ } else {
+ (None, None)
+ };
+
+ if let Some(do_op) = do_op {
+ sink.bind_label(do_op);
+ }
+
+ assert!(
+ *size > 1,
+ "CheckedDivOrRemSeq for i8 is not yet implemented"
+ );
+
+ // Fill in the high parts:
+ if kind.is_signed() {
+ // sign-extend the sign-bit of rax into rdx, for signed opcodes.
+ let inst = Inst::sign_extend_data(*size);
+ inst.emit(sink, info, state);
+ } else {
+ // zero for unsigned opcodes.
+ let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
+ inst.emit(sink, info, state);
+ }
+
+ let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor.to_reg()));
+ inst.emit(sink, info, state);
+
+ // Lowering takes care of moving the result back into the right register, see comment
+ // there.
+
+ if let Some(done) = done_label {
+ sink.bind_label(done);
+ }
+ }
+
+ Inst::Imm {
+ dst_is_64,
+ simm64,
+ dst,
+ } => {
+ let enc_dst = int_reg_enc(dst.to_reg());
+ if *dst_is_64 {
+ if low32_will_sign_extend_to_64(*simm64) {
+ // Sign-extended move imm32.
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ 0xC7,
+ 1,
+ /* subopcode */ 0,
+ enc_dst,
+ RexFlags::set_w(),
+ );
+ sink.put4(*simm64 as u32);
+ } else {
+ sink.put1(0x48 | ((enc_dst >> 3) & 1));
+ sink.put1(0xB8 | (enc_dst & 7));
+ sink.put8(*simm64);
+ }
+ } else {
+ if ((enc_dst >> 3) & 1) == 1 {
+ sink.put1(0x41);
+ }
+ sink.put1(0xB8 | (enc_dst & 7));
+ sink.put4(*simm64 as u32);
+ }
+ }
+
+ Inst::MovRR { is_64, src, dst } => {
+ let rex = if *is_64 {
+ RexFlags::set_w()
+ } else {
+ RexFlags::clear_w()
+ };
+ emit_std_reg_reg(sink, LegacyPrefixes::None, 0x89, 1, *src, dst.to_reg(), rex);
+ }
+
+ Inst::MovzxRmR { ext_mode, src, dst } => {
+ let (opcodes, num_opcodes, mut rex_flags) = match ext_mode {
+ ExtMode::BL => {
+ // MOVZBL is (REX.W==0) 0F B6 /r
+ (0x0FB6, 2, RexFlags::clear_w())
+ }
+ ExtMode::BQ => {
+ // MOVZBQ is (REX.W==1) 0F B6 /r
+ // I'm not sure why the Intel manual offers different
+ // encodings for MOVZBQ than for MOVZBL. AIUI they should
+ // achieve the same, since MOVZBL is just going to zero out
+ // the upper half of the destination anyway.
+ (0x0FB6, 2, RexFlags::set_w())
+ }
+ ExtMode::WL => {
+ // MOVZWL is (REX.W==0) 0F B7 /r
+ (0x0FB7, 2, RexFlags::clear_w())
+ }
+ ExtMode::WQ => {
+ // MOVZWQ is (REX.W==1) 0F B7 /r
+ (0x0FB7, 2, RexFlags::set_w())
+ }
+ ExtMode::LQ => {
+ // This is just a standard 32 bit load, and we rely on the
+ // default zero-extension rule to perform the extension.
+ // Note that in reg/reg mode, gcc seems to use the swapped form R/RM, which we
+ // don't do here, since it's the same encoding size.
+ // MOV r/m32, r32 is (REX.W==0) 8B /r
+ (0x8B, 1, RexFlags::clear_w())
+ }
+ };
+
+ match src {
+ RegMem::Reg { reg: src } => {
+ match ext_mode {
+ ExtMode::BL | ExtMode::BQ => {
+ // A redundant REX prefix must be emitted for certain register inputs.
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex_flags.always_emit();
+ };
+ }
+ _ => {}
+ }
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ *src,
+ rex_flags,
+ )
+ }
+
+ RegMem::Mem { addr: src } => {
+ let src = &src.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ src,
+ rex_flags,
+ )
+ }
+ }
+ }
+
+ Inst::Mov64MR { src, dst } => {
+ let src = &src.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x8B,
+ 1,
+ dst.to_reg(),
+ src,
+ RexFlags::set_w(),
+ )
+ }
+
+ Inst::LoadEffectiveAddress { addr, dst } => {
+ let amode = addr.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x8D,
+ 1,
+ dst.to_reg(),
+ &amode,
+ RexFlags::set_w(),
+ );
+ }
+
+ Inst::MovsxRmR { ext_mode, src, dst } => {
+ let (opcodes, num_opcodes, mut rex_flags) = match ext_mode {
+ ExtMode::BL => {
+ // MOVSBL is (REX.W==0) 0F BE /r
+ (0x0FBE, 2, RexFlags::clear_w())
+ }
+ ExtMode::BQ => {
+ // MOVSBQ is (REX.W==1) 0F BE /r
+ (0x0FBE, 2, RexFlags::set_w())
+ }
+ ExtMode::WL => {
+ // MOVSWL is (REX.W==0) 0F BF /r
+ (0x0FBF, 2, RexFlags::clear_w())
+ }
+ ExtMode::WQ => {
+ // MOVSWQ is (REX.W==1) 0F BF /r
+ (0x0FBF, 2, RexFlags::set_w())
+ }
+ ExtMode::LQ => {
+ // MOVSLQ is (REX.W==1) 63 /r
+ (0x63, 1, RexFlags::set_w())
+ }
+ };
+
+ match src {
+ RegMem::Reg { reg: src } => {
+ match ext_mode {
+ ExtMode::BL | ExtMode::BQ => {
+ // A redundant REX prefix must be emitted for certain register inputs.
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex_flags.always_emit();
+ };
+ }
+ _ => {}
+ }
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ *src,
+ rex_flags,
+ )
+ }
+
+ RegMem::Mem { addr: src } => {
+ let src = &src.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ src,
+ rex_flags,
+ )
+ }
+ }
+ }
+
+ Inst::MovRM { size, src, dst } => {
+ let dst = &dst.finalize(state);
+
+ match size {
+ 1 => {
+ // This is one of the few places where the presence of a
+ // redundant REX prefix changes the meaning of the
+ // instruction.
+ let mut rex = RexFlags::clear_w();
+
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex.always_emit();
+ };
+
+ // MOV r8, r/m8 is (REX.W==0) 88 /r
+ emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
+ }
+
+ 2 => {
+ // MOV r16, r/m16 is 66 (REX.W==0) 89 /r
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::_66,
+ 0x89,
+ 1,
+ *src,
+ dst,
+ RexFlags::clear_w(),
+ )
+ }
+
+ 4 => {
+ // MOV r32, r/m32 is (REX.W==0) 89 /r
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x89,
+ 1,
+ *src,
+ dst,
+ RexFlags::clear_w(),
+ )
+ }
+
+ 8 => {
+ // MOV r64, r/m64 is (REX.W==1) 89 /r
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x89,
+ 1,
+ *src,
+ dst,
+ RexFlags::set_w(),
+ )
+ }
+
+ _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
+ }
+ }
+
+ Inst::ShiftR {
+ size,
+ kind,
+ num_bits,
+ dst,
+ } => {
+ let enc_dst = int_reg_enc(dst.to_reg());
+ let subopcode = match kind {
+ ShiftKind::RotateLeft => 0,
+ ShiftKind::RotateRight => 1,
+ ShiftKind::ShiftLeft => 4,
+ ShiftKind::ShiftRightLogical => 5,
+ ShiftKind::ShiftRightArithmetic => 7,
+ };
+
+ match num_bits {
+ None => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xD2, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ // SHL/SHR/SAR %cl, reg8 is (REX.W==0) D2 /subopcode
+ // SHL/SHR/SAR %cl, reg16 is 66 (REX.W==0) D3 /subopcode
+ // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
+ // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
+ }
+
+ Some(num_bits) => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xC0, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ // SHL/SHR/SAR $ib, reg8 is (REX.W==0) C0 /subopcode
+ // SHL/SHR/SAR $ib, reg16 is 66 (REX.W==0) C1 /subopcode
+ // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
+ // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
+ // When the shift amount is 1, there's an even shorter encoding, but we don't
+ // bother with that nicety here.
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
+ sink.put1(*num_bits);
+ }
+ }
+ }
+
+ Inst::XmmRmiReg { opcode, src, dst } => {
+ let rex = RexFlags::clear_w();
+ let prefix = LegacyPrefixes::_66;
+ if let RegMemImm::Imm { simm32 } = src {
+ let (opcode_bytes, reg_digit) = match opcode {
+ SseOpcode::Psllw => (0x0F71, 6),
+ SseOpcode::Pslld => (0x0F72, 6),
+ SseOpcode::Psllq => (0x0F73, 6),
+ SseOpcode::Psraw => (0x0F71, 4),
+ SseOpcode::Psrad => (0x0F72, 4),
+ SseOpcode::Psrlw => (0x0F71, 2),
+ SseOpcode::Psrld => (0x0F72, 2),
+ SseOpcode::Psrlq => (0x0F73, 2),
+ _ => panic!("invalid opcode: {}", opcode),
+ };
+ let dst_enc = reg_enc(dst.to_reg());
+ emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex);
+ let imm = (*simm32)
+ .try_into()
+ .expect("the immediate must be convertible to a u8");
+ sink.put1(imm);
+ } else {
+ let opcode_bytes = match opcode {
+ SseOpcode::Psllw => 0x0FF1,
+ SseOpcode::Pslld => 0x0FF2,
+ SseOpcode::Psllq => 0x0FF3,
+ SseOpcode::Psraw => 0x0FE1,
+ SseOpcode::Psrad => 0x0FE2,
+ SseOpcode::Psrlw => 0x0FD1,
+ SseOpcode::Psrld => 0x0FD2,
+ SseOpcode::Psrlq => 0x0FD3,
+ _ => panic!("invalid opcode: {}", opcode),
+ };
+
+ match src {
+ RegMemImm::Reg { reg } => {
+ emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
+ }
+ RegMemImm::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode_bytes,
+ 2,
+ dst.to_reg(),
+ addr,
+ rex,
+ );
+ }
+ RegMemImm::Imm { .. } => unreachable!(),
+ }
+ };
+ }
+
+ Inst::CmpRmiR {
+ size,
+ src: src_e,
+ dst: reg_g,
+ } => {
+ let mut prefix = LegacyPrefixes::None;
+ if *size == 2 {
+ prefix = LegacyPrefixes::_66;
+ }
+
+ let mut rex = match size {
+ 8 => RexFlags::set_w(),
+ 4 | 2 => RexFlags::clear_w(),
+ 1 => {
+ let mut rex = RexFlags::clear_w();
+ // Here, a redundant REX prefix changes the meaning of the instruction.
+ let enc_g = int_reg_enc(*reg_g);
+ if enc_g >= 4 && enc_g <= 7 {
+ rex.always_emit();
+ }
+ rex
+ }
+ _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
+ };
+
+ match src_e {
+ RegMemImm::Reg { reg: reg_e } => {
+ if *size == 1 {
+ // Check whether the E register forces the use of a redundant REX.
+ let enc_e = int_reg_enc(*reg_e);
+ if enc_e >= 4 && enc_e <= 7 {
+ rex.always_emit();
+ }
+ }
+
+ // Use the swapped operands encoding, to stay consistent with the output of
+ // gcc/llvm.
+ let opcode = if *size == 1 { 0x38 } else { 0x39 };
+ emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex);
+ }
+
+ RegMemImm::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ // Whereas here we revert to the "normal" G-E ordering.
+ let opcode = if *size == 1 { 0x3A } else { 0x3B };
+ emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex);
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ // FIXME JRS 2020Feb11: there are shorter encodings for
+ // cmp $imm, rax/eax/ax/al.
+ let use_imm8 = low8_will_sign_extend_to_32(*simm32);
+
+ // And also here we use the "normal" G-E ordering.
+ let opcode = if *size == 1 {
+ 0x80
+ } else if use_imm8 {
+ 0x83
+ } else {
+ 0x81
+ };
+
+ let enc_g = int_reg_enc(*reg_g);
+ emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex);
+ emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32);
+ }
+ }
+ }
+
+ Inst::Setcc { cc, dst } => {
+ let opcode = 0x0f90 + cc.get_enc() as u32;
+ let mut rex_flags = RexFlags::clear_w();
+ rex_flags.always_emit();
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ opcode,
+ 2,
+ 0,
+ reg_enc(dst.to_reg()),
+ rex_flags,
+ );
+ }
+
+ Inst::Cmove {
+ size,
+ cc,
+ src,
+ dst: reg_g,
+ } => {
+ let (prefix, rex_flags) = match size {
+ 2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("invalid size spec for cmove"),
+ };
+ let opcode = 0x0F40 + cc.get_enc() as u32;
+ match src {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ 2,
+ reg_g.to_reg(),
+ addr,
+ rex_flags,
+ );
+ }
+ }
+ }
+
+ Inst::XmmCmove {
+ is_64,
+ cc,
+ src,
+ dst,
+ } => {
+ // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
+ // this doesn't clobber flags. Make sure to not do so here.
+ let next = sink.get_label();
+
+ // Jump if cc is *not* set.
+ one_way_jmp(sink, cc.invert(), next);
+
+ let op = if *is_64 {
+ SseOpcode::Movsd
+ } else {
+ SseOpcode::Movss
+ };
+ let inst = Inst::xmm_unary_rm_r(op, src.clone(), *dst);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(next);
+ }
+
+ Inst::Push64 { src } => {
+ match src {
+ RegMemImm::Reg { reg } => {
+ let enc_reg = int_reg_enc(*reg);
+ let rex = 0x40 | ((enc_reg >> 3) & 1);
+ if rex != 0x40 {
+ sink.put1(rex);
+ }
+ sink.put1(0x50 | (enc_reg & 7));
+ }
+
+ RegMemImm::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_enc_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 6, /*subopcode*/
+ addr,
+ RexFlags::clear_w(),
+ );
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ if low8_will_sign_extend_to_64(*simm32) {
+ sink.put1(0x6A);
+ sink.put1(*simm32 as u8);
+ } else {
+ sink.put1(0x68);
+ sink.put4(*simm32);
+ }
+ }
+ }
+ }
+
+ Inst::Pop64 { dst } => {
+ let enc_dst = int_reg_enc(dst.to_reg());
+ if enc_dst >= 8 {
+ // 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant here.
+ sink.put1(0x41);
+ }
+ sink.put1(0x58 + (enc_dst & 7));
+ }
+
+ Inst::CallKnown { dest, opcode, .. } => {
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
+ }
+ sink.put1(0xE8);
+ // The addend adjusts for the difference between the end of the instruction and the
+ // beginning of the immediate field.
+ emit_reloc(sink, state, Reloc::X86CallPCRel4, &dest, -4);
+ sink.put4(0);
+ if opcode.is_call() {
+ let loc = state.cur_srcloc();
+ sink.add_call_site(loc, *opcode);
+ }
+ }
+
+ Inst::CallUnknown { dest, opcode, .. } => {
+ let start_offset = sink.cur_offset();
+ match dest {
+ RegMem::Reg { reg } => {
+ let reg_enc = int_reg_enc(*reg);
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 2, /*subopcode*/
+ reg_enc,
+ RexFlags::clear_w(),
+ );
+ }
+
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_enc_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 2, /*subopcode*/
+ addr,
+ RexFlags::clear_w(),
+ );
+ }
+ }
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
+ }
+ if opcode.is_call() {
+ let loc = state.cur_srcloc();
+ sink.add_call_site(loc, *opcode);
+ }
+ }
+
+ Inst::Ret {} => sink.put1(0xC3),
+
+ Inst::JmpKnown { dst } => {
+ let br_start = sink.cur_offset();
+ let br_disp_off = br_start + 1;
+ let br_end = br_start + 5;
+
+ sink.use_label_at_offset(br_disp_off, *dst, LabelUse::JmpRel32);
+ sink.add_uncond_branch(br_start, br_end, *dst);
+
+ sink.put1(0xE9);
+ // Placeholder for the label value.
+ sink.put4(0x0);
+ }
+
+ Inst::JmpIf { cc, taken } => {
+ let cond_start = sink.cur_offset();
+ let cond_disp_off = cond_start + 2;
+
+ sink.use_label_at_offset(cond_disp_off, *taken, LabelUse::JmpRel32);
+ // Since this is not a terminator, don't enroll in the branch inversion mechanism.
+
+ sink.put1(0x0F);
+ sink.put1(0x80 + cc.get_enc());
+ // Placeholder for the label value.
+ sink.put4(0x0);
+ }
+
+ Inst::JmpCond {
+ cc,
+ taken,
+ not_taken,
+ } => {
+ // If taken.
+ let cond_start = sink.cur_offset();
+ let cond_disp_off = cond_start + 2;
+ let cond_end = cond_start + 6;
+
+ sink.use_label_at_offset(cond_disp_off, *taken, LabelUse::JmpRel32);
+ let inverted: [u8; 6] = [0x0F, 0x80 + (cc.invert().get_enc()), 0x00, 0x00, 0x00, 0x00];
+ sink.add_cond_branch(cond_start, cond_end, *taken, &inverted[..]);
+
+ sink.put1(0x0F);
+ sink.put1(0x80 + cc.get_enc());
+ // Placeholder for the label value.
+ sink.put4(0x0);
+
+ // If not taken.
+ let uncond_start = sink.cur_offset();
+ let uncond_disp_off = uncond_start + 1;
+ let uncond_end = uncond_start + 5;
+
+ sink.use_label_at_offset(uncond_disp_off, *not_taken, LabelUse::JmpRel32);
+ sink.add_uncond_branch(uncond_start, uncond_end, *not_taken);
+
+ sink.put1(0xE9);
+ // Placeholder for the label value.
+ sink.put4(0x0);
+ }
+
+ Inst::JmpUnknown { target } => {
+ match target {
+ RegMem::Reg { reg } => {
+ let reg_enc = int_reg_enc(*reg);
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 4, /*subopcode*/
+ reg_enc,
+ RexFlags::clear_w(),
+ );
+ }
+
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_enc_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 4, /*subopcode*/
+ addr,
+ RexFlags::clear_w(),
+ );
+ }
+ }
+ }
+
+ Inst::JmpTableSeq {
+ idx,
+ tmp1,
+ tmp2,
+ ref targets,
+ default_target,
+ ..
+ } => {
+ // This sequence is *one* instruction in the vcode, and is expanded only here at
+ // emission time, because we cannot allow the regalloc to insert spills/reloads in
+ // the middle; we depend on hardcoded PC-rel addressing below.
+ //
+ // We don't have to worry about emitting islands, because the only label-use type has a
+ // maximum range of 2 GB. If we later consider using shorter-range label references,
+ // this will need to be revisited.
+
+ // Save index in a tmp (the live range of ridx only goes to start of this
+ // sequence; rtmp1 or rtmp2 may overwrite it).
+
+ // We generate the following sequence:
+ // ;; generated by lowering: cmp #jmp_table_size, %idx
+ // jnb $default_target
+ // movl %idx, %tmp2
+ // lea start_of_jump_table_offset(%rip), %tmp1
+ // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4
+ // addq %tmp2, %tmp1
+ // j *%tmp1
+ // $start_of_jump_table:
+ // -- jump table entries
+ one_way_jmp(sink, CC::NB, *default_target); // idx unsigned >= jmp table size
+
+ // Copy the index (and make sure to clear the high 32-bits lane of tmp2).
+ let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2);
+ inst.emit(sink, info, state);
+
+ // Load base address of jump table.
+ let start_of_jumptable = sink.get_label();
+ let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), *tmp1);
+ inst.emit(sink, info, state);
+
+ // Load value out of the jump table. It's a relative offset to the target block, so it
+ // might be negative; use a sign-extension.
+ let inst = Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
+ *tmp2,
+ );
+ inst.emit(sink, info, state);
+
+ // Add base of jump table to jump-table-sourced block offset.
+ let inst = Inst::alu_rmi_r(
+ true, /* is_64 */
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp2.to_reg()),
+ *tmp1,
+ );
+ inst.emit(sink, info, state);
+
+ // Branch to computed address.
+ let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg()));
+ inst.emit(sink, info, state);
+
+ // Emit jump table (table of 32-bit offsets).
+ sink.bind_label(start_of_jumptable);
+ let jt_off = sink.cur_offset();
+ for &target in targets.iter() {
+ let word_off = sink.cur_offset();
+ // off_into_table is an addend here embedded in the label to be later patched at
+ // the end of codegen. The offset is initially relative to this jump table entry;
+ // with the extra addend, it'll be relative to the jump table's start, after
+ // patching.
+ let off_into_table = word_off - jt_off;
+ sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
+ sink.put4(off_into_table);
+ }
+ }
+
+ Inst::TrapIf { cc, trap_code } => {
+ let else_label = sink.get_label();
+
+ // Jump over if the invert of CC is set (i.e. CC is not set).
+ one_way_jmp(sink, cc.invert(), else_label);
+
+ // Trap!
+ let inst = Inst::trap(*trap_code);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(else_label);
+ }
+
+ Inst::XmmUnaryRmR {
+ op,
+ src: src_e,
+ dst: reg_g,
+ } => {
+ let rex = RexFlags::clear_w();
+
+ let (prefix, opcode, num_opcodes) = match op {
+ SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
+ SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
+ SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
+ SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2),
+ SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2),
+ SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2),
+ SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
+ SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
+ SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2),
+ SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2),
+ SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
+ SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
+ SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
+ SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
+ SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
+ SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
+ SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+
+ match src_e {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(
+ sink,
+ prefix,
+ opcode,
+ num_opcodes,
+ reg_g.to_reg(),
+ *reg_e,
+ rex,
+ );
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ num_opcodes,
+ reg_g.to_reg(),
+ addr,
+ rex,
+ );
+ }
+ };
+ }
+
+ Inst::XmmRmR {
+ op,
+ src: src_e,
+ dst: reg_g,
+ } => {
+ let rex = RexFlags::clear_w();
+ let (prefix, opcode, length) = match op {
+ SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
+ SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
+ SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
+ SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
+ SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
+ SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
+ SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
+ SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
+ SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
+ SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
+ SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
+ SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
+ SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
+ SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
+ SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
+ SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
+ SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
+ SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
+ SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
+ SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
+ SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
+ SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
+ SseOpcode::Movlhps => (LegacyPrefixes::None, 0x0F16, 2),
+ SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
+ SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
+ SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
+ SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
+ SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2),
+ SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2),
+ SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2),
+ SseOpcode::Packsswb => (LegacyPrefixes::_66, 0x0F63, 2),
+ SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2),
+ SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
+ SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
+ SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
+ SseOpcode::Paddsb => (LegacyPrefixes::_66, 0x0FEC, 2),
+ SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
+ SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
+ SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
+ SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
+ SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
+ SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
+ SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
+ SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
+ SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
+ SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
+ SseOpcode::Pcmpeqq => (LegacyPrefixes::_66, 0x0F3829, 3),
+ SseOpcode::Pcmpgtb => (LegacyPrefixes::_66, 0x0F64, 2),
+ SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
+ SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
+ SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
+ SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
+ SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
+ SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
+ SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2),
+ SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3),
+ SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3),
+ SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3),
+ SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2),
+ SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3),
+ SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
+ SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
+ SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
+ SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
+ SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
+ SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
+ SseOpcode::Por => (LegacyPrefixes::_66, 0x0FEB, 2),
+ SseOpcode::Pshufb => (LegacyPrefixes::_66, 0x0F3800, 3),
+ SseOpcode::Psubb => (LegacyPrefixes::_66, 0x0FF8, 2),
+ SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
+ SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
+ SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
+ SseOpcode::Psubsb => (LegacyPrefixes::_66, 0x0FE8, 2),
+ SseOpcode::Psubsw => (LegacyPrefixes::_66, 0x0FE9, 2),
+ SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
+ SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
+ SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
+ SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
+ SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
+ SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
+ SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
+ SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
+ SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+
+ match src_e {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ length,
+ reg_g.to_reg(),
+ addr,
+ rex,
+ );
+ }
+ }
+ }
+
+ Inst::XmmMinMaxSeq {
+ size,
+ is_min,
+ lhs,
+ rhs_dst,
+ } => {
+ // Generates the following sequence:
+ // cmpss/cmpsd %lhs, %rhs_dst
+ // jnz do_min_max
+ // jp propagate_nan
+ //
+ // ;; ordered and equal: propagate the sign bit (for -0 vs 0):
+ // {and,or}{ss,sd} %lhs, %rhs_dst
+ // j done
+ //
+ // ;; to get the desired NaN behavior (signalling NaN transformed into a quiet NaN, the
+ // ;; NaN value is returned), we add both inputs.
+ // propagate_nan:
+ // add{ss,sd} %lhs, %rhs_dst
+ // j done
+ //
+ // do_min_max:
+ // {min,max}{ss,sd} %lhs, %rhs_dst
+ //
+ // done:
+ let done = sink.get_label();
+ let propagate_nan = sink.get_label();
+ let do_min_max = sink.get_label();
+
+ let (add_op, cmp_op, and_op, or_op, min_max_op) = match size {
+ OperandSize::Size32 => (
+ SseOpcode::Addss,
+ SseOpcode::Ucomiss,
+ SseOpcode::Andps,
+ SseOpcode::Orps,
+ if *is_min {
+ SseOpcode::Minss
+ } else {
+ SseOpcode::Maxss
+ },
+ ),
+ OperandSize::Size64 => (
+ SseOpcode::Addsd,
+ SseOpcode::Ucomisd,
+ SseOpcode::Andpd,
+ SseOpcode::Orpd,
+ if *is_min {
+ SseOpcode::Minsd
+ } else {
+ SseOpcode::Maxsd
+ },
+ ),
+ };
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(*lhs), rhs_dst.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NZ, do_min_max);
+ one_way_jmp(sink, CC::P, propagate_nan);
+
+ // Ordered and equal. The operands are bit-identical unless they are zero
+ // and negative zero. These instructions merge the sign bits in that
+ // case, and are no-ops otherwise.
+ let op = if *is_min { or_op } else { and_op };
+ let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+
+ // x86's min/max are not symmetric; if either operand is a NaN, they return the
+ // read-only operand: perform an addition between the two operands, which has the
+ // desired NaN propagation effects.
+ sink.bind_label(propagate_nan);
+ let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst);
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::P, done);
+
+ sink.bind_label(do_min_max);
+ let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(done);
+ }
+
+ Inst::XmmRmRImm {
+ op,
+ src,
+ dst,
+ imm,
+ is64,
+ } => {
+ let (prefix, opcode, len) = match op {
+ SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
+ SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
+ SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2),
+ SseOpcode::Cmpsd => (LegacyPrefixes::_F2, 0x0FC2, 2),
+ SseOpcode::Insertps => (LegacyPrefixes::_66, 0x0F3A21, 3),
+ SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
+ SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
+ SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
+ SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3),
+ SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
+ SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
+ SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+ let rex = if *is64 {
+ RexFlags::set_w()
+ } else {
+ RexFlags::clear_w()
+ };
+ let regs_swapped = match *op {
+ // These opcodes (and not the SSE2 version of PEXTRW) flip the operand
+ // encoding: `dst` in ModRM's r/m, `src` in ModRM's reg field.
+ SseOpcode::Pextrb | SseOpcode::Pextrd => true,
+ // The rest of the opcodes have the customary encoding: `dst` in ModRM's reg,
+ // `src` in ModRM's r/m field.
+ _ => false,
+ };
+ match src {
+ RegMem::Reg { reg } => {
+ if regs_swapped {
+ emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
+ } else {
+ emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), *reg, rex);
+ }
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ assert!(
+ !regs_swapped,
+ "No existing way to encode a mem argument in the ModRM r/m field."
+ );
+ emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex);
+ }
+ }
+ sink.put1(*imm);
+ }
+
+ Inst::XmmLoadConst { src, dst, ty } => {
+ let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src));
+ let load = Inst::load(*ty, load_offset, *dst, ExtKind::None);
+ load.emit(sink, info, state);
+ }
+
+ Inst::XmmUninitializedValue { .. } => {
+ // This instruction format only exists to declare a register as a `def`; no code is
+ // emitted.
+ }
+
+ Inst::XmmMovRM { op, src, dst } => {
+ let (prefix, opcode) = match op {
+ SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
+ SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
+ SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
+ SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
+ SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
+ SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
+ SseOpcode::Movups => (LegacyPrefixes::None, 0x0F11),
+ SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+ let dst = &dst.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ 2,
+ *src,
+ dst,
+ RexFlags::clear_w(),
+ );
+ }
+
+ Inst::XmmToGpr {
+ op,
+ src,
+ dst,
+ dst_size,
+ } => {
+ let (prefix, opcode, dst_first) = match op {
+ SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
+ SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
+ // Movd and movq use the same opcode; the presence of the REX prefix (set below)
+ // actually determines which is used.
+ SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false),
+ SseOpcode::Movmskps => (LegacyPrefixes::None, 0x0F50, true),
+ SseOpcode::Movmskpd => (LegacyPrefixes::_66, 0x0F50, true),
+ SseOpcode::Pmovmskb => (LegacyPrefixes::_66, 0x0FD7, true),
+ _ => panic!("unexpected opcode {:?}", op),
+ };
+ let rex = match dst_size {
+ OperandSize::Size32 => RexFlags::clear_w(),
+ OperandSize::Size64 => RexFlags::set_w(),
+ };
+
+ let (src, dst) = if dst_first {
+ (dst.to_reg(), *src)
+ } else {
+ (*src, dst.to_reg())
+ };
+
+ emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
+ }
+
+ Inst::GprToXmm {
+ op,
+ src: src_e,
+ dst: reg_g,
+ src_size,
+ } => {
+ let (prefix, opcode) = match op {
+ // Movd and movq use the same opcode; the presence of the REX prefix (set below)
+ // actually determines which is used.
+ SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E),
+ SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
+ SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
+ _ => panic!("unexpected opcode {:?}", op),
+ };
+ let rex = match *src_size {
+ OperandSize::Size32 => RexFlags::clear_w(),
+ OperandSize::Size64 => RexFlags::set_w(),
+ };
+ match src_e {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
+ }
+ }
+ }
+
+ Inst::XmmCmpRmR { op, src, dst } => {
+ let rex = RexFlags::clear_w();
+ let (prefix, opcode, len) = match op {
+ SseOpcode::Ptest => (LegacyPrefixes::_66, 0x0F3817, 3),
+ SseOpcode::Ucomisd => (LegacyPrefixes::_66, 0x0F2E, 2),
+ SseOpcode::Ucomiss => (LegacyPrefixes::None, 0x0F2E, 2),
+ _ => unimplemented!("Emit xmm cmp rm r"),
+ };
+
+ match src {
+ RegMem::Reg { reg } => {
+ emit_std_reg_reg(sink, prefix, opcode, len, *dst, *reg, rex);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex);
+ }
+ }
+ }
+
+ Inst::CvtUint64ToFloatSeq {
+ to_f64,
+ src,
+ dst,
+ tmp_gpr1,
+ tmp_gpr2,
+ } => {
+ // Note: this sequence is specific to 64-bit mode; a 32-bit mode would require a
+ // different sequence.
+ //
+ // Emit the following sequence:
+ //
+ // cmp 0, %src
+ // jl handle_negative
+ //
+ // ;; handle positive, which can't overflow
+ // cvtsi2sd/cvtsi2ss %src, %dst
+ // j done
+ //
+ // ;; handle negative: see below for an explanation of what it's doing.
+ // handle_negative:
+ // mov %src, %tmp_gpr1
+ // shr $1, %tmp_gpr1
+ // mov %src, %tmp_gpr2
+ // and $1, %tmp_gpr2
+ // or %tmp_gpr1, %tmp_gpr2
+ // cvtsi2sd/cvtsi2ss %tmp_gpr2, %dst
+ // addsd/addss %dst, %dst
+ //
+ // done:
+
+ assert_ne!(src, tmp_gpr1);
+ assert_ne!(src, tmp_gpr2);
+ assert_ne!(tmp_gpr1, tmp_gpr2);
+
+ let handle_negative = sink.get_label();
+ let done = sink.get_label();
+
+ // If x seen as a signed int64 is not negative, a signed-conversion will do the right
+ // thing.
+ // TODO use tst src, src here.
+ let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), src.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::L, handle_negative);
+
+ // Handle a positive int64, which is the "easy" case: a signed conversion will do the
+ // right thing.
+ emit_signed_cvt(sink, info, state, src.to_reg(), *dst, *to_f64);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(handle_negative);
+
+ // Divide x by two to get it in range for the signed conversion, keep the LSB, and
+ // scale it back up on the FP side.
+ let inst = Inst::gen_move(*tmp_gpr1, src.to_reg(), types::I64);
+ inst.emit(sink, info, state);
+
+ // tmp_gpr1 := src >> 1
+ let inst = Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(1), *tmp_gpr1);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::alu_rmi_r(
+ true, /* 64bits */
+ AluRmiROpcode::And,
+ RegMemImm::imm(1),
+ *tmp_gpr2,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::alu_rmi_r(
+ true, /* 64bits */
+ AluRmiROpcode::Or,
+ RegMemImm::reg(tmp_gpr1.to_reg()),
+ *tmp_gpr2,
+ );
+ inst.emit(sink, info, state);
+
+ emit_signed_cvt(sink, info, state, tmp_gpr2.to_reg(), *dst, *to_f64);
+
+ let add_op = if *to_f64 {
+ SseOpcode::Addsd
+ } else {
+ SseOpcode::Addss
+ };
+ let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(done);
+ }
+
+ Inst::CvtFloatToSintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ } => {
+ // Emits the following common sequence:
+ //
+ // cvttss2si/cvttsd2si %src, %dst
+ // cmp %dst, 1
+ // jno done
+ //
+ // Then, for saturating conversions:
+ //
+ // ;; check for NaN
+ // cmpss/cmpsd %src, %src
+ // jnp not_nan
+ // xor %dst, %dst
+ //
+ // ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is
+ // ;; already in %dst.
+ // xorpd %tmp_xmm, %tmp_xmm
+ // cmpss/cmpsd %src, %tmp_xmm
+ // jnb done
+ // mov/movaps $INT_MAX, %dst
+ //
+ // done:
+ //
+ // Then, for non-saturating conversions:
+ //
+ // ;; check for NaN
+ // cmpss/cmpsd %src, %src
+ // jnp not_nan
+ // ud2 trap BadConversionToInteger
+ //
+ // ;; check if INT_MIN was the correct result, against a magic constant:
+ // not_nan:
+ // movaps/mov $magic, %tmp_gpr
+ // movq/movd %tmp_gpr, %tmp_xmm
+ // cmpss/cmpsd %tmp_xmm, %src
+ // jnb/jnbe $check_positive
+ // ud2 trap IntegerOverflow
+ //
+ // ;; if positive, it was a real overflow
+ // check_positive:
+ // xorpd %tmp_xmm, %tmp_xmm
+ // cmpss/cmpsd %src, %tmp_xmm
+ // jnb done
+ // ud2 trap IntegerOverflow
+ //
+ // done:
+
+ let src = src.to_reg();
+
+ let (cast_op, cmp_op, trunc_op) = match src_size {
+ OperandSize::Size64 => (SseOpcode::Movq, SseOpcode::Ucomisd, SseOpcode::Cvttsd2si),
+ OperandSize::Size32 => (SseOpcode::Movd, SseOpcode::Ucomiss, SseOpcode::Cvttss2si),
+ };
+
+ let done = sink.get_label();
+ let not_nan = sink.get_label();
+
+ // The truncation.
+ let inst = Inst::xmm_to_gpr(trunc_op, src, *dst, *dst_size);
+ inst.emit(sink, info, state);
+
+ // Compare against 1, in case of overflow the dst operand was INT_MIN.
+ let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(1), dst.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NO, done); // no overflow => done
+
+ // Check for NaN.
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src);
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NP, not_nan); // go to not_nan if not a NaN
+
+ if *is_saturating {
+ // For NaN, emit 0.
+ let inst = Inst::alu_rmi_r(
+ *dst_size == OperandSize::Size64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(dst.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(not_nan);
+
+ // If the input was positive, saturate to INT_MAX.
+
+ // Zero out tmp_xmm.
+ let inst =
+ Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
+ inst.emit(sink, info, state);
+
+ // Jump if >= to done.
+ one_way_jmp(sink, CC::NB, done);
+
+ // Otherwise, put INT_MAX.
+ if *dst_size == OperandSize::Size64 {
+ let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
+ inst.emit(sink, info, state);
+ } else {
+ let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
+ inst.emit(sink, info, state);
+ }
+ } else {
+ let check_positive = sink.get_label();
+
+ let inst = Inst::trap(TrapCode::BadConversionToInteger);
+ inst.emit(sink, info, state);
+
+ // Check if INT_MIN was the correct result: determine the smallest floating point
+ // number that would convert to INT_MIN, put it in a temporary register, and compare
+ // against the src register.
+ // If the src register is less (or in some cases, less-or-equal) than the threshold,
+ // trap!
+
+ sink.bind_label(not_nan);
+
+ let mut no_overflow_cc = CC::NB; // >=
+ let output_bits = dst_size.to_bits();
+ match *src_size {
+ OperandSize::Size32 => {
+ let cst = Ieee32::pow2(output_bits - 1).neg().bits();
+ let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
+ inst.emit(sink, info, state);
+ }
+ OperandSize::Size64 => {
+ // An f64 can represent `i32::min_value() - 1` exactly with precision to spare,
+ // so there are values less than -2^(N-1) that convert correctly to INT_MIN.
+ let cst = if output_bits < 64 {
+ no_overflow_cc = CC::NBE; // >
+ Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+ } else {
+ Ieee64::pow2(output_bits - 1).neg()
+ };
+ let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
+ inst.emit(sink, info, state);
+ }
+ }
+
+ let inst =
+ Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src);
+ inst.emit(sink, info, state);
+
+ // jump over trap if src >= or > threshold
+ one_way_jmp(sink, no_overflow_cc, check_positive);
+
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+
+ // If positive, it was a real overflow.
+
+ sink.bind_label(check_positive);
+
+ // Zero out the tmp_xmm register.
+ let inst =
+ Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src
+
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(done);
+ }
+
+ Inst::CvtFloatToUintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ } => {
+ // The only difference in behavior between saturating and non-saturating is how we
+ // handle errors. Emits the following sequence:
+ //
+ // movaps/mov 2**(int_width - 1), %tmp_gpr
+ // movq/movd %tmp_gpr, %tmp_xmm
+ // cmpss/cmpsd %tmp_xmm, %src
+ // jnb is_large
+ //
+ // ;; check for NaN inputs
+ // jnp not_nan
+ // -- non-saturating: ud2 trap BadConversionToInteger
+ // -- saturating: xor %dst, %dst; j done
+ //
+ // not_nan:
+ // cvttss2si/cvttsd2si %src, %dst
+ // cmp 0, %dst
+ // jnl done
+ // -- non-saturating: ud2 trap IntegerOverflow
+ // -- saturating: xor %dst, %dst; j done
+ //
+ // is_large:
+ // subss/subsd %tmp_xmm, %src ; <-- we clobber %src here
+ // cvttss2si/cvttss2sd %tmp_x, %dst
+ // cmp 0, %dst
+ // jnl next_is_large
+ // -- non-saturating: ud2 trap IntegerOverflow
+ // -- saturating: movaps $UINT_MAX, %dst; j done
+ //
+ // next_is_large:
+ // add 2**(int_width -1), %dst ;; 2 instructions for 64-bits integers
+ //
+ // done:
+
+ assert_ne!(tmp_xmm, src, "tmp_xmm clobbers src!");
+
+ let (sub_op, cast_op, cmp_op, trunc_op) = if *src_size == OperandSize::Size64 {
+ (
+ SseOpcode::Subsd,
+ SseOpcode::Movq,
+ SseOpcode::Ucomisd,
+ SseOpcode::Cvttsd2si,
+ )
+ } else {
+ (
+ SseOpcode::Subss,
+ SseOpcode::Movd,
+ SseOpcode::Ucomiss,
+ SseOpcode::Cvttss2si,
+ )
+ };
+
+ let done = sink.get_label();
+
+ let cst = if *src_size == OperandSize::Size64 {
+ Ieee64::pow2(dst_size.to_bits() - 1).bits()
+ } else {
+ Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64
+ };
+
+ let inst = Inst::imm(*src_size, cst, *tmp_gpr);
+ inst.emit(sink, info, state);
+
+ let inst =
+ Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src.to_reg());
+ inst.emit(sink, info, state);
+
+ let handle_large = sink.get_label();
+ one_way_jmp(sink, CC::NB, handle_large); // jump to handle_large if src >= large_threshold
+
+ let not_nan = sink.get_label();
+ one_way_jmp(sink, CC::NP, not_nan); // jump over trap if not NaN
+
+ if *is_saturating {
+ // Emit 0.
+ let inst = Inst::alu_rmi_r(
+ *dst_size == OperandSize::Size64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(dst.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+ } else {
+ // Trap.
+ let inst = Inst::trap(TrapCode::BadConversionToInteger);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(not_nan);
+
+ // Actual truncation for small inputs: if the result is not positive, then we had an
+ // overflow.
+
+ let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(0), dst.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done
+
+ if *is_saturating {
+ // The input was "small" (< 2**(width -1)), so the only way to get an integer
+ // overflow is because the input was too small: saturate to the min value, i.e. 0.
+ let inst = Inst::alu_rmi_r(
+ *dst_size == OperandSize::Size64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(dst.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+ } else {
+ // Trap.
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+ }
+
+ // Now handle large inputs.
+
+ sink.bind_label(handle_large);
+
+ let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(0), dst.to_reg());
+ inst.emit(sink, info, state);
+
+ let next_is_large = sink.get_label();
+ one_way_jmp(sink, CC::NL, next_is_large); // if dst >= 0, jump to next_is_large
+
+ if *is_saturating {
+ // The input was "large" (>= 2**(width -1)), so the only way to get an integer
+ // overflow is because the input was too large: saturate to the max value.
+ let inst = Inst::imm(
+ OperandSize::Size64,
+ if *dst_size == OperandSize::Size64 {
+ u64::max_value()
+ } else {
+ u32::max_value() as u64
+ },
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+ } else {
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(next_is_large);
+
+ if *dst_size == OperandSize::Size64 {
+ let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp_gpr.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+ } else {
+ let inst =
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(1 << 31), *dst);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(done);
+ }
+
+ Inst::LoadExtName { dst, name, offset } => {
+ // The full address can be encoded in the register, with a relocation.
+ // Generates: movabsq $name, %dst
+ let enc_dst = int_reg_enc(dst.to_reg());
+ sink.put1(0x48 | ((enc_dst >> 3) & 1));
+ sink.put1(0xB8 | (enc_dst & 7));
+ emit_reloc(sink, state, Reloc::Abs8, name, *offset);
+ if info.flags().emit_all_ones_funcaddrs() {
+ sink.put8(u64::max_value());
+ } else {
+ sink.put8(0);
+ }
+ }
+
+ Inst::LockCmpxchg { ty, src, dst } => {
+ // lock cmpxchg{b,w,l,q} %src, (dst)
+ // Note that 0xF0 is the Lock prefix.
+ let (prefix, rex, opcodes) = match *ty {
+ types::I8 => {
+ let mut rex_flags = RexFlags::clear_w();
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex_flags.always_emit();
+ };
+ (LegacyPrefixes::_F0, rex_flags, 0x0FB0)
+ }
+ types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
+ types::I32 => (LegacyPrefixes::_F0, RexFlags::clear_w(), 0x0FB1),
+ types::I64 => (LegacyPrefixes::_F0, RexFlags::set_w(), 0x0FB1),
+ _ => unreachable!(),
+ };
+ let amode = dst.finalize(state);
+ emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex);
+ }
+
+ Inst::AtomicRmwSeq { ty, op } => {
+ // Emit this:
+ //
+ // mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value
+ // again:
+ // movq %rax, %r11 // rax = old value, r11 = old value
+ // `op`q %r10, %r11 // rax = old value, r11 = new value
+ // lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value
+ // jnz again // If this is taken, rax will have a "revised" old value
+ //
+ // Operand conventions:
+ // IN: %r9 (addr), %r10 (2nd arg for `op`)
+ // OUT: %rax (old value), %r11 (trashed), %rflags (trashed)
+ //
+ // In the case where the operation is 'xchg', the "`op`q" instruction is instead
+ // movq %r10, %r11
+ // so that we simply write in the destination, the "2nd arg for `op`".
+ let rax = regs::rax();
+ let r9 = regs::r9();
+ let r10 = regs::r10();
+ let r11 = regs::r11();
+ let rax_w = Writable::from_reg(rax);
+ let r11_w = Writable::from_reg(r11);
+ let amode = Amode::imm_reg(0, r9);
+ let again_label = sink.get_label();
+
+ // mov{zbq,zwq,zlq,q} (%r9), %rax
+ // No need to call `add_trap` here, since the `i1` emit will do that.
+ let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend);
+ i1.emit(sink, info, state);
+
+ // again:
+ sink.bind_label(again_label);
+
+ // movq %rax, %r11
+ let i2 = Inst::mov_r_r(true, rax, r11_w);
+ i2.emit(sink, info, state);
+
+ // opq %r10, %r11
+ let r10_rmi = RegMemImm::reg(r10);
+ let i3 = if *op == inst_common::AtomicRmwOp::Xchg {
+ Inst::mov_r_r(true, r10, r11_w)
+ } else {
+ let alu_op = match op {
+ inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add,
+ inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub,
+ inst_common::AtomicRmwOp::And => AluRmiROpcode::And,
+ inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or,
+ inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor,
+ inst_common::AtomicRmwOp::Xchg => unreachable!(),
+ };
+ Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w)
+ };
+ i3.emit(sink, info, state);
+
+ // lock cmpxchg{b,w,l,q} %r11, (%r9)
+ // No need to call `add_trap` here, since the `i4` emit will do that.
+ let i4 = Inst::LockCmpxchg {
+ ty: *ty,
+ src: r11,
+ dst: amode.into(),
+ };
+ i4.emit(sink, info, state);
+
+ // jnz again
+ one_way_jmp(sink, CC::NZ, again_label);
+ }
+
+ Inst::Fence { kind } => {
+ sink.put1(0x0F);
+ sink.put1(0xAE);
+ match kind {
+ FenceKind::MFence => sink.put1(0xF0), // mfence = 0F AE F0
+ FenceKind::LFence => sink.put1(0xE8), // lfence = 0F AE E8
+ FenceKind::SFence => sink.put1(0xF8), // sfence = 0F AE F8
+ }
+ }
+
+ Inst::Hlt => {
+ sink.put1(0xcc);
+ }
+
+ Inst::Ud2 { trap_code } => {
+ let cur_srcloc = state.cur_srcloc();
+ sink.add_trap(cur_srcloc, *trap_code);
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s);
+ }
+ sink.put1(0x0f);
+ sink.put1(0x0b);
+ }
+
+ Inst::VirtualSPOffsetAdj { offset } => {
+ debug!(
+ "virtual sp offset adjusted by {} -> {}",
+ offset,
+ state.virtual_sp_offset + offset
+ );
+ state.virtual_sp_offset += offset;
+ }
+
+ Inst::Nop { len } => {
+ // These encodings can all be found in Intel's architecture manual, at the NOP
+ // instruction description.
+ let mut len = *len;
+ while len != 0 {
+ let emitted = u8::min(len, 9);
+ match emitted {
+ 0 => {}
+ 1 => sink.put1(0x90), // NOP
+ 2 => {
+ // 66 NOP
+ sink.put1(0x66);
+ sink.put1(0x90);
+ }
+ 3 => {
+ // NOP [EAX]
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x00);
+ }
+ 4 => {
+ // NOP 0(EAX), with 0 a 1-byte immediate.
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x40);
+ sink.put1(0x00);
+ }
+ 5 => {
+ // NOP [EAX, EAX, 1]
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x44);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 6 => {
+ // 66 NOP [EAX, EAX, 1]
+ sink.put1(0x66);
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x44);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 7 => {
+ // NOP 0[EAX], but 0 is a 4 bytes immediate.
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x80);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 8 => {
+ // NOP 0[EAX, EAX, 1], with 0 a 4 bytes immediate.
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x84);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 9 => {
+ // 66 NOP 0[EAX, EAX, 1], with 0 a 4 bytes immediate.
+ sink.put1(0x66);
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x84);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ _ => unreachable!(),
+ }
+ len -= emitted;
+ }
+ }
+
+ Inst::EpiloguePlaceholder => {
+ // Generate no code.
+ }
+ }
+
+ state.clear_post_insn();
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
new file mode 100644
index 0000000000..06092d498a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
@@ -0,0 +1,3593 @@
+//! Tests for the emitter
+//!
+//! See comments at the top of `fn x64_emit` for advice on how to create reliable test cases.
+//!
+//! to see stdout: cargo test -- --nocapture
+//!
+//! for this specific case, as of 24 Aug 2020:
+//!
+//! cd to the top of your wasmtime tree, then:
+//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
+//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
+//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
+//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
+
+use super::*;
+use crate::isa::test_utils;
+use crate::isa::x64;
+use alloc::vec::Vec;
+
+#[test]
+fn test_x64_emit() {
+ let rax = regs::rax();
+ let rbx = regs::rbx();
+ let rcx = regs::rcx();
+ let rdx = regs::rdx();
+ let rsi = regs::rsi();
+ let rdi = regs::rdi();
+ let rsp = regs::rsp();
+ let rbp = regs::rbp();
+ let r8 = regs::r8();
+ let r9 = regs::r9();
+ let r10 = regs::r10();
+ let r11 = regs::r11();
+ let r12 = regs::r12();
+ let r13 = regs::r13();
+ let r14 = regs::r14();
+ let r15 = regs::r15();
+
+ let xmm0 = regs::xmm0();
+ let xmm1 = regs::xmm1();
+ let xmm2 = regs::xmm2();
+ let xmm3 = regs::xmm3();
+ let xmm4 = regs::xmm4();
+ let xmm5 = regs::xmm5();
+ let xmm6 = regs::xmm6();
+ let xmm7 = regs::xmm7();
+ let xmm8 = regs::xmm8();
+ let xmm9 = regs::xmm9();
+ let xmm10 = regs::xmm10();
+ let xmm11 = regs::xmm11();
+ let xmm12 = regs::xmm12();
+ let xmm13 = regs::xmm13();
+ let xmm14 = regs::xmm14();
+ let xmm15 = regs::xmm15();
+
+ // And Writable<> versions of the same:
+ let w_rax = Writable::<Reg>::from_reg(rax);
+ let w_rbx = Writable::<Reg>::from_reg(rbx);
+ let w_rcx = Writable::<Reg>::from_reg(rcx);
+ let w_rdx = Writable::<Reg>::from_reg(rdx);
+ let w_rsi = Writable::<Reg>::from_reg(rsi);
+ let w_rdi = Writable::<Reg>::from_reg(rdi);
+ let _w_rsp = Writable::<Reg>::from_reg(rsp);
+ let _w_rbp = Writable::<Reg>::from_reg(rbp);
+ let w_r8 = Writable::<Reg>::from_reg(r8);
+ let w_r9 = Writable::<Reg>::from_reg(r9);
+ let _w_r10 = Writable::<Reg>::from_reg(r10);
+ let w_r11 = Writable::<Reg>::from_reg(r11);
+ let w_r12 = Writable::<Reg>::from_reg(r12);
+ let w_r13 = Writable::<Reg>::from_reg(r13);
+ let w_r14 = Writable::<Reg>::from_reg(r14);
+ let w_r15 = Writable::<Reg>::from_reg(r15);
+
+ let w_xmm0 = Writable::<Reg>::from_reg(xmm0);
+ let w_xmm1 = Writable::<Reg>::from_reg(xmm1);
+ let w_xmm2 = Writable::<Reg>::from_reg(xmm2);
+ let w_xmm3 = Writable::<Reg>::from_reg(xmm3);
+ let w_xmm4 = Writable::<Reg>::from_reg(xmm4);
+ let w_xmm5 = Writable::<Reg>::from_reg(xmm5);
+ let w_xmm6 = Writable::<Reg>::from_reg(xmm6);
+ let w_xmm7 = Writable::<Reg>::from_reg(xmm7);
+ let w_xmm8 = Writable::<Reg>::from_reg(xmm8);
+ let w_xmm9 = Writable::<Reg>::from_reg(xmm9);
+ let w_xmm10 = Writable::<Reg>::from_reg(xmm10);
+ let w_xmm11 = Writable::<Reg>::from_reg(xmm11);
+ let w_xmm12 = Writable::<Reg>::from_reg(xmm12);
+ let w_xmm13 = Writable::<Reg>::from_reg(xmm13);
+ let w_xmm14 = Writable::<Reg>::from_reg(xmm14);
+ let w_xmm15 = Writable::<Reg>::from_reg(xmm15);
+
+ let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+ // ========================================================
+ // Cases aimed at checking Addr-esses: IR (Imm + Reg)
+ //
+ // These are just a bunch of loads with all supported (by the emitter)
+ // permutations of address formats.
+ //
+ // Addr_IR, offset zero
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rax), w_rdi),
+ "488B38",
+ "movq 0(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rbx), w_rdi),
+ "488B3B",
+ "movq 0(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rcx), w_rdi),
+ "488B39",
+ "movq 0(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rdx), w_rdi),
+ "488B3A",
+ "movq 0(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rbp), w_rdi),
+ "488B7D00",
+ "movq 0(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rsp), w_rdi),
+ "488B3C24",
+ "movq 0(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rsi), w_rdi),
+ "488B3E",
+ "movq 0(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rdi), w_rdi),
+ "488B3F",
+ "movq 0(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r8), w_rdi),
+ "498B38",
+ "movq 0(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r9), w_rdi),
+ "498B39",
+ "movq 0(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r10), w_rdi),
+ "498B3A",
+ "movq 0(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r11), w_rdi),
+ "498B3B",
+ "movq 0(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r12), w_rdi),
+ "498B3C24",
+ "movq 0(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r13), w_rdi),
+ "498B7D00",
+ "movq 0(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r14), w_rdi),
+ "498B3E",
+ "movq 0(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r15), w_rdi),
+ "498B3F",
+ "movq 0(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset max simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rax), w_rdi),
+ "488B787F",
+ "movq 127(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rbx), w_rdi),
+ "488B7B7F",
+ "movq 127(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rcx), w_rdi),
+ "488B797F",
+ "movq 127(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rdx), w_rdi),
+ "488B7A7F",
+ "movq 127(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rbp), w_rdi),
+ "488B7D7F",
+ "movq 127(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rsp), w_rdi),
+ "488B7C247F",
+ "movq 127(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rsi), w_rdi),
+ "488B7E7F",
+ "movq 127(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rdi), w_rdi),
+ "488B7F7F",
+ "movq 127(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r8), w_rdi),
+ "498B787F",
+ "movq 127(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r9), w_rdi),
+ "498B797F",
+ "movq 127(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r10), w_rdi),
+ "498B7A7F",
+ "movq 127(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r11), w_rdi),
+ "498B7B7F",
+ "movq 127(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r12), w_rdi),
+ "498B7C247F",
+ "movq 127(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r13), w_rdi),
+ "498B7D7F",
+ "movq 127(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r14), w_rdi),
+ "498B7E7F",
+ "movq 127(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r15), w_rdi),
+ "498B7F7F",
+ "movq 127(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset min simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rax), w_rdi),
+ "488B7880",
+ "movq -128(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbx), w_rdi),
+ "488B7B80",
+ "movq -128(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rcx), w_rdi),
+ "488B7980",
+ "movq -128(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdx), w_rdi),
+ "488B7A80",
+ "movq -128(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbp), w_rdi),
+ "488B7D80",
+ "movq -128(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsp), w_rdi),
+ "488B7C2480",
+ "movq -128(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsi), w_rdi),
+ "488B7E80",
+ "movq -128(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdi), w_rdi),
+ "488B7F80",
+ "movq -128(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r8), w_rdi),
+ "498B7880",
+ "movq -128(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r9), w_rdi),
+ "498B7980",
+ "movq -128(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r10), w_rdi),
+ "498B7A80",
+ "movq -128(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r11), w_rdi),
+ "498B7B80",
+ "movq -128(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r12), w_rdi),
+ "498B7C2480",
+ "movq -128(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r13), w_rdi),
+ "498B7D80",
+ "movq -128(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r14), w_rdi),
+ "498B7E80",
+ "movq -128(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r15), w_rdi),
+ "498B7F80",
+ "movq -128(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset smallest positive simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rax), w_rdi),
+ "488BB880000000",
+ "movq 128(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rbx), w_rdi),
+ "488BBB80000000",
+ "movq 128(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rcx), w_rdi),
+ "488BB980000000",
+ "movq 128(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rdx), w_rdi),
+ "488BBA80000000",
+ "movq 128(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rbp), w_rdi),
+ "488BBD80000000",
+ "movq 128(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rsp), w_rdi),
+ "488BBC2480000000",
+ "movq 128(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rsi), w_rdi),
+ "488BBE80000000",
+ "movq 128(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rdi), w_rdi),
+ "488BBF80000000",
+ "movq 128(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r8), w_rdi),
+ "498BB880000000",
+ "movq 128(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r9), w_rdi),
+ "498BB980000000",
+ "movq 128(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r10), w_rdi),
+ "498BBA80000000",
+ "movq 128(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r11), w_rdi),
+ "498BBB80000000",
+ "movq 128(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r12), w_rdi),
+ "498BBC2480000000",
+ "movq 128(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r13), w_rdi),
+ "498BBD80000000",
+ "movq 128(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r14), w_rdi),
+ "498BBE80000000",
+ "movq 128(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r15), w_rdi),
+ "498BBF80000000",
+ "movq 128(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset smallest negative simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rax), w_rdi),
+ "488BB87FFFFFFF",
+ "movq -129(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbx), w_rdi),
+ "488BBB7FFFFFFF",
+ "movq -129(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rcx), w_rdi),
+ "488BB97FFFFFFF",
+ "movq -129(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdx), w_rdi),
+ "488BBA7FFFFFFF",
+ "movq -129(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbp), w_rdi),
+ "488BBD7FFFFFFF",
+ "movq -129(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsp), w_rdi),
+ "488BBC247FFFFFFF",
+ "movq -129(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsi), w_rdi),
+ "488BBE7FFFFFFF",
+ "movq -129(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdi), w_rdi),
+ "488BBF7FFFFFFF",
+ "movq -129(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r8), w_rdi),
+ "498BB87FFFFFFF",
+ "movq -129(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r9), w_rdi),
+ "498BB97FFFFFFF",
+ "movq -129(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r10), w_rdi),
+ "498BBA7FFFFFFF",
+ "movq -129(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r11), w_rdi),
+ "498BBB7FFFFFFF",
+ "movq -129(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r12), w_rdi),
+ "498BBC247FFFFFFF",
+ "movq -129(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r13), w_rdi),
+ "498BBD7FFFFFFF",
+ "movq -129(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r14), w_rdi),
+ "498BBE7FFFFFFF",
+ "movq -129(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r15), w_rdi),
+ "498BBF7FFFFFFF",
+ "movq -129(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset large positive simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rax), w_rdi),
+ "488BB877207317",
+ "movq 393420919(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbx), w_rdi),
+ "488BBB77207317",
+ "movq 393420919(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rcx), w_rdi),
+ "488BB977207317",
+ "movq 393420919(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdx), w_rdi),
+ "488BBA77207317",
+ "movq 393420919(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbp), w_rdi),
+ "488BBD77207317",
+ "movq 393420919(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsp), w_rdi),
+ "488BBC2477207317",
+ "movq 393420919(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsi), w_rdi),
+ "488BBE77207317",
+ "movq 393420919(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdi), w_rdi),
+ "488BBF77207317",
+ "movq 393420919(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r8), w_rdi),
+ "498BB877207317",
+ "movq 393420919(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r9), w_rdi),
+ "498BB977207317",
+ "movq 393420919(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r10), w_rdi),
+ "498BBA77207317",
+ "movq 393420919(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r11), w_rdi),
+ "498BBB77207317",
+ "movq 393420919(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r12), w_rdi),
+ "498BBC2477207317",
+ "movq 393420919(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r13), w_rdi),
+ "498BBD77207317",
+ "movq 393420919(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r14), w_rdi),
+ "498BBE77207317",
+ "movq 393420919(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r15), w_rdi),
+ "498BBF77207317",
+ "movq 393420919(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset large negative simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rax), w_rdi),
+ "488BB8D9A6BECE",
+ "movq -826366247(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbx), w_rdi),
+ "488BBBD9A6BECE",
+ "movq -826366247(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rcx), w_rdi),
+ "488BB9D9A6BECE",
+ "movq -826366247(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdx), w_rdi),
+ "488BBAD9A6BECE",
+ "movq -826366247(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbp), w_rdi),
+ "488BBDD9A6BECE",
+ "movq -826366247(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsp), w_rdi),
+ "488BBC24D9A6BECE",
+ "movq -826366247(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsi), w_rdi),
+ "488BBED9A6BECE",
+ "movq -826366247(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdi), w_rdi),
+ "488BBFD9A6BECE",
+ "movq -826366247(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r8), w_rdi),
+ "498BB8D9A6BECE",
+ "movq -826366247(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r9), w_rdi),
+ "498BB9D9A6BECE",
+ "movq -826366247(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r10), w_rdi),
+ "498BBAD9A6BECE",
+ "movq -826366247(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r11), w_rdi),
+ "498BBBD9A6BECE",
+ "movq -826366247(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r12), w_rdi),
+ "498BBC24D9A6BECE",
+ "movq -826366247(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r13), w_rdi),
+ "498BBDD9A6BECE",
+ "movq -826366247(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r14), w_rdi),
+ "498BBED9A6BECE",
+ "movq -826366247(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r15), w_rdi),
+ "498BBFD9A6BECE",
+ "movq -826366247(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Cases aimed at checking Addr-esses: IRRS (Imm + Reg + (Reg << Shift))
+ // Note these don't check the case where the index reg is RSP, since we
+ // don't encode any of those.
+ //
+ // Addr_IRRS, offset max simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11),
+ "4C8B5C007F",
+ "movq 127(%rax,%rax,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11),
+ "4C8B5C477F",
+ "movq 127(%rdi,%rax,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11),
+ "4D8B5C807F",
+ "movq 127(%r8,%rax,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11),
+ "4D8B5CC77F",
+ "movq 127(%r15,%rax,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11),
+ "4C8B5CF87F",
+ "movq 127(%rax,%rdi,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11),
+ "4C8B5CBF7F",
+ "movq 127(%rdi,%rdi,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11),
+ "4D8B5C787F",
+ "movq 127(%r8,%rdi,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11),
+ "4D8B5C3F7F",
+ "movq 127(%r15,%rdi,1), %r11",
+ ));
+
+ // ========================================================
+ // Addr_IRRS, offset min simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11),
+ "4E8B5C8080",
+ "movq -128(%rax,%r8,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11),
+ "4E8B5CC780",
+ "movq -128(%rdi,%r8,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11),
+ "4F8B5C0080",
+ "movq -128(%r8,%r8,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11),
+ "4F8B5C4780",
+ "movq -128(%r15,%r8,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11),
+ "4E8B5C7880",
+ "movq -128(%rax,%r15,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11),
+ "4E8B5C3F80",
+ "movq -128(%rdi,%r15,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11),
+ "4F8B5CF880",
+ "movq -128(%r8,%r15,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11),
+ "4F8B5CBF80",
+ "movq -128(%r15,%r15,4), %r11",
+ ));
+
+ // ========================================================
+ // Addr_IRRS, offset large positive simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11),
+ "4C8B9C00BE25664F",
+ "movq 1332094398(%rax,%rax,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11),
+ "4C8B9C47BE25664F",
+ "movq 1332094398(%rdi,%rax,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11),
+ "4D8B9C80BE25664F",
+ "movq 1332094398(%r8,%rax,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11),
+ "4D8B9CC7BE25664F",
+ "movq 1332094398(%r15,%rax,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11),
+ "4C8B9CF8BE25664F",
+ "movq 1332094398(%rax,%rdi,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11),
+ "4C8B9CBFBE25664F",
+ "movq 1332094398(%rdi,%rdi,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11),
+ "4D8B9C78BE25664F",
+ "movq 1332094398(%r8,%rdi,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11),
+ "4D8B9C3FBE25664F",
+ "movq 1332094398(%r15,%rdi,1), %r11",
+ ));
+
+ // ========================================================
+ // Addr_IRRS, offset large negative simm32
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2),
+ w_r11,
+ ),
+ "4E8B9C8070E9B2D9",
+ "movq -642586256(%rax,%r8,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3),
+ w_r11,
+ ),
+ "4E8B9CC770E9B2D9",
+ "movq -642586256(%rdi,%r8,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0),
+ w_r11,
+ ),
+ "4F8B9C0070E9B2D9",
+ "movq -642586256(%r8,%r8,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1),
+ w_r11,
+ ),
+ "4F8B9C4770E9B2D9",
+ "movq -642586256(%r15,%r8,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1),
+ w_r11,
+ ),
+ "4E8B9C7870E9B2D9",
+ "movq -642586256(%rax,%r15,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0),
+ w_r11,
+ ),
+ "4E8B9C3F70E9B2D9",
+ "movq -642586256(%rdi,%r15,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3),
+ w_r11,
+ ),
+ "4F8B9CF870E9B2D9",
+ "movq -642586256(%r8,%r15,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2),
+ w_r11,
+ ),
+ "4F8B9CBF70E9B2D9",
+ "movq -642586256(%r15,%r15,4), %r11",
+ ));
+
+ // End of test cases for Addr
+ // ========================================================
+
+ // ========================================================
+ // General tests for each insn. Don't forget to follow the
+ // guidelines commented just prior to `fn x64_emit`.
+ //
+ // Alu_RMI_R
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Add, RegMemImm::reg(r15), w_rdx),
+ "4C01FA",
+ "addq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::reg(rcx), w_r8),
+ "4101C8",
+ "addl %ecx, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::reg(rcx), w_rsi),
+ "01CE",
+ "addl %ecx, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rdx,
+ ),
+ "48035763",
+ "addq 99(%rdi), %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_r8,
+ ),
+ "44034763",
+ "addl 99(%rdi), %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rsi,
+ ),
+ "037763",
+ "addl 99(%rdi), %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-127i32 as u32),
+ w_rdx,
+ ),
+ "4883C281",
+ "addq $-127, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-129i32 as u32),
+ w_rdx,
+ ),
+ "4881C27FFFFFFF",
+ "addq $-129, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Add, RegMemImm::imm(76543210), w_rdx),
+ "4881C2EAF48F04",
+ "addq $76543210, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-127i32 as u32),
+ w_r8,
+ ),
+ "4183C081",
+ "addl $-127, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-129i32 as u32),
+ w_r8,
+ ),
+ "4181C07FFFFFFF",
+ "addl $-129, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-76543210i32 as u32),
+ w_r8,
+ ),
+ "4181C0160B70FB",
+ "addl $-76543210, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-127i32 as u32),
+ w_rsi,
+ ),
+ "83C681",
+ "addl $-127, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-129i32 as u32),
+ w_rsi,
+ ),
+ "81C67FFFFFFF",
+ "addl $-129, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(76543210), w_rsi),
+ "81C6EAF48F04",
+ "addl $76543210, %esi",
+ ));
+ // This is pretty feeble
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Sub, RegMemImm::reg(r15), w_rdx),
+ "4C29FA",
+ "subq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::And, RegMemImm::reg(r15), w_rdx),
+ "4C21FA",
+ "andq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Or, RegMemImm::reg(r15), w_rdx),
+ "4C09FA",
+ "orq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Xor, RegMemImm::reg(r15), w_rdx),
+ "4C31FA",
+ "xorq %r15, %rdx",
+ ));
+ // Test all mul cases, though
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Mul, RegMemImm::reg(r15), w_rdx),
+ "490FAFD7",
+ "imulq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::reg(rcx), w_r8),
+ "440FAFC1",
+ "imull %ecx, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::reg(rcx), w_rsi),
+ "0FAFF1",
+ "imull %ecx, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Mul,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rdx,
+ ),
+ "480FAF5763",
+ "imulq 99(%rdi), %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_r8,
+ ),
+ "440FAF4763",
+ "imull 99(%rdi), %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rsi,
+ ),
+ "0FAF7763",
+ "imull 99(%rdi), %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-127i32 as u32),
+ w_rdx,
+ ),
+ "486BD281",
+ "imulq $-127, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-129i32 as u32),
+ w_rdx,
+ ),
+ "4869D27FFFFFFF",
+ "imulq $-129, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Mul, RegMemImm::imm(76543210), w_rdx),
+ "4869D2EAF48F04",
+ "imulq $76543210, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-127i32 as u32),
+ w_r8,
+ ),
+ "456BC081",
+ "imull $-127, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-129i32 as u32),
+ w_r8,
+ ),
+ "4569C07FFFFFFF",
+ "imull $-129, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-76543210i32 as u32),
+ w_r8,
+ ),
+ "4569C0160B70FB",
+ "imull $-76543210, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-127i32 as u32),
+ w_rsi,
+ ),
+ "6BF681",
+ "imull $-127, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-129i32 as u32),
+ w_rsi,
+ ),
+ "69F67FFFFFFF",
+ "imull $-129, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::imm(76543210), w_rsi),
+ "69F6EAF48F04",
+ "imull $76543210, %esi",
+ ));
+
+ // ========================================================
+ // UnaryRmR
+
+ insns.push((
+ Inst::unary_rm_r(4, UnaryRmROpcode::Bsr, RegMem::reg(rsi), w_rdi),
+ "0FBDFE",
+ "bsrl %esi, %edi",
+ ));
+ insns.push((
+ Inst::unary_rm_r(8, UnaryRmROpcode::Bsr, RegMem::reg(r15), w_rax),
+ "490FBDC7",
+ "bsrq %r15, %rax",
+ ));
+
+ // ========================================================
+ // Not
+ insns.push((
+ Inst::not(4, Writable::from_reg(regs::rsi())),
+ "F7D6",
+ "notl %esi",
+ ));
+ insns.push((
+ Inst::not(8, Writable::from_reg(regs::r15())),
+ "49F7D7",
+ "notq %r15",
+ ));
+ insns.push((
+ Inst::not(4, Writable::from_reg(regs::r14())),
+ "41F7D6",
+ "notl %r14d",
+ ));
+ insns.push((
+ Inst::not(2, Writable::from_reg(regs::rdi())),
+ "66F7D7",
+ "notw %di",
+ ));
+
+ // ========================================================
+ // Neg
+ insns.push((
+ Inst::neg(4, Writable::from_reg(regs::rsi())),
+ "F7DE",
+ "negl %esi",
+ ));
+ insns.push((
+ Inst::neg(8, Writable::from_reg(regs::r15())),
+ "49F7DF",
+ "negq %r15",
+ ));
+ insns.push((
+ Inst::neg(4, Writable::from_reg(regs::r14())),
+ "41F7DE",
+ "negl %r14d",
+ ));
+ insns.push((
+ Inst::neg(2, Writable::from_reg(regs::rdi())),
+ "66F7DF",
+ "negw %di",
+ ));
+
+ // ========================================================
+ // Div
+ insns.push((
+ Inst::div(4, true /*signed*/, RegMem::reg(regs::rsi())),
+ "F7FE",
+ "idiv %esi",
+ ));
+ insns.push((
+ Inst::div(8, true /*signed*/, RegMem::reg(regs::r15())),
+ "49F7FF",
+ "idiv %r15",
+ ));
+ insns.push((
+ Inst::div(4, false /*signed*/, RegMem::reg(regs::r14())),
+ "41F7F6",
+ "div %r14d",
+ ));
+ insns.push((
+ Inst::div(8, false /*signed*/, RegMem::reg(regs::rdi())),
+ "48F7F7",
+ "div %rdi",
+ ));
+
+ // ========================================================
+ // MulHi
+ insns.push((
+ Inst::mul_hi(4, true /*signed*/, RegMem::reg(regs::rsi())),
+ "F7EE",
+ "imul %esi",
+ ));
+ insns.push((
+ Inst::mul_hi(8, true /*signed*/, RegMem::reg(regs::r15())),
+ "49F7EF",
+ "imul %r15",
+ ));
+ insns.push((
+ Inst::mul_hi(4, false /*signed*/, RegMem::reg(regs::r14())),
+ "41F7E6",
+ "mul %r14d",
+ ));
+ insns.push((
+ Inst::mul_hi(8, false /*signed*/, RegMem::reg(regs::rdi())),
+ "48F7E7",
+ "mul %rdi",
+ ));
+
+ // ========================================================
+ // cbw
+ insns.push((Inst::sign_extend_data(1), "6698", "cbw"));
+
+ // ========================================================
+ // cdq family: SignExtendRaxRdx
+ insns.push((Inst::sign_extend_data(2), "6699", "cwd"));
+ insns.push((Inst::sign_extend_data(4), "99", "cdq"));
+ insns.push((Inst::sign_extend_data(8), "4899", "cqo"));
+
+ // ========================================================
+ // Imm_R
+ //
+ insns.push((
+ Inst::imm(OperandSize::Size32, 1234567, w_r14),
+ "41BE87D61200",
+ "movl $1234567, %r14d",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size32, -126i64 as u64, w_r14),
+ "41BE82FFFFFF",
+ "movl $-126, %r14d",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, 1234567898765, w_r14),
+ "49BE8D26FB711F010000",
+ "movabsq $1234567898765, %r14",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, -126i64 as u64, w_r14),
+ "49C7C682FFFFFF",
+ "movabsq $-126, %r14",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size32, 1234567, w_rcx),
+ "B987D61200",
+ "movl $1234567, %ecx",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size32, -126i64 as u64, w_rcx),
+ "B982FFFFFF",
+ "movl $-126, %ecx",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, 1234567898765, w_rsi),
+ "48BE8D26FB711F010000",
+ "movabsq $1234567898765, %rsi",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, -126i64 as u64, w_rbx),
+ "48C7C382FFFFFF",
+ "movabsq $-126, %rbx",
+ ));
+
+ // ========================================================
+ // Mov_R_R
+ insns.push((
+ Inst::mov_r_r(false, rbx, w_rsi),
+ "89DE",
+ "movl %ebx, %esi",
+ ));
+ insns.push((
+ Inst::mov_r_r(false, rbx, w_r9),
+ "4189D9",
+ "movl %ebx, %r9d",
+ ));
+ insns.push((
+ Inst::mov_r_r(false, r11, w_rsi),
+ "4489DE",
+ "movl %r11d, %esi",
+ ));
+ insns.push((
+ Inst::mov_r_r(false, r12, w_r9),
+ "4589E1",
+ "movl %r12d, %r9d",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, rbx, w_rsi),
+ "4889DE",
+ "movq %rbx, %rsi",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, rbx, w_r9),
+ "4989D9",
+ "movq %rbx, %r9",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, r11, w_rsi),
+ "4C89DE",
+ "movq %r11, %rsi",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, r12, w_r9),
+ "4D89E1",
+ "movq %r12, %r9",
+ ));
+
+ // ========================================================
+ // MovZX_RM_R
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rdi), w_rdi),
+ "400FB6FF",
+ "movzbl %dil, %edi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rax), w_rsi),
+ "0FB6F0",
+ "movzbl %al, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(r15), w_rsi),
+ "410FB6F7",
+ "movzbl %r15b, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FB671F9",
+ "movzbl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FB658F9",
+ "movzbl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FB64AF9",
+ "movzbl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FB653F9",
+ "movzbl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(rax), w_rsi),
+ "480FB6F0",
+ "movzbq %al, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(r10), w_rsi),
+ "490FB6F2",
+ "movzbq %r10b, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FB671F9",
+ "movzbq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FB658F9",
+ "movzbq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FB64AF9",
+ "movzbq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FB653F9",
+ "movzbq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi),
+ "0FB7F1",
+ "movzwl %cx, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(r10), w_rsi),
+ "410FB7F2",
+ "movzwl %r10w, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FB771F9",
+ "movzwl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FB758F9",
+ "movzwl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FB74AF9",
+ "movzwl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FB753F9",
+ "movzwl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi),
+ "480FB7F1",
+ "movzwq %cx, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(r11), w_rsi),
+ "490FB7F3",
+ "movzwq %r11w, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FB771F9",
+ "movzwq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FB758F9",
+ "movzwq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FB74AF9",
+ "movzwq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FB753F9",
+ "movzwq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi),
+ "8BF1",
+ "movl %ecx, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "8B71F9",
+ "movl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "418B58F9",
+ "movl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "458B4AF9",
+ "movl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "418B53F9",
+ "movl -7(%r11), %edx",
+ ));
+
+ // ========================================================
+ // Mov64_M_R
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx),
+ "488B8C18B3000000",
+ "movq 179(%rax,%rbx,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8),
+ "4C8B8418B3000000",
+ "movq 179(%rax,%rbx,1), %r8",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx),
+ "4A8B8C08B3000000",
+ "movq 179(%rax,%r9,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8),
+ "4E8B8408B3000000",
+ "movq 179(%rax,%r9,1), %r8",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx),
+ "498B8C1AB3000000",
+ "movq 179(%r10,%rbx,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8),
+ "4D8B841AB3000000",
+ "movq 179(%r10,%rbx,1), %r8",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx),
+ "4B8B8C0AB3000000",
+ "movq 179(%r10,%r9,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
+ "4F8B840AB3000000",
+ "movq 179(%r10,%r9,1), %r8",
+ ));
+
+ // ========================================================
+ // LoadEffectiveAddress
+ insns.push((
+ Inst::lea(Amode::imm_reg(42, r10), w_r8),
+ "4D8D422A",
+ "lea 42(%r10), %r8",
+ ));
+ insns.push((
+ Inst::lea(Amode::imm_reg(42, r10), w_r15),
+ "4D8D7A2A",
+ "lea 42(%r10), %r15",
+ ));
+ insns.push((
+ Inst::lea(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
+ "4F8D840AB3000000",
+ "lea 179(%r10,%r9,1), %r8",
+ ));
+ insns.push((
+ Inst::lea(Amode::rip_relative(MachLabel::from_block(0)), w_rdi),
+ "488D3D00000000",
+ "lea label0(%rip), %rdi",
+ ));
+
+ // ========================================================
+ // MovSX_RM_R
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rdi), w_rdi),
+ "400FBEFF",
+ "movsbl %dil, %edi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rcx), w_rsi),
+ "0FBEF1",
+ "movsbl %cl, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(r14), w_rsi),
+ "410FBEF6",
+ "movsbl %r14b, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FBE71F9",
+ "movsbl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FBE58F9",
+ "movsbl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FBE4AF9",
+ "movsbl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FBE53F9",
+ "movsbl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(rcx), w_rsi),
+ "480FBEF1",
+ "movsbq %cl, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(r15), w_rsi),
+ "490FBEF7",
+ "movsbq %r15b, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FBE71F9",
+ "movsbq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FBE58F9",
+ "movsbq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FBE4AF9",
+ "movsbq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FBE53F9",
+ "movsbq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi),
+ "0FBFF1",
+ "movswl %cx, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(r14), w_rsi),
+ "410FBFF6",
+ "movswl %r14w, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FBF71F9",
+ "movswl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FBF58F9",
+ "movswl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FBF4AF9",
+ "movswl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FBF53F9",
+ "movswl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi),
+ "480FBFF1",
+ "movswq %cx, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(r13), w_rsi),
+ "490FBFF5",
+ "movswq %r13w, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FBF71F9",
+ "movswq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FBF58F9",
+ "movswq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FBF4AF9",
+ "movswq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FBF53F9",
+ "movswq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi),
+ "4863F1",
+ "movslq %ecx, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(r15), w_rsi),
+ "4963F7",
+ "movslq %r15d, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "486371F9",
+ "movslq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "496358F9",
+ "movslq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D634AF9",
+ "movslq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "496353F9",
+ "movslq -7(%r11), %rdx",
+ ));
+
+ // ========================================================
+ // Mov_R_M. Byte stores are tricky. Check everything carefully.
+ insns.push((
+ Inst::mov_r_m(8, rax, Amode::imm_reg(99, rdi)),
+ "48894763",
+ "movq %rax, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rbx, Amode::imm_reg(99, r8)),
+ "49895863",
+ "movq %rbx, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rcx, Amode::imm_reg(99, rsi)),
+ "48894E63",
+ "movq %rcx, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rdx, Amode::imm_reg(99, r9)),
+ "49895163",
+ "movq %rdx, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rsi, Amode::imm_reg(99, rax)),
+ "48897063",
+ "movq %rsi, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rdi, Amode::imm_reg(99, r15)),
+ "49897F63",
+ "movq %rdi, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rsp, Amode::imm_reg(99, rcx)),
+ "48896163",
+ "movq %rsp, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rbp, Amode::imm_reg(99, r14)),
+ "49896E63",
+ "movq %rbp, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r8, Amode::imm_reg(99, rdi)),
+ "4C894763",
+ "movq %r8, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r9, Amode::imm_reg(99, r8)),
+ "4D894863",
+ "movq %r9, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r10, Amode::imm_reg(99, rsi)),
+ "4C895663",
+ "movq %r10, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r11, Amode::imm_reg(99, r9)),
+ "4D895963",
+ "movq %r11, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r12, Amode::imm_reg(99, rax)),
+ "4C896063",
+ "movq %r12, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r13, Amode::imm_reg(99, r15)),
+ "4D896F63",
+ "movq %r13, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r14, Amode::imm_reg(99, rcx)),
+ "4C897163",
+ "movq %r14, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r15, Amode::imm_reg(99, r14)),
+ "4D897E63",
+ "movq %r15, 99(%r14)",
+ ));
+ //
+ insns.push((
+ Inst::mov_r_m(4, rax, Amode::imm_reg(99, rdi)),
+ "894763",
+ "movl %eax, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rbx, Amode::imm_reg(99, r8)),
+ "41895863",
+ "movl %ebx, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rcx, Amode::imm_reg(99, rsi)),
+ "894E63",
+ "movl %ecx, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rdx, Amode::imm_reg(99, r9)),
+ "41895163",
+ "movl %edx, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rsi, Amode::imm_reg(99, rax)),
+ "897063",
+ "movl %esi, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rdi, Amode::imm_reg(99, r15)),
+ "41897F63",
+ "movl %edi, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rsp, Amode::imm_reg(99, rcx)),
+ "896163",
+ "movl %esp, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rbp, Amode::imm_reg(99, r14)),
+ "41896E63",
+ "movl %ebp, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r8, Amode::imm_reg(99, rdi)),
+ "44894763",
+ "movl %r8d, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r9, Amode::imm_reg(99, r8)),
+ "45894863",
+ "movl %r9d, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r10, Amode::imm_reg(99, rsi)),
+ "44895663",
+ "movl %r10d, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r11, Amode::imm_reg(99, r9)),
+ "45895963",
+ "movl %r11d, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r12, Amode::imm_reg(99, rax)),
+ "44896063",
+ "movl %r12d, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r13, Amode::imm_reg(99, r15)),
+ "45896F63",
+ "movl %r13d, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r14, Amode::imm_reg(99, rcx)),
+ "44897163",
+ "movl %r14d, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r15, Amode::imm_reg(99, r14)),
+ "45897E63",
+ "movl %r15d, 99(%r14)",
+ ));
+ //
+ insns.push((
+ Inst::mov_r_m(2, rax, Amode::imm_reg(99, rdi)),
+ "66894763",
+ "movw %ax, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rbx, Amode::imm_reg(99, r8)),
+ "6641895863",
+ "movw %bx, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rcx, Amode::imm_reg(99, rsi)),
+ "66894E63",
+ "movw %cx, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rdx, Amode::imm_reg(99, r9)),
+ "6641895163",
+ "movw %dx, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rsi, Amode::imm_reg(99, rax)),
+ "66897063",
+ "movw %si, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rdi, Amode::imm_reg(99, r15)),
+ "6641897F63",
+ "movw %di, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rsp, Amode::imm_reg(99, rcx)),
+ "66896163",
+ "movw %sp, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rbp, Amode::imm_reg(99, r14)),
+ "6641896E63",
+ "movw %bp, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r8, Amode::imm_reg(99, rdi)),
+ "6644894763",
+ "movw %r8w, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r9, Amode::imm_reg(99, r8)),
+ "6645894863",
+ "movw %r9w, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r10, Amode::imm_reg(99, rsi)),
+ "6644895663",
+ "movw %r10w, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r11, Amode::imm_reg(99, r9)),
+ "6645895963",
+ "movw %r11w, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r12, Amode::imm_reg(99, rax)),
+ "6644896063",
+ "movw %r12w, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r13, Amode::imm_reg(99, r15)),
+ "6645896F63",
+ "movw %r13w, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r14, Amode::imm_reg(99, rcx)),
+ "6644897163",
+ "movw %r14w, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r15, Amode::imm_reg(99, r14)),
+ "6645897E63",
+ "movw %r15w, 99(%r14)",
+ ));
+ //
+ insns.push((
+ Inst::mov_r_m(1, rax, Amode::imm_reg(99, rdi)),
+ "884763",
+ "movb %al, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rbx, Amode::imm_reg(99, r8)),
+ "41885863",
+ "movb %bl, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rcx, Amode::imm_reg(99, rsi)),
+ "884E63",
+ "movb %cl, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rdx, Amode::imm_reg(99, r9)),
+ "41885163",
+ "movb %dl, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rsi, Amode::imm_reg(99, rax)),
+ "40887063",
+ "movb %sil, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rdi, Amode::imm_reg(99, r15)),
+ "41887F63",
+ "movb %dil, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rsp, Amode::imm_reg(99, rcx)),
+ "40886163",
+ "movb %spl, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rbp, Amode::imm_reg(99, r14)),
+ "41886E63",
+ "movb %bpl, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r8, Amode::imm_reg(99, rdi)),
+ "44884763",
+ "movb %r8b, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r9, Amode::imm_reg(99, r8)),
+ "45884863",
+ "movb %r9b, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r10, Amode::imm_reg(99, rsi)),
+ "44885663",
+ "movb %r10b, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r11, Amode::imm_reg(99, r9)),
+ "45885963",
+ "movb %r11b, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r12, Amode::imm_reg(99, rax)),
+ "44886063",
+ "movb %r12b, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r13, Amode::imm_reg(99, r15)),
+ "45886F63",
+ "movb %r13b, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r14, Amode::imm_reg(99, rcx)),
+ "44887163",
+ "movb %r14b, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r15, Amode::imm_reg(99, r14)),
+ "45887E63",
+ "movb %r15b, 99(%r14)",
+ ));
+
+ // ========================================================
+ // Shift_R
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_rdi),
+ "D3E7",
+ "shll %cl, %edi",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_r12),
+ "41D3E4",
+ "shll %cl, %r12d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, Some(2), w_r8),
+ "41C1E002",
+ "shll $2, %r8d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, Some(31), w_r13),
+ "41C1E51F",
+ "shll $31, %r13d",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_r13),
+ "49D3E5",
+ "shlq %cl, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_rdi),
+ "48D3E7",
+ "shlq %cl, %rdi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, Some(2), w_r8),
+ "49C1E002",
+ "shlq $2, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, Some(3), w_rbx),
+ "48C1E303",
+ "shlq $3, %rbx",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, Some(63), w_r13),
+ "49C1E53F",
+ "shlq $63, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightLogical, None, w_rdi),
+ "D3EF",
+ "shrl %cl, %edi",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(2), w_r8),
+ "41C1E802",
+ "shrl $2, %r8d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(31), w_r13),
+ "41C1ED1F",
+ "shrl $31, %r13d",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightLogical, None, w_rdi),
+ "48D3EF",
+ "shrq %cl, %rdi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(2), w_r8),
+ "49C1E802",
+ "shrq $2, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(63), w_r13),
+ "49C1ED3F",
+ "shrq $63, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, None, w_rdi),
+ "D3FF",
+ "sarl %cl, %edi",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
+ "41C1F802",
+ "sarl $2, %r8d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(31), w_r13),
+ "41C1FD1F",
+ "sarl $31, %r13d",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, None, w_rdi),
+ "48D3FF",
+ "sarq %cl, %rdi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
+ "49C1F802",
+ "sarq $2, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(63), w_r13),
+ "49C1FD3F",
+ "sarq $63, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::RotateLeft, None, w_r8),
+ "49D3C0",
+ "rolq %cl, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::RotateLeft, Some(3), w_r9),
+ "41C1C103",
+ "roll $3, %r9d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::RotateRight, None, w_rsi),
+ "D3CE",
+ "rorl %cl, %esi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::RotateRight, Some(5), w_r15),
+ "49C1CF05",
+ "rorq $5, %r15",
+ ));
+ insns.push((
+ Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
+ "D2CE",
+ "rorb %cl, %sil",
+ ));
+ insns.push((
+ Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
+ "41C0CF05",
+ "rorb $5, %r15b",
+ ));
+ insns.push((
+ Inst::shift_r(2, ShiftKind::RotateRight, None, w_rsi),
+ "66D3CE",
+ "rorw %cl, %si",
+ ));
+ insns.push((
+ Inst::shift_r(2, ShiftKind::RotateRight, Some(5), w_r15),
+ "6641C1CF05",
+ "rorw $5, %r15w",
+ ));
+
+ // ========================================================
+ // CmpRMIR
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::reg(r15), rdx),
+ "4C39FA",
+ "cmpq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::reg(rcx), r8),
+ "4939C8",
+ "cmpq %rcx, %r8",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::reg(rcx), rsi),
+ "4839CE",
+ "cmpq %rcx, %rsi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "483B5763",
+ "cmpq 99(%rdi), %rdx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "4C3B4763",
+ "cmpq 99(%rdi), %r8",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "483B7763",
+ "cmpq 99(%rdi), %rsi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::imm(76543210), rdx),
+ "4881FAEAF48F04",
+ "cmpq $76543210, %rdx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::imm(-76543210i32 as u32), r8),
+ "4981F8160B70FB",
+ "cmpq $-76543210, %r8",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::imm(76543210), rsi),
+ "4881FEEAF48F04",
+ "cmpq $76543210, %rsi",
+ ));
+ //
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::reg(r15), rdx),
+ "4439FA",
+ "cmpl %r15d, %edx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::reg(rcx), r8),
+ "4139C8",
+ "cmpl %ecx, %r8d",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::reg(rcx), rsi),
+ "39CE",
+ "cmpl %ecx, %esi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "3B5763",
+ "cmpl 99(%rdi), %edx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "443B4763",
+ "cmpl 99(%rdi), %r8d",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "3B7763",
+ "cmpl 99(%rdi), %esi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::imm(76543210), rdx),
+ "81FAEAF48F04",
+ "cmpl $76543210, %edx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::imm(-76543210i32 as u32), r8),
+ "4181F8160B70FB",
+ "cmpl $-76543210, %r8d",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::imm(76543210), rsi),
+ "81FEEAF48F04",
+ "cmpl $76543210, %esi",
+ ));
+ //
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::reg(r15), rdx),
+ "664439FA",
+ "cmpw %r15w, %dx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::reg(rcx), r8),
+ "664139C8",
+ "cmpw %cx, %r8w",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::reg(rcx), rsi),
+ "6639CE",
+ "cmpw %cx, %si",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "663B5763",
+ "cmpw 99(%rdi), %dx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "66443B4763",
+ "cmpw 99(%rdi), %r8w",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "663B7763",
+ "cmpw 99(%rdi), %si",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::imm(23210), rdx),
+ "6681FAAA5A",
+ "cmpw $23210, %dx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::imm(-7654i32 as u32), r8),
+ "664181F81AE2",
+ "cmpw $-7654, %r8w",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::imm(7654), rsi),
+ "6681FEE61D",
+ "cmpw $7654, %si",
+ ));
+ //
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r15), rdx),
+ "4438FA",
+ "cmpb %r15b, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), r8),
+ "4138C8",
+ "cmpb %cl, %r8b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), rsi),
+ "4038CE",
+ "cmpb %cl, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "3A5763",
+ "cmpb 99(%rdi), %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "443A4763",
+ "cmpb 99(%rdi), %r8b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "403A7763",
+ "cmpb 99(%rdi), %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::imm(70), rdx),
+ "80FA46",
+ "cmpb $70, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::imm(-76i32 as u32), r8),
+ "4180F8B4",
+ "cmpb $-76, %r8b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::imm(76), rsi),
+ "4080FE4C",
+ "cmpb $76, %sil",
+ ));
+ // Extra byte-cases (paranoia!) for cmp_rmi_r for first operand = R
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rax), rbx),
+ "38C3",
+ "cmpb %al, %bl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbx), rax),
+ "38D8",
+ "cmpb %bl, %al",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), rdx),
+ "38CA",
+ "cmpb %cl, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), rsi),
+ "4038CE",
+ "cmpb %cl, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), r10),
+ "4138CA",
+ "cmpb %cl, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), r14),
+ "4138CE",
+ "cmpb %cl, %r14b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), rdx),
+ "4038EA",
+ "cmpb %bpl, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), rsi),
+ "4038EE",
+ "cmpb %bpl, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), r10),
+ "4138EA",
+ "cmpb %bpl, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), r14),
+ "4138EE",
+ "cmpb %bpl, %r14b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), rdx),
+ "4438CA",
+ "cmpb %r9b, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), rsi),
+ "4438CE",
+ "cmpb %r9b, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), r10),
+ "4538CA",
+ "cmpb %r9b, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), r14),
+ "4538CE",
+ "cmpb %r9b, %r14b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), rdx),
+ "4438EA",
+ "cmpb %r13b, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), rsi),
+ "4438EE",
+ "cmpb %r13b, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), r10),
+ "4538EA",
+ "cmpb %r13b, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), r14),
+ "4538EE",
+ "cmpb %r13b, %r14b",
+ ));
+
+ // ========================================================
+ // SetCC
+ insns.push((Inst::setcc(CC::O, w_rsi), "400F90C6", "seto %sil"));
+ insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle %sil"));
+ insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b"));
+ insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
+ insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp %r9b"));
+ insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp %r8b"));
+ // ========================================================
+ // Cmove
+ insns.push((
+ Inst::cmove(2, CC::O, RegMem::reg(rdi), w_rsi),
+ "660F40F7",
+ "cmovow %di, %si",
+ ));
+ insns.push((
+ Inst::cmove(
+ 2,
+ CC::NO,
+ RegMem::mem(Amode::imm_reg_reg_shift(37, rdi, rsi, 2)),
+ w_r15,
+ ),
+ "66440F417CB725",
+ "cmovnow 37(%rdi,%rsi,4), %r15w",
+ ));
+ insns.push((
+ Inst::cmove(4, CC::LE, RegMem::reg(rdi), w_rsi),
+ "0F4EF7",
+ "cmovlel %edi, %esi",
+ ));
+ insns.push((
+ Inst::cmove(4, CC::NLE, RegMem::mem(Amode::imm_reg(0, r15)), w_rsi),
+ "410F4F37",
+ "cmovnlel 0(%r15), %esi",
+ ));
+ insns.push((
+ Inst::cmove(8, CC::Z, RegMem::reg(rdi), w_r14),
+ "4C0F44F7",
+ "cmovzq %rdi, %r14",
+ ));
+ insns.push((
+ Inst::cmove(8, CC::NZ, RegMem::mem(Amode::imm_reg(13, rdi)), w_r14),
+ "4C0F45770D",
+ "cmovnzq 13(%rdi), %r14",
+ ));
+
+ // ========================================================
+ // Push64
+ insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi"));
+ insns.push((Inst::push64(RegMemImm::reg(r8)), "4150", "pushq %r8"));
+ insns.push((
+ Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
+ "FFB4CE41010000",
+ "pushq 321(%rsi,%rcx,8)",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, r9, rbx, 2))),
+ "41FFB49941010000",
+ "pushq 321(%r9,%rbx,4)",
+ ));
+ insns.push((Inst::push64(RegMemImm::imm(0)), "6A00", "pushq $0"));
+ insns.push((Inst::push64(RegMemImm::imm(127)), "6A7F", "pushq $127"));
+ insns.push((
+ Inst::push64(RegMemImm::imm(128)),
+ "6880000000",
+ "pushq $128",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(0x31415927)),
+ "6827594131",
+ "pushq $826366247",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(-128i32 as u32)),
+ "6A80",
+ "pushq $-128",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(-129i32 as u32)),
+ "687FFFFFFF",
+ "pushq $-129",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(-0x75c4e8a1i32 as u32)),
+ "685F173B8A",
+ "pushq $-1975838881",
+ ));
+
+ // ========================================================
+ // Pop64
+ insns.push((Inst::pop64(w_rax), "58", "popq %rax"));
+ insns.push((Inst::pop64(w_rdi), "5F", "popq %rdi"));
+ insns.push((Inst::pop64(w_r8), "4158", "popq %r8"));
+ insns.push((Inst::pop64(w_r15), "415F", "popq %r15"));
+
+ // ========================================================
+ // CallKnown
+ insns.push((
+ Inst::call_known(
+ ExternalName::User {
+ namespace: 0,
+ index: 0,
+ },
+ Vec::new(),
+ Vec::new(),
+ Opcode::Call,
+ ),
+ "E800000000",
+ "call User { namespace: 0, index: 0 }",
+ ));
+
+ // ========================================================
+ // CallUnknown
+ fn call_unknown(rm: RegMem) -> Inst {
+ Inst::call_unknown(rm, Vec::new(), Vec::new(), Opcode::CallIndirect)
+ }
+
+ insns.push((call_unknown(RegMem::reg(rbp)), "FFD5", "call *%rbp"));
+ insns.push((call_unknown(RegMem::reg(r11)), "41FFD3", "call *%r11"));
+ insns.push((
+ call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
+ "FF94CE41010000",
+ "call *321(%rsi,%rcx,8)",
+ ));
+ insns.push((
+ call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
+ "41FF949241010000",
+ "call *321(%r10,%rdx,4)",
+ ));
+
+ // ========================================================
+ // Ret
+ insns.push((Inst::ret(), "C3", "ret"));
+
+ // ========================================================
+ // JmpKnown skipped for now
+
+ // ========================================================
+ // JmpCondSymm isn't a real instruction
+
+ // ========================================================
+ // JmpCond skipped for now
+
+ // ========================================================
+ // JmpCondCompound isn't a real instruction
+
+ // ========================================================
+ // JmpUnknown
+ insns.push((Inst::jmp_unknown(RegMem::reg(rbp)), "FFE5", "jmp *%rbp"));
+ insns.push((
+ Inst::jmp_unknown(RegMem::reg(r11)),
+ "41FFE3",
+ "jmp *%r11",
+ ));
+ insns.push((
+ Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
+ "FFA4CE41010000",
+ "jmp *321(%rsi,%rcx,8)",
+ ));
+ insns.push((
+ Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
+ "41FFA49241010000",
+ "jmp *321(%r10,%rdx,4)",
+ ));
+
+ // ========================================================
+ // XMM_CMP_RM_R
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm1), xmm2),
+ "0F2ED1",
+ "ucomiss %xmm1, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm0), xmm9),
+ "440F2EC8",
+ "ucomiss %xmm0, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm13), xmm4),
+ "66410F2EE5",
+ "ucomisd %xmm13, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm11), xmm12),
+ "66450F2EE3",
+ "ucomisd %xmm11, %xmm12",
+ ));
+
+ // ========================================================
+ // XMM_RM_R: float binary ops
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0),
+ "F30F58C1",
+ "addss %xmm1, %xmm0",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
+ "F3450F58EB",
+ "addss %xmm11, %xmm13",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(
+ SseOpcode::Addss,
+ RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)),
+ w_xmm0,
+ ),
+ "F3410F5844927B",
+ "addss 123(%r10,%rdx,4), %xmm0",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4),
+ "F2410F58E7",
+ "addsd %xmm15, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1),
+ "F30F5CC8",
+ "subss %xmm0, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
+ "F3410F5CCC",
+ "subss %xmm12, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(
+ SseOpcode::Subss,
+ RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)),
+ w_xmm10,
+ ),
+ "F3450F5C94C241010000",
+ "subss 321(%r10,%rax,8), %xmm10",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14),
+ "F2440F5CF5",
+ "subsd %xmm5, %xmm14",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
+ "F30F59E5",
+ "mulss %xmm5, %xmm4",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4),
+ "F20F59E5",
+ "mulsd %xmm5, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7),
+ "F3410F5EF8",
+ "divss %xmm8, %xmm7",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4),
+ "F20F5EE5",
+ "divsd %xmm5, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12),
+ "440F54E3",
+ "andps %xmm3, %xmm12",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11),
+ "440F55DC",
+ "andnps %xmm4, %xmm11",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15),
+ "440F56F9",
+ "orps %xmm1, %xmm15",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4),
+ "0F56E5",
+ "orps %xmm5, %xmm4",
+ ));
+
+ // ========================================================
+ // XMM_RM_R: Integer Packed
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5),
+ "66410FFCE9",
+ "paddb %xmm9, %xmm5",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6),
+ "660FFDF7",
+ "paddw %xmm7, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13),
+ "66450FFEEC",
+ "paddd %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8),
+ "66440FD4C1",
+ "paddq %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5),
+ "66410FECE9",
+ "paddsb %xmm9, %xmm5",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6),
+ "660FEDF7",
+ "paddsw %xmm7, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13),
+ "66450FDCEC",
+ "paddusb %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8),
+ "66440FDDC1",
+ "paddusw %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5),
+ "66410FE8E9",
+ "psubsb %xmm9, %xmm5",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6),
+ "660FE9F7",
+ "psubsw %xmm7, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13),
+ "66450FD8EC",
+ "psubusb %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8),
+ "66440FD9C1",
+ "psubusw %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
+ "66450FE0EC",
+ "pavgb %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8),
+ "66440FE3C1",
+ "pavgw %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9),
+ "66440FF8CD",
+ "psubb %xmm5, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7),
+ "660FF9FE",
+ "psubw %xmm6, %xmm7",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12),
+ "66450FFAE5",
+ "psubd %xmm13, %xmm12",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1),
+ "66410FFBC8",
+ "psubq %xmm8, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
+ "66410F3840F7",
+ "pmulld %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
+ "66410FD5CE",
+ "pmullw %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
+ "66450FF4C8",
+ "pmuludq %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
+ "66410F383CF7",
+ "pmaxsb %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
+ "66410FEEF7",
+ "pmaxsw %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
+ "66410F383DF7",
+ "pmaxsd %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
+ "66410FDECE",
+ "pmaxub %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
+ "66410F383ECE",
+ "pmaxuw %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
+ "66410F383FCE",
+ "pmaxud %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
+ "66450F3838C8",
+ "pminsb %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
+ "66450FEAC8",
+ "pminsw %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
+ "66450F3839C8",
+ "pminsd %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
+ "660FDAD3",
+ "pminub %xmm3, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
+ "660F383AD3",
+ "pminuw %xmm3, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
+ "660F383BD3",
+ "pminud %xmm3, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
+ "66410FEFD3",
+ "pxor %xmm11, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2),
+ "66410F3800D3",
+ "pshufb %xmm11, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(xmm11), w_xmm2),
+ "66410F63D3",
+ "packsswb %xmm11, %xmm2",
+ ));
+
+ // ========================================================
+ // XMM_RM_R: Integer Conversion
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8),
+ "440F5BC1",
+ "cvtdq2ps %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8),
+ "F3450F5BC1",
+ "cvttps2dq %xmm9, %xmm8",
+ ));
+
+ // XMM_Mov_R_M: float stores
+ insns.push((
+ Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),
+ "F3450F11BC2480000000",
+ "movss %xmm15, 128(%r12)",
+ ));
+ insns.push((
+ Inst::xmm_mov_r_m(SseOpcode::Movsd, xmm1, Amode::imm_reg(0, rsi)),
+ "F20F110E",
+ "movsd %xmm1, 0(%rsi)",
+ ));
+
+ // XmmUnary: moves and unary float ops
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2),
+ "F3410F10D5",
+ "movss %xmm13, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movsd, RegMem::reg(xmm0), w_xmm1),
+ "F20F10C8",
+ "movsd %xmm0, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(
+ SseOpcode::Movsd,
+ RegMem::mem(Amode::imm_reg(0, rsi)),
+ w_xmm2,
+ ),
+ "F20F1016",
+ "movsd 0(%rsi), %xmm2",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3),
+ "F2410F10DE",
+ "movsd %xmm14, %xmm3",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14),
+ "440F28F5",
+ "movaps %xmm5, %xmm14",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8),
+ "F3440F51C7",
+ "sqrtss %xmm7, %xmm8",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Sqrtsd, RegMem::reg(xmm1), w_xmm2),
+ "F20F51D1",
+ "sqrtsd %xmm1, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, RegMem::reg(xmm0), w_xmm1),
+ "F30F5AC8",
+ "cvtss2sd %xmm0, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, RegMem::reg(xmm1), w_xmm0),
+ "F20F5AC1",
+ "cvtsd2ss %xmm1, %xmm0",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1),
+ "660F381CCA",
+ "pabsb %xmm2, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0),
+ "660F381DC0",
+ "pabsw %xmm0, %xmm0",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11),
+ "66450F381EDA",
+ "pabsd %xmm10, %xmm11",
+ ));
+
+ // Xmm to int conversions, and conversely.
+
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movd, xmm0, w_rsi, OperandSize::Size32),
+ "660F7EC6",
+ "movd %xmm0, %esi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movq, xmm2, w_rdi, OperandSize::Size64),
+ "66480F7ED7",
+ "movq %xmm2, %rdi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttss2si, xmm0, w_rsi, OperandSize::Size32),
+ "F30F2CF0",
+ "cvttss2si %xmm0, %esi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttss2si, xmm0, w_rdi, OperandSize::Size64),
+ "F3480F2CF8",
+ "cvttss2si %xmm0, %rdi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttsd2si, xmm0, w_rax, OperandSize::Size32),
+ "F20F2CC0",
+ "cvttsd2si %xmm0, %eax",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttsd2si, xmm0, w_r15, OperandSize::Size64),
+ "F24C0F2CF8",
+ "cvttsd2si %xmm0, %r15",
+ ));
+
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Pmovmskb, xmm10, w_rax, OperandSize::Size32),
+ "66410FD7C2",
+ "pmovmskb %xmm10, %eax",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movmskps, xmm2, w_rax, OperandSize::Size32),
+ "0F50C2",
+ "movmskps %xmm2, %eax",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movmskpd, xmm0, w_rcx, OperandSize::Size32),
+ "660F50C8",
+ "movmskpd %xmm0, %ecx",
+ ));
+
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::reg(rax),
+ OperandSize::Size32,
+ w_xmm15,
+ ),
+ "66440F6EF8",
+ "movd %eax, %xmm15",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::mem(Amode::imm_reg(2, r10)),
+ OperandSize::Size32,
+ w_xmm9,
+ ),
+ "66450F6E4A02",
+ "movd 2(%r10), %xmm9",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::reg(rsi),
+ OperandSize::Size32,
+ w_xmm1,
+ ),
+ "660F6ECE",
+ "movd %esi, %xmm1",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movq,
+ RegMem::reg(rdi),
+ OperandSize::Size64,
+ w_xmm15,
+ ),
+ "664C0F6EFF",
+ "movq %rdi, %xmm15",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Cvtsi2ss,
+ RegMem::reg(rdi),
+ OperandSize::Size32,
+ w_xmm15,
+ ),
+ "F3440F2AFF",
+ "cvtsi2ss %edi, %xmm15",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Cvtsi2sd,
+ RegMem::reg(rsi),
+ OperandSize::Size64,
+ w_xmm1,
+ ),
+ "F2480F2ACE",
+ "cvtsi2sd %rsi, %xmm1",
+ ));
+
+ // ========================================================
+ // XmmRmi
+ insns.push((
+ Inst::xmm_rmi_reg(SseOpcode::Psraw, RegMemImm::reg(xmm10), w_xmm1),
+ "66410FE1CA",
+ "psraw %xmm10, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(31), w_xmm1),
+ "660F72F11F",
+ "pslld $31, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rmi_reg(SseOpcode::Psrlq, RegMemImm::imm(1), w_xmm3),
+ "660F73D301",
+ "psrlq $1, %xmm3",
+ ));
+
+ // ========================================================
+ // XmmRmRImm
+ insns.push((
+ Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false),
+ "660FC2CD02",
+ "cmppd $2, %xmm5, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false),
+ "410FC2FF00",
+ "cmpps $0, %xmm15, %xmm7",
+ ));
+
+ // ========================================================
+ // Pertaining to atomics.
+ let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
+ // `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing
+ // for retention of the apparently-redundant rex prefix in the 8-bit case.
+ let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into();
+
+ // A general 8-bit case.
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: rbx,
+ dst: am1,
+ },
+ "F0410FB09C9241010000",
+ "lock cmpxchgb %bl, 321(%r10,%rdx,4)",
+ ));
+ // Check redundant rex retention in 8-bit cases.
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: rdx,
+ dst: am2.clone(),
+ },
+ "F00FB094F1C7CFFFFF",
+ "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "F0400FB0B4F1C7CFFFFF",
+ "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "F0440FB094F1C7CFFFFF",
+ "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: r15,
+ dst: am2.clone(),
+ },
+ "F0440FB0BCF1C7CFFFFF",
+ "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
+ ));
+ // 16 bit cases
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I16,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "66F00FB1B4F1C7CFFFFF",
+ "lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I16,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "66F0440FB194F1C7CFFFFF",
+ "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
+ ));
+ // 32 bit cases
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I32,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "F00FB1B4F1C7CFFFFF",
+ "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I32,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "F0440FB194F1C7CFFFFF",
+ "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
+ ));
+ // 64 bit cases
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I64,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "F0480FB1B4F1C7CFFFFF",
+ "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I64,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "F04C0FB194F1C7CFFFFF",
+ "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
+ ));
+
+ // AtomicRmwSeq
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, },
+ "490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
+ "atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, },
+ "490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
+ "atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, },
+ "418B014989C34D89D3F0450FB1190F85EFFFFFFF",
+ "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, },
+ "498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
+ "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+
+ // Fence
+ insns.push((
+ Inst::Fence {
+ kind: FenceKind::MFence,
+ },
+ "0FAEF0",
+ "mfence",
+ ));
+ insns.push((
+ Inst::Fence {
+ kind: FenceKind::LFence,
+ },
+ "0FAEE8",
+ "lfence",
+ ));
+ insns.push((
+ Inst::Fence {
+ kind: FenceKind::SFence,
+ },
+ "0FAEF8",
+ "sfence",
+ ));
+
+ // ========================================================
+ // Misc instructions.
+
+ insns.push((Inst::Hlt, "CC", "hlt"));
+
+ let trap_code = TrapCode::UnreachableCodeReached;
+ insns.push((Inst::Ud2 { trap_code }, "0F0B", "ud2 unreachable"));
+
+ // ========================================================
+ // Actually run the tests!
+ let flags = settings::Flags::new(settings::builder());
+
+ use crate::settings::Configurable;
+ let mut isa_flag_builder = x64::settings::builder();
+ isa_flag_builder.enable("has_ssse3").unwrap();
+ isa_flag_builder.enable("has_sse41").unwrap();
+ let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
+
+ let rru = regs::create_reg_universe_systemv(&flags);
+ let emit_info = EmitInfo::new(flags, isa_flags);
+ for (insn, expected_encoding, expected_printing) in insns {
+ // Check the printed text is as expected.
+ let actual_printing = insn.show_rru(Some(&rru));
+ assert_eq!(expected_printing, actual_printing);
+ let mut sink = test_utils::TestCodeSink::new();
+ let mut buffer = MachBuffer::new();
+
+ insn.emit(&mut buffer, &emit_info, &mut Default::default());
+
+ // Allow one label just after the instruction (so the offset is 0).
+ let label = buffer.get_label();
+ buffer.bind_label(label);
+
+ let buffer = buffer.finish();
+ buffer.emit(&mut sink);
+ let actual_encoding = &sink.stringify();
+ assert_eq!(expected_encoding, actual_encoding, "{}", expected_printing);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
new file mode 100644
index 0000000000..1172b22eff
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
@@ -0,0 +1,2733 @@
+//! This module defines x86_64-specific machine instruction types.
+
+use crate::binemit::{CodeOffset, StackMap};
+use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type};
+use crate::isa::x64::settings as x64_settings;
+use crate::machinst::*;
+use crate::{settings, settings::Flags, CodegenError, CodegenResult};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use regalloc::{
+ PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
+ RegUsageMapper, SpillSlot, VirtualReg, Writable,
+};
+use smallvec::SmallVec;
+use std::fmt;
+use std::string::{String, ToString};
+
+pub mod args;
+mod emit;
+#[cfg(test)]
+mod emit_tests;
+pub mod regs;
+pub mod unwind;
+
+use args::*;
+use regs::{create_reg_universe_systemv, show_ireg_sized};
+
+//=============================================================================
+// Instructions (top level): definition
+
+// Don't build these directly. Instead use the Inst:: functions to create them.
+
+/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
+#[derive(Clone)]
+pub enum Inst {
+ /// Nops of various sizes, including zero.
+ Nop { len: u8 },
+
+ // =====================================
+ // Integer instructions.
+ /// Integer arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
+ AluRmiR {
+ is_64: bool,
+ op: AluRmiROpcode,
+ src: RegMemImm,
+ dst: Writable<Reg>,
+ },
+
+ /// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc.
+ UnaryRmR {
+ size: u8, // 2, 4 or 8
+ op: UnaryRmROpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// Bitwise not
+ Not {
+ size: u8, // 1, 2, 4 or 8
+ src: Writable<Reg>,
+ },
+
+ /// Integer negation
+ Neg {
+ size: u8, // 1, 2, 4 or 8
+ src: Writable<Reg>,
+ },
+
+ /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
+ Div {
+ size: u8, // 1, 2, 4 or 8
+ signed: bool,
+ divisor: RegMem,
+ },
+
+ /// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs.
+ MulHi { size: u8, signed: bool, rhs: RegMem },
+
+ /// A synthetic sequence to implement the right inline checks for remainder and division,
+ /// assuming the dividend is in %rax.
+ /// Puts the result back into %rax if is_div, %rdx if !is_div, to mimic what the div
+ /// instruction does.
+ /// The generated code sequence is described in the emit's function match arm for this
+ /// instruction.
+ ///
+ /// Note: %rdx is marked as modified by this instruction, to avoid an early clobber problem
+ /// with the temporary and divisor registers. Make sure to zero %rdx right before this
+ /// instruction, or you might run into regalloc failures where %rdx is live before its first
+ /// def!
+ CheckedDivOrRemSeq {
+ kind: DivOrRemKind,
+ size: u8,
+ /// The divisor operand. Note it's marked as modified so that it gets assigned a register
+ /// different from the temporary.
+ divisor: Writable<Reg>,
+ tmp: Option<Writable<Reg>>,
+ },
+
+ /// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo)
+ /// or al into ah: (cbw)
+ SignExtendData {
+ size: u8, // 1, 2, 4 or 8
+ },
+
+ /// Constant materialization: (imm32 imm64) reg.
+ /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
+ Imm {
+ dst_is_64: bool,
+ simm64: u64,
+ dst: Writable<Reg>,
+ },
+
+ /// GPR to GPR move: mov (64 32) reg reg.
+ MovRR {
+ is_64: bool,
+ src: Reg,
+ dst: Writable<Reg>,
+ },
+
+ /// Zero-extended loads, except for 64 bits: movz (bl bq wl wq lq) addr reg.
+ /// Note that the lq variant doesn't really exist since the default zero-extend rule makes it
+ /// unnecessary. For that case we emit the equivalent "movl AM, reg32".
+ MovzxRmR {
+ ext_mode: ExtMode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
+ Mov64MR {
+ src: SyntheticAmode,
+ dst: Writable<Reg>,
+ },
+
+ /// Loads the memory address of addr into dst.
+ LoadEffectiveAddress {
+ addr: SyntheticAmode,
+ dst: Writable<Reg>,
+ },
+
+ /// Sign-extended loads and moves: movs (bl bq wl wq lq) addr reg.
+ MovsxRmR {
+ ext_mode: ExtMode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// Integer stores: mov (b w l q) reg addr.
+ MovRM {
+ size: u8, // 1, 2, 4 or 8.
+ src: Reg,
+ dst: SyntheticAmode,
+ },
+
+ /// Arithmetic shifts: (shl shr sar) (b w l q) imm reg.
+ ShiftR {
+ size: u8, // 1, 2, 4 or 8
+ kind: ShiftKind,
+ /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
+ num_bits: Option<u8>,
+ dst: Writable<Reg>,
+ },
+
+ /// Arithmetic SIMD shifts.
+ XmmRmiReg {
+ opcode: SseOpcode,
+ src: RegMemImm,
+ dst: Writable<Reg>,
+ },
+
+ /// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg.
+ CmpRmiR {
+ size: u8, // 1, 2, 4 or 8
+ src: RegMemImm,
+ dst: Reg,
+ },
+
+ /// Materializes the requested condition code in the destination reg.
+ Setcc { cc: CC, dst: Writable<Reg> },
+
+ /// Integer conditional move.
+ /// Overwrites the destination register.
+ Cmove {
+ /// Possible values are 2, 4 or 8. Checked in the related factory.
+ size: u8,
+ cc: CC,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ // =====================================
+ // Stack manipulation.
+ /// pushq (reg addr imm)
+ Push64 { src: RegMemImm },
+
+ /// popq reg
+ Pop64 { dst: Writable<Reg> },
+
+ // =====================================
+ // Floating-point operations.
+ /// XMM (scalar or vector) binary op: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg
+ XmmRmR {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt,
+ /// etc.
+ ///
+ /// This differs from XMM_RM_R in that the dst register of XmmUnaryRmR is not used in the
+ /// computation of the instruction dst value and so does not have to be a previously valid
+ /// value. This is characteristic of mov instructions.
+ XmmUnaryRmR {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq
+ XmmMovRM {
+ op: SseOpcode,
+ src: Reg,
+ dst: SyntheticAmode,
+ },
+
+ /// XMM (vector) unary op (to move a constant value into an xmm register): movups
+ XmmLoadConst {
+ src: VCodeConstant,
+ dst: Writable<Reg>,
+ ty: Type,
+ },
+
+ /// XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si
+ XmmToGpr {
+ op: SseOpcode,
+ src: Reg,
+ dst: Writable<Reg>,
+ dst_size: OperandSize,
+ },
+
+ /// XMM (scalar) unary op (from integer to float reg): movd, movq, cvtsi2s{s,d}
+ GprToXmm {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ src_size: OperandSize,
+ },
+
+ /// Converts an unsigned int64 to a float32/float64.
+ CvtUint64ToFloatSeq {
+ /// Is the target a 64-bits or 32-bits register?
+ to_f64: bool,
+ /// A copy of the source register, fed by lowering. It is marked as modified during
+ /// register allocation to make sure that the temporary registers differ from the src
+ /// register, since both registers are live at the same time in the generated code
+ /// sequence.
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr1: Writable<Reg>,
+ tmp_gpr2: Writable<Reg>,
+ },
+
+ /// Converts a scalar xmm to a signed int32/int64.
+ CvtFloatToSintSeq {
+ dst_size: OperandSize,
+ src_size: OperandSize,
+ is_saturating: bool,
+ /// A copy of the source register, fed by lowering. It is marked as modified during
+ /// register allocation to make sure that the temporary xmm register differs from the src
+ /// register, since both registers are live at the same time in the generated code
+ /// sequence.
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ },
+
+ /// Converts a scalar xmm to an unsigned int32/int64.
+ CvtFloatToUintSeq {
+ src_size: OperandSize,
+ dst_size: OperandSize,
+ is_saturating: bool,
+ /// A copy of the source register, fed by lowering, reused as a temporary. It is marked as
+ /// modified during register allocation to make sure that the temporary xmm register
+ /// differs from the src register, since both registers are live at the same time in the
+ /// generated code sequence.
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ },
+
+ /// A sequence to compute min/max with the proper NaN semantics for xmm registers.
+ XmmMinMaxSeq {
+ size: OperandSize,
+ is_min: bool,
+ lhs: Reg,
+ rhs_dst: Writable<Reg>,
+ },
+
+ /// XMM (scalar) conditional move.
+ /// Overwrites the destination register if cc is set.
+ XmmCmove {
+ /// Whether the cmove is moving either 32 or 64 bits.
+ is_64: bool,
+ cc: CC,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
+ XmmCmpRmR {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Reg,
+ },
+
+ /// A binary XMM instruction with an 8-bit immediate: e.g. cmp (ps pd) imm (reg addr) reg
+ XmmRmRImm {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ imm: u8,
+ is64: bool,
+ },
+
+ // =====================================
+ // Control flow instructions.
+ /// Direct call: call simm32.
+ CallKnown {
+ dest: ExternalName,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ },
+
+ /// Indirect call: callq (reg mem).
+ CallUnknown {
+ dest: RegMem,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ },
+
+ /// Return.
+ Ret,
+
+ /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+ /// inserted there.
+ EpiloguePlaceholder,
+
+ /// Jump to a known target: jmp simm32.
+ JmpKnown { dst: MachLabel },
+
+ /// One-way conditional branch: jcond cond target.
+ ///
+ /// This instruction is useful when we have conditional jumps depending on more than two
+ /// conditions, see for instance the lowering of Brz/brnz with Fcmp inputs.
+ ///
+ /// A note of caution: in contexts where the branch target is another block, this has to be the
+ /// same successor as the one specified in the terminator branch of the current block.
+ /// Otherwise, this might confuse register allocation by creating new invisible edges.
+ JmpIf { cc: CC, taken: MachLabel },
+
+ /// Two-way conditional branch: jcond cond target target.
+ /// Emitted as a compound sequence; the MachBuffer will shrink it as appropriate.
+ JmpCond {
+ cc: CC,
+ taken: MachLabel,
+ not_taken: MachLabel,
+ },
+
+ /// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale).
+ /// The generated code sequence is described in the emit's function match arm for this
+ /// instruction.
+ /// See comment in lowering about the temporaries signedness.
+ JmpTableSeq {
+ idx: Reg,
+ tmp1: Writable<Reg>,
+ tmp2: Writable<Reg>,
+ default_target: MachLabel,
+ targets: Vec<MachLabel>,
+ targets_for_term: Vec<MachLabel>,
+ },
+
+ /// Indirect jump: jmpq (reg mem).
+ JmpUnknown { target: RegMem },
+
+ /// Traps if the condition code is set.
+ TrapIf { cc: CC, trap_code: TrapCode },
+
+ /// A debug trap.
+ Hlt,
+
+ /// An instruction that will always trigger the illegal instruction exception.
+ Ud2 { trap_code: TrapCode },
+
+ /// Loads an external symbol in a register, with a relocation: movabsq $name, dst
+ LoadExtName {
+ dst: Writable<Reg>,
+ name: Box<ExternalName>,
+ offset: i64,
+ },
+
+ // =====================================
+ // Instructions pertaining to atomic memory accesses.
+ /// A standard (native) `lock cmpxchg src, (amode)`, with register conventions:
+ ///
+ /// `dst` (read) address
+ /// `src` (read) replacement value
+ /// %rax (modified) in: expected value, out: value that was actually at `dst`
+ /// %rflags is written. Do not assume anything about it after the instruction.
+ ///
+ /// The instruction "succeeded" iff the lowest `ty` bits of %rax afterwards are the same as
+ /// they were before.
+ LockCmpxchg {
+ ty: Type, // I8, I16, I32 or I64
+ src: Reg,
+ dst: SyntheticAmode,
+ },
+
+ /// A synthetic instruction, based on a loop around a native `lock cmpxchg` instruction.
+ /// This atomically modifies a value in memory and returns the old value. The sequence
+ /// consists of an initial "normal" load from `dst`, followed by a loop which computes the
+ /// new value and tries to compare-and-swap ("CAS") it into `dst`, using the native
+ /// instruction `lock cmpxchg{b,w,l,q}` . The loop iterates until the CAS is successful.
+ /// If there is no contention, there will be only one pass through the loop body. The
+ /// sequence does *not* perform any explicit memory fence instructions
+ /// (mfence/sfence/lfence).
+ ///
+ /// Note that the transaction is atomic in the sense that, as observed by some other thread,
+ /// `dst` either has the initial or final value, but no other. It isn't atomic in the sense
+ /// of guaranteeing that no other thread writes to `dst` in between the initial load and the
+ /// CAS -- but that would cause the CAS to fail unless the other thread's last write before
+ /// the CAS wrote the same value that was already there. In other words, this
+ /// implementation suffers (unavoidably) from the A-B-A problem.
+ ///
+ /// This instruction sequence has fixed register uses as follows:
+ ///
+ /// %r9 (read) address
+ /// %r10 (read) second operand for `op`
+ /// %r11 (written) scratch reg; value afterwards has no meaning
+ /// %rax (written) the old value at %r9
+ /// %rflags is written. Do not assume anything about it after the instruction.
+ AtomicRmwSeq {
+ ty: Type, // I8, I16, I32 or I64
+ op: inst_common::AtomicRmwOp,
+ },
+
+ /// A memory fence (mfence, lfence or sfence).
+ Fence { kind: FenceKind },
+
+ // =====================================
+ // Meta-instructions generating no code.
+ /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+ /// controls how MemArg::NominalSPOffset args are lowered.
+ VirtualSPOffsetAdj { offset: i64 },
+
+ /// Provides a way to tell the register allocator that the upcoming sequence of instructions
+ /// will overwrite `dst` so it should be considered as a `def`; use this with care.
+ ///
+ /// This is useful when we have a sequence of instructions whose register usages are nominally
+ /// `mod`s, but such that the combination of operations creates a result that is independent of
+ /// the initial register value. It's thus semantically a `def`, not a `mod`, when all the
+ /// instructions are taken together, so we want to ensure the register is defined (its
+ /// live-range starts) prior to the sequence to keep analyses happy.
+ ///
+ /// One alternative would be a compound instruction that somehow encapsulates the others and
+ /// reports its own `def`s/`use`s/`mod`s; this adds complexity (the instruction list is no
+ /// longer flat) and requires knowledge about semantics and initial-value independence anyway.
+ XmmUninitializedValue { dst: Writable<Reg> },
+}
+
+pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
+ let xs = x as i64;
+ xs == ((xs << 32) >> 32)
+}
+
+impl Inst {
+ fn isa_requirement(&self) -> Option<InstructionSet> {
+ match self {
+ // These instructions are part of SSE2, which is a basic requirement in Cranelift, and
+ // don't have to be checked.
+ Inst::AluRmiR { .. }
+ | Inst::AtomicRmwSeq { .. }
+ | Inst::CallKnown { .. }
+ | Inst::CallUnknown { .. }
+ | Inst::CheckedDivOrRemSeq { .. }
+ | Inst::Cmove { .. }
+ | Inst::CmpRmiR { .. }
+ | Inst::CvtFloatToSintSeq { .. }
+ | Inst::CvtFloatToUintSeq { .. }
+ | Inst::CvtUint64ToFloatSeq { .. }
+ | Inst::Div { .. }
+ | Inst::EpiloguePlaceholder
+ | Inst::Fence { .. }
+ | Inst::Hlt
+ | Inst::Imm { .. }
+ | Inst::JmpCond { .. }
+ | Inst::JmpIf { .. }
+ | Inst::JmpKnown { .. }
+ | Inst::JmpTableSeq { .. }
+ | Inst::JmpUnknown { .. }
+ | Inst::LoadEffectiveAddress { .. }
+ | Inst::LoadExtName { .. }
+ | Inst::LockCmpxchg { .. }
+ | Inst::Mov64MR { .. }
+ | Inst::MovRM { .. }
+ | Inst::MovRR { .. }
+ | Inst::MovsxRmR { .. }
+ | Inst::MovzxRmR { .. }
+ | Inst::MulHi { .. }
+ | Inst::Neg { .. }
+ | Inst::Not { .. }
+ | Inst::Nop { .. }
+ | Inst::Pop64 { .. }
+ | Inst::Push64 { .. }
+ | Inst::Ret
+ | Inst::Setcc { .. }
+ | Inst::ShiftR { .. }
+ | Inst::SignExtendData { .. }
+ | Inst::TrapIf { .. }
+ | Inst::Ud2 { .. }
+ | Inst::UnaryRmR { .. }
+ | Inst::VirtualSPOffsetAdj { .. }
+ | Inst::XmmCmove { .. }
+ | Inst::XmmCmpRmR { .. }
+ | Inst::XmmLoadConst { .. }
+ | Inst::XmmMinMaxSeq { .. }
+ | Inst::XmmUninitializedValue { .. } => None,
+
+ // These use dynamic SSE opcodes.
+ Inst::GprToXmm { op, .. }
+ | Inst::XmmMovRM { op, .. }
+ | Inst::XmmRmiReg { opcode: op, .. }
+ | Inst::XmmRmR { op, .. }
+ | Inst::XmmRmRImm { op, .. }
+ | Inst::XmmToGpr { op, .. }
+ | Inst::XmmUnaryRmR { op, .. } => Some(op.available_from()),
+ }
+ }
+}
+
+// Handy constructors for Insts.
+
+impl Inst {
+ pub(crate) fn nop(len: u8) -> Self {
+ debug_assert!(len <= 16);
+ Self::Nop { len }
+ }
+
+ pub(crate) fn alu_rmi_r(
+ is_64: bool,
+ op: AluRmiROpcode,
+ src: RegMemImm,
+ dst: Writable<Reg>,
+ ) -> Self {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Self::AluRmiR {
+ is_64,
+ op,
+ src,
+ dst,
+ }
+ }
+
+ pub(crate) fn unary_rm_r(
+ size: u8,
+ op: UnaryRmROpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ ) -> Self {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2);
+ Self::UnaryRmR { size, op, src, dst }
+ }
+
+ pub(crate) fn not(size: u8, src: Writable<Reg>) -> Inst {
+ debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::Not { size, src }
+ }
+
+ pub(crate) fn neg(size: u8, src: Writable<Reg>) -> Inst {
+ debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::Neg { size, src }
+ }
+
+ pub(crate) fn div(size: u8, signed: bool, divisor: RegMem) -> Inst {
+ divisor.assert_regclass_is(RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::Div {
+ size,
+ signed,
+ divisor,
+ }
+ }
+
+ pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst {
+ rhs.assert_regclass_is(RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::MulHi { size, signed, rhs }
+ }
+
+ pub(crate) fn checked_div_or_rem_seq(
+ kind: DivOrRemKind,
+ size: u8,
+ divisor: Writable<Reg>,
+ tmp: Option<Writable<Reg>>,
+ ) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(divisor.to_reg().get_class() == RegClass::I64);
+ debug_assert!(tmp
+ .map(|tmp| tmp.to_reg().get_class() == RegClass::I64)
+ .unwrap_or(true));
+ Inst::CheckedDivOrRemSeq {
+ kind,
+ size,
+ divisor,
+ tmp,
+ }
+ }
+
+ pub(crate) fn sign_extend_data(size: u8) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::SignExtendData { size }
+ }
+
+ pub(crate) fn imm(size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
+ // the semantics of movl).
+ let dst_is_64 = size == OperandSize::Size64 && simm64 > u32::max_value() as u64;
+ Inst::Imm {
+ dst_is_64,
+ simm64,
+ dst,
+ }
+ }
+
+ pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
+ debug_assert!(src.get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::MovRR { is_64, src, dst }
+ }
+
+ // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level)
+ pub(crate) fn xmm_mov(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmUnaryRmR { op, src, dst }
+ }
+
+ pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ debug_assert!(ty.is_vector() && ty.bits() == 128);
+ Inst::XmmLoadConst { src, dst, ty }
+ }
+
+ /// Convenient helper for unary float operations.
+ pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmUnaryRmR { op, src, dst }
+ }
+
+ pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmRmR { op, src, dst }
+ }
+
+ pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self {
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmUninitializedValue { dst }
+ }
+
+ pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
+ debug_assert!(src.get_class() == RegClass::V128);
+ Inst::XmmMovRM {
+ op,
+ src,
+ dst: dst.into(),
+ }
+ }
+
+ pub(crate) fn xmm_to_gpr(
+ op: SseOpcode,
+ src: Reg,
+ dst: Writable<Reg>,
+ dst_size: OperandSize,
+ ) -> Inst {
+ debug_assert!(src.get_class() == RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::XmmToGpr {
+ op,
+ src,
+ dst,
+ dst_size,
+ }
+ }
+
+ pub(crate) fn gpr_to_xmm(
+ op: SseOpcode,
+ src: RegMem,
+ src_size: OperandSize,
+ dst: Writable<Reg>,
+ ) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::GprToXmm {
+ op,
+ src,
+ dst,
+ src_size,
+ }
+ }
+
+ pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.get_class() == RegClass::V128);
+ Inst::XmmCmpRmR { op, src, dst }
+ }
+
+ pub(crate) fn cvt_u64_to_float_seq(
+ to_f64: bool,
+ src: Writable<Reg>,
+ tmp_gpr1: Writable<Reg>,
+ tmp_gpr2: Writable<Reg>,
+ dst: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(src.to_reg().get_class() == RegClass::I64);
+ debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64);
+ debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::CvtUint64ToFloatSeq {
+ src,
+ dst,
+ tmp_gpr1,
+ tmp_gpr2,
+ to_f64,
+ }
+ }
+
+ pub(crate) fn cvt_float_to_sint_seq(
+ src_size: OperandSize,
+ dst_size: OperandSize,
+ is_saturating: bool,
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(src.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::CvtFloatToSintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ }
+ }
+
+ pub(crate) fn cvt_float_to_uint_seq(
+ src_size: OperandSize,
+ dst_size: OperandSize,
+ is_saturating: bool,
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(src.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::CvtFloatToUintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ }
+ }
+
+ pub(crate) fn xmm_min_max_seq(
+ size: OperandSize,
+ is_min: bool,
+ lhs: Reg,
+ rhs_dst: Writable<Reg>,
+ ) -> Inst {
+ debug_assert_eq!(lhs.get_class(), RegClass::V128);
+ debug_assert_eq!(rhs_dst.to_reg().get_class(), RegClass::V128);
+ Inst::XmmMinMaxSeq {
+ size,
+ is_min,
+ lhs,
+ rhs_dst,
+ }
+ }
+
+ pub(crate) fn xmm_rm_r_imm(
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ imm: u8,
+ is64: bool,
+ ) -> Inst {
+ Inst::XmmRmRImm {
+ op,
+ src,
+ dst,
+ imm,
+ is64,
+ }
+ }
+
+ pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::MovzxRmR { ext_mode, src, dst }
+ }
+
+ pub(crate) fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmRmiReg { opcode, src, dst }
+ }
+
+ pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::MovsxRmR { ext_mode, src, dst }
+ }
+
+ pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Mov64MR {
+ src: src.into(),
+ dst,
+ }
+ }
+
+ /// A convenience function to be able to use a RegMem as the source of a move.
+ pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ match src {
+ RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst),
+ RegMem::Mem { addr } => Self::mov64_m_r(addr, dst),
+ }
+ }
+
+ pub(crate) fn mov_r_m(
+ size: u8, // 1, 2, 4 or 8
+ src: Reg,
+ dst: impl Into<SyntheticAmode>,
+ ) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(src.get_class() == RegClass::I64);
+ Inst::MovRM {
+ size,
+ src,
+ dst: dst.into(),
+ }
+ }
+
+ pub(crate) fn lea(addr: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::LoadEffectiveAddress {
+ addr: addr.into(),
+ dst,
+ }
+ }
+
+ pub(crate) fn shift_r(
+ size: u8,
+ kind: ShiftKind,
+ num_bits: Option<u8>,
+ dst: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(if let Some(num_bits) = num_bits {
+ num_bits < size * 8
+ } else {
+ true
+ });
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::ShiftR {
+ size,
+ kind,
+ num_bits,
+ dst,
+ }
+ }
+
+ /// Does a comparison of dst - src for operands of size `size`, as stated by the machine
+ /// instruction semantics. Be careful with the order of parameters!
+ pub(crate) fn cmp_rmi_r(
+ size: u8, // 1, 2, 4 or 8
+ src: RegMemImm,
+ dst: Reg,
+ ) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(dst.get_class() == RegClass::I64);
+ Inst::CmpRmiR { size, src, dst }
+ }
+
+ pub(crate) fn trap(trap_code: TrapCode) -> Inst {
+ Inst::Ud2 {
+ trap_code: trap_code,
+ }
+ }
+
+ pub(crate) fn setcc(cc: CC, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Setcc { cc, dst }
+ }
+
+ pub(crate) fn cmove(size: u8, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Cmove { size, cc, src, dst }
+ }
+
+ pub(crate) fn xmm_cmove(is_64: bool, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmCmove {
+ is_64,
+ cc,
+ src,
+ dst,
+ }
+ }
+
+ pub(crate) fn push64(src: RegMemImm) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ Inst::Push64 { src }
+ }
+
+ pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Pop64 { dst }
+ }
+
+ pub(crate) fn call_known(
+ dest: ExternalName,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ ) -> Inst {
+ Inst::CallKnown {
+ dest,
+ uses,
+ defs,
+ opcode,
+ }
+ }
+
+ pub(crate) fn call_unknown(
+ dest: RegMem,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ ) -> Inst {
+ dest.assert_regclass_is(RegClass::I64);
+ Inst::CallUnknown {
+ dest,
+ uses,
+ defs,
+ opcode,
+ }
+ }
+
+ pub(crate) fn ret() -> Inst {
+ Inst::Ret
+ }
+
+ pub(crate) fn epilogue_placeholder() -> Inst {
+ Inst::EpiloguePlaceholder
+ }
+
+ pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
+ Inst::JmpKnown { dst }
+ }
+
+ pub(crate) fn jmp_if(cc: CC, taken: MachLabel) -> Inst {
+ Inst::JmpIf { cc, taken }
+ }
+
+ pub(crate) fn jmp_cond(cc: CC, taken: MachLabel, not_taken: MachLabel) -> Inst {
+ Inst::JmpCond {
+ cc,
+ taken,
+ not_taken,
+ }
+ }
+
+ pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
+ target.assert_regclass_is(RegClass::I64);
+ Inst::JmpUnknown { target }
+ }
+
+ pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
+ Inst::TrapIf { cc, trap_code }
+ }
+
+ /// Choose which instruction to use for loading a register value from memory. For loads smaller
+ /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
+ /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
+ pub(crate) fn load(
+ ty: Type,
+ from_addr: impl Into<SyntheticAmode>,
+ to_reg: Writable<Reg>,
+ ext_kind: ExtKind,
+ ) -> Inst {
+ let rc = to_reg.to_reg().get_class();
+ match rc {
+ RegClass::I64 => {
+ let ext_mode = match ty.bytes() {
+ 1 => Some(ExtMode::BQ),
+ 2 => Some(ExtMode::WQ),
+ 4 => Some(ExtMode::LQ),
+ 8 => None,
+ _ => unreachable!("the type should never use a scalar load: {}", ty),
+ };
+ if let Some(ext_mode) = ext_mode {
+ // Values smaller than 64 bits must be extended in some way.
+ match ext_kind {
+ ExtKind::SignExtend => {
+ Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
+ }
+ ExtKind::ZeroExtend => {
+ Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
+ }
+ ExtKind::None => panic!(
+ "expected an extension kind for extension mode: {:?}",
+ ext_mode
+ ),
+ }
+ } else {
+ // 64-bit values can be moved directly.
+ Inst::mov64_m_r(from_addr, to_reg)
+ }
+ }
+ RegClass::V128 => {
+ let opcode = match ty {
+ types::F32 => SseOpcode::Movss,
+ types::F64 => SseOpcode::Movsd,
+ types::F32X4 => SseOpcode::Movups,
+ types::F64X2 => SseOpcode::Movupd,
+ _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu,
+ _ => unimplemented!("unable to load type: {}", ty),
+ };
+ Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg)
+ }
+ _ => panic!("unable to generate load for register class: {:?}", rc),
+ }
+ }
+
+ /// Choose which instruction to use for storing a register value to memory.
+ pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
+ let rc = from_reg.get_class();
+ match rc {
+ RegClass::I64 => {
+ // Always store the full register, to ensure that the high bits are properly set
+ // when doing a full reload.
+ Inst::mov_r_m(8 /* bytes */, from_reg, to_addr)
+ }
+ RegClass::V128 => {
+ let opcode = match ty {
+ types::F32 => SseOpcode::Movss,
+ types::F64 => SseOpcode::Movsd,
+ types::F32X4 => SseOpcode::Movups,
+ types::F64X2 => SseOpcode::Movupd,
+ _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu,
+ _ => unimplemented!("unable to store type: {}", ty),
+ };
+ Inst::xmm_mov_r_m(opcode, from_reg, to_addr)
+ }
+ _ => panic!("unable to generate store for register class: {:?}", rc),
+ }
+ }
+}
+
+// Inst helpers.
+
+impl Inst {
+ /// In certain cases, instructions of this format can act as a definition of an XMM register,
+ /// producing a value that is independent of its initial value.
+ ///
+ /// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
+ /// itself will generate all ones as a result, regardless of its value. From the register
+ /// allocator's point of view, we should (i) record the first register, which is normally a
+ /// mod, as a def instead; and (ii) not record the second register as a use, because it is the
+ /// same as the first register (already handled).
+ fn produces_const(&self) -> bool {
+ match self {
+ Self::AluRmiR { op, src, dst, .. } => {
+ src.to_reg() == Some(dst.to_reg())
+ && (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
+ }
+
+ Self::XmmRmR { op, src, dst, .. } => {
+ src.to_reg() == Some(dst.to_reg())
+ && (*op == SseOpcode::Xorps
+ || *op == SseOpcode::Xorpd
+ || *op == SseOpcode::Pxor
+ || *op == SseOpcode::Pcmpeqb
+ || *op == SseOpcode::Pcmpeqw
+ || *op == SseOpcode::Pcmpeqd
+ || *op == SseOpcode::Pcmpeqq)
+ }
+
+ Self::XmmRmRImm {
+ op, src, dst, imm, ..
+ } => {
+ src.to_reg() == Some(dst.to_reg())
+ && (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
+ && *imm == FcmpImm::Equal.encode()
+ }
+
+ _ => false,
+ }
+ }
+
+ /// Choose which instruction to use for comparing two values for equality.
+ pub(crate) fn equals(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to),
+ types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to),
+ types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to),
+ types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to),
+ types::F32X4 => {
+ Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false)
+ }
+ types::F64X2 => {
+ Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false)
+ }
+ _ => unimplemented!("unimplemented type for Inst::equals: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise AND on two values.
+ pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to),
+ _ => unimplemented!("unimplemented type for Inst::and: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise AND NOT on two values.
+ pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to),
+ _ => unimplemented!("unimplemented type for Inst::and_not: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise OR on two values.
+ pub(crate) fn or(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to),
+ _ => unimplemented!("unimplemented type for Inst::or: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise XOR on two values.
+ pub(crate) fn xor(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to),
+ _ => unimplemented!("unimplemented type for Inst::xor: {}", ty),
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: printing
+
+impl PrettyPrint for Inst {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ fn ljustify(s: String) -> String {
+ let w = 7;
+ if s.len() >= w {
+ s
+ } else {
+ let need = usize::min(w, w - s.len());
+ s + &format!("{nil: <width$}", nil = "", width = need)
+ }
+ }
+
+ fn ljustify2(s1: String, s2: String) -> String {
+ ljustify(s1 + &s2)
+ }
+
+ fn suffix_lq(is_64: bool) -> String {
+ (if is_64 { "q" } else { "l" }).to_string()
+ }
+
+ fn size_lq(is_64: bool) -> u8 {
+ if is_64 {
+ 8
+ } else {
+ 4
+ }
+ }
+
+ fn suffix_bwlq(size: u8) -> String {
+ match size {
+ 1 => "b".to_string(),
+ 2 => "w".to_string(),
+ 4 => "l".to_string(),
+ 8 => "q".to_string(),
+ _ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
+ }
+ }
+
+ match self {
+ Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
+
+ Inst::AluRmiR {
+ is_64,
+ op,
+ src,
+ dst,
+ } => format!(
+ "{} {}, {}",
+ ljustify2(op.to_string(), suffix_lq(*is_64)),
+ src.show_rru_sized(mb_rru, size_lq(*is_64)),
+ show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64)),
+ ),
+
+ Inst::UnaryRmR { src, dst, op, size } => format!(
+ "{} {}, {}",
+ ljustify2(op.to_string(), suffix_bwlq(*size)),
+ src.show_rru_sized(mb_rru, *size),
+ show_ireg_sized(dst.to_reg(), mb_rru, *size),
+ ),
+
+ Inst::Not { size, src } => format!(
+ "{} {}",
+ ljustify2("not".to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(src.to_reg(), mb_rru, *size)
+ ),
+
+ Inst::Neg { size, src } => format!(
+ "{} {}",
+ ljustify2("neg".to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(src.to_reg(), mb_rru, *size)
+ ),
+
+ Inst::Div {
+ size,
+ signed,
+ divisor,
+ ..
+ } => format!(
+ "{} {}",
+ ljustify(if *signed {
+ "idiv".to_string()
+ } else {
+ "div".into()
+ }),
+ divisor.show_rru_sized(mb_rru, *size)
+ ),
+
+ Inst::MulHi {
+ size, signed, rhs, ..
+ } => format!(
+ "{} {}",
+ ljustify(if *signed {
+ "imul".to_string()
+ } else {
+ "mul".to_string()
+ }),
+ rhs.show_rru_sized(mb_rru, *size)
+ ),
+
+ Inst::CheckedDivOrRemSeq {
+ kind,
+ size,
+ divisor,
+ ..
+ } => format!(
+ "{} $rax:$rdx, {}",
+ match kind {
+ DivOrRemKind::SignedDiv => "sdiv",
+ DivOrRemKind::UnsignedDiv => "udiv",
+ DivOrRemKind::SignedRem => "srem",
+ DivOrRemKind::UnsignedRem => "urem",
+ },
+ show_ireg_sized(divisor.to_reg(), mb_rru, *size),
+ ),
+
+ Inst::SignExtendData { size } => match size {
+ 1 => "cbw",
+ 2 => "cwd",
+ 4 => "cdq",
+ 8 => "cqo",
+ _ => unreachable!(),
+ }
+ .into(),
+
+ Inst::XmmUnaryRmR { op, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, op.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::XmmMovRM { op, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ show_ireg_sized(*src, mb_rru, 8),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::XmmRmR { op, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, 8),
+ show_ireg_sized(dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::XmmMinMaxSeq {
+ lhs,
+ rhs_dst,
+ is_min,
+ size,
+ } => format!(
+ "{} {}, {}",
+ ljustify2(
+ if *is_min {
+ "xmm min seq ".to_string()
+ } else {
+ "xmm max seq ".to_string()
+ },
+ match size {
+ OperandSize::Size32 => "f32",
+ OperandSize::Size64 => "f64",
+ }
+ .into()
+ ),
+ show_ireg_sized(*lhs, mb_rru, 8),
+ show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::XmmRmRImm { op, src, dst, imm, is64, .. } => format!(
+ "{} ${}, {}, {}",
+ ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })),
+ imm,
+ src.show_rru(mb_rru),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::XmmUninitializedValue { dst } => format!(
+ "{} {}",
+ ljustify("uninit".into()),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::XmmLoadConst { src, dst, .. } => {
+ format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),)
+ }
+
+ Inst::XmmToGpr {
+ op,
+ src,
+ dst,
+ dst_size,
+ } => {
+ let dst_size = match dst_size {
+ OperandSize::Size32 => 4,
+ OperandSize::Size64 => 8,
+ };
+ format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru(mb_rru),
+ show_ireg_sized(dst.to_reg(), mb_rru, dst_size),
+ )
+ }
+
+ Inst::GprToXmm {
+ op,
+ src,
+ src_size,
+ dst,
+ } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, src_size.to_bytes()),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::XmmCmpRmR { op, src, dst } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, 8),
+ show_ireg_sized(*dst, mb_rru, 8),
+ ),
+
+ Inst::CvtUint64ToFloatSeq {
+ src, dst, to_f64, ..
+ } => format!(
+ "{} {}, {}",
+ ljustify(format!(
+ "u64_to_{}_seq",
+ if *to_f64 { "f64" } else { "f32" }
+ )),
+ show_ireg_sized(src.to_reg(), mb_rru, 8),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::CvtFloatToSintSeq {
+ src,
+ dst,
+ src_size,
+ dst_size,
+ ..
+ } => format!(
+ "{} {}, {}",
+ ljustify(format!(
+ "cvt_float{}_to_sint{}_seq",
+ if *src_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ },
+ if *dst_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ }
+ )),
+ show_ireg_sized(src.to_reg(), mb_rru, 8),
+ show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
+ ),
+
+ Inst::CvtFloatToUintSeq {
+ src,
+ dst,
+ src_size,
+ dst_size,
+ ..
+ } => format!(
+ "{} {}, {}",
+ ljustify(format!(
+ "cvt_float{}_to_uint{}_seq",
+ if *src_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ },
+ if *dst_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ }
+ )),
+ show_ireg_sized(src.to_reg(), mb_rru, 8),
+ show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
+ ),
+
+ Inst::Imm {
+ dst_is_64,
+ simm64,
+ dst,
+ } => {
+ if *dst_is_64 {
+ format!(
+ "{} ${}, {}",
+ ljustify("movabsq".to_string()),
+ *simm64 as i64,
+ show_ireg_sized(dst.to_reg(), mb_rru, 8)
+ )
+ } else {
+ format!(
+ "{} ${}, {}",
+ ljustify("movl".to_string()),
+ (*simm64 as u32) as i32,
+ show_ireg_sized(dst.to_reg(), mb_rru, 4)
+ )
+ }
+ }
+
+ Inst::MovRR { is_64, src, dst } => format!(
+ "{} {}, {}",
+ ljustify2("mov".to_string(), suffix_lq(*is_64)),
+ show_ireg_sized(*src, mb_rru, size_lq(*is_64)),
+ show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64))
+ ),
+
+ Inst::MovzxRmR {
+ ext_mode, src, dst, ..
+ } => {
+ if *ext_mode == ExtMode::LQ {
+ format!(
+ "{} {}, {}",
+ ljustify("movl".to_string()),
+ src.show_rru_sized(mb_rru, ext_mode.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, 4)
+ )
+ } else {
+ format!(
+ "{} {}, {}",
+ ljustify2("movz".to_string(), ext_mode.to_string()),
+ src.show_rru_sized(mb_rru, ext_mode.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
+ )
+ }
+ }
+
+ Inst::Mov64MR { src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify("movq".to_string()),
+ src.show_rru(mb_rru),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::LoadEffectiveAddress { addr, dst } => format!(
+ "{} {}, {}",
+ ljustify("lea".to_string()),
+ addr.show_rru(mb_rru),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::MovsxRmR {
+ ext_mode, src, dst, ..
+ } => format!(
+ "{} {}, {}",
+ ljustify2("movs".to_string(), ext_mode.to_string()),
+ src.show_rru_sized(mb_rru, ext_mode.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
+ ),
+
+ Inst::MovRM { size, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify2("mov".to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(*src, mb_rru, *size),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::ShiftR {
+ size,
+ kind,
+ num_bits,
+ dst,
+ } => match num_bits {
+ None => format!(
+ "{} %cl, {}",
+ ljustify2(kind.to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(dst.to_reg(), mb_rru, *size)
+ ),
+
+ Some(num_bits) => format!(
+ "{} ${}, {}",
+ ljustify2(kind.to_string(), suffix_bwlq(*size)),
+ num_bits,
+ show_ireg_sized(dst.to_reg(), mb_rru, *size)
+ ),
+ },
+
+ Inst::XmmRmiReg { opcode, src, dst } => format!(
+ "{} {}, {}",
+ ljustify(opcode.to_string()),
+ src.show_rru(mb_rru),
+ dst.to_reg().show_rru(mb_rru)
+ ),
+
+ Inst::CmpRmiR { size, src, dst } => format!(
+ "{} {}, {}",
+ ljustify2("cmp".to_string(), suffix_bwlq(*size)),
+ src.show_rru_sized(mb_rru, *size),
+ show_ireg_sized(*dst, mb_rru, *size)
+ ),
+
+ Inst::Setcc { cc, dst } => format!(
+ "{} {}",
+ ljustify2("set".to_string(), cc.to_string()),
+ show_ireg_sized(dst.to_reg(), mb_rru, 1)
+ ),
+
+ Inst::Cmove { size, cc, src, dst } => format!(
+ "{} {}, {}",
+ ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size))),
+ src.show_rru_sized(mb_rru, *size),
+ show_ireg_sized(dst.to_reg(), mb_rru, *size)
+ ),
+
+ Inst::XmmCmove {
+ is_64,
+ cc,
+ src,
+ dst,
+ } => {
+ let size = if *is_64 { 8 } else { 4 };
+ format!(
+ "j{} $next; mov{} {}, {}; $next: ",
+ cc.invert().to_string(),
+ if *is_64 { "sd" } else { "ss" },
+ src.show_rru_sized(mb_rru, size),
+ show_ireg_sized(dst.to_reg(), mb_rru, size)
+ )
+ }
+
+ Inst::Push64 { src } => {
+ format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
+ }
+
+ Inst::Pop64 { dst } => {
+ format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
+ }
+
+ Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest),
+
+ Inst::CallUnknown { dest, .. } => format!(
+ "{} *{}",
+ ljustify("call".to_string()),
+ dest.show_rru(mb_rru)
+ ),
+
+ Inst::Ret => "ret".to_string(),
+
+ Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+
+ Inst::JmpKnown { dst } => {
+ format!("{} {}", ljustify("jmp".to_string()), dst.to_string())
+ }
+
+ Inst::JmpIf { cc, taken } => format!(
+ "{} {}",
+ ljustify2("j".to_string(), cc.to_string()),
+ taken.to_string(),
+ ),
+
+ Inst::JmpCond {
+ cc,
+ taken,
+ not_taken,
+ } => format!(
+ "{} {}; j {}",
+ ljustify2("j".to_string(), cc.to_string()),
+ taken.to_string(),
+ not_taken.to_string()
+ ),
+
+ Inst::JmpTableSeq { idx, .. } => {
+ format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru))
+ }
+
+ Inst::JmpUnknown { target } => format!(
+ "{} *{}",
+ ljustify("jmp".to_string()),
+ target.show_rru(mb_rru)
+ ),
+
+ Inst::TrapIf { cc, trap_code, .. } => {
+ format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
+ }
+
+ Inst::LoadExtName {
+ dst, name, offset, ..
+ } => format!(
+ "{} {}+{}, {}",
+ ljustify("movaps".into()),
+ name,
+ offset,
+ show_ireg_sized(dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::LockCmpxchg { ty, src, dst, .. } => {
+ let size = ty.bytes() as u8;
+ format!("lock cmpxchg{} {}, {}",
+ suffix_bwlq(size), show_ireg_sized(*src, mb_rru, size), dst.show_rru(mb_rru))
+ }
+
+ Inst::AtomicRmwSeq { ty, op, .. } => {
+ format!(
+ "atomically {{ {}_bits_at_[%r9]) {:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}",
+ ty.bits(), op)
+ },
+
+ Inst::Fence { kind } => {
+ match kind {
+ FenceKind::MFence => "mfence".to_string(),
+ FenceKind::LFence => "lfence".to_string(),
+ FenceKind::SFence => "sfence".to_string(),
+ }
+ }
+
+ Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
+
+ Inst::Hlt => "hlt".into(),
+
+ Inst::Ud2 { trap_code } => format!("ud2 {}", trap_code),
+ }
+ }
+}
+
+// Temp hook for legacy printing machinery
+impl fmt::Debug for Inst {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ // Print the insn without a Universe :-(
+ write!(fmt, "{}", self.show_rru(None))
+ }
+}
+
+fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+ // This is a bit subtle. If some register is in the modified set, then it may not be in either
+ // the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
+ // regalloc.rs will "fix" this for us by removing the the modified set from the use and def
+ // sets.
+ match inst {
+ Inst::AluRmiR { src, dst, .. } => {
+ if inst.produces_const() {
+ // No need to account for src, since src == dst.
+ collector.add_def(*dst);
+ } else {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ }
+ Inst::Not { src, .. } => {
+ collector.add_mod(*src);
+ }
+ Inst::Neg { src, .. } => {
+ collector.add_mod(*src);
+ }
+ Inst::Div { size, divisor, .. } => {
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ if *size == 1 {
+ collector.add_def(Writable::from_reg(regs::rdx()));
+ } else {
+ collector.add_mod(Writable::from_reg(regs::rdx()));
+ }
+ divisor.get_regs_as_uses(collector);
+ }
+ Inst::MulHi { rhs, .. } => {
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ collector.add_def(Writable::from_reg(regs::rdx()));
+ rhs.get_regs_as_uses(collector);
+ }
+ Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
+ // Mark both fixed registers as mods, to avoid an early clobber problem in codegen
+ // (i.e. the temporary is allocated one of the fixed registers). This requires writing
+ // the rdx register *before* the instruction, which is not too bad.
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ collector.add_mod(Writable::from_reg(regs::rdx()));
+ collector.add_mod(*divisor);
+ if let Some(tmp) = tmp {
+ collector.add_def(*tmp);
+ }
+ }
+ Inst::SignExtendData { size } => match size {
+ 1 => collector.add_mod(Writable::from_reg(regs::rax())),
+ 2 | 4 | 8 => {
+ collector.add_use(regs::rax());
+ collector.add_def(Writable::from_reg(regs::rdx()));
+ }
+ _ => unreachable!(),
+ },
+ Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::XmmRmR { src, dst, .. } => {
+ if inst.produces_const() {
+ // No need to account for src, since src == dst.
+ collector.add_def(*dst);
+ } else {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ }
+ Inst::XmmRmRImm { op, src, dst, .. } => {
+ if inst.produces_const() {
+ // No need to account for src, since src == dst.
+ collector.add_def(*dst);
+ } else if *op == SseOpcode::Pextrb
+ || *op == SseOpcode::Pextrw
+ || *op == SseOpcode::Pextrd
+ || *op == SseOpcode::Pshufd
+ {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ } else {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ }
+ Inst::XmmUninitializedValue { dst } => collector.add_def(*dst),
+ Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst),
+ Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
+ collector.add_use(*lhs);
+ collector.add_mod(*rhs_dst);
+ }
+ Inst::XmmRmiReg { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ Inst::XmmMovRM { src, dst, .. } => {
+ collector.add_use(*src);
+ dst.get_regs_as_uses(collector);
+ }
+ Inst::XmmCmpRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_use(*dst);
+ }
+ Inst::Imm { dst, .. } => {
+ collector.add_def(*dst);
+ }
+ Inst::MovRR { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => {
+ collector.add_use(*src);
+ collector.add_def(*dst);
+ }
+ Inst::GprToXmm { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::CvtUint64ToFloatSeq {
+ src,
+ dst,
+ tmp_gpr1,
+ tmp_gpr2,
+ ..
+ } => {
+ collector.add_mod(*src);
+ collector.add_def(*dst);
+ collector.add_def(*tmp_gpr1);
+ collector.add_def(*tmp_gpr2);
+ }
+ Inst::CvtFloatToSintSeq {
+ src,
+ dst,
+ tmp_xmm,
+ tmp_gpr,
+ ..
+ }
+ | Inst::CvtFloatToUintSeq {
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ ..
+ } => {
+ collector.add_mod(*src);
+ collector.add_def(*dst);
+ collector.add_def(*tmp_gpr);
+ collector.add_def(*tmp_xmm);
+ }
+ Inst::MovzxRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::Mov64MR { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst)
+ }
+ Inst::MovsxRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::MovRM { src, dst, .. } => {
+ collector.add_use(*src);
+ dst.get_regs_as_uses(collector);
+ }
+ Inst::ShiftR { num_bits, dst, .. } => {
+ if num_bits.is_none() {
+ collector.add_use(regs::rcx());
+ }
+ collector.add_mod(*dst);
+ }
+ Inst::CmpRmiR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_use(*dst); // yes, really `add_use`
+ }
+ Inst::Setcc { dst, .. } => {
+ collector.add_def(*dst);
+ }
+ Inst::Cmove { src, dst, .. } | Inst::XmmCmove { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ Inst::Push64 { src } => {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(Writable::from_reg(regs::rsp()));
+ }
+ Inst::Pop64 { dst } => {
+ collector.add_def(*dst);
+ }
+
+ Inst::CallKnown {
+ ref uses, ref defs, ..
+ } => {
+ collector.add_uses(uses);
+ collector.add_defs(defs);
+ }
+
+ Inst::CallUnknown {
+ ref uses,
+ ref defs,
+ dest,
+ ..
+ } => {
+ collector.add_uses(uses);
+ collector.add_defs(defs);
+ dest.get_regs_as_uses(collector);
+ }
+
+ Inst::JmpTableSeq {
+ ref idx,
+ ref tmp1,
+ ref tmp2,
+ ..
+ } => {
+ collector.add_use(*idx);
+ collector.add_def(*tmp1);
+ collector.add_def(*tmp2);
+ }
+
+ Inst::JmpUnknown { target } => {
+ target.get_regs_as_uses(collector);
+ }
+
+ Inst::LoadExtName { dst, .. } => {
+ collector.add_def(*dst);
+ }
+
+ Inst::LockCmpxchg { src, dst, .. } => {
+ dst.get_regs_as_uses(collector);
+ collector.add_use(*src);
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ }
+
+ Inst::AtomicRmwSeq { .. } => {
+ collector.add_use(regs::r9());
+ collector.add_use(regs::r10());
+ collector.add_def(Writable::from_reg(regs::r11()));
+ collector.add_def(Writable::from_reg(regs::rax()));
+ }
+
+ Inst::Ret
+ | Inst::EpiloguePlaceholder
+ | Inst::JmpKnown { .. }
+ | Inst::JmpIf { .. }
+ | Inst::JmpCond { .. }
+ | Inst::Nop { .. }
+ | Inst::TrapIf { .. }
+ | Inst::VirtualSPOffsetAdj { .. }
+ | Inst::Hlt
+ | Inst::Ud2 { .. }
+ | Inst::Fence { .. } => {
+ // No registers are used.
+ }
+ }
+}
+
+//=============================================================================
+// Instructions and subcomponents: map_regs
+
+fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
+ if let Some(reg) = r.as_virtual_reg() {
+ let new = m.get_use(reg).unwrap().to_reg();
+ *r = new;
+ }
+}
+
+fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if let Some(reg) = r.to_reg().as_virtual_reg() {
+ let new = m.get_def(reg).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+}
+
+fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if let Some(reg) = r.to_reg().as_virtual_reg() {
+ let new = m.get_mod(reg).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+}
+
+impl Amode {
+ fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ Amode::ImmReg { ref mut base, .. } => map_use(map, base),
+ Amode::ImmRegRegShift {
+ ref mut base,
+ ref mut index,
+ ..
+ } => {
+ map_use(map, base);
+ map_use(map, index);
+ }
+ Amode::RipRelative { .. } => {
+ // RIP isn't involved in regalloc.
+ }
+ }
+ }
+}
+
+impl RegMemImm {
+ fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ RegMemImm::Reg { ref mut reg } => map_use(map, reg),
+ RegMemImm::Mem { ref mut addr } => addr.map_uses(map),
+ RegMemImm::Imm { .. } => {}
+ }
+ }
+
+ fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ match self {
+ Self::Reg { reg } => {
+ let mut writable_src = Writable::from_reg(*reg);
+ map_def(mapper, &mut writable_src);
+ *self = Self::reg(writable_src.to_reg());
+ }
+ _ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"),
+ }
+ }
+}
+
+impl RegMem {
+ fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ RegMem::Reg { ref mut reg } => map_use(map, reg),
+ RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
+ }
+ }
+
+ fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ match self {
+ Self::Reg { reg } => {
+ let mut writable_src = Writable::from_reg(*reg);
+ map_def(mapper, &mut writable_src);
+ *self = Self::reg(writable_src.to_reg());
+ }
+ _ => panic!("unexpected RegMem kind in map_src_reg_as_def"),
+ }
+ }
+}
+
+fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
+ // Note this must be carefully synchronized with x64_get_regs.
+ let produces_const = inst.produces_const();
+
+ match inst {
+ // ** Nop
+ Inst::AluRmiR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ if produces_const {
+ src.map_as_def(mapper);
+ map_def(mapper, dst);
+ } else {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ }
+ Inst::Not { src, .. } | Inst::Neg { src, .. } => map_mod(mapper, src),
+ Inst::Div { divisor, .. } => divisor.map_uses(mapper),
+ Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
+ Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
+ map_mod(mapper, divisor);
+ if let Some(tmp) = tmp {
+ map_def(mapper, tmp)
+ }
+ }
+ Inst::SignExtendData { .. } => {}
+ Inst::XmmUnaryRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ }
+ | Inst::UnaryRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::XmmRmRImm {
+ ref op,
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ if produces_const {
+ src.map_as_def(mapper);
+ map_def(mapper, dst);
+ } else if *op == SseOpcode::Pextrb
+ || *op == SseOpcode::Pextrw
+ || *op == SseOpcode::Pextrd
+ || *op == SseOpcode::Pshufd
+ {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ } else {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ }
+ Inst::XmmRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ if produces_const {
+ src.map_as_def(mapper);
+ map_def(mapper, dst);
+ } else {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ }
+ Inst::XmmRmiReg {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ Inst::XmmUninitializedValue { ref mut dst, .. } => {
+ map_def(mapper, dst);
+ }
+ Inst::XmmLoadConst { ref mut dst, .. } => {
+ map_def(mapper, dst);
+ }
+ Inst::XmmMinMaxSeq {
+ ref mut lhs,
+ ref mut rhs_dst,
+ ..
+ } => {
+ map_use(mapper, lhs);
+ map_mod(mapper, rhs_dst);
+ }
+ Inst::XmmMovRM {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ dst.map_uses(mapper);
+ }
+ Inst::XmmCmpRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_use(mapper, dst);
+ }
+ Inst::Imm { ref mut dst, .. } => map_def(mapper, dst),
+ Inst::MovRR {
+ ref mut src,
+ ref mut dst,
+ ..
+ }
+ | Inst::XmmToGpr {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ map_def(mapper, dst);
+ }
+ Inst::GprToXmm {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::CvtUint64ToFloatSeq {
+ ref mut src,
+ ref mut dst,
+ ref mut tmp_gpr1,
+ ref mut tmp_gpr2,
+ ..
+ } => {
+ map_mod(mapper, src);
+ map_def(mapper, dst);
+ map_def(mapper, tmp_gpr1);
+ map_def(mapper, tmp_gpr2);
+ }
+ Inst::CvtFloatToSintSeq {
+ ref mut src,
+ ref mut dst,
+ ref mut tmp_xmm,
+ ref mut tmp_gpr,
+ ..
+ }
+ | Inst::CvtFloatToUintSeq {
+ ref mut src,
+ ref mut dst,
+ ref mut tmp_gpr,
+ ref mut tmp_xmm,
+ ..
+ } => {
+ map_mod(mapper, src);
+ map_def(mapper, dst);
+ map_def(mapper, tmp_gpr);
+ map_def(mapper, tmp_xmm);
+ }
+ Inst::MovzxRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::Mov64MR { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::MovsxRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::MovRM {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ dst.map_uses(mapper);
+ }
+ Inst::ShiftR { ref mut dst, .. } => {
+ map_mod(mapper, dst);
+ }
+ Inst::CmpRmiR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_use(mapper, dst);
+ }
+ Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
+ Inst::Cmove {
+ ref mut src,
+ ref mut dst,
+ ..
+ }
+ | Inst::XmmCmove {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_mod(mapper, dst)
+ }
+ Inst::Push64 { ref mut src } => src.map_uses(mapper),
+ Inst::Pop64 { ref mut dst } => {
+ map_def(mapper, dst);
+ }
+
+ Inst::CallKnown {
+ ref mut uses,
+ ref mut defs,
+ ..
+ } => {
+ for r in uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ }
+
+ Inst::CallUnknown {
+ ref mut uses,
+ ref mut defs,
+ ref mut dest,
+ ..
+ } => {
+ for r in uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ dest.map_uses(mapper);
+ }
+
+ Inst::JmpTableSeq {
+ ref mut idx,
+ ref mut tmp1,
+ ref mut tmp2,
+ ..
+ } => {
+ map_use(mapper, idx);
+ map_def(mapper, tmp1);
+ map_def(mapper, tmp2);
+ }
+
+ Inst::JmpUnknown { ref mut target } => target.map_uses(mapper),
+
+ Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
+
+ Inst::LockCmpxchg {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ dst.map_uses(mapper);
+ }
+
+ Inst::Ret
+ | Inst::EpiloguePlaceholder
+ | Inst::JmpKnown { .. }
+ | Inst::JmpCond { .. }
+ | Inst::JmpIf { .. }
+ | Inst::Nop { .. }
+ | Inst::TrapIf { .. }
+ | Inst::VirtualSPOffsetAdj { .. }
+ | Inst::Ud2 { .. }
+ | Inst::Hlt
+ | Inst::AtomicRmwSeq { .. }
+ | Inst::Fence { .. } => {
+ // Instruction doesn't explicitly mention any regs, so it can't have any virtual
+ // regs that we'd need to remap. Hence no action required.
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+ fn get_regs(&self, collector: &mut RegUsageCollector) {
+ x64_get_regs(&self, collector)
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ x64_map_regs(self, mapper);
+ }
+
+ fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+ match self {
+ // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
+ // out the upper 32 bits of the destination. For example, we could
+ // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
+ // %reg.
+ Self::MovRR {
+ is_64, src, dst, ..
+ } if *is_64 => Some((*dst, *src)),
+ // Note as well that MOVS[S|D] when used in the `XmmUnaryRmR` context are pure moves of
+ // scalar floating-point values (and annotate `dst` as `def`s to the register allocator)
+ // whereas the same operation in a packed context, e.g. `XMM_RM_R`, is used to merge a
+ // value into the lowest lane of a vector (not a move).
+ Self::XmmUnaryRmR { op, src, dst, .. }
+ if *op == SseOpcode::Movss
+ || *op == SseOpcode::Movsd
+ || *op == SseOpcode::Movaps
+ || *op == SseOpcode::Movapd
+ || *op == SseOpcode::Movups
+ || *op == SseOpcode::Movupd
+ || *op == SseOpcode::Movdqa
+ || *op == SseOpcode::Movdqu =>
+ {
+ if let RegMem::Reg { reg } = src {
+ Some((*dst, *reg))
+ } else {
+ None
+ }
+ }
+ _ => None,
+ }
+ }
+
+ fn is_epilogue_placeholder(&self) -> bool {
+ if let Self::EpiloguePlaceholder = self {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+ match self {
+ // Interesting cases.
+ &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
+ &Self::JmpKnown { dst } => MachTerminator::Uncond(dst),
+ &Self::JmpCond {
+ taken, not_taken, ..
+ } => MachTerminator::Cond(taken, not_taken),
+ &Self::JmpTableSeq {
+ ref targets_for_term,
+ ..
+ } => MachTerminator::Indirect(&targets_for_term[..]),
+ // All other cases are boring.
+ _ => MachTerminator::None,
+ }
+ }
+
+ fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
+ let rc_dst = dst_reg.to_reg().get_class();
+ let rc_src = src_reg.get_class();
+ // If this isn't true, we have gone way off the rails.
+ debug_assert!(rc_dst == rc_src);
+ match rc_dst {
+ RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
+ RegClass::V128 => {
+ // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
+ // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
+ // those, which may write more lanes that we need, but are specified to have
+ // zero-latency.
+ let opcode = match ty {
+ types::F32 | types::F64 | types::F32X4 => SseOpcode::Movaps,
+ types::F64X2 => SseOpcode::Movapd,
+ _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa,
+ _ => unimplemented!("unable to move type: {}", ty),
+ };
+ Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg)
+ }
+ _ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst),
+ }
+ }
+
+ fn gen_zero_len_nop() -> Inst {
+ Inst::Nop { len: 0 }
+ }
+
+ fn gen_nop(preferred_size: usize) -> Inst {
+ Inst::nop((preferred_size % 16) as u8)
+ }
+
+ fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+ None
+ }
+
+ fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+ match ty {
+ types::I8
+ | types::I16
+ | types::I32
+ | types::I64
+ | types::B1
+ | types::B8
+ | types::B16
+ | types::B32
+ | types::B64
+ | types::R32
+ | types::R64 => Ok(RegClass::I64),
+ types::F32 | types::F64 => Ok(RegClass::V128),
+ _ if ty.bits() == 128 => Ok(RegClass::V128),
+ types::IFLAGS | types::FFLAGS => Ok(RegClass::I64),
+ _ => Err(CodegenError::Unsupported(format!(
+ "Unexpected SSA-value type: {}",
+ ty
+ ))),
+ }
+ }
+
+ fn gen_jump(label: MachLabel) -> Inst {
+ Inst::jmp_known(label)
+ }
+
+ fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ to_reg: Writable<Reg>,
+ value: u64,
+ ty: Type,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Self; 4]> {
+ let mut ret = SmallVec::new();
+ if ty == types::F32 {
+ if value == 0 {
+ ret.push(Inst::xmm_rm_r(
+ SseOpcode::Xorps,
+ RegMem::reg(to_reg.to_reg()),
+ to_reg,
+ ));
+ } else {
+ let tmp = alloc_tmp(RegClass::I64, types::I32);
+ ret.push(Inst::imm(OperandSize::Size32, value, tmp));
+
+ ret.push(Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::reg(tmp.to_reg()),
+ OperandSize::Size32,
+ to_reg,
+ ));
+ }
+ } else if ty == types::F64 {
+ if value == 0 {
+ ret.push(Inst::xmm_rm_r(
+ SseOpcode::Xorpd,
+ RegMem::reg(to_reg.to_reg()),
+ to_reg,
+ ));
+ } else {
+ let tmp = alloc_tmp(RegClass::I64, types::I64);
+ ret.push(Inst::imm(OperandSize::Size64, value, tmp));
+
+ ret.push(Inst::gpr_to_xmm(
+ SseOpcode::Movq,
+ RegMem::reg(tmp.to_reg()),
+ OperandSize::Size64,
+ to_reg,
+ ));
+ }
+ } else {
+ // Must be an integer type.
+ debug_assert!(
+ ty == types::B1
+ || ty == types::I8
+ || ty == types::B8
+ || ty == types::I16
+ || ty == types::B16
+ || ty == types::I32
+ || ty == types::B32
+ || ty == types::I64
+ || ty == types::B64
+ || ty == types::R32
+ || ty == types::R64
+ );
+ if value == 0 {
+ ret.push(Inst::alu_rmi_r(
+ ty == types::I64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(to_reg.to_reg()),
+ to_reg,
+ ));
+ } else {
+ ret.push(Inst::imm(
+ OperandSize::from_bytes(ty.bytes()),
+ value.into(),
+ to_reg,
+ ));
+ }
+ }
+ ret
+ }
+
+ fn reg_universe(flags: &Flags) -> RealRegUniverse {
+ create_reg_universe_systemv(flags)
+ }
+
+ fn worst_case_size() -> CodeOffset {
+ 15
+ }
+
+ fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+ RegClass::I64
+ }
+
+ type LabelUse = LabelUse;
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+ /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+ /// program point.
+ pub(crate) virtual_sp_offset: i64,
+ /// Offset of FP from nominal-SP.
+ pub(crate) nominal_sp_to_fp: i64,
+ /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+ stack_map: Option<StackMap>,
+ /// Current source location.
+ cur_srcloc: SourceLoc,
+}
+
+/// Constant state used during emissions of a sequence of instructions.
+pub struct EmitInfo {
+ flags: settings::Flags,
+ isa_flags: x64_settings::Flags,
+}
+
+impl EmitInfo {
+ pub(crate) fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
+ Self { flags, isa_flags }
+ }
+}
+
+impl MachInstEmitInfo for EmitInfo {
+ fn flags(&self) -> &Flags {
+ &self.flags
+ }
+}
+
+impl MachInstEmit for Inst {
+ type State = EmitState;
+ type Info = EmitInfo;
+ type UnwindInfo = unwind::X64UnwindInfo;
+
+ fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
+ emit::emit(self, sink, info, state);
+ }
+
+ fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String {
+ self.show_rru(mb_rru)
+ }
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+ fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
+ EmitState {
+ virtual_sp_offset: 0,
+ nominal_sp_to_fp: abi.frame_size() as i64,
+ stack_map: None,
+ cur_srcloc: SourceLoc::default(),
+ }
+ }
+
+ fn pre_safepoint(&mut self, stack_map: StackMap) {
+ self.stack_map = Some(stack_map);
+ }
+
+ fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
+ self.cur_srcloc = srcloc;
+ }
+}
+
+impl EmitState {
+ fn take_stack_map(&mut self) -> Option<StackMap> {
+ self.stack_map.take()
+ }
+
+ fn clear_post_insn(&mut self) {
+ self.stack_map = None;
+ }
+
+ fn cur_srcloc(&self) -> SourceLoc {
+ self.cur_srcloc
+ }
+}
+
+/// A label-use (internal relocation) in generated code.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+ /// A 32-bit offset from location of relocation itself, added to the existing value at that
+ /// location. Used for control flow instructions which consider an offset from the start of the
+ /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
+ JmpRel32,
+
+ /// A 32-bit offset from location of relocation itself, added to the existing value at that
+ /// location.
+ PCRel32,
+}
+
+impl MachInstLabelUse for LabelUse {
+ const ALIGN: CodeOffset = 1;
+
+ fn max_pos_range(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
+ }
+ }
+
+ fn max_neg_range(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
+ }
+ }
+
+ fn patch_size(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
+ }
+ }
+
+ fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+ let pc_rel = (label_offset as i64) - (use_offset as i64);
+ debug_assert!(pc_rel <= self.max_pos_range() as i64);
+ debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
+ let pc_rel = pc_rel as u32;
+ match self {
+ LabelUse::JmpRel32 => {
+ let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+ let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
+ buffer.copy_from_slice(&value.to_le_bytes()[..]);
+ }
+ LabelUse::PCRel32 => {
+ let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+ let value = pc_rel.wrapping_add(addend);
+ buffer.copy_from_slice(&value.to_le_bytes()[..]);
+ }
+ }
+ }
+
+ fn supports_veneer(self) -> bool {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
+ }
+ }
+
+ fn veneer_size(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
+ }
+ }
+
+ fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => {
+ panic!("Veneer not supported for JumpRel32 label-use.");
+ }
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
new file mode 100644
index 0000000000..04bc1f09bf
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
@@ -0,0 +1,289 @@
+//! Registers, the Universe thereof, and printing.
+//!
+//! These are ordered by sequence number, as required in the Universe. The strange ordering is
+//! intended to make callee-save registers available before caller-saved ones. This is a net win
+//! provided that each function makes at least one onward call. It'll be a net loss for leaf
+//! functions, and we should change the ordering in that case, so as to make caller-save regs
+//! available first.
+//!
+//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
+//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
+//! for each function we compile.
+
+use crate::settings;
+use alloc::vec::Vec;
+use regalloc::{
+ PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES,
+};
+use std::string::String;
+
+// Hardware encodings for a few registers.
+
+pub const ENC_RBX: u8 = 3;
+pub const ENC_RSP: u8 = 4;
+pub const ENC_RBP: u8 = 5;
+pub const ENC_R12: u8 = 12;
+pub const ENC_R13: u8 = 13;
+pub const ENC_R14: u8 = 14;
+pub const ENC_R15: u8 = 15;
+
+fn gpr(enc: u8, index: u8) -> Reg {
+ Reg::new_real(RegClass::I64, enc, index)
+}
+
+pub(crate) fn r12() -> Reg {
+ gpr(ENC_R12, 16)
+}
+pub(crate) fn r13() -> Reg {
+ gpr(ENC_R13, 17)
+}
+pub(crate) fn r14() -> Reg {
+ gpr(ENC_R14, 18)
+}
+pub(crate) fn rbx() -> Reg {
+ gpr(ENC_RBX, 19)
+}
+pub(crate) fn rsi() -> Reg {
+ gpr(6, 20)
+}
+pub(crate) fn rdi() -> Reg {
+ gpr(7, 21)
+}
+pub(crate) fn rax() -> Reg {
+ gpr(0, 22)
+}
+pub(crate) fn rcx() -> Reg {
+ gpr(1, 23)
+}
+pub(crate) fn rdx() -> Reg {
+ gpr(2, 24)
+}
+pub(crate) fn r8() -> Reg {
+ gpr(8, 25)
+}
+pub(crate) fn r9() -> Reg {
+ gpr(9, 26)
+}
+pub(crate) fn r10() -> Reg {
+ gpr(10, 27)
+}
+pub(crate) fn r11() -> Reg {
+ gpr(11, 28)
+}
+
+pub(crate) fn r15() -> Reg {
+ // r15 is put aside since this is the pinned register.
+ gpr(ENC_R15, 29)
+}
+
+/// The pinned register on this architecture.
+/// It must be the same as Spidermonkey's HeapReg, as found in this file.
+/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99
+pub(crate) fn pinned_reg() -> Reg {
+ r15()
+}
+
+fn fpr(enc: u8, index: u8) -> Reg {
+ Reg::new_real(RegClass::V128, enc, index)
+}
+
+pub(crate) fn xmm0() -> Reg {
+ fpr(0, 0)
+}
+pub(crate) fn xmm1() -> Reg {
+ fpr(1, 1)
+}
+pub(crate) fn xmm2() -> Reg {
+ fpr(2, 2)
+}
+pub(crate) fn xmm3() -> Reg {
+ fpr(3, 3)
+}
+pub(crate) fn xmm4() -> Reg {
+ fpr(4, 4)
+}
+pub(crate) fn xmm5() -> Reg {
+ fpr(5, 5)
+}
+pub(crate) fn xmm6() -> Reg {
+ fpr(6, 6)
+}
+pub(crate) fn xmm7() -> Reg {
+ fpr(7, 7)
+}
+pub(crate) fn xmm8() -> Reg {
+ fpr(8, 8)
+}
+pub(crate) fn xmm9() -> Reg {
+ fpr(9, 9)
+}
+pub(crate) fn xmm10() -> Reg {
+ fpr(10, 10)
+}
+pub(crate) fn xmm11() -> Reg {
+ fpr(11, 11)
+}
+pub(crate) fn xmm12() -> Reg {
+ fpr(12, 12)
+}
+pub(crate) fn xmm13() -> Reg {
+ fpr(13, 13)
+}
+pub(crate) fn xmm14() -> Reg {
+ fpr(14, 14)
+}
+pub(crate) fn xmm15() -> Reg {
+ fpr(15, 15)
+}
+
+pub(crate) fn rsp() -> Reg {
+ gpr(ENC_RSP, 30)
+}
+pub(crate) fn rbp() -> Reg {
+ gpr(ENC_RBP, 31)
+}
+
+/// Create the register universe for X64.
+///
+/// The ordering of registers matters, as commented in the file doc comment: assumes the
+/// calling-convention is SystemV, at the moment.
+pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
+ let mut regs = Vec::<(RealReg, String)>::new();
+ let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+ let use_pinned_reg = flags.enable_pinned_reg();
+
+ // XMM registers
+ let first_fpr = regs.len();
+ regs.push((xmm0().to_real_reg(), "%xmm0".into()));
+ regs.push((xmm1().to_real_reg(), "%xmm1".into()));
+ regs.push((xmm2().to_real_reg(), "%xmm2".into()));
+ regs.push((xmm3().to_real_reg(), "%xmm3".into()));
+ regs.push((xmm4().to_real_reg(), "%xmm4".into()));
+ regs.push((xmm5().to_real_reg(), "%xmm5".into()));
+ regs.push((xmm6().to_real_reg(), "%xmm6".into()));
+ regs.push((xmm7().to_real_reg(), "%xmm7".into()));
+ regs.push((xmm8().to_real_reg(), "%xmm8".into()));
+ regs.push((xmm9().to_real_reg(), "%xmm9".into()));
+ regs.push((xmm10().to_real_reg(), "%xmm10".into()));
+ regs.push((xmm11().to_real_reg(), "%xmm11".into()));
+ regs.push((xmm12().to_real_reg(), "%xmm12".into()));
+ regs.push((xmm13().to_real_reg(), "%xmm13".into()));
+ regs.push((xmm14().to_real_reg(), "%xmm14".into()));
+ regs.push((xmm15().to_real_reg(), "%xmm15".into()));
+ let last_fpr = regs.len() - 1;
+
+ // Integer regs.
+ let first_gpr = regs.len();
+
+ // Callee-saved, in the SystemV x86_64 ABI.
+ regs.push((r12().to_real_reg(), "%r12".into()));
+ regs.push((r13().to_real_reg(), "%r13".into()));
+ regs.push((r14().to_real_reg(), "%r14".into()));
+
+ regs.push((rbx().to_real_reg(), "%rbx".into()));
+
+ // Caller-saved, in the SystemV x86_64 ABI.
+ regs.push((rsi().to_real_reg(), "%rsi".into()));
+ regs.push((rdi().to_real_reg(), "%rdi".into()));
+ regs.push((rax().to_real_reg(), "%rax".into()));
+ regs.push((rcx().to_real_reg(), "%rcx".into()));
+ regs.push((rdx().to_real_reg(), "%rdx".into()));
+ regs.push((r8().to_real_reg(), "%r8".into()));
+ regs.push((r9().to_real_reg(), "%r9".into()));
+ regs.push((r10().to_real_reg(), "%r10".into()));
+ regs.push((r11().to_real_reg(), "%r11".into()));
+
+ // Other regs, not available to the allocator.
+ debug_assert_eq!(r15(), pinned_reg());
+ let allocable = if use_pinned_reg {
+ // The pinned register is not allocatable in this case, so record the length before adding
+ // it.
+ let len = regs.len();
+ regs.push((r15().to_real_reg(), "%r15/pinned".into()));
+ len
+ } else {
+ regs.push((r15().to_real_reg(), "%r15".into()));
+ regs.len()
+ };
+ let last_gpr = allocable - 1;
+
+ regs.push((rsp().to_real_reg(), "%rsp".into()));
+ regs.push((rbp().to_real_reg(), "%rbp".into()));
+
+ allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+ first: first_gpr,
+ last: last_gpr,
+ suggested_scratch: Some(r12().get_index()),
+ });
+ allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+ first: first_fpr,
+ last: last_fpr,
+ suggested_scratch: Some(xmm15().get_index()),
+ });
+
+ // Sanity-check: the index passed to the Reg ctor must match the order in the register list.
+ for (i, reg) in regs.iter().enumerate() {
+ assert_eq!(i, reg.0.get_index());
+ }
+
+ RealRegUniverse {
+ regs,
+ allocable,
+ allocable_by_class,
+ }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
+/// smaller size (4, 2 or 1 bytes).
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+ let mut s = reg.show_rru(mb_rru);
+
+ if reg.get_class() != RegClass::I64 || size == 8 {
+ // We can't do any better.
+ return s;
+ }
+
+ if reg.is_real() {
+ // Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could
+ // describe diplomatically as "a kludge", but it's only debug code.
+ let remapper = match s.as_str() {
+ "%rax" => Some(["%eax", "%ax", "%al"]),
+ "%rbx" => Some(["%ebx", "%bx", "%bl"]),
+ "%rcx" => Some(["%ecx", "%cx", "%cl"]),
+ "%rdx" => Some(["%edx", "%dx", "%dl"]),
+ "%rsi" => Some(["%esi", "%si", "%sil"]),
+ "%rdi" => Some(["%edi", "%di", "%dil"]),
+ "%rbp" => Some(["%ebp", "%bp", "%bpl"]),
+ "%rsp" => Some(["%esp", "%sp", "%spl"]),
+ "%r8" => Some(["%r8d", "%r8w", "%r8b"]),
+ "%r9" => Some(["%r9d", "%r9w", "%r9b"]),
+ "%r10" => Some(["%r10d", "%r10w", "%r10b"]),
+ "%r11" => Some(["%r11d", "%r11w", "%r11b"]),
+ "%r12" => Some(["%r12d", "%r12w", "%r12b"]),
+ "%r13" => Some(["%r13d", "%r13w", "%r13b"]),
+ "%r14" => Some(["%r14d", "%r14w", "%r14b"]),
+ "%r15" => Some(["%r15d", "%r15w", "%r15b"]),
+ _ => None,
+ };
+ if let Some(smaller_names) = remapper {
+ match size {
+ 4 => s = smaller_names[0].into(),
+ 2 => s = smaller_names[1].into(),
+ 1 => s = smaller_names[2].into(),
+ _ => panic!("show_ireg_sized: real"),
+ }
+ }
+ } else {
+ // Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
+ let suffix = match size {
+ 4 => "l",
+ 2 => "w",
+ 1 => "b",
+ _ => panic!("show_ireg_sized: virtual"),
+ };
+ s = s + suffix;
+ }
+
+ s
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs
new file mode 100644
index 0000000000..ffe43930f0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs
@@ -0,0 +1,125 @@
+use crate::isa::unwind::input::UnwindInfo;
+use crate::isa::x64::inst::{
+ args::{AluRmiROpcode, Amode, RegMemImm, SyntheticAmode},
+ regs, Inst,
+};
+use crate::machinst::{UnwindInfoContext, UnwindInfoGenerator};
+use crate::result::CodegenResult;
+use alloc::vec::Vec;
+use regalloc::Reg;
+
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
+
+pub struct X64UnwindInfo;
+
+impl UnwindInfoGenerator<Inst> for X64UnwindInfo {
+ fn create_unwind_info(
+ context: UnwindInfoContext<Inst>,
+ ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
+ use crate::isa::unwind::input::{self, UnwindCode};
+ let mut codes = Vec::new();
+ const WORD_SIZE: u8 = 8;
+
+ for i in context.prologue.clone() {
+ let i = i as usize;
+ let inst = &context.insts[i];
+ let offset = context.insts_layout[i];
+
+ match inst {
+ Inst::Push64 {
+ src: RegMemImm::Reg { reg },
+ } => {
+ codes.push((
+ offset,
+ UnwindCode::StackAlloc {
+ size: WORD_SIZE.into(),
+ },
+ ));
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *reg,
+ stack_offset: 0,
+ },
+ ));
+ }
+ Inst::MovRR { src, dst, .. } => {
+ if *src == regs::rsp() {
+ codes.push((offset, UnwindCode::SetFramePointer { reg: dst.to_reg() }));
+ }
+ }
+ Inst::AluRmiR {
+ is_64: true,
+ op: AluRmiROpcode::Sub,
+ src: RegMemImm::Imm { simm32 },
+ dst,
+ ..
+ } if dst.to_reg() == regs::rsp() => {
+ let imm = *simm32;
+ codes.push((offset, UnwindCode::StackAlloc { size: imm }));
+ }
+ Inst::MovRM {
+ src,
+ dst: SyntheticAmode::Real(Amode::ImmReg { simm32, base, .. }),
+ ..
+ } if *base == regs::rsp() => {
+ // `mov reg, imm(rsp)`
+ let imm = *simm32;
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *src,
+ stack_offset: imm,
+ },
+ ));
+ }
+ Inst::AluRmiR {
+ is_64: true,
+ op: AluRmiROpcode::Add,
+ src: RegMemImm::Imm { simm32 },
+ dst,
+ ..
+ } if dst.to_reg() == regs::rsp() => {
+ let imm = *simm32;
+ codes.push((offset, UnwindCode::StackDealloc { size: imm }));
+ }
+ _ => {}
+ }
+ }
+
+ let last_epilogue_end = context.len;
+ let epilogues_unwind_codes = context
+ .epilogues
+ .iter()
+ .map(|epilogue| {
+ // TODO add logic to process epilogue instruction instead of
+ // returning empty array.
+ let end = epilogue.end as usize - 1;
+ let end_offset = context.insts_layout[end];
+ if end_offset == last_epilogue_end {
+ // Do not remember/restore for very last epilogue.
+ return vec![];
+ }
+
+ let start = epilogue.start as usize;
+ let offset = context.insts_layout[start];
+ vec![
+ (offset, UnwindCode::RememberState),
+ // TODO epilogue instructions
+ (end_offset, UnwindCode::RestoreState),
+ ]
+ })
+ .collect();
+
+ let prologue_size = context.insts_layout[context.prologue.end as usize];
+ Ok(Some(input::UnwindInfo {
+ prologue_size,
+ prologue_unwind_codes: codes,
+ epilogues_unwind_codes,
+ function_size: context.len,
+ word_size: WORD_SIZE,
+ initial_sp_offset: WORD_SIZE,
+ }))
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs
new file mode 100644
index 0000000000..68473a8afb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs
@@ -0,0 +1,204 @@
+//! Unwind information for System V ABI (x86-64).
+
+use crate::isa::unwind::input;
+use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
+use crate::result::CodegenResult;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
+use regalloc::{Reg, RegClass};
+
+/// Creates a new x86-64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+ use gimli::write::CallFrameInstruction;
+
+ let mut entry = CommonInformationEntry::new(
+ Encoding {
+ address_size: 8,
+ format: Format::Dwarf32,
+ version: 1,
+ },
+ 1, // Code alignment factor
+ -8, // Data alignment factor
+ X86_64::RA,
+ );
+
+ // Every frame will start with the call frame address (CFA) at RSP+8
+ // It is +8 to account for the push of the return address by the call instruction
+ entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8));
+
+ // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP)
+ entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8));
+
+ entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+ // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
+ const X86_GP_REG_MAP: [gimli::Register; 16] = [
+ X86_64::RAX,
+ X86_64::RCX,
+ X86_64::RDX,
+ X86_64::RBX,
+ X86_64::RSP,
+ X86_64::RBP,
+ X86_64::RSI,
+ X86_64::RDI,
+ X86_64::R8,
+ X86_64::R9,
+ X86_64::R10,
+ X86_64::R11,
+ X86_64::R12,
+ X86_64::R13,
+ X86_64::R14,
+ X86_64::R15,
+ ];
+ const X86_XMM_REG_MAP: [gimli::Register; 16] = [
+ X86_64::XMM0,
+ X86_64::XMM1,
+ X86_64::XMM2,
+ X86_64::XMM3,
+ X86_64::XMM4,
+ X86_64::XMM5,
+ X86_64::XMM6,
+ X86_64::XMM7,
+ X86_64::XMM8,
+ X86_64::XMM9,
+ X86_64::XMM10,
+ X86_64::XMM11,
+ X86_64::XMM12,
+ X86_64::XMM13,
+ X86_64::XMM14,
+ X86_64::XMM15,
+ ];
+
+ match reg.get_class() {
+ RegClass::I64 => {
+ // x86 GP registers have a weird mapping to DWARF registers, so we use a
+ // lookup table.
+ Ok(X86_GP_REG_MAP[reg.get_hw_encoding() as usize])
+ }
+ RegClass::V128 => Ok(X86_XMM_REG_MAP[reg.get_hw_encoding() as usize]),
+ _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+ }
+}
+
+pub(crate) fn create_unwind_info(
+ unwind: input::UnwindInfo<Reg>,
+) -> CodegenResult<Option<UnwindInfo>> {
+ struct RegisterMapper;
+ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+ fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+ Ok(map_reg(reg)?.0)
+ }
+ fn sp(&self) -> u16 {
+ X86_64::RSP.0
+ }
+ }
+ let map = RegisterMapper;
+
+ Ok(Some(UnwindInfo::build(unwind, &map)?))
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{
+ types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+ StackSlotKind,
+ };
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use gimli::write::Address;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_simple_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(1234))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 13, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+
+ #[test]
+ fn test_multi_return_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(4321))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 23, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (16, RememberState), (18, RestoreState)] }");
+ }
+
+ fn create_multi_return_function(call_conv: CallConv) -> Function {
+ let mut sig = Signature::new(call_conv);
+ sig.params.push(AbiParam::new(types::I32));
+ let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+ let block0 = func.dfg.make_block();
+ let v0 = func.dfg.append_block_param(block0, types::I32);
+ let block1 = func.dfg.make_block();
+ let block2 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().brnz(v0, block2, &[]);
+ pos.ins().jump(block1, &[]);
+
+ pos.insert_block(block1);
+ pos.ins().return_(&[]);
+
+ pos.insert_block(block2);
+ pos.ins().return_(&[]);
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
new file mode 100644
index 0000000000..0862154360
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
@@ -0,0 +1,3771 @@
+//! Lowering rules for X64.
+
+use crate::data_value::DataValue;
+use crate::ir::{
+ condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName,
+ Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type,
+};
+use crate::isa::x64::abi::*;
+use crate::isa::x64::inst::args::*;
+use crate::isa::x64::inst::*;
+use crate::isa::{x64::X64Backend, CallConv};
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::result::CodegenResult;
+use crate::settings::Flags;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use cranelift_codegen_shared::condcodes::CondCode;
+use log::trace;
+use regalloc::{Reg, RegClass, Writable};
+use smallvec::SmallVec;
+use std::convert::TryFrom;
+use target_lexicon::Triple;
+
+/// Context passed to all lowering functions.
+type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
+
+//=============================================================================
+// Helpers for instruction lowering.
+
+fn is_int_or_ref_ty(ty: Type) -> bool {
+ match ty {
+ types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
+ types::R32 => panic!("shouldn't have 32-bits refs on x64"),
+ _ => false,
+ }
+}
+
+fn is_bool_ty(ty: Type) -> bool {
+ match ty {
+ types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
+ types::R32 => panic!("shouldn't have 32-bits refs on x64"),
+ _ => false,
+ }
+}
+
+/// This is target-word-size dependent. And it excludes booleans and reftypes.
+fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
+ match ty {
+ types::I8 | types::I16 | types::I32 | types::I64 => true,
+ _ => false,
+ }
+}
+
+/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
+/// `op`.
+// TODO investigate failures with checking against the result index.
+fn matches_input<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ op: Opcode,
+) -> Option<IRInst> {
+ let inputs = ctx.get_input(input.insn, input.input);
+ inputs.inst.and_then(|(src_inst, _)| {
+ let data = ctx.data(src_inst);
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ None
+ })
+}
+
+/// Returns whether the given specified `input` is a result produced by an instruction with any of
+/// the opcodes specified in `ops`.
+fn matches_input_any<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ ops: &[Opcode],
+) -> Option<IRInst> {
+ let inputs = ctx.get_input(input.insn, input.input);
+ inputs.inst.and_then(|(src_inst, _)| {
+ let data = ctx.data(src_inst);
+ for &op in ops {
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ }
+ None
+ })
+}
+
+fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
+ ctx.use_input_reg(input);
+ input.reg
+}
+
+/// Put the given input into a register, and mark it as used (side-effect).
+fn put_input_in_reg(ctx: Ctx, spec: InsnInput) -> Reg {
+ let input = ctx.get_input(spec.insn, spec.input);
+
+ if let Some(c) = input.constant {
+ // Generate constants fresh at each use to minimize long-range register pressure.
+ let ty = ctx.input_ty(spec.insn, spec.input);
+ let from_bits = ty_bits(ty);
+ let masked = if from_bits < 64 {
+ c & ((1u64 << from_bits) - 1)
+ } else {
+ c
+ };
+
+ let cst_copy = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
+ for inst in Inst::gen_constant(cst_copy, masked, ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ })
+ .into_iter()
+ {
+ ctx.emit(inst);
+ }
+ cst_copy.to_reg()
+ } else {
+ lowerinput_to_reg(ctx, input)
+ }
+}
+
+/// An extension specification for `extend_input_to_reg`.
+#[derive(Clone, Copy)]
+enum ExtSpec {
+ ZeroExtendTo32,
+ ZeroExtendTo64,
+ SignExtendTo32,
+ SignExtendTo64,
+}
+
+/// Put the given input into a register, marking it as used, and do a zero- or signed- extension if
+/// required. (This obviously causes side-effects.)
+fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
+ let requested_size = match ext_spec {
+ ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32,
+ ExtSpec::ZeroExtendTo64 | ExtSpec::SignExtendTo64 => 64,
+ };
+ let input_size = ctx.input_ty(spec.insn, spec.input).bits();
+
+ let requested_ty = if requested_size == 32 {
+ types::I32
+ } else {
+ types::I64
+ };
+
+ let ext_mode = match (input_size, requested_size) {
+ (a, b) if a == b => return put_input_in_reg(ctx, spec),
+ (1, 8) => return put_input_in_reg(ctx, spec),
+ (a, b) => ExtMode::new(a, b).expect(&format!("invalid extension: {} -> {}", a, b)),
+ };
+
+ let src = input_to_reg_mem(ctx, spec);
+ let dst = ctx.alloc_tmp(RegClass::I64, requested_ty);
+ match ext_spec {
+ ExtSpec::ZeroExtendTo32 | ExtSpec::ZeroExtendTo64 => {
+ ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst))
+ }
+ ExtSpec::SignExtendTo32 | ExtSpec::SignExtendTo64 => {
+ ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst))
+ }
+ }
+ dst.to_reg()
+}
+
+fn lowerinput_to_reg_mem(ctx: Ctx, input: LowerInput) -> RegMem {
+ // TODO handle memory.
+ RegMem::reg(lowerinput_to_reg(ctx, input))
+}
+
+/// Put the given input into a register or a memory operand.
+/// Effectful: may mark the given input as used, when returning the register form.
+fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
+ let input = ctx.get_input(spec.insn, spec.input);
+ lowerinput_to_reg_mem(ctx, input)
+}
+
+/// Returns whether the given input is an immediate that can be properly sign-extended, without any
+/// possible side-effect.
+fn lowerinput_to_sext_imm(input: LowerInput, input_ty: Type) -> Option<u32> {
+ input.constant.and_then(|x| {
+ // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
+ // to 64 bits. For other sizes, it doesn't matter and we can just use the plain
+ // constant.
+ if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) {
+ Some(x as u32)
+ } else {
+ None
+ }
+ })
+}
+
+fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
+ let input = ctx.get_input(spec.insn, spec.input);
+ let input_ty = ctx.input_ty(spec.insn, spec.input);
+ lowerinput_to_sext_imm(input, input_ty)
+}
+
+fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
+ ctx.get_input(spec.insn, spec.input).constant
+}
+
+/// Put the given input into an immediate, a register or a memory operand.
+/// Effectful: may mark the given input as used, when returning the register form.
+fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
+ let input = ctx.get_input(spec.insn, spec.input);
+ let input_ty = ctx.input_ty(spec.insn, spec.input);
+ match lowerinput_to_sext_imm(input, input_ty) {
+ Some(x) => RegMemImm::imm(x),
+ None => match lowerinput_to_reg_mem(ctx, input) {
+ RegMem::Reg { reg } => RegMemImm::reg(reg),
+ RegMem::Mem { addr } => RegMemImm::mem(addr),
+ },
+ }
+}
+
+/// Emit an instruction to insert a value `src` into a lane of `dst`.
+fn emit_insert_lane<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ src: RegMem,
+ dst: Writable<Reg>,
+ lane: u8,
+ ty: Type,
+) {
+ if !ty.is_float() {
+ let (sse_op, is64) = match ty.lane_bits() {
+ 8 => (SseOpcode::Pinsrb, false),
+ 16 => (SseOpcode::Pinsrw, false),
+ 32 => (SseOpcode::Pinsrd, false),
+ 64 => (SseOpcode::Pinsrd, true),
+ _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
+ };
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64));
+ } else if ty == types::F32 {
+ let sse_op = SseOpcode::Insertps;
+ // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
+ // shifted into bits 5:6).
+ let lane = 0b00_00_00_00 | lane << 4;
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false));
+ } else if ty == types::F64 {
+ let sse_op = match lane {
+ // Move the lowest quadword in replacement to vector without changing
+ // the upper bits.
+ 0 => SseOpcode::Movsd,
+ // Move the low 64 bits of replacement vector to the high 64 bits of the
+ // vector.
+ 1 => SseOpcode::Movlhps,
+ _ => unreachable!(),
+ };
+ // Here we use the `xmm_rm_r` encoding because it correctly tells the register
+ // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
+ // encoding formats like `xmm_unary_rm_r` treat it as a `def`.
+ ctx.emit(Inst::xmm_rm_r(sse_op, src, dst));
+ } else {
+ panic!("unable to emit insertlane for type: {}", ty)
+ }
+}
+
+/// Emits an int comparison instruction.
+///
+/// Note: make sure that there are no instructions modifying the flags between a call to this
+/// function and the use of the flags!
+fn emit_cmp(ctx: Ctx, insn: IRInst) {
+ let ty = ctx.input_ty(insn, 0);
+
+ let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+
+ // TODO Try to commute the operands (and invert the condition) if one is an immediate.
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
+
+ // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
+ // us dst - src at the machine instruction level, so invert operands.
+ ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
+}
+
+/// A specification for a fcmp emission.
+enum FcmpSpec {
+ /// Normal flow.
+ Normal,
+
+ /// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that
+ /// happens with `InvertedEqualOrConditions`.
+ ///
+ /// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
+ /// sequence of instructions) that check for an "AND" combination of condition codes; see for
+ /// instance lowering of Select.
+ InvertEqual,
+}
+
+/// This explains how to interpret the results of an fcmp instruction.
+enum FcmpCondResult {
+ /// The given condition code must be set.
+ Condition(CC),
+
+ /// Both condition codes must be set.
+ AndConditions(CC, CC),
+
+ /// Either of the conditions codes must be set.
+ OrConditions(CC, CC),
+
+ /// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either
+ /// of the condition codes must be set, and the user must invert meaning of analyzing the
+ /// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be
+ /// reached.
+ InvertedEqualOrConditions(CC, CC),
+}
+
+/// Emits a float comparison instruction.
+///
+/// Note: make sure that there are no instructions modifying the flags between a call to this
+/// function and the use of the flags!
+fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
+ let (flip_operands, inverted_equal) = match cond_code {
+ FloatCC::LessThan
+ | FloatCC::LessThanOrEqual
+ | FloatCC::UnorderedOrGreaterThan
+ | FloatCC::UnorderedOrGreaterThanOrEqual => {
+ cond_code = cond_code.reverse();
+ (true, false)
+ }
+ FloatCC::Equal => {
+ let inverted_equal = match spec {
+ FcmpSpec::Normal => false,
+ FcmpSpec::InvertEqual => {
+ cond_code = FloatCC::NotEqual; // same as .inverse()
+ true
+ }
+ };
+ (false, inverted_equal)
+ }
+ _ => (false, false),
+ };
+
+ // The only valid CC constructed with `from_floatcc` can be put in the flag
+ // register with a direct float comparison; do this here.
+ let op = match ctx.input_ty(insn, 0) {
+ types::F32 => SseOpcode::Ucomiss,
+ types::F64 => SseOpcode::Ucomisd,
+ _ => panic!("Bad input type to Fcmp"),
+ };
+
+ let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+ let (lhs_input, rhs_input) = if flip_operands {
+ (inputs[1], inputs[0])
+ } else {
+ (inputs[0], inputs[1])
+ };
+ let lhs = put_input_in_reg(ctx, lhs_input);
+ let rhs = input_to_reg_mem(ctx, rhs_input);
+ ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
+
+ let cond_result = match cond_code {
+ FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
+ FloatCC::NotEqual if inverted_equal => {
+ FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ)
+ }
+ FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ),
+ _ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)),
+ };
+
+ cond_result
+}
+
+fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
+ let mut sig = Signature::new(call_conv);
+ for i in 0..ctx.num_inputs(insn) {
+ sig.params.push(AbiParam::new(ctx.input_ty(insn, i)));
+ }
+ for i in 0..ctx.num_outputs(insn) {
+ sig.returns.push(AbiParam::new(ctx.output_ty(insn, i)));
+ }
+ if call_conv.extends_baldrdash() {
+ // Adds the special VMContext parameter to the signature.
+ sig.params
+ .push(AbiParam::special(ptr_ty, ArgumentPurpose::VMContext));
+ }
+ sig
+}
+
+fn emit_vm_call<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ flags: &Flags,
+ triple: &Triple,
+ libcall: LibCall,
+ insn: IRInst,
+ inputs: SmallVec<[InsnInput; 4]>,
+ outputs: SmallVec<[InsnOutput; 2]>,
+) -> CodegenResult<()> {
+ let extname = ExternalName::LibCall(libcall);
+
+ let dist = if flags.use_colocated_libcalls() {
+ RelocDistance::Near
+ } else {
+ RelocDistance::Far
+ };
+
+ // TODO avoid recreating signatures for every single Libcall function.
+ let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
+ let sig = make_libcall_sig(ctx, insn, call_conv, types::I64);
+ let caller_conv = ctx.abi().call_conv();
+
+ let mut abi = X64ABICaller::from_func(&sig, &extname, dist, caller_conv)?;
+
+ abi.emit_stack_pre_adjust(ctx);
+
+ let vm_context = if call_conv.extends_baldrdash() { 1 } else { 0 };
+ assert_eq!(inputs.len() + vm_context, abi.num_args());
+
+ for (i, input) in inputs.iter().enumerate() {
+ let arg_reg = put_input_in_reg(ctx, *input);
+ abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+ }
+ if call_conv.extends_baldrdash() {
+ let vm_context_vreg = ctx
+ .get_vm_context()
+ .expect("should have a VMContext to pass to libcall funcs");
+ abi.emit_copy_reg_to_arg(ctx, inputs.len(), vm_context_vreg);
+ }
+
+ abi.emit_call(ctx);
+ for (i, output) in outputs.iter().enumerate() {
+ let retval_reg = get_output_reg(ctx, *output);
+ abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+ }
+ abi.emit_stack_post_adjust(ctx);
+
+ Ok(())
+}
+
+/// Returns whether the given input is a shift by a constant value less or equal than 3.
+/// The goal is to embed it within an address mode.
+fn matches_small_constant_shift<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ spec: InsnInput,
+) -> Option<(InsnInput, u8)> {
+ matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
+ match input_to_imm(
+ ctx,
+ InsnInput {
+ insn: shift,
+ input: 1,
+ },
+ ) {
+ Some(shift_amt) if shift_amt <= 3 => Some((
+ InsnInput {
+ insn: shift,
+ input: 0,
+ },
+ shift_amt as u8,
+ )),
+ _ => None,
+ }
+ })
+}
+
+/// Lowers an instruction to one of the x86 addressing modes.
+///
+/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
+fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i32) -> Amode {
+ let flags = ctx
+ .memflags(spec.insn)
+ .expect("Instruction with amode should have memflags");
+
+ // We now either have an add that we must materialize, or some other input; as well as the
+ // final offset.
+ if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
+ debug_assert_eq!(ctx.output_ty(add, 0), types::I64);
+ let add_inputs = &[
+ InsnInput {
+ insn: add,
+ input: 0,
+ },
+ InsnInput {
+ insn: add,
+ input: 1,
+ },
+ ];
+
+ // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
+ // aren't happening in the wasm case. We could do better, given some range analysis.
+ let (base, index, shift) = if let Some((shift_input, shift_amt)) =
+ matches_small_constant_shift(ctx, add_inputs[0])
+ {
+ (
+ put_input_in_reg(ctx, add_inputs[1]),
+ put_input_in_reg(ctx, shift_input),
+ shift_amt,
+ )
+ } else if let Some((shift_input, shift_amt)) =
+ matches_small_constant_shift(ctx, add_inputs[1])
+ {
+ (
+ put_input_in_reg(ctx, add_inputs[0]),
+ put_input_in_reg(ctx, shift_input),
+ shift_amt,
+ )
+ } else {
+ for i in 0..=1 {
+ let input = ctx.get_input(add, i);
+
+ // Try to pierce through uextend.
+ if let Some(uextend) = matches_input(
+ ctx,
+ InsnInput {
+ insn: add,
+ input: i,
+ },
+ Opcode::Uextend,
+ ) {
+ if let Some(cst) = ctx.get_input(uextend, 0).constant {
+ // Zero the upper bits.
+ let input_size = ctx.input_ty(uextend, 0).bits() as u64;
+ let shift: u64 = 64 - input_size;
+ let uext_cst: u64 = (cst << shift) >> shift;
+
+ let final_offset = (offset as i64).wrapping_add(uext_cst as i64);
+ if low32_will_sign_extend_to_64(final_offset as u64) {
+ let base = put_input_in_reg(ctx, add_inputs[1 - i]);
+ return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
+ }
+ }
+ }
+
+ // If it's a constant, add it directly!
+ if let Some(cst) = input.constant {
+ let final_offset = (offset as i64).wrapping_add(cst as i64);
+ if low32_will_sign_extend_to_64(final_offset as u64) {
+ let base = put_input_in_reg(ctx, add_inputs[1 - i]);
+ return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
+ }
+ }
+ }
+
+ (
+ put_input_in_reg(ctx, add_inputs[0]),
+ put_input_in_reg(ctx, add_inputs[1]),
+ 0,
+ )
+ };
+
+ return Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags);
+ }
+
+ let input = put_input_in_reg(ctx, spec);
+ Amode::imm_reg(offset as u32, input).with_flags(flags)
+}
+
+//=============================================================================
+// Top-level instruction lowering entry point, for one instruction.
+
+/// Actually codegen an instruction's results into registers.
+fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ insn: IRInst,
+ flags: &Flags,
+ triple: &Triple,
+) -> CodegenResult<()> {
+ let op = ctx.data(insn).opcode();
+
+ let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
+ .map(|i| InsnInput { insn, input: i })
+ .collect();
+ let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
+ .map(|i| InsnOutput { insn, output: i })
+ .collect();
+
+ let ty = if outputs.len() > 0 {
+ Some(ctx.output_ty(insn, 0))
+ } else {
+ None
+ };
+
+ match op {
+ Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
+ let value = ctx
+ .get_constant(insn)
+ .expect("constant value for iconst et al");
+ let dst = get_output_reg(ctx, outputs[0]);
+ for inst in Inst::gen_constant(dst, value, ty.unwrap(), |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::Iadd
+ | Opcode::IaddIfcout
+ | Opcode::SaddSat
+ | Opcode::UaddSat
+ | Opcode::Isub
+ | Opcode::SsubSat
+ | Opcode::UsubSat
+ | Opcode::Imul
+ | Opcode::AvgRound
+ | Opcode::Band
+ | Opcode::Bor
+ | Opcode::Bxor => {
+ let ty = ty.unwrap();
+ if ty.lane_count() > 1 {
+ let sse_op = match op {
+ Opcode::Iadd => match ty {
+ types::I8X16 => SseOpcode::Paddb,
+ types::I16X8 => SseOpcode::Paddw,
+ types::I32X4 => SseOpcode::Paddd,
+ types::I64X2 => SseOpcode::Paddq,
+ _ => panic!("Unsupported type for packed iadd instruction: {}", ty),
+ },
+ Opcode::SaddSat => match ty {
+ types::I8X16 => SseOpcode::Paddsb,
+ types::I16X8 => SseOpcode::Paddsw,
+ _ => panic!("Unsupported type for packed sadd_sat instruction: {}", ty),
+ },
+ Opcode::UaddSat => match ty {
+ types::I8X16 => SseOpcode::Paddusb,
+ types::I16X8 => SseOpcode::Paddusw,
+ _ => panic!("Unsupported type for packed uadd_sat instruction: {}", ty),
+ },
+ Opcode::Isub => match ty {
+ types::I8X16 => SseOpcode::Psubb,
+ types::I16X8 => SseOpcode::Psubw,
+ types::I32X4 => SseOpcode::Psubd,
+ types::I64X2 => SseOpcode::Psubq,
+ _ => panic!("Unsupported type for packed isub instruction: {}", ty),
+ },
+ Opcode::SsubSat => match ty {
+ types::I8X16 => SseOpcode::Psubsb,
+ types::I16X8 => SseOpcode::Psubsw,
+ _ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
+ },
+ Opcode::UsubSat => match ty {
+ types::I8X16 => SseOpcode::Psubusb,
+ types::I16X8 => SseOpcode::Psubusw,
+ _ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
+ },
+ Opcode::Imul => match ty {
+ types::I16X8 => SseOpcode::Pmullw,
+ types::I32X4 => SseOpcode::Pmulld,
+ types::I64X2 => {
+ // Note for I64X2 we describe a lane A as being composed of a
+ // 32-bit upper half "Ah" and a 32-bit lower half "Al".
+ // The 32-bit long hand multiplication can then be written as:
+ // Ah Al
+ // * Bh Bl
+ // -----
+ // Al * Bl
+ // + (Ah * Bl) << 32
+ // + (Al * Bh) << 32
+ //
+ // So for each lane we will compute:
+ // A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
+ //
+ // Note, the algorithm will use pmuldq which operates directly on
+ // the lower 32-bit (Al or Bl) of a lane and writes the result
+ // to the full 64-bits of the lane of the destination. For this
+ // reason we don't need shifts to isolate the lower 32-bits, however
+ // we will need to use shifts to isolate the high 32-bits when doing
+ // calculations, i.e. Ah == A >> 32
+ //
+ // The full sequence then is as follows:
+ // A' = A
+ // A' = A' >> 32
+ // A' = Ah' * Bl
+ // B' = B
+ // B' = B' >> 32
+ // B' = Bh' * Al
+ // B' = B' + A'
+ // B' = B' << 32
+ // A' = A
+ // A' = Al' * Bl
+ // A' = A' + B'
+ // dst = A'
+
+ // Get inputs rhs=A and lhs=B and the dst register
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // A' = A
+ let rhs_1 = ctx.alloc_tmp(RegClass::V128, types::I64X2);
+ ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
+
+ // A' = A' >> 32
+ // A' = Ah' * Bl
+ ctx.emit(Inst::xmm_rmi_reg(
+ SseOpcode::Psrlq,
+ RegMemImm::imm(32),
+ rhs_1,
+ ));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pmuludq,
+ RegMem::reg(lhs.clone()),
+ rhs_1,
+ ));
+
+ // B' = B
+ let lhs_1 = ctx.alloc_tmp(RegClass::V128, types::I64X2);
+ ctx.emit(Inst::gen_move(lhs_1, lhs, ty));
+
+ // B' = B' >> 32
+ // B' = Bh' * Al
+ ctx.emit(Inst::xmm_rmi_reg(
+ SseOpcode::Psrlq,
+ RegMemImm::imm(32),
+ lhs_1,
+ ));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1));
+
+ // B' = B' + A'
+ // B' = B' << 32
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Paddq,
+ RegMem::reg(rhs_1.to_reg()),
+ lhs_1,
+ ));
+ ctx.emit(Inst::xmm_rmi_reg(
+ SseOpcode::Psllq,
+ RegMemImm::imm(32),
+ lhs_1,
+ ));
+
+ // A' = A
+ // A' = Al' * Bl
+ // A' = A' + B'
+ // dst = A'
+ ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pmuludq,
+ RegMem::reg(lhs.clone()),
+ rhs_1,
+ ));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Paddq,
+ RegMem::reg(lhs_1.to_reg()),
+ rhs_1,
+ ));
+ ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
+ return Ok(());
+ }
+ _ => panic!("Unsupported type for packed imul instruction: {}", ty),
+ },
+ Opcode::AvgRound => match ty {
+ types::I8X16 => SseOpcode::Pavgb,
+ types::I16X8 => SseOpcode::Pavgw,
+ _ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
+ },
+ Opcode::Band => match ty {
+ types::F32X4 => SseOpcode::Andps,
+ types::F64X2 => SseOpcode::Andpd,
+ _ => SseOpcode::Pand,
+ },
+ Opcode::Bor => match ty {
+ types::F32X4 => SseOpcode::Orps,
+ types::F64X2 => SseOpcode::Orpd,
+ _ => SseOpcode::Por,
+ },
+ Opcode::Bxor => match ty {
+ types::F32X4 => SseOpcode::Xorps,
+ types::F64X2 => SseOpcode::Xorpd,
+ _ => SseOpcode::Pxor,
+ },
+ _ => panic!("Unsupported packed instruction: {}", op),
+ };
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // Move the `lhs` to the same register as `dst`.
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+ ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
+ } else {
+ let is_64 = ty == types::I64;
+ let alu_op = match op {
+ Opcode::Iadd | Opcode::IaddIfcout => AluRmiROpcode::Add,
+ Opcode::Isub => AluRmiROpcode::Sub,
+ Opcode::Imul => AluRmiROpcode::Mul,
+ Opcode::Band => AluRmiROpcode::And,
+ Opcode::Bor => AluRmiROpcode::Or,
+ Opcode::Bxor => AluRmiROpcode::Xor,
+ _ => unreachable!(),
+ };
+
+ let (lhs, rhs) = match op {
+ Opcode::Iadd
+ | Opcode::IaddIfcout
+ | Opcode::Imul
+ | Opcode::Band
+ | Opcode::Bor
+ | Opcode::Bxor => {
+ // For commutative operations, try to commute operands if one is an
+ // immediate.
+ if let Some(imm) = input_to_sext_imm(ctx, inputs[0]) {
+ (put_input_in_reg(ctx, inputs[1]), RegMemImm::imm(imm))
+ } else {
+ (
+ put_input_in_reg(ctx, inputs[0]),
+ input_to_reg_mem_imm(ctx, inputs[1]),
+ )
+ }
+ }
+ Opcode::Isub => (
+ put_input_in_reg(ctx, inputs[0]),
+ input_to_reg_mem_imm(ctx, inputs[1]),
+ ),
+ _ => unreachable!(),
+ };
+
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::mov_r_r(true, lhs, dst));
+ ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst));
+ }
+ }
+
+ Opcode::BandNot => {
+ let ty = ty.unwrap();
+ debug_assert!(ty.is_vector() && ty.bytes() == 16);
+ let lhs = input_to_reg_mem(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let sse_op = match ty {
+ types::F32X4 => SseOpcode::Andnps,
+ types::F64X2 => SseOpcode::Andnpd,
+ _ => SseOpcode::Pandn,
+ };
+ // Note the flipping of operands: the `rhs` operand is used as the destination instead
+ // of the `lhs` as in the other bit operations above (e.g. `band`).
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
+ }
+
+ Opcode::Iabs => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if ty.is_vector() {
+ let opcode = match ty {
+ types::I8X16 => SseOpcode::Pabsb,
+ types::I16X8 => SseOpcode::Pabsw,
+ types::I32X4 => SseOpcode::Pabsd,
+ _ => panic!("Unsupported type for packed iabs instruction: {}", ty),
+ };
+ ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
+ } else {
+ unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
+ }
+ }
+
+ Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if ty.is_vector() {
+ let sse_op = match op {
+ Opcode::Imax => match ty {
+ types::I8X16 => SseOpcode::Pmaxsb,
+ types::I16X8 => SseOpcode::Pmaxsw,
+ types::I32X4 => SseOpcode::Pmaxsd,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ Opcode::Umax => match ty {
+ types::I8X16 => SseOpcode::Pmaxub,
+ types::I16X8 => SseOpcode::Pmaxuw,
+ types::I32X4 => SseOpcode::Pmaxud,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ Opcode::Imin => match ty {
+ types::I8X16 => SseOpcode::Pminsb,
+ types::I16X8 => SseOpcode::Pminsw,
+ types::I32X4 => SseOpcode::Pminsd,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ Opcode::Umin => match ty {
+ types::I8X16 => SseOpcode::Pminub,
+ types::I16X8 => SseOpcode::Pminuw,
+ types::I32X4 => SseOpcode::Pminud,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ _ => unreachable!("This is a bug: the external and internal `match op` should be over the same opcodes."),
+ };
+
+ // Move the `lhs` to the same register as `dst`.
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+ ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
+ } else {
+ panic!("Unsupported type for {} instruction: {}", op, ty);
+ }
+ }
+
+ Opcode::Bnot => {
+ let ty = ty.unwrap();
+ let size = ty.bytes() as u8;
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, ty));
+
+ if ty.is_vector() {
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::equals(ty, RegMem::from(tmp), tmp));
+ ctx.emit(Inst::xor(ty, RegMem::from(tmp), dst));
+ } else if ty.is_bool() {
+ unimplemented!("bool bnot")
+ } else {
+ ctx.emit(Inst::not(size, dst));
+ }
+ }
+
+ Opcode::Bitselect => {
+ let ty = ty.unwrap();
+ let condition = put_input_in_reg(ctx, inputs[0]);
+ let if_true = put_input_in_reg(ctx, inputs[1]);
+ let if_false = input_to_reg_mem(ctx, inputs[2]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ if ty.is_vector() {
+ let tmp1 = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::gen_move(tmp1, if_true, ty));
+ ctx.emit(Inst::and(ty, RegMem::reg(condition.clone()), tmp1));
+
+ let tmp2 = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::gen_move(tmp2, condition, ty));
+ ctx.emit(Inst::and_not(ty, if_false, tmp2));
+
+ ctx.emit(Inst::gen_move(dst, tmp2.to_reg(), ty));
+ ctx.emit(Inst::or(ty, RegMem::from(tmp1), dst));
+ } else {
+ unimplemented!("scalar bitselect")
+ }
+ }
+
+ Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => {
+ let dst_ty = ctx.output_ty(insn, 0);
+ debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty);
+
+ let (size, lhs) = match dst_ty {
+ types::I8 | types::I16 => match op {
+ Opcode::Ishl => (4, put_input_in_reg(ctx, inputs[0])),
+ Opcode::Ushr => (
+ 4,
+ extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo32),
+ ),
+ Opcode::Sshr => (
+ 4,
+ extend_input_to_reg(ctx, inputs[0], ExtSpec::SignExtendTo32),
+ ),
+ Opcode::Rotl | Opcode::Rotr => {
+ (dst_ty.bytes() as u8, put_input_in_reg(ctx, inputs[0]))
+ }
+ _ => unreachable!(),
+ },
+ types::I32 | types::I64 => (dst_ty.bytes() as u8, put_input_in_reg(ctx, inputs[0])),
+ _ => unreachable!("unhandled output type for shift/rotates: {}", dst_ty),
+ };
+
+ let (count, rhs) = if let Some(cst) = ctx.get_input(insn, 1).constant {
+ // Mask count, according to Cranelift's semantics.
+ let cst = (cst as u8) & (dst_ty.bits() as u8 - 1);
+ (Some(cst), None)
+ } else {
+ (None, Some(put_input_in_reg(ctx, inputs[1])))
+ };
+
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let shift_kind = match op {
+ Opcode::Ishl => ShiftKind::ShiftLeft,
+ Opcode::Ushr => ShiftKind::ShiftRightLogical,
+ Opcode::Sshr => ShiftKind::ShiftRightArithmetic,
+ Opcode::Rotl => ShiftKind::RotateLeft,
+ Opcode::Rotr => ShiftKind::RotateRight,
+ _ => unreachable!(),
+ };
+
+ let w_rcx = Writable::from_reg(regs::rcx());
+ ctx.emit(Inst::mov_r_r(true, lhs, dst));
+ if count.is_none() {
+ ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx));
+ }
+ ctx.emit(Inst::shift_r(size, shift_kind, count, dst));
+ }
+
+ Opcode::Ineg => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ if ty.is_vector() {
+ // Zero's out a register and then does a packed subtraction
+ // of the input from the register.
+
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
+
+ let subtract_opcode = match ty {
+ types::I8X16 => SseOpcode::Psubb,
+ types::I16X8 => SseOpcode::Psubw,
+ types::I32X4 => SseOpcode::Psubd,
+ types::I64X2 => SseOpcode::Psubq,
+ _ => panic!("Unsupported type for Ineg instruction, found {}", ty),
+ };
+
+ // Note we must zero out a tmp instead of using the destination register since
+ // the desitnation could be an alias for the source input register
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pxor,
+ RegMem::reg(tmp.to_reg()),
+ tmp,
+ ));
+ ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp));
+ ctx.emit(Inst::xmm_unary_rm_r(
+ SseOpcode::Movapd,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ } else {
+ let size = ty.bytes() as u8;
+ let src = put_input_in_reg(ctx, inputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ ctx.emit(Inst::neg(size, dst));
+ }
+ }
+
+ Opcode::Clz => {
+ // TODO when the x86 flags have use_lzcnt, we can use LZCNT.
+
+ // General formula using bit-scan reverse (BSR):
+ // mov -1, %dst
+ // bsr %src, %tmp
+ // cmovz %dst, %tmp
+ // mov $(size_bits - 1), %dst
+ // sub %tmp, %dst
+
+ let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
+ types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32),
+ a if a == types::I32 || a == types::I64 => (None, a),
+ _ => unreachable!(),
+ };
+
+ let src = if let Some(ext_spec) = ext_spec {
+ RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
+ } else {
+ input_to_reg_mem(ctx, inputs[0])
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let tmp = ctx.alloc_tmp(RegClass::I64, ty);
+ ctx.emit(Inst::imm(
+ OperandSize::from_bytes(ty.bytes()),
+ u64::max_value(),
+ dst,
+ ));
+
+ ctx.emit(Inst::unary_rm_r(
+ ty.bytes() as u8,
+ UnaryRmROpcode::Bsr,
+ src,
+ tmp,
+ ));
+
+ ctx.emit(Inst::cmove(
+ ty.bytes() as u8,
+ CC::Z,
+ RegMem::reg(dst.to_reg()),
+ tmp,
+ ));
+
+ ctx.emit(Inst::imm(
+ OperandSize::from_bytes(ty.bytes()),
+ ty.bits() as u64 - 1,
+ dst,
+ ));
+
+ ctx.emit(Inst::alu_rmi_r(
+ ty == types::I64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+
+ Opcode::Ctz => {
+ // TODO when the x86 flags have use_bmi1, we can use TZCNT.
+
+ // General formula using bit-scan forward (BSF):
+ // bsf %src, %dst
+ // mov $(size_bits), %tmp
+ // cmovz %tmp, %dst
+ let ty = ctx.input_ty(insn, 0);
+ let ty = if ty.bits() < 32 { types::I32 } else { ty };
+ debug_assert!(ty == types::I32 || ty == types::I64);
+
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let tmp = ctx.alloc_tmp(RegClass::I64, ty);
+ ctx.emit(Inst::imm(OperandSize::Size32, ty.bits() as u64, tmp));
+
+ ctx.emit(Inst::unary_rm_r(
+ ty.bytes() as u8,
+ UnaryRmROpcode::Bsf,
+ src,
+ dst,
+ ));
+
+ ctx.emit(Inst::cmove(
+ ty.bytes() as u8,
+ CC::Z,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+
+ Opcode::Popcnt => {
+ // TODO when the x86 flags have use_popcnt, we can use the popcnt instruction.
+
+ let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
+ types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32),
+ a if a == types::I32 || a == types::I64 => (None, a),
+ _ => unreachable!(),
+ };
+
+ let src = if let Some(ext_spec) = ext_spec {
+ RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
+ } else {
+ input_to_reg_mem(ctx, inputs[0])
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ if ty == types::I64 {
+ let is_64 = true;
+
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let cst = ctx.alloc_tmp(RegClass::I64, types::I64);
+
+ // mov src, tmp1
+ ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // mov 0x7777_7777_7777_7777, cst
+ ctx.emit(Inst::imm(OperandSize::Size64, 0x7777777777777777, cst));
+
+ // andq cst, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ tmp1,
+ ));
+
+ // mov src, tmp2
+ ctx.emit(Inst::mov64_rm_r(src, tmp2));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and cst, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and cst, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // mov tmp2, dst
+ ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst));
+
+ // shr $4, dst
+ ctx.emit(Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(4), dst));
+
+ // add tmp2, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp2.to_reg()),
+ dst,
+ ));
+
+ // mov $0x0F0F_0F0F_0F0F_0F0F, cst
+ ctx.emit(Inst::imm(OperandSize::Size64, 0x0F0F0F0F0F0F0F0F, cst));
+
+ // and cst, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ dst,
+ ));
+
+ // mov $0x0101_0101_0101_0101, cst
+ ctx.emit(Inst::imm(OperandSize::Size64, 0x0101010101010101, cst));
+
+ // mul cst, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Mul,
+ RegMemImm::reg(cst.to_reg()),
+ dst,
+ ));
+
+ // shr $56, dst
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(56),
+ dst,
+ ));
+ } else {
+ assert_eq!(ty, types::I32);
+ let is_64 = false;
+
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+
+ // mov src, tmp1
+ ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // andq $0x7777_7777, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x77777777),
+ tmp1,
+ ));
+
+ // mov src, tmp2
+ ctx.emit(Inst::mov64_rm_r(src, tmp2));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and 0x7777_7777, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x77777777),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and $0x7777_7777, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x77777777),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // mov tmp2, dst
+ ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst));
+
+ // shr $4, dst
+ ctx.emit(Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(4), dst));
+
+ // add tmp2, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp2.to_reg()),
+ dst,
+ ));
+
+ // and $0x0F0F_0F0F, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x0F0F0F0F),
+ dst,
+ ));
+
+ // mul $0x0101_0101, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(0x01010101),
+ dst,
+ ));
+
+ // shr $24, dst
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(24),
+ dst,
+ ));
+ }
+ }
+
+ Opcode::IsNull | Opcode::IsInvalid => {
+ // Null references are represented by the constant value 0; invalid references are
+ // represented by the constant value -1. See `define_reftypes()` in
+ // `meta/src/isa/x86/encodings.rs` to confirm.
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ let imm = match op {
+ Opcode::IsNull => {
+ // TODO could use tst src, src for IsNull
+ 0
+ }
+ Opcode::IsInvalid => {
+ // We can do a 32-bit comparison even in 64-bits mode, as the constant is then
+ // sign-extended.
+ 0xffffffff
+ }
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::imm(imm), src));
+ ctx.emit(Inst::setcc(CC::Z, dst));
+ }
+
+ Opcode::Uextend
+ | Opcode::Sextend
+ | Opcode::Bint
+ | Opcode::Breduce
+ | Opcode::Bextend
+ | Opcode::Ireduce => {
+ let src_ty = ctx.input_ty(insn, 0);
+ let dst_ty = ctx.output_ty(insn, 0);
+
+ // Sextend requires a sign-extended move, but all the other opcodes are simply a move
+ // from a zero-extended source. Here is why this works, in each case:
+ //
+ // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to
+ // zero-extend here.
+ //
+ // - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so
+ // again, this is a zero-extend / no-op.
+ //
+ // - Ireduce: changing width of an integer. Smaller ints are stored with undefined
+ // high-order bits, so we can simply do a copy.
+
+ if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend {
+ // As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
+ // 32-bits will zero-extend the upper 32-bits, so we can even not generate a
+ // zero-extended move in this case.
+ // TODO add loads and shifts here.
+ if let Some(_) = matches_input_any(
+ ctx,
+ inputs[0],
+ &[
+ Opcode::Iadd,
+ Opcode::IaddIfcout,
+ Opcode::Isub,
+ Opcode::Imul,
+ Opcode::Band,
+ Opcode::Bor,
+ Opcode::Bxor,
+ ],
+ ) {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, types::I64));
+ return Ok(());
+ }
+ }
+
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let ext_mode = ExtMode::new(src_ty.bits(), dst_ty.bits());
+ assert_eq!(
+ src_ty.bits() < dst_ty.bits(),
+ ext_mode.is_some(),
+ "unexpected extension: {} -> {}",
+ src_ty,
+ dst_ty
+ );
+
+ if let Some(ext_mode) = ext_mode {
+ if op == Opcode::Sextend {
+ ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst));
+ } else {
+ ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst));
+ }
+ } else {
+ ctx.emit(Inst::mov64_rm_r(src, dst));
+ }
+ }
+
+ Opcode::Icmp => {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ if !ty.is_vector() {
+ emit_cmp(ctx, insn);
+ let cc = CC::from_intcc(condcode);
+ ctx.emit(Inst::setcc(cc, dst));
+ } else {
+ assert_eq!(ty.bits(), 128);
+ let eq = |ty| match ty {
+ types::I8X16 => SseOpcode::Pcmpeqb,
+ types::I16X8 => SseOpcode::Pcmpeqw,
+ types::I32X4 => SseOpcode::Pcmpeqd,
+ types::I64X2 => SseOpcode::Pcmpeqq,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let gt = |ty| match ty {
+ types::I8X16 => SseOpcode::Pcmpgtb,
+ types::I16X8 => SseOpcode::Pcmpgtw,
+ types::I32X4 => SseOpcode::Pcmpgtd,
+ types::I64X2 => SseOpcode::Pcmpgtq,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let maxu = |ty| match ty {
+ types::I8X16 => SseOpcode::Pmaxub,
+ types::I16X8 => SseOpcode::Pmaxuw,
+ types::I32X4 => SseOpcode::Pmaxud,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let mins = |ty| match ty {
+ types::I8X16 => SseOpcode::Pminsb,
+ types::I16X8 => SseOpcode::Pminsw,
+ types::I32X4 => SseOpcode::Pminsd,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let minu = |ty| match ty {
+ types::I8X16 => SseOpcode::Pminub,
+ types::I16X8 => SseOpcode::Pminuw,
+ types::I32X4 => SseOpcode::Pminud,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+
+ // Here we decide which operand to use as the read/write `dst` (ModRM reg field)
+ // and which to use as the read `input` (ModRM r/m field). In the normal case we
+ // use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for
+ // the less-than cases so that we can reuse the greater-than implementation.
+ let input = match condcode {
+ IntCC::SignedLessThan
+ | IntCC::SignedLessThanOrEqual
+ | IntCC::UnsignedLessThan
+ | IntCC::UnsignedLessThanOrEqual => {
+ let lhs = input_to_reg_mem(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ lhs
+ }
+ _ => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+ rhs
+ }
+ };
+
+ match condcode {
+ IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
+ IntCC::NotEqual => {
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
+ // Emit all 1s into the `tmp` register.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
+ // Invert the result of the `PCMPEQ*`.
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
+ }
+ IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
+ ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
+ }
+ IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
+ ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
+ }
+ IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
+ ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
+ // Emit all 1s into the `tmp` register.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
+ // Invert the result of the `PCMPEQ*`.
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
+ }
+ IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
+ ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
+ }
+ _ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
+ }
+ }
+ }
+
+ Opcode::Fcmp => {
+ let cond_code = ctx.data(insn).fp_cond_code().unwrap();
+ let input_ty = ctx.input_ty(insn, 0);
+ if !input_ty.is_vector() {
+ // Unordered is returned by setting ZF, PF, CF <- 111
+ // Greater than by ZF, PF, CF <- 000
+ // Less than by ZF, PF, CF <- 001
+ // Equal by ZF, PF, CF <- 100
+ //
+ // Checking the result of comiss is somewhat annoying because you don't have setcc
+ // instructions that explicitly check simultaneously for the condition (i.e. eq, le,
+ // gt, etc) *and* orderedness.
+ //
+ // So that might mean we need more than one setcc check and then a logical "and" or
+ // "or" to determine both, in some cases. However knowing that if the parity bit is
+ // set, then the result was considered unordered and knowing that if the parity bit is
+ // set, then both the ZF and CF flag bits must also be set we can get away with using
+ // one setcc for most condition codes.
+
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ match emit_fcmp(ctx, insn, cond_code, FcmpSpec::Normal) {
+ FcmpCondResult::Condition(cc) => {
+ ctx.emit(Inst::setcc(cc, dst));
+ }
+ FcmpCondResult::AndConditions(cc1, cc2) => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::setcc(cc1, tmp));
+ ctx.emit(Inst::setcc(cc2, dst));
+ ctx.emit(Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::And,
+ RegMemImm::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+ FcmpCondResult::OrConditions(cc1, cc2) => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::setcc(cc1, tmp));
+ ctx.emit(Inst::setcc(cc2, dst));
+ ctx.emit(Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Or,
+ RegMemImm::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+ FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
+ }
+ } else {
+ let op = match input_ty {
+ types::F32X4 => SseOpcode::Cmpps,
+ types::F64X2 => SseOpcode::Cmppd,
+ _ => panic!("Bad input type to fcmp: {}", input_ty),
+ };
+
+ // Since some packed comparisons are not available, some of the condition codes
+ // must be inverted, with a corresponding `flip` of the operands.
+ let (imm, flip) = match cond_code {
+ FloatCC::GreaterThan => (FcmpImm::LessThan, true),
+ FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true),
+ FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true),
+ FloatCC::UnorderedOrLessThanOrEqual => {
+ (FcmpImm::UnorderedOrGreaterThanOrEqual, true)
+ }
+ FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => {
+ panic!("unsupported float condition code: {}", cond_code)
+ }
+ _ => (FcmpImm::from(cond_code), false),
+ };
+
+ // Determine the operands of the comparison, possibly by flipping them.
+ let (lhs, rhs) = if flip {
+ (
+ put_input_in_reg(ctx, inputs[1]),
+ input_to_reg_mem(ctx, inputs[0]),
+ )
+ } else {
+ (
+ put_input_in_reg(ctx, inputs[0]),
+ input_to_reg_mem(ctx, inputs[1]),
+ )
+ };
+
+ // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+ // but ensures that the registers are the same to match x86's read-write operand
+ // encoding.
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, lhs, input_ty));
+
+ // Emit the comparison.
+ ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false));
+ }
+ }
+
+ Opcode::FallthroughReturn | Opcode::Return => {
+ for i in 0..ctx.num_inputs(insn) {
+ let src_reg = put_input_in_reg(ctx, inputs[i]);
+ let retval_reg = ctx.retval(i);
+ let ty = ctx.input_ty(insn, i);
+ ctx.emit(Inst::gen_move(retval_reg, src_reg, ty));
+ }
+ // N.B.: the Ret itself is generated by the ABI.
+ }
+
+ Opcode::Call | Opcode::CallIndirect => {
+ let caller_conv = ctx.abi().call_conv();
+ let (mut abi, inputs) = match op {
+ Opcode::Call => {
+ let (extname, dist) = ctx.call_target(insn).unwrap();
+ let sig = ctx.call_sig(insn).unwrap();
+ assert_eq!(inputs.len(), sig.params.len());
+ assert_eq!(outputs.len(), sig.returns.len());
+ (
+ X64ABICaller::from_func(sig, &extname, dist, caller_conv)?,
+ &inputs[..],
+ )
+ }
+
+ Opcode::CallIndirect => {
+ let ptr = put_input_in_reg(ctx, inputs[0]);
+ let sig = ctx.call_sig(insn).unwrap();
+ assert_eq!(inputs.len() - 1, sig.params.len());
+ assert_eq!(outputs.len(), sig.returns.len());
+ (
+ X64ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
+ &inputs[1..],
+ )
+ }
+
+ _ => unreachable!(),
+ };
+
+ abi.emit_stack_pre_adjust(ctx);
+ assert_eq!(inputs.len(), abi.num_args());
+ for (i, input) in inputs.iter().enumerate() {
+ let arg_reg = put_input_in_reg(ctx, *input);
+ abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+ }
+ abi.emit_call(ctx);
+ for (i, output) in outputs.iter().enumerate() {
+ let retval_reg = get_output_reg(ctx, *output);
+ abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+ }
+ abi.emit_stack_post_adjust(ctx);
+ }
+
+ Opcode::Debugtrap => {
+ ctx.emit(Inst::Hlt);
+ }
+
+ Opcode::Trap | Opcode::ResumableTrap => {
+ let trap_code = ctx.data(insn).trap_code().unwrap();
+ ctx.emit_safepoint(Inst::Ud2 { trap_code });
+ }
+
+ Opcode::Trapif | Opcode::Trapff => {
+ let trap_code = ctx.data(insn).trap_code().unwrap();
+
+ if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
+ let cond_code = ctx.data(insn).cond_code().unwrap();
+ // The flags must not have been clobbered by any other instruction between the
+ // iadd_ifcout and this instruction, as verified by the CLIF validator; so we can
+ // simply use the flags here.
+ let cc = CC::from_intcc(cond_code);
+
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+ } else if op == Opcode::Trapif {
+ let cond_code = ctx.data(insn).cond_code().unwrap();
+ let cc = CC::from_intcc(cond_code);
+
+ // Verification ensures that the input is always a single-def ifcmp.
+ let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap();
+ emit_cmp(ctx, ifcmp);
+
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+ } else {
+ let cond_code = ctx.data(insn).fp_cond_code().unwrap();
+
+ // Verification ensures that the input is always a single-def ffcmp.
+ let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
+
+ match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
+ FcmpCondResult::Condition(cc) => {
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc })
+ }
+ FcmpCondResult::AndConditions(cc1, cc2) => {
+ // A bit unfortunate, but materialize the flags in their own register, and
+ // check against this.
+ let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::setcc(cc1, tmp));
+ ctx.emit(Inst::setcc(cc2, tmp2));
+ ctx.emit(Inst::alu_rmi_r(
+ false, /* is_64 */
+ AluRmiROpcode::And,
+ RegMemImm::reg(tmp.to_reg()),
+ tmp2,
+ ));
+ ctx.emit_safepoint(Inst::TrapIf {
+ trap_code,
+ cc: CC::NZ,
+ });
+ }
+ FcmpCondResult::OrConditions(cc1, cc2) => {
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 });
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 });
+ }
+ FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
+ };
+ };
+ }
+
+ Opcode::F64const => {
+ // TODO use cmpeqpd for all 1s.
+ let value = ctx.get_constant(insn).unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ for inst in Inst::gen_constant(dst, value, types::F64, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::F32const => {
+ // TODO use cmpeqps for all 1s.
+ let value = ctx.get_constant(insn).unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ for inst in Inst::gen_constant(dst, value, types::F32, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+ // but ensures that the registers are the same to match x86's read-write operand
+ // encoding.
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+
+ // Note: min and max can't be handled here, because of the way Cranelift defines them:
+ // if any operand is a NaN, they must return the NaN operand, while the x86 machine
+ // instruction will return the second operand if either operand is a NaN.
+ let sse_op = match ty {
+ types::F32 => match op {
+ Opcode::Fadd => SseOpcode::Addss,
+ Opcode::Fsub => SseOpcode::Subss,
+ Opcode::Fmul => SseOpcode::Mulss,
+ Opcode::Fdiv => SseOpcode::Divss,
+ _ => unreachable!(),
+ },
+ types::F64 => match op {
+ Opcode::Fadd => SseOpcode::Addsd,
+ Opcode::Fsub => SseOpcode::Subsd,
+ Opcode::Fmul => SseOpcode::Mulsd,
+ Opcode::Fdiv => SseOpcode::Divsd,
+ _ => unreachable!(),
+ },
+ types::F32X4 => match op {
+ Opcode::Fadd => SseOpcode::Addps,
+ Opcode::Fsub => SseOpcode::Subps,
+ Opcode::Fmul => SseOpcode::Mulps,
+ Opcode::Fdiv => SseOpcode::Divps,
+ _ => unreachable!(),
+ },
+ types::F64X2 => match op {
+ Opcode::Fadd => SseOpcode::Addpd,
+ Opcode::Fsub => SseOpcode::Subpd,
+ Opcode::Fmul => SseOpcode::Mulpd,
+ Opcode::Fdiv => SseOpcode::Divpd,
+ _ => unreachable!(),
+ },
+ _ => panic!(
+ "invalid type: expected one of [F32, F64, F32X4, F64X2], found {}",
+ ty
+ ),
+ };
+ ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
+ }
+
+ Opcode::Fmin | Opcode::Fmax => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let is_min = op == Opcode::Fmin;
+ let output_ty = ty.unwrap();
+ ctx.emit(Inst::gen_move(dst, rhs, output_ty));
+ if !output_ty.is_vector() {
+ let op_size = match output_ty {
+ types::F32 => OperandSize::Size32,
+ types::F64 => OperandSize::Size64,
+ _ => panic!("unexpected type {:?} for fmin/fmax", output_ty),
+ };
+ ctx.emit(Inst::xmm_min_max_seq(op_size, is_min, lhs, dst));
+ } else {
+ // X64's implementation of floating point min and floating point max does not
+ // propagate NaNs and +0's in a way that is friendly to the SIMD spec. For the
+ // scalar approach we use jumps to handle cases where NaN and +0 propagation is
+ // not consistent with what is needed. However for packed floating point min and
+ // floating point max we implement a different approach to avoid the sequence
+ // of jumps that would be required on a per lane basis. Because we do not need to
+ // lower labels and jumps but do need ctx for creating temporaries we implement
+ // the lowering here in lower.rs instead of emit.rs as is done in the case for scalars.
+ // The outline of approach is as follows:
+ //
+ // First we preform the Min/Max in both directions. This is because in the
+ // case of an operand's lane containing a NaN or in the case of the lanes of the
+ // two operands containing 0 but with mismatched signs, x64 will return the second
+ // operand regardless of its contents. So in order to make sure we capture NaNs and
+ // normalize NaNs and 0 values we capture the operation in both directions and merge the
+ // results. Then we normalize the results through operations that create a mask for the
+ // lanes containing NaNs, we use that mask to adjust NaNs to quite NaNs and normalize
+ // 0s.
+ //
+ // The following sequence is generated for min:
+ //
+ // movap{s,d} %lhs, %tmp
+ // minp{s,d} %dst, %tmp
+ // minp,{s,d} %lhs, %dst
+ // orp{s,d} %dst, %tmp
+ // cmpp{s,d} %tmp, %dst, $3
+ // orps{s,d} %dst, %tmp
+ // psrl{s,d} {$10, $13}, %dst
+ // andnp{s,d} %tmp, %dst
+ //
+ // and for max the sequence is:
+ //
+ // movap{s,d} %lhs, %tmp
+ // minp{s,d} %dst, %tmp
+ // minp,{s,d} %lhs, %dst
+ // xorp{s,d} %tmp, %dst
+ // orp{s,d} %dst, %tmp
+ // subp{s,d} %dst, %tmp
+ // cmpp{s,d} %tmp, %dst, $3
+ // psrl{s,d} {$10, $13}, %dst
+ // andnp{s,d} %tmp, %dst
+
+ if is_min {
+ let (mov_op, min_op, or_op, cmp_op, shift_op, shift_by, andn_op) =
+ match output_ty {
+ types::F32X4 => (
+ SseOpcode::Movaps,
+ SseOpcode::Minps,
+ SseOpcode::Orps,
+ SseOpcode::Cmpps,
+ SseOpcode::Psrld,
+ 10,
+ SseOpcode::Andnps,
+ ),
+ types::F64X2 => (
+ SseOpcode::Movapd,
+ SseOpcode::Minpd,
+ SseOpcode::Orpd,
+ SseOpcode::Cmppd,
+ SseOpcode::Psrlq,
+ 13,
+ SseOpcode::Andnpd,
+ ),
+ _ => unimplemented!("unsupported op type {:?}", output_ty),
+ };
+
+ // Copy lhs into tmp
+ let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, output_ty);
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1));
+
+ // Perform min in reverse direction
+ ctx.emit(Inst::xmm_rm_r(min_op, RegMem::from(dst), tmp_xmm1));
+
+ // Perform min in original direction
+ ctx.emit(Inst::xmm_rm_r(min_op, RegMem::reg(lhs), dst));
+
+ // X64 handles propagation of -0's and Nans differently between left and right
+ // operands. After doing the min in both directions, this OR will
+ // guarrentee capture of -0's and Nan in our tmp register
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::from(dst), tmp_xmm1));
+
+ // Compare unordered to create mask for lanes containing NaNs and then use
+ // that mask to saturate the NaN containing lanes in the tmp register with 1s.
+ // TODO: Would a check for NaN and then a jump be better here in the
+ // common case than continuing on to normalize NaNs that might not exist?
+ let cond = FcmpImm::from(FloatCC::Unordered);
+ ctx.emit(Inst::xmm_rm_r_imm(
+ cmp_op,
+ RegMem::reg(tmp_xmm1.to_reg()),
+ dst,
+ cond.encode(),
+ false,
+ ));
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // The dst register holds a mask for lanes containing NaNs.
+ // We take that mask and shift in preparation for creating a different mask
+ // to normalize NaNs (create a quite NaN) by zeroing out the appropriate
+ // number of least signficant bits. We shift right each lane by 10 bits
+ // (1 sign + 8 exp. + 1 MSB sig.) for F32X4 and by 13 bits (1 sign +
+ // 11 exp. + 1 MSB sig.) for F64X2.
+ ctx.emit(Inst::xmm_rmi_reg(shift_op, RegMemImm::imm(shift_by), dst));
+
+ // Finally we do a nand with the tmp register to produce the final results
+ // in the dst.
+ ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+ } else {
+ let (
+ mov_op,
+ max_op,
+ xor_op,
+ or_op,
+ sub_op,
+ cmp_op,
+ shift_op,
+ shift_by,
+ andn_op,
+ ) = match output_ty {
+ types::F32X4 => (
+ SseOpcode::Movaps,
+ SseOpcode::Maxps,
+ SseOpcode::Xorps,
+ SseOpcode::Orps,
+ SseOpcode::Subps,
+ SseOpcode::Cmpps,
+ SseOpcode::Psrld,
+ 10,
+ SseOpcode::Andnps,
+ ),
+ types::F64X2 => (
+ SseOpcode::Movapd,
+ SseOpcode::Maxpd,
+ SseOpcode::Xorpd,
+ SseOpcode::Orpd,
+ SseOpcode::Subpd,
+ SseOpcode::Cmppd,
+ SseOpcode::Psrlq,
+ 13,
+ SseOpcode::Andnpd,
+ ),
+ _ => unimplemented!("unsupported op type {:?}", output_ty),
+ };
+
+ // Copy lhs into tmp.
+ let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, types::F32);
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1));
+
+ // Perform max in reverse direction.
+ ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // Perform max in original direction.
+ ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(lhs), dst));
+
+ // Get the difference between the two results and store in tmp.
+ // Max uses a different approach than min to account for potential
+ // discrepancies with plus/minus 0.
+ ctx.emit(Inst::xmm_rm_r(xor_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+
+ // X64 handles propagation of -0's and Nans differently between left and right
+ // operands. After doing the max in both directions, this OR will
+ // guarentee capture of 0's and Nan in our tmp register.
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // Capture NaNs and sign discrepancies.
+ ctx.emit(Inst::xmm_rm_r(sub_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // Compare unordered to create mask for lanes containing NaNs and then use
+ // that mask to saturate the NaN containing lanes in the tmp register with 1s.
+ let cond = FcmpImm::from(FloatCC::Unordered);
+ ctx.emit(Inst::xmm_rm_r_imm(
+ cmp_op,
+ RegMem::reg(tmp_xmm1.to_reg()),
+ dst,
+ cond.encode(),
+ false,
+ ));
+
+ // The dst register holds a mask for lanes containing NaNs.
+ // We take that mask and shift in preparation for creating a different mask
+ // to normalize NaNs (create a quite NaN) by zeroing out the appropriate
+ // number of least signficant bits. We shift right each lane by 10 bits
+ // (1 sign + 8 exp. + 1 MSB sig.) for F32X4 and by 13 bits (1 sign +
+ // 11 exp. + 1 MSB sig.) for F64X2.
+ ctx.emit(Inst::xmm_rmi_reg(shift_op, RegMemImm::imm(shift_by), dst));
+
+ // Finally we do a nand with the tmp register to produce the final results
+ // in the dst.
+ ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+ }
+ }
+ }
+
+ Opcode::FminPseudo | Opcode::FmaxPseudo => {
+ let lhs = input_to_reg_mem(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ let sse_opcode = match (ty, op) {
+ (types::F32X4, Opcode::FminPseudo) => SseOpcode::Minps,
+ (types::F32X4, Opcode::FmaxPseudo) => SseOpcode::Maxps,
+ (types::F64X2, Opcode::FminPseudo) => SseOpcode::Minpd,
+ (types::F64X2, Opcode::FmaxPseudo) => SseOpcode::Maxpd,
+ _ => unimplemented!("unsupported type {} for {}", ty, op),
+ };
+ ctx.emit(Inst::xmm_rm_r(sse_opcode, lhs, dst));
+ }
+
+ Opcode::Sqrt => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ let sse_op = match ty {
+ types::F32 => SseOpcode::Sqrtss,
+ types::F64 => SseOpcode::Sqrtsd,
+ types::F32X4 => SseOpcode::Sqrtps,
+ types::F64X2 => SseOpcode::Sqrtpd,
+ _ => panic!(
+ "invalid type: expected one of [F32, F64, F32X4, F64X2], found {}",
+ ty
+ ),
+ };
+
+ ctx.emit(Inst::xmm_unary_rm_r(sse_op, src, dst));
+ }
+
+ Opcode::Fpromote => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, src, dst));
+ }
+
+ Opcode::Fdemote => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst));
+ }
+
+ Opcode::FcvtFromSint => {
+ let output_ty = ty.unwrap();
+ if !output_ty.is_vector() {
+ let (ext_spec, src_size) = match ctx.input_ty(insn, 0) {
+ types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32),
+ types::I32 => (None, OperandSize::Size32),
+ types::I64 => (None, OperandSize::Size64),
+ _ => unreachable!(),
+ };
+
+ let src = match ext_spec {
+ Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)),
+ None => input_to_reg_mem(ctx, inputs[0]),
+ };
+
+ let opcode = if output_ty == types::F32 {
+ SseOpcode::Cvtsi2ss
+ } else {
+ assert_eq!(output_ty, types::F64);
+ SseOpcode::Cvtsi2sd
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst));
+ } else {
+ let ty = ty.unwrap();
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let opcode = match ctx.input_ty(insn, 0) {
+ types::I32X4 => SseOpcode::Cvtdq2ps,
+ _ => {
+ unimplemented!("unable to use type {} for op {}", ctx.input_ty(insn, 0), op)
+ }
+ };
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
+ }
+ }
+
+ Opcode::FcvtFromUint => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ let input_ty = ctx.input_ty(insn, 0);
+ if !ty.is_vector() {
+ match input_ty {
+ types::I8 | types::I16 | types::I32 => {
+ // Conversion from an unsigned int smaller than 64-bit is easy: zero-extend +
+ // do a signed conversion (which won't overflow).
+ let opcode = if ty == types::F32 {
+ SseOpcode::Cvtsi2ss
+ } else {
+ assert_eq!(ty, types::F64);
+ SseOpcode::Cvtsi2sd
+ };
+
+ let src = RegMem::reg(extend_input_to_reg(
+ ctx,
+ inputs[0],
+ ExtSpec::ZeroExtendTo64,
+ ));
+ ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst));
+ }
+
+ types::I64 => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+
+ let src_copy = ctx.alloc_tmp(RegClass::I64, types::I64);
+ ctx.emit(Inst::gen_move(src_copy, src, types::I64));
+
+ let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let tmp_gpr2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ ctx.emit(Inst::cvt_u64_to_float_seq(
+ ty == types::F64,
+ src_copy,
+ tmp_gpr1,
+ tmp_gpr2,
+ dst,
+ ));
+ }
+ _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
+ };
+ } else {
+ // Converting packed unsigned integers to packed floats requires a few steps.
+ // There is no single instruction lowering for converting unsigned floats but there
+ // is for converting packed signed integers to float (cvtdq2ps). In the steps below
+ // we isolate the upper half (16 bits) and lower half (16 bits) of each lane and
+ // then we convert each half separately using cvtdq2ps meant for signed integers.
+ // In order for this to work for the upper half bits we must shift right by 1
+ // (divide by 2) these bits in order to ensure the most significant bit is 0 not
+ // signed, and then after the conversion we double the value. Finally we add the
+ // converted values where addition will correctly round.
+ //
+ // Sequence:
+ // -> A = 0xffffffff
+ // -> Ah = 0xffff0000
+ // -> Al = 0x0000ffff
+ // -> Convert(Al) // Convert int to float
+ // -> Ah = Ah >> 1 // Shift right 1 to assure Ah conversion isn't treated as signed
+ // -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
+ // -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
+ // -> dst = Ah + Al // Add the two floats together
+
+ assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // Create a temporary register
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
+ ctx.emit(Inst::xmm_unary_rm_r(
+ SseOpcode::Movapd,
+ RegMem::reg(src),
+ tmp,
+ ));
+ ctx.emit(Inst::gen_move(dst, src, ty));
+
+ // Get the low 16 bits
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(16), tmp));
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(16), tmp));
+
+ // Get the high 16 bits
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst));
+
+ // Convert the low 16 bits
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(tmp), tmp));
+
+ // Shift the high bits by 1, convert, and double to get the correct value.
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(dst), dst));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Addps,
+ RegMem::reg(dst.to_reg()),
+ dst,
+ ));
+
+ // Add together the two converted values.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Addps,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+ }
+
+ Opcode::FcvtToUint | Opcode::FcvtToUintSat | Opcode::FcvtToSint | Opcode::FcvtToSintSat => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let input_ty = ctx.input_ty(insn, 0);
+ if !input_ty.is_vector() {
+ let src_size = if input_ty == types::F32 {
+ OperandSize::Size32
+ } else {
+ assert_eq!(input_ty, types::F64);
+ OperandSize::Size64
+ };
+
+ let output_ty = ty.unwrap();
+ let dst_size = if output_ty == types::I32 {
+ OperandSize::Size32
+ } else {
+ assert_eq!(output_ty, types::I64);
+ OperandSize::Size64
+ };
+
+ let to_signed = op == Opcode::FcvtToSint || op == Opcode::FcvtToSintSat;
+ let is_sat = op == Opcode::FcvtToUintSat || op == Opcode::FcvtToSintSat;
+
+ let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty);
+ ctx.emit(Inst::gen_move(src_copy, src, input_ty));
+
+ let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty);
+ let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty);
+
+ if to_signed {
+ ctx.emit(Inst::cvt_float_to_sint_seq(
+ src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm,
+ ));
+ } else {
+ ctx.emit(Inst::cvt_float_to_uint_seq(
+ src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm,
+ ));
+ }
+ } else {
+ if op == Opcode::FcvtToSintSat {
+ // Sets destination to zero if float is NaN
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
+ ctx.emit(Inst::xmm_unary_rm_r(
+ SseOpcode::Movapd,
+ RegMem::reg(src),
+ tmp,
+ ));
+ ctx.emit(Inst::gen_move(dst, src, input_ty));
+ let cond = FcmpImm::from(FloatCC::Equal);
+ ctx.emit(Inst::xmm_rm_r_imm(
+ SseOpcode::Cmpps,
+ RegMem::reg(tmp.to_reg()),
+ tmp,
+ cond.encode(),
+ false,
+ ));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Andps,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+
+ // Sets top bit of tmp if float is positive
+ // Setting up to set top bit on negative float values
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pxor,
+ RegMem::reg(dst.to_reg()),
+ tmp,
+ ));
+
+ // Convert the packed float to packed doubleword.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Cvttps2dq,
+ RegMem::reg(dst.to_reg()),
+ dst,
+ ));
+
+ // Set top bit only if < 0
+ // Saturate lane with sign (top) bit.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pand,
+ RegMem::reg(dst.to_reg()),
+ tmp,
+ ));
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrad, RegMemImm::imm(31), tmp));
+
+ // On overflow 0x80000000 is returned to a lane.
+ // Below sets positive overflow lanes to 0x7FFFFFFF
+ // Keeps negative overflow lanes as is.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pxor,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ } else if op == Opcode::FcvtToUintSat {
+ unimplemented!("f32x4.convert_i32x4_u");
+ } else {
+ // Since this branch is also guarded by a check for vector types
+ // neither Opcode::FcvtToUint nor Opcode::FcvtToSint can reach here
+ // due to vector varients not existing. The first two branches will
+ // cover all reachable cases.
+ unreachable!();
+ }
+ }
+ }
+
+ Opcode::Bitcast => {
+ let input_ty = ctx.input_ty(insn, 0);
+ let output_ty = ctx.output_ty(insn, 0);
+ match (input_ty, output_ty) {
+ (types::F32, types::I32) => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_to_gpr(
+ SseOpcode::Movd,
+ src,
+ dst,
+ OperandSize::Size32,
+ ));
+ }
+ (types::I32, types::F32) => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ src,
+ OperandSize::Size32,
+ dst,
+ ));
+ }
+ (types::F64, types::I64) => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_to_gpr(
+ SseOpcode::Movq,
+ src,
+ dst,
+ OperandSize::Size64,
+ ));
+ }
+ (types::I64, types::F64) => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gpr_to_xmm(
+ SseOpcode::Movq,
+ src,
+ OperandSize::Size64,
+ dst,
+ ));
+ }
+ _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty),
+ }
+ }
+
+ Opcode::Fabs | Opcode::Fneg => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // In both cases, generate a constant and apply a single binary instruction:
+ // - to compute the absolute value, set all bits to 1 but the MSB to 0, and bit-AND the
+ // src with it.
+ // - to compute the negated value, set all bits to 0 but the MSB to 1, and bit-XOR the
+ // src with it.
+ let output_ty = ty.unwrap();
+ if !output_ty.is_vector() {
+ let (val, opcode) = match output_ty {
+ types::F32 => match op {
+ Opcode::Fabs => (0x7fffffff, SseOpcode::Andps),
+ Opcode::Fneg => (0x80000000, SseOpcode::Xorps),
+ _ => unreachable!(),
+ },
+ types::F64 => match op {
+ Opcode::Fabs => (0x7fffffffffffffff, SseOpcode::Andpd),
+ Opcode::Fneg => (0x8000000000000000, SseOpcode::Xorpd),
+ _ => unreachable!(),
+ },
+ _ => panic!("unexpected type {:?} for Fabs", output_ty),
+ };
+
+ for inst in Inst::gen_constant(dst, val, output_ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+
+ ctx.emit(Inst::xmm_rm_r(opcode, src, dst));
+ } else {
+ // Eventually vector constants should be available in `gen_constant` and this block
+ // can be merged with the one above (TODO).
+ if output_ty.bits() == 128 {
+ // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+ // but ensures that the registers are the same to match x86's read-write operand
+ // encoding.
+ let src = put_input_in_reg(ctx, inputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, output_ty));
+
+ // Generate an all 1s constant in an XMM register. This uses CMPPS but could
+ // have used CMPPD with the same effect.
+ let tmp = ctx.alloc_tmp(RegClass::V128, output_ty);
+ let cond = FcmpImm::from(FloatCC::Equal);
+ let cmpps = Inst::xmm_rm_r_imm(
+ SseOpcode::Cmpps,
+ RegMem::reg(tmp.to_reg()),
+ tmp,
+ cond.encode(),
+ false,
+ );
+ ctx.emit(cmpps);
+
+ // Shift the all 1s constant to generate the mask.
+ let lane_bits = output_ty.lane_bits();
+ let (shift_opcode, opcode, shift_by) = match (op, lane_bits) {
+ (Opcode::Fabs, 32) => (SseOpcode::Psrld, SseOpcode::Andps, 1),
+ (Opcode::Fabs, 64) => (SseOpcode::Psrlq, SseOpcode::Andpd, 1),
+ (Opcode::Fneg, 32) => (SseOpcode::Pslld, SseOpcode::Xorps, 31),
+ (Opcode::Fneg, 64) => (SseOpcode::Psllq, SseOpcode::Xorpd, 63),
+ _ => unreachable!(
+ "unexpected opcode and lane size: {:?}, {} bits",
+ op, lane_bits
+ ),
+ };
+ let shift = Inst::xmm_rmi_reg(shift_opcode, RegMemImm::imm(shift_by), tmp);
+ ctx.emit(shift);
+
+ // Apply shifted mask (XOR or AND).
+ let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst);
+ ctx.emit(mask);
+ } else {
+ panic!("unexpected type {:?} for Fabs", output_ty);
+ }
+ }
+ }
+
+ Opcode::Fcopysign => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+
+ let ty = ty.unwrap();
+
+ // We're going to generate the following sequence:
+ //
+ // movabs $INT_MIN, tmp_gpr1
+ // mov{d,q} tmp_gpr1, tmp_xmm1
+ // movap{s,d} tmp_xmm1, dst
+ // andnp{s,d} src_1, dst
+ // movap{s,d} src_2, tmp_xmm2
+ // andp{s,d} tmp_xmm1, tmp_xmm2
+ // orp{s,d} tmp_xmm2, dst
+
+ let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, types::F32);
+ let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, types::F32);
+
+ let (sign_bit_cst, mov_op, and_not_op, and_op, or_op) = match ty {
+ types::F32 => (
+ 0x8000_0000,
+ SseOpcode::Movaps,
+ SseOpcode::Andnps,
+ SseOpcode::Andps,
+ SseOpcode::Orps,
+ ),
+ types::F64 => (
+ 0x8000_0000_0000_0000,
+ SseOpcode::Movapd,
+ SseOpcode::Andnpd,
+ SseOpcode::Andpd,
+ SseOpcode::Orpd,
+ ),
+ _ => {
+ panic!("unexpected type {:?} for copysign", ty);
+ }
+ };
+
+ for inst in Inst::gen_constant(tmp_xmm1, sign_bit_cst, ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+ ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst));
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2));
+ ctx.emit(Inst::xmm_rm_r(
+ and_op,
+ RegMem::reg(tmp_xmm1.to_reg()),
+ tmp_xmm2,
+ ));
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst));
+ }
+
+ Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {
+ // TODO use ROUNDSS/ROUNDSD after sse4.1.
+
+ // Lower to VM calls when there's no access to SSE4.1.
+ let ty = ty.unwrap();
+ let libcall = match (ty, op) {
+ (types::F32, Opcode::Ceil) => LibCall::CeilF32,
+ (types::F64, Opcode::Ceil) => LibCall::CeilF64,
+ (types::F32, Opcode::Floor) => LibCall::FloorF32,
+ (types::F64, Opcode::Floor) => LibCall::FloorF64,
+ (types::F32, Opcode::Nearest) => LibCall::NearestF32,
+ (types::F64, Opcode::Nearest) => LibCall::NearestF64,
+ (types::F32, Opcode::Trunc) => LibCall::TruncF32,
+ (types::F64, Opcode::Trunc) => LibCall::TruncF64,
+ _ => panic!(
+ "unexpected type/opcode {:?}/{:?} in Ceil/Floor/Nearest/Trunc",
+ ty, op
+ ),
+ };
+
+ emit_vm_call(ctx, flags, triple, libcall, insn, inputs, outputs)?;
+ }
+
+ Opcode::Load
+ | Opcode::Uload8
+ | Opcode::Sload8
+ | Opcode::Uload16
+ | Opcode::Sload16
+ | Opcode::Uload32
+ | Opcode::Sload32
+ | Opcode::LoadComplex
+ | Opcode::Uload8Complex
+ | Opcode::Sload8Complex
+ | Opcode::Uload16Complex
+ | Opcode::Sload16Complex
+ | Opcode::Uload32Complex
+ | Opcode::Sload32Complex => {
+ let offset = ctx.data(insn).load_store_offset().unwrap();
+
+ let elem_ty = match op {
+ Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
+ types::I8
+ }
+ Opcode::Sload16
+ | Opcode::Uload16
+ | Opcode::Sload16Complex
+ | Opcode::Uload16Complex => types::I16,
+ Opcode::Sload32
+ | Opcode::Uload32
+ | Opcode::Sload32Complex
+ | Opcode::Uload32Complex => types::I32,
+ Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
+ _ => unimplemented!(),
+ };
+
+ let ext_mode = ExtMode::new(elem_ty.bits(), 64);
+
+ let sign_extend = match op {
+ Opcode::Sload8
+ | Opcode::Sload8Complex
+ | Opcode::Sload16
+ | Opcode::Sload16Complex
+ | Opcode::Sload32
+ | Opcode::Sload32Complex => true,
+ _ => false,
+ };
+
+ let amode = match op {
+ Opcode::Load
+ | Opcode::Uload8
+ | Opcode::Sload8
+ | Opcode::Uload16
+ | Opcode::Sload16
+ | Opcode::Uload32
+ | Opcode::Sload32 => {
+ assert_eq!(inputs.len(), 1, "only one input for load operands");
+ lower_to_amode(ctx, inputs[0], offset)
+ }
+
+ Opcode::LoadComplex
+ | Opcode::Uload8Complex
+ | Opcode::Sload8Complex
+ | Opcode::Uload16Complex
+ | Opcode::Sload16Complex
+ | Opcode::Uload32Complex
+ | Opcode::Sload32Complex => {
+ assert_eq!(
+ inputs.len(),
+ 2,
+ "can't handle more than two inputs in complex load"
+ );
+ let base = put_input_in_reg(ctx, inputs[0]);
+ let index = put_input_in_reg(ctx, inputs[1]);
+ let shift = 0;
+ let flags = ctx.memflags(insn).expect("load should have memflags");
+ Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
+ }
+
+ _ => unreachable!(),
+ };
+
+ let dst = get_output_reg(ctx, outputs[0]);
+ let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
+ match (sign_extend, is_xmm) {
+ (true, false) => {
+ // The load is sign-extended only when the output size is lower than 64 bits,
+ // so ext-mode is defined in this case.
+ ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(amode), dst));
+ }
+ (false, false) => {
+ if elem_ty.bytes() == 8 {
+ // Use a plain load.
+ ctx.emit(Inst::mov64_m_r(amode, dst))
+ } else {
+ // Use a zero-extended load.
+ ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(amode), dst))
+ }
+ }
+ (_, true) => {
+ ctx.emit(match elem_ty {
+ types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst),
+ types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst),
+ _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
+ Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst)
+ } // TODO Specialize for different types: MOVUPD, MOVDQU
+ _ => unreachable!("unexpected type for load: {:?}", elem_ty),
+ });
+ }
+ }
+ }
+
+ Opcode::Store
+ | Opcode::Istore8
+ | Opcode::Istore16
+ | Opcode::Istore32
+ | Opcode::StoreComplex
+ | Opcode::Istore8Complex
+ | Opcode::Istore16Complex
+ | Opcode::Istore32Complex => {
+ let offset = ctx.data(insn).load_store_offset().unwrap();
+
+ let elem_ty = match op {
+ Opcode::Istore8 | Opcode::Istore8Complex => types::I8,
+ Opcode::Istore16 | Opcode::Istore16Complex => types::I16,
+ Opcode::Istore32 | Opcode::Istore32Complex => types::I32,
+ Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
+ _ => unreachable!(),
+ };
+
+ let addr = match op {
+ Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
+ assert_eq!(inputs.len(), 2, "only one input for store memory operands");
+ lower_to_amode(ctx, inputs[1], offset)
+ }
+
+ Opcode::StoreComplex
+ | Opcode::Istore8Complex
+ | Opcode::Istore16Complex
+ | Opcode::Istore32Complex => {
+ assert_eq!(
+ inputs.len(),
+ 3,
+ "can't handle more than two inputs in complex store"
+ );
+ let base = put_input_in_reg(ctx, inputs[1]);
+ let index = put_input_in_reg(ctx, inputs[2]);
+ let shift = 0;
+ let flags = ctx.memflags(insn).expect("store should have memflags");
+ Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
+ }
+
+ _ => unreachable!(),
+ };
+
+ let src = put_input_in_reg(ctx, inputs[0]);
+
+ ctx.emit(match elem_ty {
+ types::F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
+ types::F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr),
+ _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
+ // TODO Specialize for different types: MOVUPD, MOVDQU, etc.
+ Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr)
+ }
+ _ => Inst::mov_r_m(elem_ty.bytes() as u8, src, addr),
+ });
+ }
+
+ Opcode::AtomicRmw => {
+ // This is a simple, general-case atomic update, based on a loop involving
+ // `cmpxchg`. Note that we could do much better than this in the case where the old
+ // value at the location (that is to say, the SSA `Value` computed by this CLIF
+ // instruction) is not required. In that case, we could instead implement this
+ // using a single `lock`-prefixed x64 read-modify-write instruction. Also, even in
+ // the case where the old value is required, for the `add` and `sub` cases, we can
+ // use the single instruction `lock xadd`. However, those improvements have been
+ // left for another day.
+ // TODO: filed as https://github.com/bytecodealliance/wasmtime/issues/2153
+ let dst = get_output_reg(ctx, outputs[0]);
+ let mut addr = put_input_in_reg(ctx, inputs[0]);
+ let mut arg2 = put_input_in_reg(ctx, inputs[1]);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ // Make sure that both args are in virtual regs, since in effect we have to do a
+ // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not
+ // guaranteed safe if either is in a real reg.
+ addr = ctx.ensure_in_vreg(addr, types::I64);
+ arg2 = ctx.ensure_in_vreg(arg2, types::I64);
+
+ // Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq`
+ // operates at whatever width is specified by `ty`, so there's no need to
+ // zero-extend `arg2` in the case of `ty` being I8/I16/I32.
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::r9()),
+ addr,
+ types::I64,
+ ));
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::r10()),
+ arg2,
+ types::I64,
+ ));
+
+ // Now the AtomicRmwSeq (pseudo-) instruction itself
+ let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
+ ctx.emit(Inst::AtomicRmwSeq { ty: ty_access, op });
+
+ // And finally, copy the preordained AtomicRmwSeq output reg to its destination.
+ ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
+ }
+
+ Opcode::AtomicCas => {
+ // This is very similar to, but not identical to, the `AtomicRmw` case. As with
+ // `AtomicRmw`, there's no need to zero-extend narrow values here.
+ let dst = get_output_reg(ctx, outputs[0]);
+ let addr = lower_to_amode(ctx, inputs[0], 0);
+ let expected = put_input_in_reg(ctx, inputs[1]);
+ let replacement = put_input_in_reg(ctx, inputs[2]);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ // Move the expected value into %rax. Because there's only one fixed register on
+ // the input side, we don't have to use `ensure_in_vreg`, as is necessary in the
+ // `AtomicRmw` case.
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::rax()),
+ expected,
+ types::I64,
+ ));
+ ctx.emit(Inst::LockCmpxchg {
+ ty: ty_access,
+ src: replacement,
+ dst: addr.into(),
+ });
+ // And finally, copy the old value at the location to its destination reg.
+ ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
+ }
+
+ Opcode::AtomicLoad => {
+ // This is a normal load. The x86-TSO memory model provides sufficient sequencing
+ // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
+ // need for any fence instructions.
+ let data = get_output_reg(ctx, outputs[0]);
+ let addr = lower_to_amode(ctx, inputs[0], 0);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ let rm = RegMem::mem(addr);
+ if ty_access == types::I64 {
+ ctx.emit(Inst::mov64_rm_r(rm, data));
+ } else {
+ let ext_mode = ExtMode::new(ty_access.bits(), 64).expect(&format!(
+ "invalid extension during AtomicLoad: {} -> {}",
+ ty_access.bits(),
+ 64
+ ));
+ ctx.emit(Inst::movzx_rm_r(ext_mode, rm, data));
+ }
+ }
+
+ Opcode::AtomicStore => {
+ // This is a normal store, followed by an `mfence` instruction.
+ let data = put_input_in_reg(ctx, inputs[0]);
+ let addr = lower_to_amode(ctx, inputs[1], 0);
+ let ty_access = ctx.input_ty(insn, 0);
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ ctx.emit(Inst::mov_r_m(ty_access.bytes() as u8, data, addr));
+ ctx.emit(Inst::Fence {
+ kind: FenceKind::MFence,
+ });
+ }
+
+ Opcode::Fence => {
+ ctx.emit(Inst::Fence {
+ kind: FenceKind::MFence,
+ });
+ }
+
+ Opcode::FuncAddr => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let (extname, _) = ctx.call_target(insn).unwrap();
+ let extname = extname.clone();
+ ctx.emit(Inst::LoadExtName {
+ dst,
+ name: Box::new(extname),
+ offset: 0,
+ });
+ }
+
+ Opcode::SymbolValue => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
+ let extname = extname.clone();
+ ctx.emit(Inst::LoadExtName {
+ dst,
+ name: Box::new(extname),
+ offset,
+ });
+ }
+
+ Opcode::StackAddr => {
+ let (stack_slot, offset) = match *ctx.data(insn) {
+ InstructionData::StackLoad {
+ opcode: Opcode::StackAddr,
+ stack_slot,
+ offset,
+ } => (stack_slot, offset),
+ _ => unreachable!(),
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+ let offset: i32 = offset.into();
+ let inst = ctx
+ .abi()
+ .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
+ ctx.emit(inst);
+ }
+
+ Opcode::Select => {
+ let flag_input = inputs[0];
+ if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
+ let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
+
+ // For equal, we flip the operands, because we can't test a conjunction of
+ // CPU flags with a single cmove; see InvertedEqualOrConditions doc comment.
+ let (lhs_input, rhs_input) = match cond_code {
+ FloatCC::Equal => (inputs[2], inputs[1]),
+ _ => (inputs[1], inputs[2]),
+ };
+
+ let ty = ctx.output_ty(insn, 0);
+ let rhs = put_input_in_reg(ctx, rhs_input);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let lhs = if is_int_or_ref_ty(ty) && ty.bytes() < 4 {
+ // Special case: since the higher bits are undefined per CLIF semantics, we
+ // can just apply a 32-bit cmove here. Force inputs into registers, to
+ // avoid partial spilling out-of-bounds with memory accesses, though.
+ // Sign-extend operands to 32, then do a cmove of size 4.
+ RegMem::reg(put_input_in_reg(ctx, lhs_input))
+ } else {
+ input_to_reg_mem(ctx, lhs_input)
+ };
+
+ // We request inversion of Equal to NotEqual here: taking LHS if equal would mean
+ // take it if both CC::NP and CC::Z are set, the conjunction of which can't be
+ // modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
+ // select operation, and invert the equal to a not-equal here.
+ let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
+
+ if let FcmpCondResult::InvertedEqualOrConditions(_, _) = &fcmp_results {
+ // Keep this sync'd with the lowering of the select inputs above.
+ assert_eq!(cond_code, FloatCC::Equal);
+ }
+
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+
+ match fcmp_results {
+ FcmpCondResult::Condition(cc) => {
+ if is_int_or_ref_ty(ty) {
+ let size = u8::max(ty.bytes() as u8, 4);
+ ctx.emit(Inst::cmove(size, cc, lhs, dst));
+ } else {
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
+ }
+ }
+ FcmpCondResult::AndConditions(_, _) => {
+ unreachable!(
+ "can't AND with select; see above comment about inverting equal"
+ );
+ }
+ FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
+ | FcmpCondResult::OrConditions(cc1, cc2) => {
+ if is_int_or_ref_ty(ty) {
+ let size = u8::max(ty.bytes() as u8, 4);
+ ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst));
+ ctx.emit(Inst::cmove(size, cc2, lhs, dst));
+ } else {
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc1, lhs.clone(), dst));
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc2, lhs, dst));
+ }
+ }
+ }
+ } else {
+ let ty = ty.unwrap();
+
+ let mut size = ty.bytes() as u8;
+ let lhs = if is_int_or_ref_ty(ty) {
+ if size < 4 {
+ // Special case: since the higher bits are undefined per CLIF semantics, we
+ // can just apply a 32-bit cmove here. Force inputs into registers, to
+ // avoid partial spilling out-of-bounds with memory accesses, though.
+ size = 4;
+ RegMem::reg(put_input_in_reg(ctx, inputs[1]))
+ } else {
+ input_to_reg_mem(ctx, inputs[1])
+ }
+ } else {
+ input_to_reg_mem(ctx, inputs[1])
+ };
+
+ let rhs = put_input_in_reg(ctx, inputs[2]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
+ emit_cmp(ctx, icmp);
+ let cond_code = ctx.data(icmp).cond_code().unwrap();
+ CC::from_intcc(cond_code)
+ } else {
+ // The input is a boolean value, compare it against zero.
+ let size = ctx.input_ty(insn, 0).bytes() as u8;
+ let test = put_input_in_reg(ctx, flag_input);
+ ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
+ CC::NZ
+ };
+
+ // This doesn't affect the flags.
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+
+ if is_int_or_ref_ty(ty) {
+ ctx.emit(Inst::cmove(size, cc, lhs, dst));
+ } else {
+ debug_assert!(ty == types::F32 || ty == types::F64);
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
+ }
+ }
+ }
+
+ Opcode::Selectif | Opcode::SelectifSpectreGuard => {
+ let lhs = input_to_reg_mem(ctx, inputs[1]);
+ let rhs = put_input_in_reg(ctx, inputs[2]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.output_ty(insn, 0);
+
+ // Verification ensures that the input is always a single-def ifcmp.
+ let cmp_insn = ctx
+ .get_input(inputs[0].insn, inputs[0].input)
+ .inst
+ .unwrap()
+ .0;
+ debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
+ emit_cmp(ctx, cmp_insn);
+
+ let cc = CC::from_intcc(ctx.data(insn).cond_code().unwrap());
+
+ if is_int_or_ref_ty(ty) {
+ let size = ty.bytes() as u8;
+ if size == 1 {
+ // Sign-extend operands to 32, then do a cmove of size 4.
+ let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se));
+ ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst));
+ ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst));
+ } else {
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ ctx.emit(Inst::cmove(size, cc, lhs, dst));
+ }
+ } else {
+ debug_assert!(ty == types::F32 || ty == types::F64);
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
+ }
+ }
+
+ Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
+ let kind = match op {
+ Opcode::Udiv => DivOrRemKind::UnsignedDiv,
+ Opcode::Sdiv => DivOrRemKind::SignedDiv,
+ Opcode::Urem => DivOrRemKind::UnsignedRem,
+ Opcode::Srem => DivOrRemKind::SignedRem,
+ _ => unreachable!(),
+ };
+ let is_div = kind.is_div();
+
+ let input_ty = ctx.input_ty(insn, 0);
+ let size = input_ty.bytes() as u8;
+
+ let dividend = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::rax()),
+ dividend,
+ input_ty,
+ ));
+
+ if flags.avoid_div_traps() {
+ // A vcode meta-instruction is used to lower the inline checks, since they embed
+ // pc-relative offsets that must not change, thus requiring regalloc to not
+ // interfere by introducing spills and reloads.
+ //
+ // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
+ // regalloc is aware of the coalescing opportunity between rax/rdx and the
+ // destination register.
+ let divisor = put_input_in_reg(ctx, inputs[1]);
+
+ let divisor_copy = ctx.alloc_tmp(RegClass::I64, types::I64);
+ ctx.emit(Inst::gen_move(divisor_copy, divisor, types::I64));
+
+ let tmp = if op == Opcode::Sdiv && size == 8 {
+ Some(ctx.alloc_tmp(RegClass::I64, types::I64))
+ } else {
+ None
+ };
+ // TODO use xor
+ ctx.emit(Inst::imm(
+ OperandSize::Size32,
+ 0,
+ Writable::from_reg(regs::rdx()),
+ ));
+ ctx.emit(Inst::checked_div_or_rem_seq(kind, size, divisor_copy, tmp));
+ } else {
+ let divisor = input_to_reg_mem(ctx, inputs[1]);
+
+ // Fill in the high parts:
+ if kind.is_signed() {
+ // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
+ // signed opcodes.
+ ctx.emit(Inst::sign_extend_data(size));
+ } else if input_ty == types::I8 {
+ ctx.emit(Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::reg(regs::rax()),
+ Writable::from_reg(regs::rax()),
+ ));
+ } else {
+ // zero for unsigned opcodes.
+ ctx.emit(Inst::imm(
+ OperandSize::Size64,
+ 0,
+ Writable::from_reg(regs::rdx()),
+ ));
+ }
+
+ // Emit the actual idiv.
+ ctx.emit(Inst::div(size, kind.is_signed(), divisor));
+ }
+
+ // Move the result back into the destination reg.
+ if is_div {
+ // The quotient is in rax.
+ ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
+ } else {
+ // The remainder is in rdx.
+ ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
+ }
+ }
+
+ Opcode::Umulhi | Opcode::Smulhi => {
+ let input_ty = ctx.input_ty(insn, 0);
+ let size = input_ty.bytes() as u8;
+
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // Move lhs in %rax.
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::rax()),
+ lhs,
+ input_ty,
+ ));
+
+ // Emit the actual mul or imul.
+ let signed = op == Opcode::Smulhi;
+ ctx.emit(Inst::mul_hi(size, signed, rhs));
+
+ // Read the result from the high part (stored in %rdx).
+ ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
+ }
+
+ Opcode::GetPinnedReg => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64));
+ }
+
+ Opcode::SetPinnedReg => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::pinned_reg()),
+ src,
+ types::I64,
+ ));
+ }
+
+ Opcode::Vconst => {
+ let used_constant = if let &InstructionData::UnaryConst {
+ constant_handle, ..
+ } = ctx.data(insn)
+ {
+ ctx.use_constant(VCodeConstantData::Pool(
+ constant_handle,
+ ctx.get_constant_data(constant_handle).clone(),
+ ))
+ } else {
+ unreachable!("vconst should always have unary_const format")
+ };
+ // TODO use Inst::gen_constant() instead.
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::xmm_load_const(used_constant, dst, ty));
+ }
+
+ Opcode::RawBitcast => {
+ // A raw_bitcast is just a mechanism for correcting the type of V128 values (see
+ // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
+ // instruction should emit no machine code but a move is necessary to give the register
+ // allocator a definition for the output virtual register.
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ }
+
+ Opcode::Shuffle => {
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let lhs_ty = ctx.input_ty(insn, 0);
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let mask = match ctx.get_immediate(insn) {
+ Some(DataValue::V128(bytes)) => bytes.to_vec(),
+ _ => unreachable!("shuffle should always have a 16-byte immediate"),
+ };
+
+ // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a
+ // 1 in the most significant position zeroes the lane.
+ let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
+
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ if rhs == lhs {
+ // If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
+ // register. We statically build `constructed_mask` to zero out any unknown lane
+ // indices (may not be completely necessary: verification could fail incorrect mask
+ // values) and fix the indexes to all point to the `dst` vector.
+ let constructed_mask = mask
+ .iter()
+ // If the mask is greater than 15 it still may be referring to a lane in b.
+ .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
+ .map(zero_unknown_lane_index)
+ .collect();
+ let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ ctx.emit(Inst::xmm_load_const(constant, tmp, ty));
+ // After loading the constructed mask in a temporary register, we use this to
+ // shuffle the `dst` register (remember that, in this case, it is the same as
+ // `src` so we disregard this register).
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst));
+ } else {
+ // If `lhs` and `rhs` are different, we must shuffle each separately and then OR
+ // them together. This is necessary due to PSHUFB semantics. As in the case above,
+ // we build the `constructed_mask` for each case statically.
+
+ // PSHUFB the `lhs` argument into `tmp0`, placing zeroes for unused lanes.
+ let tmp0 = ctx.alloc_tmp(RegClass::V128, lhs_ty);
+ ctx.emit(Inst::gen_move(tmp0, lhs, lhs_ty));
+ let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
+ let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
+ let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ ctx.emit(Inst::xmm_load_const(constant, tmp1, ty));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp1), tmp0));
+
+ // PSHUFB the second argument, placing zeroes for unused lanes.
+ let constructed_mask = mask
+ .iter()
+ .map(|b| b.wrapping_sub(16))
+ .map(zero_unknown_lane_index)
+ .collect();
+ let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
+ let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ ctx.emit(Inst::xmm_load_const(constant, tmp2, ty));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp2), dst));
+
+ // OR the shuffled registers (the mechanism and lane-size for OR-ing the registers
+ // is not important).
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Orps, RegMem::from(tmp0), dst));
+
+ // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
+ }
+ }
+
+ Opcode::Swizzle => {
+ // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec
+ // requiring mask indexes greater than 15 to have the same semantics as a 0 index. For
+ // the spec discussion, see https://github.com/WebAssembly/simd/issues/93. The CLIF
+ // semantics match the Wasm SIMD semantics for this instruction.
+ // The instruction format maps to variables like: %dst = swizzle %src, %mask
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let swizzle_mask = put_input_in_reg(ctx, inputs[1]);
+
+ // Inform the register allocator that `src` and `dst` should be in the same register.
+ ctx.emit(Inst::gen_move(dst, src, ty));
+
+ // Create a mask for zeroing out-of-bounds lanes of the swizzle mask.
+ let zero_mask = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ static ZERO_MASK_VALUE: [u8; 16] = [
+ 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+ 0x70, 0x70,
+ ];
+ let constant = ctx.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE));
+ ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
+
+ // Use the `zero_mask` on a writable `swizzle_mask`.
+ let swizzle_mask = Writable::from_reg(swizzle_mask);
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Paddusb,
+ RegMem::from(zero_mask),
+ swizzle_mask,
+ ));
+
+ // Shuffle `dst` using the fixed-up `swizzle_mask`.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pshufb,
+ RegMem::from(swizzle_mask),
+ dst,
+ ));
+ }
+
+ Opcode::Insertlane => {
+ // The instruction format maps to variables like: %dst = insertlane %in_vec, %src, %lane
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let in_vec = put_input_in_reg(ctx, inputs[0]);
+ let src_ty = ctx.input_ty(insn, 1);
+ debug_assert!(!src_ty.is_vector());
+ let src = input_to_reg_mem(ctx, inputs[1]);
+ let lane = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) {
+ *imm
+ } else {
+ unreachable!();
+ };
+ debug_assert!(lane < ty.lane_count() as u8);
+
+ ctx.emit(Inst::gen_move(dst, in_vec, ty));
+ emit_insert_lane(ctx, src, dst, lane, ty.lane_type());
+ }
+
+ Opcode::Extractlane => {
+ // The instruction format maps to variables like: %dst = extractlane %src, %lane
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert_eq!(src_ty.bits(), 128);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let lane = if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(insn) {
+ *imm
+ } else {
+ unreachable!();
+ };
+ debug_assert!(lane < src_ty.lane_count() as u8);
+
+ if !ty.is_float() {
+ let (sse_op, w_bit) = match ty.lane_bits() {
+ 8 => (SseOpcode::Pextrb, false),
+ 16 => (SseOpcode::Pextrw, false),
+ 32 => (SseOpcode::Pextrd, false),
+ 64 => (SseOpcode::Pextrd, true),
+ _ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()),
+ };
+ let src = RegMem::reg(src);
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit));
+ } else {
+ if lane == 0 {
+ // Remove the extractlane instruction, leaving the float where it is. The upper
+ // bits will remain unchanged; for correctness, this relies on Cranelift type
+ // checking to avoid using those bits.
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ } else {
+ // Otherwise, shuffle the bits in `lane` to the lowest lane.
+ let sse_op = SseOpcode::Pshufd;
+ let mask = match src_ty {
+ // Move the value at `lane` to lane 0, copying existing value at lane 0 to
+ // other lanes. Again, this relies on Cranelift type checking to avoid
+ // using those bits.
+ types::F32X4 => 0b00_00_00_00 | lane,
+ // Move the value at `lane` 1 (we know it must be 1 because of the `if`
+ // statement above) to lane 0 and leave lane 1 unchanged. The Cranelift type
+ // checking assumption also applies here.
+ types::F64X2 => 0b11_10_11_10,
+ _ => unreachable!(),
+ };
+ let src = RegMem::reg(src);
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false));
+ }
+ }
+ }
+
+ Opcode::Splat | Opcode::LoadSplat => {
+ let ty = ty.unwrap();
+ assert_eq!(ty.bits(), 128);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert!(src_ty.bits() < 128);
+
+ let src = match op {
+ Opcode::Splat => input_to_reg_mem(ctx, inputs[0]),
+ Opcode::LoadSplat => {
+ let offset = ctx.data(insn).load_store_offset().unwrap();
+ let amode = lower_to_amode(ctx, inputs[0], offset);
+ RegMem::mem(amode)
+ }
+ _ => unreachable!(),
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // We know that splat will overwrite all of the lanes of `dst` but it takes several
+ // instructions to do so. Because of the multiple instructions, there is no good way to
+ // declare `dst` a `def` except with the following pseudo-instruction.
+ ctx.emit(Inst::xmm_uninit_value(dst));
+
+ // TODO: eventually many of these sequences could be optimized with AVX's VBROADCAST*
+ // and VPBROADCAST*.
+ match ty.lane_bits() {
+ 8 => {
+ emit_insert_lane(ctx, src, dst, 0, ty.lane_type());
+ // Initialize a register with all 0s.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
+ // Shuffle the lowest byte lane to all other lanes.
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst))
+ }
+ 16 => {
+ emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type());
+ emit_insert_lane(ctx, src, dst, 1, ty.lane_type());
+ // Shuffle the lowest two lanes to all other lanes.
+ ctx.emit(Inst::xmm_rm_r_imm(
+ SseOpcode::Pshufd,
+ RegMem::from(dst),
+ dst,
+ 0,
+ false,
+ ))
+ }
+ 32 => {
+ emit_insert_lane(ctx, src, dst, 0, ty.lane_type());
+ // Shuffle the lowest lane to all other lanes.
+ ctx.emit(Inst::xmm_rm_r_imm(
+ SseOpcode::Pshufd,
+ RegMem::from(dst),
+ dst,
+ 0,
+ false,
+ ))
+ }
+ 64 => {
+ emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type());
+ emit_insert_lane(ctx, src, dst, 1, ty.lane_type());
+ }
+ _ => panic!("Invalid type to splat: {}", ty),
+ }
+ }
+
+ Opcode::VanyTrue => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert_eq!(src_ty.bits(), 128);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ // Set the ZF if the result is all zeroes.
+ ctx.emit(Inst::xmm_cmp_rm_r(SseOpcode::Ptest, RegMem::reg(src), src));
+ // If the ZF is not set, place a 1 in `dst`.
+ ctx.emit(Inst::setcc(CC::NZ, dst));
+ }
+
+ Opcode::VallTrue => {
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert_eq!(src_ty.bits(), 128);
+ let src = input_to_reg_mem(ctx, inputs[0]);
+
+ let eq = |ty: Type| match ty.lane_bits() {
+ 8 => SseOpcode::Pcmpeqb,
+ 16 => SseOpcode::Pcmpeqw,
+ 32 => SseOpcode::Pcmpeqd,
+ 64 => SseOpcode::Pcmpeqq,
+ _ => panic!("Unable to find an instruction for {} for type: {}", op, ty),
+ };
+
+ // Initialize a register with all 0s.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
+ // Compare to see what lanes are filled with all 1s.
+ ctx.emit(Inst::xmm_rm_r(eq(src_ty), src, tmp));
+ // Set the ZF if the result is all zeroes.
+ ctx.emit(Inst::xmm_cmp_rm_r(
+ SseOpcode::Ptest,
+ RegMem::from(tmp),
+ tmp.to_reg(),
+ ));
+ // If the ZF is set, place a 1 in `dst`.
+ ctx.emit(Inst::setcc(CC::Z, dst));
+ }
+
+ Opcode::VhighBits => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
+ let dst = get_output_reg(ctx, outputs[0]);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+
+ // The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
+ // the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,
+ // the instruction can access additional registers when used with a REX.R prefix. The
+ // default operand size is 64-bit in 64-bit mode" (PMOVMSKB in IA Software Development
+ // Manual, vol. 2). This being the case, we will always clear REX.W since its use is
+ // unnecessary (`OperandSize` is used for setting/clearing REX.W).
+ let size = OperandSize::Size32;
+
+ match src_ty {
+ types::I8X16 | types::B8X16 => {
+ ctx.emit(Inst::xmm_to_gpr(SseOpcode::Pmovmskb, src, dst, size))
+ }
+ types::I32X4 | types::B32X4 | types::F32X4 => {
+ ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movmskps, src, dst, size))
+ }
+ types::I64X2 | types::B64X2 | types::F64X2 => {
+ ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movmskpd, src, dst, size))
+ }
+ types::I16X8 | types::B16X8 => {
+ // There is no x86 instruction for extracting the high bit of 16-bit lanes so
+ // here we:
+ // - duplicate the 16-bit lanes of `src` into 8-bit lanes:
+ // PACKSSWB([x1, x2, ...], [x1, x2, ...]) = [x1', x2', ..., x1', x2', ...]
+ // - use PMOVMSKB to gather the high bits; now we have duplicates, though
+ // - shift away the bottom 8 high bits to remove the duplicates.
+ let tmp = ctx.alloc_tmp(RegClass::V128, src_ty);
+ ctx.emit(Inst::gen_move(tmp, src, src_ty));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(src), tmp));
+ ctx.emit(Inst::xmm_to_gpr(
+ SseOpcode::Pmovmskb,
+ tmp.to_reg(),
+ dst,
+ size,
+ ));
+ ctx.emit(Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(8), dst));
+ }
+ _ => unimplemented!("unknown input type {} for {}", src_ty, op),
+ }
+ }
+
+ Opcode::IaddImm
+ | Opcode::ImulImm
+ | Opcode::UdivImm
+ | Opcode::SdivImm
+ | Opcode::UremImm
+ | Opcode::SremImm
+ | Opcode::IrsubImm
+ | Opcode::IaddCin
+ | Opcode::IaddIfcin
+ | Opcode::IaddCout
+ | Opcode::IaddCarry
+ | Opcode::IaddIfcarry
+ | Opcode::IsubBin
+ | Opcode::IsubIfbin
+ | Opcode::IsubBout
+ | Opcode::IsubIfbout
+ | Opcode::IsubBorrow
+ | Opcode::IsubIfborrow
+ | Opcode::BandImm
+ | Opcode::BorImm
+ | Opcode::BxorImm
+ | Opcode::RotlImm
+ | Opcode::RotrImm
+ | Opcode::IshlImm
+ | Opcode::UshrImm
+ | Opcode::SshrImm => {
+ panic!("ALU+imm and ALU+carry ops should not appear here!");
+ }
+ _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
+ }
+
+ Ok(())
+}
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for X64Backend {
+ type MInst = Inst;
+
+ fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
+ lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.triple)
+ }
+
+ fn lower_branch_group<C: LowerCtx<I = Inst>>(
+ &self,
+ ctx: &mut C,
+ branches: &[IRInst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+ ) -> CodegenResult<()> {
+ // A block should end with at most two branches. The first may be a
+ // conditional branch; a conditional branch can be followed only by an
+ // unconditional branch or fallthrough. Otherwise, if only one branch,
+ // it may be an unconditional branch, a fallthrough, a return, or a
+ // trap. These conditions are verified by `is_ebb_basic()` during the
+ // verifier pass.
+ assert!(branches.len() <= 2);
+
+ if branches.len() == 2 {
+ // Must be a conditional branch followed by an unconditional branch.
+ let op0 = ctx.data(branches[0]).opcode();
+ let op1 = ctx.data(branches[1]).opcode();
+
+ trace!(
+ "lowering two-branch group: opcodes are {:?} and {:?}",
+ op0,
+ op1
+ );
+ assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
+
+ let taken = targets[0];
+ let not_taken = match op1 {
+ Opcode::Jump => targets[1],
+ Opcode::Fallthrough => fallthrough.unwrap(),
+ _ => unreachable!(), // assert above.
+ };
+
+ match op0 {
+ Opcode::Brz | Opcode::Brnz => {
+ let flag_input = InsnInput {
+ insn: branches[0],
+ input: 0,
+ };
+
+ let src_ty = ctx.input_ty(branches[0], 0);
+
+ if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
+ emit_cmp(ctx, icmp);
+
+ let cond_code = ctx.data(icmp).cond_code().unwrap();
+ let cond_code = if op0 == Opcode::Brz {
+ cond_code.inverse()
+ } else {
+ cond_code
+ };
+
+ let cc = CC::from_intcc(cond_code);
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ } else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
+ let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
+ let cond_code = if op0 == Opcode::Brz {
+ cond_code.inverse()
+ } else {
+ cond_code
+ };
+ match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) {
+ FcmpCondResult::Condition(cc) => {
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ }
+ FcmpCondResult::AndConditions(cc1, cc2) => {
+ ctx.emit(Inst::jmp_if(cc1.invert(), not_taken));
+ ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken));
+ }
+ FcmpCondResult::OrConditions(cc1, cc2) => {
+ ctx.emit(Inst::jmp_if(cc1, taken));
+ ctx.emit(Inst::jmp_cond(cc2, taken, not_taken));
+ }
+ FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
+ }
+ } else if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
+ let src = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ );
+ let cc = match op0 {
+ Opcode::Brz => CC::Z,
+ Opcode::Brnz => CC::NZ,
+ _ => unreachable!(),
+ };
+ let size_bytes = src_ty.bytes() as u8;
+ ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src));
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ } else {
+ unimplemented!("brz/brnz with non-int type {:?}", src_ty);
+ }
+ }
+
+ Opcode::BrIcmp => {
+ let src_ty = ctx.input_ty(branches[0], 0);
+ if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
+ let lhs = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ );
+ let rhs = input_to_reg_mem_imm(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 1,
+ },
+ );
+ let cc = CC::from_intcc(ctx.data(branches[0]).cond_code().unwrap());
+ let byte_size = src_ty.bytes() as u8;
+ // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
+ // us dst - src at the machine instruction level, so invert operands.
+ ctx.emit(Inst::cmp_rmi_r(byte_size, rhs, lhs));
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ } else {
+ unimplemented!("bricmp with non-int type {:?}", src_ty);
+ }
+ }
+
+ _ => panic!("unexpected branch opcode: {:?}", op0),
+ }
+ } else {
+ assert_eq!(branches.len(), 1);
+
+ // Must be an unconditional branch or trap.
+ let op = ctx.data(branches[0]).opcode();
+ match op {
+ Opcode::Jump | Opcode::Fallthrough => {
+ ctx.emit(Inst::jmp_known(targets[0]));
+ }
+
+ Opcode::BrTable => {
+ let jt_size = targets.len() - 1;
+ assert!(jt_size <= u32::max_value() as usize);
+ let jt_size = jt_size as u32;
+
+ let idx = extend_input_to_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ ExtSpec::ZeroExtendTo32,
+ );
+
+ // Bounds-check (compute flags from idx - jt_size) and branch to default.
+ ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx));
+
+ // Emit the compound instruction that does:
+ //
+ // lea $jt, %rA
+ // movsbl [%rA, %rIndex, 2], %rB
+ // add %rB, %rA
+ // j *%rA
+ // [jt entries]
+ //
+ // This must be *one* instruction in the vcode because we cannot allow regalloc
+ // to insert any spills/fills in the middle of the sequence; otherwise, the
+ // lea PC-rel offset to the jumptable would be incorrect. (The alternative
+ // is to introduce a relocation pass for inlined jumptables, which is much
+ // worse.)
+
+ // This temporary is used as a signed integer of 64-bits (to hold addresses).
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ // This temporary is used as a signed integer of 32-bits (for the wasm-table
+ // index) and then 64-bits (address addend). The small lie about the I64 type
+ // is benign, since the temporary is dead after this instruction (and its
+ // Cranelift type is thus unused).
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+
+ let targets_for_term: Vec<MachLabel> = targets.to_vec();
+ let default_target = targets[0];
+
+ let jt_targets: Vec<MachLabel> = targets.iter().skip(1).cloned().collect();
+
+ ctx.emit(Inst::JmpTableSeq {
+ idx,
+ tmp1,
+ tmp2,
+ default_target,
+ targets: jt_targets,
+ targets_for_term,
+ });
+ }
+
+ _ => panic!("Unknown branch type {:?}", op),
+ }
+ }
+
+ Ok(())
+ }
+
+ fn maybe_pinned_reg(&self) -> Option<Reg> {
+ Some(regs::pinned_reg())
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
new file mode 100644
index 0000000000..fd4444498d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@@ -0,0 +1,149 @@
+//! X86_64-bit Instruction Set Architecture.
+
+use self::inst::EmitInfo;
+
+use super::TargetIsa;
+use crate::ir::{condcodes::IntCC, Function};
+use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
+use crate::isa::Builder as IsaBuilder;
+use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
+use crate::result::CodegenResult;
+use crate::settings::{self as shared_settings, Flags};
+use alloc::boxed::Box;
+use regalloc::{PrettyPrint, RealRegUniverse};
+use target_lexicon::Triple;
+
+mod abi;
+mod inst;
+mod lower;
+mod settings;
+
+/// An X64 backend.
+pub(crate) struct X64Backend {
+ triple: Triple,
+ flags: Flags,
+ x64_flags: x64_settings::Flags,
+ reg_universe: RealRegUniverse,
+}
+
+impl X64Backend {
+ /// Create a new X64 backend with the given (shared) flags.
+ fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
+ let reg_universe = create_reg_universe_systemv(&flags);
+ Self {
+ triple,
+ flags,
+ x64_flags,
+ reg_universe,
+ }
+ }
+
+ fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
+ // This performs lowering to VCode, register-allocates the code, computes
+ // block layout and finalizes branches. The result is ready for binary emission.
+ let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
+ let abi = Box::new(abi::X64ABICallee::new(&func, flags)?);
+ compile::compile::<Self>(&func, self, abi, emit_info)
+ }
+}
+
+impl MachBackend for X64Backend {
+ fn compile_function(
+ &self,
+ func: &Function,
+ want_disasm: bool,
+ ) -> CodegenResult<MachCompileResult> {
+ let flags = self.flags();
+ let vcode = self.compile_vcode(func, flags.clone())?;
+
+ let buffer = vcode.emit();
+ let buffer = buffer.finish();
+ let frame_size = vcode.frame_size();
+ let unwind_info = vcode.unwind_info()?;
+
+ let disasm = if want_disasm {
+ Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
+ } else {
+ None
+ };
+
+ Ok(MachCompileResult {
+ buffer,
+ frame_size,
+ disasm,
+ unwind_info,
+ })
+ }
+
+ fn flags(&self) -> &Flags {
+ &self.flags
+ }
+
+ fn name(&self) -> &'static str {
+ "x64"
+ }
+
+ fn triple(&self) -> Triple {
+ self.triple.clone()
+ }
+
+ fn reg_universe(&self) -> &RealRegUniverse {
+ &self.reg_universe
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> IntCC {
+ // Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
+ // overflow of an add.
+ IntCC::UnsignedGreaterThanOrEqual
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> IntCC {
+ // unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
+ // underflow of a subtract (carry is borrow for subtract).
+ IntCC::UnsignedGreaterThanOrEqual
+ }
+
+ #[cfg(feature = "unwind")]
+ fn emit_unwind_info(
+ &self,
+ result: &MachCompileResult,
+ kind: crate::machinst::UnwindInfoKind,
+ ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+ use crate::isa::unwind::UnwindInfo;
+ use crate::machinst::UnwindInfoKind;
+ Ok(match (result.unwind_info.as_ref(), kind) {
+ (Some(info), UnwindInfoKind::SystemV) => {
+ inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
+ }
+ (Some(_info), UnwindInfoKind::Windows) => {
+ //TODO inst::unwind::winx64::create_unwind_info(info.clone())?.map(|u| UnwindInfo::WindowsX64(u))
+ None
+ }
+ _ => None,
+ })
+ }
+
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ Some(inst::unwind::systemv::create_cie())
+ }
+}
+
+/// Create a new `isa::Builder`.
+pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
+ IsaBuilder {
+ triple,
+ setup: x64_settings::builder(),
+ constructor: isa_constructor,
+ }
+}
+
+fn isa_constructor(
+ triple: Triple,
+ shared_flags: Flags,
+ builder: shared_settings::Builder,
+) -> Box<dyn TargetIsa> {
+ let isa_flags = x64_settings::Flags::new(&shared_flags, builder);
+ let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
+ Box::new(TargetIsaAdapter::new(backend))
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs b/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
new file mode 100644
index 0000000000..c5371bb132
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
@@ -0,0 +1,9 @@
+//! x86 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs b/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
new file mode 100644
index 0000000000..5119bb3241
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/abi.rs
@@ -0,0 +1,1093 @@
+//! x86 ABI implementation.
+
+use super::super::settings as shared_settings;
+use super::registers::{FPR, GPR, RU};
+use super::settings as isa_settings;
+use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
+use crate::cursor::{Cursor, CursorPosition, EncCursor};
+use crate::ir;
+use crate::ir::immediates::Imm64;
+use crate::ir::stackslot::{StackOffset, StackSize};
+use crate::ir::types;
+use crate::ir::{
+ get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder,
+ ValueLoc,
+};
+use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
+use crate::regalloc::RegisterSet;
+use crate::result::CodegenResult;
+use crate::stack_layout::layout_stack;
+use alloc::borrow::Cow;
+use core::i32;
+use target_lexicon::{PointerWidth, Triple};
+
+/// Argument registers for x86-64
+static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
+
+/// Return value registers.
+static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
+
+/// Argument registers for x86-64, when using windows fastcall
+static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9];
+
+/// Return value registers for x86-64, when using windows fastcall
+static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
+
+/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used
+/// by the callee for temporary values.
+///
+/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow
+/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually
+/// used by the callee to save & restore the values of the arguments.
+///
+/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
+/// convention reserves spill space for parameters, you don’t have to use them as such"
+const WIN_SHADOW_STACK_SPACE: StackSize = 32;
+
+/// Stack alignment requirement for functions.
+///
+/// 16 bytes is the perfect stack alignment, because:
+///
+/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which
+/// are aligned to 16 bytes in order to aid performance".
+/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a
+/// 16-byte aligned stack pointer.
+/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located
+/// higher up in the stack.
+const STACK_ALIGNMENT: u32 = 16;
+
+#[derive(Clone)]
+struct Args {
+ pointer_bytes: u8,
+ pointer_bits: u8,
+ pointer_type: ir::Type,
+ gpr: &'static [RU],
+ gpr_used: usize,
+ fpr_limit: usize,
+ fpr_used: usize,
+ offset: u32,
+ call_conv: CallConv,
+ shared_flags: shared_settings::Flags,
+ #[allow(dead_code)]
+ isa_flags: isa_settings::Flags,
+ assigning_returns: bool,
+}
+
+impl Args {
+ fn new(
+ bits: u8,
+ gpr: &'static [RU],
+ fpr_limit: usize,
+ call_conv: CallConv,
+ shared_flags: &shared_settings::Flags,
+ isa_flags: &isa_settings::Flags,
+ assigning_returns: bool,
+ ) -> Self {
+ let offset = if call_conv.extends_windows_fastcall() {
+ WIN_SHADOW_STACK_SPACE
+ } else {
+ 0
+ };
+
+ Self {
+ pointer_bytes: bits / 8,
+ pointer_bits: bits,
+ pointer_type: ir::Type::int(u16::from(bits)).unwrap(),
+ gpr,
+ gpr_used: 0,
+ fpr_limit,
+ fpr_used: 0,
+ offset,
+ call_conv,
+ shared_flags: shared_flags.clone(),
+ isa_flags: isa_flags.clone(),
+ assigning_returns,
+ }
+ }
+}
+
+impl ArgAssigner for Args {
+ fn assign(&mut self, arg: &AbiParam) -> ArgAction {
+ if let ArgumentPurpose::StructArgument(size) = arg.purpose {
+ if self.call_conv != CallConv::SystemV {
+ panic!(
+ "The sarg argument purpose is not yet implemented for non-systemv call conv {:?}",
+ self.call_conv,
+ );
+ }
+ let loc = ArgumentLoc::Stack(self.offset as i32);
+ self.offset += size;
+ debug_assert!(self.offset <= i32::MAX as u32);
+ return ArgAction::AssignAndChangeType(loc, types::SARG_T);
+ }
+
+ let ty = arg.value_type;
+
+ if ty.bits() > u16::from(self.pointer_bits) {
+ if !self.assigning_returns && self.call_conv.extends_windows_fastcall() {
+ // "Any argument that doesn't fit in 8 bytes, or isn't
+ // 1, 2, 4, or 8 bytes, must be passed by reference"
+ return ValueConversion::Pointer(self.pointer_type).into();
+ } else if !ty.is_vector() && !ty.is_float() {
+ // On SystemV large integers and booleans are broken down to fit in a register.
+ return ValueConversion::IntSplit.into();
+ }
+ }
+
+ // Vectors should stay in vector registers unless SIMD is not enabled--then they are split
+ if ty.is_vector() {
+ if self.shared_flags.enable_simd() {
+ let reg = FPR.unit(self.fpr_used);
+ self.fpr_used += 1;
+ return ArgumentLoc::Reg(reg).into();
+ }
+ return ValueConversion::VectorSplit.into();
+ }
+
+ // Small integers are extended to the size of a pointer register.
+ if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) {
+ match arg.extension {
+ ArgumentExtension::None => {}
+ ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
+ ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
+ }
+ }
+
+ // Handle special-purpose arguments.
+ if ty.is_int() && self.call_conv.extends_baldrdash() {
+ match arg.purpose {
+ // This is SpiderMonkey's `WasmTlsReg`.
+ ArgumentPurpose::VMContext => {
+ return ArgumentLoc::Reg(if self.pointer_bits == 64 {
+ RU::r14
+ } else {
+ RU::rsi
+ } as RegUnit)
+ .into();
+ }
+ // This is SpiderMonkey's `WasmTableCallSigReg`.
+ ArgumentPurpose::SignatureId => {
+ return ArgumentLoc::Reg(if self.pointer_bits == 64 {
+ RU::r10
+ } else {
+ RU::rcx
+ } as RegUnit)
+ .into()
+ }
+ _ => {}
+ }
+ }
+
+ // Try to use a GPR.
+ if !ty.is_float() && self.gpr_used < self.gpr.len() {
+ let reg = self.gpr[self.gpr_used] as RegUnit;
+ self.gpr_used += 1;
+ return ArgumentLoc::Reg(reg).into();
+ }
+
+ // Try to use an FPR.
+ let fpr_offset = if self.call_conv.extends_windows_fastcall() {
+ // Float and general registers on windows share the same parameter index.
+ // The used register depends entirely on the parameter index: Even if XMM0
+ // is not used for the first parameter, it cannot be used for the second parameter.
+ debug_assert_eq!(self.fpr_limit, self.gpr.len());
+ &mut self.gpr_used
+ } else {
+ &mut self.fpr_used
+ };
+
+ if ty.is_float() && *fpr_offset < self.fpr_limit {
+ let reg = FPR.unit(*fpr_offset);
+ *fpr_offset += 1;
+ return ArgumentLoc::Reg(reg).into();
+ }
+
+ // Assign a stack location.
+ let loc = ArgumentLoc::Stack(self.offset as i32);
+ self.offset += u32::from(self.pointer_bytes);
+ debug_assert!(self.offset <= i32::MAX as u32);
+ loc.into()
+ }
+}
+
+/// Legalize `sig`.
+pub fn legalize_signature(
+ sig: &mut Cow<ir::Signature>,
+ triple: &Triple,
+ _current: bool,
+ shared_flags: &shared_settings::Flags,
+ isa_flags: &isa_settings::Flags,
+) {
+ let bits;
+ let mut args;
+
+ match triple.pointer_width().unwrap() {
+ PointerWidth::U16 => panic!(),
+ PointerWidth::U32 => {
+ bits = 32;
+ args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false);
+ }
+ PointerWidth::U64 => {
+ bits = 64;
+ args = if sig.call_conv.extends_windows_fastcall() {
+ Args::new(
+ bits,
+ &ARG_GPRS_WIN_FASTCALL_X64[..],
+ 4,
+ sig.call_conv,
+ shared_flags,
+ isa_flags,
+ false,
+ )
+ } else {
+ Args::new(
+ bits,
+ &ARG_GPRS[..],
+ 8,
+ sig.call_conv,
+ shared_flags,
+ isa_flags,
+ false,
+ )
+ };
+ }
+ }
+
+ let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() {
+ // windows-x64 calling convention only uses XMM0 or RAX for return values
+ (&RET_GPRS_WIN_FASTCALL_X64[..], 1)
+ } else {
+ (&RET_GPRS[..], 2)
+ };
+
+ let mut rets = Args::new(
+ bits,
+ ret_regs,
+ ret_fpr_limit,
+ sig.call_conv,
+ shared_flags,
+ isa_flags,
+ true,
+ );
+
+ // If we don't have enough available return registers
+ // to fit all of the return values, we need to backtrack and start
+ // assigning locations all over again with a different strategy. In order to
+ // do that, we need a copy of the original assigner for the returns.
+ let mut backup_rets = rets.clone();
+
+ if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
+ if new_returns
+ .iter()
+ .filter(|r| r.purpose == ArgumentPurpose::Normal)
+ .any(|r| !r.location.is_reg())
+ {
+ // The return values couldn't all fit into available return
+ // registers. Introduce the use of a struct-return parameter.
+ debug_assert!(!sig.uses_struct_return_param());
+
+ // We're using the first register for the return pointer parameter.
+ let mut ret_ptr_param = AbiParam {
+ value_type: args.pointer_type,
+ purpose: ArgumentPurpose::StructReturn,
+ extension: ArgumentExtension::None,
+ location: ArgumentLoc::Unassigned,
+ legalized_to_pointer: false,
+ };
+ match args.assign(&ret_ptr_param) {
+ ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
+ ret_ptr_param.location = ArgumentLoc::Reg(reg);
+ sig.to_mut().params.push(ret_ptr_param);
+ }
+ _ => unreachable!("return pointer should always get a register assignment"),
+ }
+
+ // We're using the first return register for the return pointer (like
+ // sys v does).
+ let mut ret_ptr_return = AbiParam {
+ value_type: args.pointer_type,
+ purpose: ArgumentPurpose::StructReturn,
+ extension: ArgumentExtension::None,
+ location: ArgumentLoc::Unassigned,
+ legalized_to_pointer: false,
+ };
+ match backup_rets.assign(&ret_ptr_return) {
+ ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
+ ret_ptr_return.location = ArgumentLoc::Reg(reg);
+ sig.to_mut().returns.push(ret_ptr_return);
+ }
+ _ => unreachable!("return pointer should always get a register assignment"),
+ }
+
+ sig.to_mut().returns.retain(|ret| {
+ // Either this is the return pointer, in which case we want to keep
+ // it, or else assume that it is assigned for a reason and doesn't
+ // conflict with our return pointering legalization.
+ debug_assert_eq!(
+ ret.location.is_assigned(),
+ ret.purpose != ArgumentPurpose::Normal
+ );
+ ret.location.is_assigned()
+ });
+
+ if let Some(new_returns) = legalize_args(&sig.returns, &mut backup_rets) {
+ sig.to_mut().returns = new_returns;
+ }
+ } else {
+ sig.to_mut().returns = new_returns;
+ }
+ }
+
+ if let Some(new_params) = legalize_args(&sig.params, &mut args) {
+ sig.to_mut().params = new_params;
+ }
+}
+
+/// Get register class for a type appearing in a legalized signature.
+pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
+ if ty.is_int() || ty.is_bool() || ty.is_ref() {
+ GPR
+ } else {
+ FPR
+ }
+}
+
+/// Get the set of allocatable registers for `func`.
+pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet {
+ let mut regs = RegisterSet::new();
+ regs.take(GPR, RU::rsp as RegUnit);
+ regs.take(GPR, RU::rbp as RegUnit);
+
+ // 32-bit arch only has 8 registers.
+ if triple.pointer_width().unwrap() != PointerWidth::U64 {
+ for i in 8..16 {
+ regs.take(GPR, GPR.unit(i));
+ regs.take(FPR, FPR.unit(i));
+ }
+ if flags.enable_pinned_reg() {
+ unimplemented!("Pinned register not implemented on x86-32.");
+ }
+ } else {
+ // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and
+ // isn't the fixed output register of any instruction.
+ if flags.enable_pinned_reg() {
+ regs.take(GPR, RU::r15 as RegUnit);
+ }
+ }
+
+ regs
+}
+
+/// Get the set of callee-saved general-purpose registers.
+fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
+ match isa.triple().pointer_width().unwrap() {
+ PointerWidth::U16 => panic!(),
+ PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
+ PointerWidth::U64 => {
+ if call_conv.extends_windows_fastcall() {
+ // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are
+ // considered nonvolatile and must be saved and restored by a function that uses
+ // them."
+ // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
+ // RSP & RBP are not listed below, since they are restored automatically during
+ // a function call. If that wasn't the case, function calls (RET) would not work.
+ &[
+ RU::rbx,
+ RU::rdi,
+ RU::rsi,
+ RU::r12,
+ RU::r13,
+ RU::r14,
+ RU::r15,
+ ]
+ } else {
+ &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
+ }
+ }
+ }
+}
+
+/// Get the set of callee-saved floating-point (SIMD) registers.
+fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
+ match isa.triple().pointer_width().unwrap() {
+ PointerWidth::U16 => panic!(),
+ PointerWidth::U32 => &[],
+ PointerWidth::U64 => {
+ if call_conv.extends_windows_fastcall() {
+ // "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved
+ // and restored by a function that uses them."
+ // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of
+ // February 5th, 2020.
+ &[
+ RU::xmm6,
+ RU::xmm7,
+ RU::xmm8,
+ RU::xmm9,
+ RU::xmm10,
+ RU::xmm11,
+ RU::xmm12,
+ RU::xmm13,
+ RU::xmm14,
+ RU::xmm15,
+ ]
+ } else {
+ &[]
+ }
+ }
+ }
+}
+
+/// Get the set of callee-saved registers that are used.
+fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet {
+ let mut all_callee_saved = RegisterSet::empty();
+ for reg in callee_saved_gprs(isa, func.signature.call_conv) {
+ all_callee_saved.free(GPR, *reg as RegUnit);
+ }
+ for reg in callee_saved_fprs(isa, func.signature.call_conv) {
+ all_callee_saved.free(FPR, *reg as RegUnit);
+ }
+
+ let mut used = RegisterSet::empty();
+ for value_loc in func.locations.values() {
+ // Note that `value_loc` here contains only a single unit of a potentially multi-unit
+ // register. We don't use registers that overlap each other in the x86 ISA, but in others
+ // we do. So this should not be blindly reused.
+ if let ValueLoc::Reg(ru) = *value_loc {
+ if GPR.contains(ru) {
+ if !used.is_avail(GPR, ru) {
+ used.free(GPR, ru);
+ }
+ } else if FPR.contains(ru) {
+ if !used.is_avail(FPR, ru) {
+ used.free(FPR, ru);
+ }
+ }
+ }
+ }
+
+ // regmove and regfill instructions may temporarily divert values into other registers,
+ // and these are not reflected in `func.locations`. Scan the function for such instructions
+ // and note which callee-saved registers they use.
+ //
+ // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
+ // to avoid this step.
+ for block in &func.layout {
+ for inst in func.layout.block_insts(block) {
+ match func.dfg[inst] {
+ ir::instructions::InstructionData::RegMove { dst, .. }
+ | ir::instructions::InstructionData::RegFill { dst, .. } => {
+ if GPR.contains(dst) {
+ if !used.is_avail(GPR, dst) {
+ used.free(GPR, dst);
+ }
+ } else if FPR.contains(dst) {
+ if !used.is_avail(FPR, dst) {
+ used.free(FPR, dst);
+ }
+ }
+ }
+ _ => (),
+ }
+ }
+ }
+
+ used.intersect(&all_callee_saved);
+ used
+}
+
+pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
+ match func.signature.call_conv {
+ // For now, just translate fast and cold as system_v.
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
+ system_v_prologue_epilogue(func, isa)
+ }
+ CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
+ CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => {
+ baldrdash_prologue_epilogue(func, isa)
+ }
+ CallConv::Probestack => unimplemented!("probestack calling convention"),
+ CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"),
+ }
+}
+
+fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
+ debug_assert!(
+ !isa.flags().enable_probestack(),
+ "baldrdash does not expect cranelift to emit stack probes"
+ );
+
+ let word_size = StackSize::from(isa.pointer_bytes());
+ let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
+ WIN_SHADOW_STACK_SPACE
+ } else {
+ 0
+ };
+
+ let bytes =
+ StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size;
+
+ let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
+ ss.offset = Some(-(bytes as StackOffset));
+ func.stack_slots.push(ss);
+
+ let is_leaf = func.is_leaf();
+ layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?;
+ Ok(())
+}
+
+/// Implementation of the fastcall-based Win64 calling convention described at [1]
+/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
+fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
+ if isa.triple().pointer_width().unwrap() != PointerWidth::U64 {
+ panic!("TODO: windows-fastcall: x86-32 not implemented yet");
+ }
+
+ // The reserved stack area is composed of:
+ // return address + frame pointer + all callee-saved registers
+ //
+ // Pushing the return address is an implicit function of the `call`
+ // instruction. Each of the others we will then push explicitly. Then we
+ // will adjust the stack pointer to make room for the rest of the required
+ // space for this frame.
+ let csrs = callee_saved_regs_used(isa, func);
+ let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32;
+ let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32;
+ let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size;
+
+ // FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed
+ if fpsr_stack_size > 0 {
+ csr_stack_size = (csr_stack_size + 15) & !15;
+ }
+
+ func.create_stack_slot(ir::StackSlotData {
+ kind: ir::StackSlotKind::IncomingArg,
+ size: csr_stack_size,
+ offset: Some(-(csr_stack_size as StackOffset)),
+ });
+
+ let is_leaf = func.is_leaf();
+
+ // If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space
+ if !is_leaf {
+ // TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack
+ func.create_stack_slot(ir::StackSlotData {
+ kind: ir::StackSlotKind::ExplicitSlot,
+ size: WIN_SHADOW_STACK_SPACE,
+ offset: None,
+ });
+ }
+
+ let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
+
+ // Subtract the GPR saved register size from the local size because pushes are used for the saves
+ let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32);
+
+ // Add CSRs to function signature
+ let reg_type = isa.pointer_type();
+ let sp_arg_index = if fpsr_stack_size > 0 {
+ let sp_arg = ir::AbiParam::special_reg(
+ reg_type,
+ ir::ArgumentPurpose::CalleeSaved,
+ RU::rsp as RegUnit,
+ );
+ let index = func.signature.params.len();
+ func.signature.params.push(sp_arg);
+ Some(index)
+ } else {
+ None
+ };
+ let fp_arg = ir::AbiParam::special_reg(
+ reg_type,
+ ir::ArgumentPurpose::FramePointer,
+ RU::rbp as RegUnit,
+ );
+ func.signature.params.push(fp_arg);
+ func.signature.returns.push(fp_arg);
+
+ for gp_csr in csrs.iter(GPR) {
+ let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr);
+ func.signature.params.push(csr_arg);
+ func.signature.returns.push(csr_arg);
+ }
+
+ for fp_csr in csrs.iter(FPR) {
+ // The calling convention described in
+ // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
+ // preserving the low 128 bits of XMM6-XMM15.
+ let csr_arg =
+ ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr);
+ func.signature.params.push(csr_arg);
+ func.signature.returns.push(csr_arg);
+ }
+
+ // Set up the cursor and insert the prologue
+ let entry_block = func.layout.entry_block().expect("missing entry block");
+ let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
+ insert_common_prologue(
+ &mut pos,
+ local_stack_size,
+ reg_type,
+ &csrs,
+ sp_arg_index.is_some(),
+ isa,
+ );
+
+ // Reset the cursor and insert the epilogue
+ let mut pos = pos.at_position(CursorPosition::Nowhere);
+ insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
+
+ Ok(())
+}
+
+/// Insert a System V-compatible prologue and epilogue.
+fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
+ let pointer_width = isa.triple().pointer_width().unwrap();
+ let word_size = pointer_width.bytes() as usize;
+
+ let csrs = callee_saved_regs_used(isa, func);
+ assert!(
+ csrs.iter(FPR).len() == 0,
+ "SysV ABI does not have callee-save SIMD registers"
+ );
+
+ // The reserved stack area is composed of:
+ // return address + frame pointer + all callee-saved registers
+ //
+ // Pushing the return address is an implicit function of the `call`
+ // instruction. Each of the others we will then push explicitly. Then we
+ // will adjust the stack pointer to make room for the rest of the required
+ // space for this frame.
+ let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
+ func.create_stack_slot(ir::StackSlotData {
+ kind: ir::StackSlotKind::IncomingArg,
+ size: csr_stack_size as u32,
+ offset: Some(-csr_stack_size),
+ });
+
+ let is_leaf = func.is_leaf();
+ let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
+ let local_stack_size = i64::from(total_stack_size - csr_stack_size);
+
+ // Add CSRs to function signature
+ let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
+ // On X86-32 all parameters, including vmctx, are passed on stack, and we need
+ // to extract vmctx from the stack before we can save the frame pointer.
+ let sp_arg_index = if isa.pointer_bits() == 32 {
+ let sp_arg = ir::AbiParam::special_reg(
+ reg_type,
+ ir::ArgumentPurpose::CalleeSaved,
+ RU::rsp as RegUnit,
+ );
+ let index = func.signature.params.len();
+ func.signature.params.push(sp_arg);
+ Some(index)
+ } else {
+ None
+ };
+ let fp_arg = ir::AbiParam::special_reg(
+ reg_type,
+ ir::ArgumentPurpose::FramePointer,
+ RU::rbp as RegUnit,
+ );
+ func.signature.params.push(fp_arg);
+ func.signature.returns.push(fp_arg);
+
+ for csr in csrs.iter(GPR) {
+ let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
+ func.signature.params.push(csr_arg);
+ func.signature.returns.push(csr_arg);
+ }
+
+ // Set up the cursor and insert the prologue
+ let entry_block = func.layout.entry_block().expect("missing entry block");
+ let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
+ insert_common_prologue(
+ &mut pos,
+ local_stack_size,
+ reg_type,
+ &csrs,
+ sp_arg_index.is_some(),
+ isa,
+ );
+
+ // Reset the cursor and insert the epilogue
+ let mut pos = pos.at_position(CursorPosition::Nowhere);
+ insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
+
+ Ok(())
+}
+
+/// Insert the prologue for a given function.
+/// This is used by common calling conventions such as System V.
+fn insert_common_prologue(
+ pos: &mut EncCursor,
+ stack_size: i64,
+ reg_type: ir::types::Type,
+ csrs: &RegisterSet,
+ has_sp_param: bool,
+ isa: &dyn TargetIsa,
+) {
+ let sp = if has_sp_param {
+ let block = pos.current_block().expect("missing block under cursor");
+ let sp = pos.func.dfg.append_block_param(block, reg_type);
+ pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit);
+ Some(sp)
+ } else {
+ None
+ };
+
+ // If this is a leaf function with zero stack, then there's no need to
+ // insert a stack check since it can't overflow anything and
+ // forward-progress is guarantee so long as loop are handled anyway.
+ //
+ // If this has a stack size it could stack overflow, or if it isn't a leaf
+ // it could be part of a long call chain which we need to check anyway.
+ //
+ // First we look for the stack limit as a special argument to the function,
+ // and failing that we see if a custom stack limit factory has been provided
+ // which will be used to likely calculate the stack limit from the arguments
+ // or perhaps constants.
+ if stack_size > 0 || !pos.func.is_leaf() {
+ let scratch = ir::ValueLoc::Reg(RU::rax as RegUnit);
+ let stack_limit_arg = match pos.func.special_param(ArgumentPurpose::StackLimit) {
+ Some(arg) => {
+ let copy = pos.ins().copy(arg);
+ pos.func.locations[copy] = scratch;
+ Some(copy)
+ }
+ None => pos
+ .func
+ .stack_limit
+ .map(|gv| interpret_gv(pos, gv, sp, scratch)),
+ };
+ if let Some(stack_limit_arg) = stack_limit_arg {
+ insert_stack_check(pos, stack_size, stack_limit_arg);
+ }
+ }
+
+ // Append param to entry block
+ let block = pos.current_block().expect("missing block under cursor");
+ let fp = pos.func.dfg.append_block_param(block, reg_type);
+ pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+
+ pos.ins().x86_push(fp);
+
+ let mov_sp_inst = pos
+ .ins()
+ .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit);
+
+ let mut last_csr_push = None;
+ for reg in csrs.iter(GPR) {
+ // Append param to entry block
+ let csr_arg = pos.func.dfg.append_block_param(block, reg_type);
+
+ // Assign it a location
+ pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+ last_csr_push = Some(pos.ins().x86_push(csr_arg));
+ }
+
+ // Allocate stack frame storage.
+ let mut adjust_sp_inst = None;
+ if stack_size > 0 {
+ if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2())
+ {
+ // Emit a stack probe.
+ let rax = RU::rax as RegUnit;
+ let rax_val = ir::ValueLoc::Reg(rax);
+
+ // The probestack function expects its input in %rax.
+ let arg = pos.ins().iconst(reg_type, stack_size);
+ pos.func.locations[arg] = rax_val;
+
+ // Call the probestack function.
+ let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
+
+ // Make the call.
+ let call = if !isa.flags().is_pic()
+ && isa.triple().pointer_width().unwrap() == PointerWidth::U64
+ && !pos.func.dfg.ext_funcs[callee].colocated
+ {
+ // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
+ // Use r11 as it may be clobbered under all supported calling conventions.
+ let r11 = RU::r11 as RegUnit;
+ let sig = pos.func.dfg.ext_funcs[callee].signature;
+ let addr = pos.ins().func_addr(reg_type, callee);
+ pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
+ pos.ins().call_indirect(sig, addr, &[arg])
+ } else {
+ // Otherwise just do a normal call.
+ pos.ins().call(callee, &[arg])
+ };
+
+ // If the probestack function doesn't adjust sp, do it ourselves.
+ if !isa.flags().probestack_func_adjusts_sp() {
+ let result = pos.func.dfg.inst_results(call)[0];
+ pos.func.locations[result] = rax_val;
+ adjust_sp_inst = Some(pos.ins().adjust_sp_down(result));
+ }
+ } else {
+ // Simply decrement the stack pointer.
+ adjust_sp_inst = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size)));
+ }
+ }
+
+ // With the stack pointer adjusted, save any callee-saved floating point registers via offset
+ // FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes
+ let mut last_fpr_save = None;
+
+ for (i, reg) in csrs.iter(FPR).enumerate() {
+ // Append param to entry block
+ let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
+
+ // Since regalloc has already run, we must assign a location.
+ pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
+
+ // Offset to where the register is saved relative to RSP, accounting for FPR save alignment
+ let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
+ + (stack_size % types::F64X2.bytes() as i64);
+
+ last_fpr_save = Some(pos.ins().store(
+ ir::MemFlags::trusted(),
+ csr_arg,
+ sp.expect("FPR save requires SP param"),
+ (stack_size - offset) as i32,
+ ));
+ }
+
+ pos.func.prologue_end = Some(
+ last_fpr_save
+ .or(adjust_sp_inst)
+ .or(last_csr_push)
+ .unwrap_or(mov_sp_inst),
+ );
+}
+
+/// Inserts code necessary to calculate `gv`.
+///
+/// Note that this is typically done with `ins().global_value(...)` but that
+/// requires legalization to run to encode it, and we're running super late
+/// here in the backend where legalization isn't possible. To get around this
+/// we manually interpret the `gv` specified and do register allocation for
+/// intermediate values.
+///
+/// This is an incomplete implementation of loading `GlobalValue` values to get
+/// compared to the stack pointer, but currently it serves enough functionality
+/// to get this implemented in `wasmtime` itself. This'll likely get expanded a
+/// bit over time!
+fn interpret_gv(
+ pos: &mut EncCursor,
+ gv: ir::GlobalValue,
+ sp: Option<ir::Value>,
+ scratch: ir::ValueLoc,
+) -> ir::Value {
+ match pos.func.global_values[gv] {
+ ir::GlobalValueData::VMContext => {
+ let vmctx_index = pos
+ .func
+ .signature
+ .special_param_index(ir::ArgumentPurpose::VMContext)
+ .expect("no vmcontext parameter found");
+ match pos.func.signature.params[vmctx_index] {
+ AbiParam {
+ location: ArgumentLoc::Reg(_),
+ ..
+ } => {
+ let entry = pos.func.layout.entry_block().unwrap();
+ pos.func.dfg.block_params(entry)[vmctx_index]
+ }
+ AbiParam {
+ location: ArgumentLoc::Stack(offset),
+ value_type,
+ ..
+ } => {
+ let offset =
+ offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8));
+ // The following access can be marked `trusted` because it is a load of an argument. We
+ // know it is safe because it was safe to write it in preparing this function call.
+ let ret =
+ pos.ins()
+ .load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset);
+ pos.func.locations[ret] = scratch;
+ return ret;
+ }
+ AbiParam {
+ location: ArgumentLoc::Unassigned,
+ ..
+ } => unreachable!(),
+ }
+ }
+ ir::GlobalValueData::Load {
+ base,
+ offset,
+ global_type,
+ readonly: _,
+ } => {
+ let base = interpret_gv(pos, base, sp, scratch);
+ let ret = pos
+ .ins()
+ .load(global_type, ir::MemFlags::trusted(), base, offset);
+ pos.func.locations[ret] = scratch;
+ return ret;
+ }
+ ref other => panic!("global value for stack limit not supported: {}", other),
+ }
+}
+
+/// Insert a check that generates a trap if the stack pointer goes
+/// below a value in `stack_limit_arg`.
+fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) {
+ use crate::ir::condcodes::IntCC;
+
+ // Our stack pointer, after subtracting `stack_size`, must not be below
+ // `stack_limit_arg`. To do this we're going to add `stack_size` to
+ // `stack_limit_arg` and see if the stack pointer is below that. The
+ // `stack_size + stack_limit_arg` computation might overflow, however, due
+ // to how stack limits may be loaded and set externally to trigger a trap.
+ //
+ // To handle this we'll need an extra comparison to see if the stack
+ // pointer is already below `stack_limit_arg`. Most of the time this
+ // isn't necessary though since the stack limit which triggers a trap is
+ // likely a sentinel somewhere around `usize::max_value()`. In that case
+ // only conditionally emit this pre-flight check. That way most functions
+ // only have the one comparison, but are also guaranteed that if we add
+ // `stack_size` to `stack_limit_arg` is won't overflow.
+ //
+ // This does mean that code generators which use this stack check
+ // functionality need to ensure that values stored into the stack limit
+ // will never overflow if this threshold is added.
+ if stack_size >= 32 * 1024 {
+ let cflags = pos.ins().ifcmp_sp(stack_limit_arg);
+ pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
+ pos.ins().trapif(
+ IntCC::UnsignedGreaterThanOrEqual,
+ cflags,
+ ir::TrapCode::StackOverflow,
+ );
+ }
+
+ // Copy `stack_limit_arg` into a %rax and use it for calculating
+ // a SP threshold.
+ let sp_threshold = pos.ins().iadd_imm(stack_limit_arg, stack_size);
+ pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit);
+
+ // If the stack pointer currently reaches the SP threshold or below it then after opening
+ // the current stack frame, the current stack pointer will reach the limit.
+ let cflags = pos.ins().ifcmp_sp(sp_threshold);
+ pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
+ pos.ins().trapif(
+ IntCC::UnsignedGreaterThanOrEqual,
+ cflags,
+ ir::TrapCode::StackOverflow,
+ );
+}
+
+/// Find all `return` instructions and insert epilogues before them.
+fn insert_common_epilogues(
+ pos: &mut EncCursor,
+ stack_size: i64,
+ reg_type: ir::types::Type,
+ csrs: &RegisterSet,
+ sp_arg_index: Option<usize>,
+) {
+ while let Some(block) = pos.next_block() {
+ pos.goto_last_inst(block);
+ if let Some(inst) = pos.current_inst() {
+ if pos.func.dfg[inst].opcode().is_return() {
+ insert_common_epilogue(inst, block, stack_size, pos, reg_type, csrs, sp_arg_index);
+ }
+ }
+ }
+}
+
+/// Insert an epilogue given a specific `return` instruction.
+/// This is used by common calling conventions such as System V.
+fn insert_common_epilogue(
+ inst: ir::Inst,
+ block: ir::Block,
+ stack_size: i64,
+ pos: &mut EncCursor,
+ reg_type: ir::types::Type,
+ csrs: &RegisterSet,
+ sp_arg_index: Option<usize>,
+) {
+ // Insert the pop of the frame pointer
+ let fp_pop = pos.ins().x86_pop(reg_type);
+ let fp_pop_inst = pos.prev_inst().unwrap();
+ pos.func.locations[fp_pop] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
+ pos.func.dfg.append_inst_arg(inst, fp_pop);
+
+ // Insert the CSR pops
+ let mut first_csr_pop_inst = None;
+ for reg in csrs.iter(GPR) {
+ let csr_pop = pos.ins().x86_pop(reg_type);
+ first_csr_pop_inst = pos.prev_inst();
+ assert!(first_csr_pop_inst.is_some());
+ pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
+ pos.func.dfg.append_inst_arg(inst, csr_pop);
+ }
+
+ // Insert the adjustment of SP
+ let mut sp_adjust_inst = None;
+ if stack_size > 0 {
+ pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
+ sp_adjust_inst = pos.prev_inst();
+ assert!(sp_adjust_inst.is_some());
+ }
+
+ let mut first_fpr_load = None;
+ if let Some(index) = sp_arg_index {
+ let sp = pos
+ .func
+ .dfg
+ .block_params(pos.func.layout.entry_block().unwrap())[index];
+
+ // Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads)
+ for (i, reg) in csrs.iter(FPR).enumerate() {
+ // Offset to where the register is saved relative to RSP, accounting for FPR save alignment
+ let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
+ + (stack_size % types::F64X2.bytes() as i64);
+
+ let value = pos.ins().load(
+ types::F64X2,
+ ir::MemFlags::trusted(),
+ sp,
+ (stack_size - offset) as i32,
+ );
+
+ first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
+
+ pos.func.locations[value] = ir::ValueLoc::Reg(reg);
+ pos.func.dfg.append_inst_arg(inst, value);
+ }
+ } else {
+ assert!(csrs.iter(FPR).len() == 0);
+ }
+
+ pos.func.epilogues_start.push((
+ first_fpr_load
+ .or(sp_adjust_inst)
+ .or(first_csr_pop_inst)
+ .unwrap_or(fp_pop_inst),
+ block,
+ ));
+}
+
+#[cfg(feature = "unwind")]
+pub fn create_unwind_info(
+ func: &ir::Function,
+ isa: &dyn TargetIsa,
+) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+ use crate::isa::unwind::UnwindInfo;
+
+ // Assumption: RBP is being used as the frame pointer for both calling conventions
+ // In the future, we should be omitting frame pointer as an optimization, so this will change
+ Ok(match func.signature.call_conv {
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => {
+ super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u))
+ }
+ CallConv::WindowsFastcall => {
+ super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u))
+ }
+ _ => None,
+ })
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/binemit.rs b/third_party/rust/cranelift-codegen/src/isa/x86/binemit.rs
new file mode 100644
index 0000000000..90ed8b7ef8
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/binemit.rs
@@ -0,0 +1,576 @@
+//! Emitting binary x86 machine code.
+
+use super::enc_tables::{needs_offset, needs_sib_byte};
+use super::registers::RU;
+use crate::binemit::{bad_encoding, CodeSink, Reloc};
+use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
+use crate::ir::{
+ Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode,
+ TrapCode,
+};
+use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa};
+use crate::regalloc::RegDiversions;
+use cranelift_codegen_shared::isa::x86::EncodingBits;
+
+include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
+
+// Convert a stack base to the corresponding register.
+fn stk_base(base: StackBase) -> RegUnit {
+ let ru = match base {
+ StackBase::SP => RU::rsp,
+ StackBase::FP => RU::rbp,
+ StackBase::Zone => unimplemented!(),
+ };
+ ru as RegUnit
+}
+
+// Mandatory prefix bytes for Mp* opcodes.
+const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
+
+// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
+const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
+
+// A REX prefix with no bits set: 0b0100WRXB.
+const BASE_REX: u8 = 0b0100_0000;
+
+// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
+// This is used for instructions that encode a register in the low 3 bits of the opcode and for
+// instructions that use the ModR/M `reg` field for something else.
+fn rex1(reg_b: RegUnit) -> u8 {
+ let b = ((reg_b >> 3) & 1) as u8;
+ BASE_REX | b
+}
+
+// Create a dual-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
+ let b = ((rm >> 3) & 1) as u8;
+ let r = ((reg >> 3) & 1) as u8;
+ BASE_REX | b | (r << 2)
+}
+
+// Create a three-register REX prefix, setting:
+//
+// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
+// REX.R = bit 3 of reg register.
+// REX.X = bit 3 of SIB index register.
+fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
+ let b = ((rm >> 3) & 1) as u8;
+ let r = ((reg >> 3) & 1) as u8;
+ let x = ((index >> 3) & 1) as u8;
+ BASE_REX | b | (x << 1) | (r << 2)
+}
+
+/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1
+/// in the Intel Software Development Manual, volume 2A. These bits can be used by different
+/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one.
+fn evex2(rm: RegUnit, reg: RegUnit) -> u8 {
+ let b = (!(rm >> 3) & 1) as u8;
+ let x = (!(rm >> 4) & 1) as u8;
+ let r = (!(reg >> 3) & 1) as u8;
+ let r_ = (!(reg >> 4) & 1) as u8;
+ 0x00 | r_ | (b << 1) | (x << 2) | (r << 3)
+}
+
+/// Determines whether a REX prefix should be emitted. A REX byte always has 0100 in bits 7:4; bits
+/// 3:0 correspond to WRXB. W allows certain instructions to declare a 64-bit operand size; because
+/// [needs_rex] is only used by [infer_rex] and we prevent [infer_rex] from using [w] in
+/// [Template::build], we do not need to check again whether [w] forces an inferred REX prefix--it
+/// always does and should be encoded like `.rex().w()`. The RXB are extension of ModR/M or SIB
+/// fields; see section 2.2.1.2 in the Intel Software Development Manual.
+#[inline]
+fn needs_rex(rex: u8) -> bool {
+ rex != BASE_REX
+}
+
+// Emit a REX prefix.
+//
+// The R, X, and B bits are computed from registers using the functions above. The W bit is
+// extracted from `bits`.
+fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(rex & 0xf8, BASE_REX);
+ let w = EncodingBits::from(bits).rex_w();
+ sink.put1(rex | (w << 3));
+}
+
+// Emit a single-byte opcode with no REX prefix.
+fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
+ debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
+ sink.put1(bits as u8);
+}
+
+// Emit a single-byte opcode with REX prefix.
+fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*");
+ rex_prefix(bits, rex, sink);
+ sink.put1(bits as u8);
+}
+
+/// Emit a single-byte opcode with inferred REX prefix.
+fn put_dynrexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*");
+ if needs_rex(rex) {
+ rex_prefix(bits, rex, sink);
+ }
+ sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX
+fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
+ debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode: 0F XX with REX prefix.
+fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
+ rex_prefix(bits, rex, sink);
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+}
+
+/// Emit two-byte opcode: 0F XX with inferred REX prefix.
+fn put_dynrexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(
+ bits & 0x0f00,
+ 0x0400,
+ "Invalid encoding bits for DynRexOp2*"
+ );
+ if needs_rex(rex) {
+ rex_prefix(bits, rex, sink);
+ }
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix.
+fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
+ sink.put1(bits as u8);
+}
+
+// Emit single-byte opcode with mandatory prefix and REX.
+fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*");
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ rex_prefix(bits, rex, sink);
+ sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix.
+fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+}
+
+// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
+fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*");
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ rex_prefix(bits, rex, sink);
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+}
+
+/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX.
+fn put_dynrexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(
+ bits & 0x0c00,
+ 0x0400,
+ "Invalid encoding bits for DynRexMp2*"
+ );
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ if needs_rex(rex) {
+ rex_prefix(bits, rex, sink);
+ }
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+}
+
+/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
+fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
+ debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ sink.put1(0x0f);
+ sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
+ sink.put1(bits as u8);
+}
+
+/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
+fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*");
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ rex_prefix(bits, rex, sink);
+ sink.put1(0x0f);
+ sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
+ sink.put1(bits as u8);
+}
+
+/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix.
+fn put_dynrexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+ debug_assert_eq!(
+ bits & 0x0800,
+ 0x0800,
+ "Invalid encoding bits for DynRexMp3*"
+ );
+ let enc = EncodingBits::from(bits);
+ sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+ if needs_rex(rex) {
+ rex_prefix(bits, rex, sink);
+ }
+ sink.put1(0x0f);
+ sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
+ sink.put1(bits as u8);
+}
+
+/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
+/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
+/// used together for certain classes of instructions; i.e., special care should be taken to ensure
+/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
+/// opcodes can result in an #UD.
+#[allow(dead_code)]
+enum EvexContext {
+ RoundingRegToRegFP {
+ rc: EvexRoundingControl,
+ },
+ NoRoundingFP {
+ sae: bool,
+ length: EvexVectorLength,
+ },
+ MemoryOp {
+ broadcast: bool,
+ length: EvexVectorLength,
+ },
+ Other {
+ length: EvexVectorLength,
+ },
+}
+
+impl EvexContext {
+ /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
+ fn bits(&self) -> u8 {
+ match self {
+ Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
+ Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
+ Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
+ Self::Other { length } => length.bits() << 1,
+ }
+ }
+}
+
+/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
+enum EvexVectorLength {
+ V128,
+ V256,
+ V512,
+}
+
+impl EvexVectorLength {
+ /// Encode the `L'` and `L` bits for merging with the P2 byte.
+ fn bits(&self) -> u8 {
+ match self {
+ Self::V128 => 0b00,
+ Self::V256 => 0b01,
+ Self::V512 => 0b10,
+ // 0b11 is reserved (#UD).
+ }
+ }
+}
+
+/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
+enum EvexRoundingControl {
+ RNE,
+ RD,
+ RU,
+ RZ,
+}
+
+impl EvexRoundingControl {
+ /// Encode the `L'` and `L` bits for merging with the P2 byte.
+ fn bits(&self) -> u8 {
+ match self {
+ Self::RNE => 0b00,
+ Self::RD => 0b01,
+ Self::RU => 0b10,
+ Self::RZ => 0b11,
+ }
+ }
+}
+
+/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
+/// Software Development Manual, volume 2A.
+#[allow(dead_code)]
+enum EvexMasking {
+ None,
+ Merging { k: u8 },
+ Zeroing { k: u8 },
+}
+
+impl EvexMasking {
+ /// Encode the `z` bit for merging with the P2 byte.
+ fn z_bit(&self) -> u8 {
+ match self {
+ Self::None | Self::Merging { .. } => 0,
+ Self::Zeroing { .. } => 1,
+ }
+ }
+
+ /// Encode the `aaa` bits for merging with the P2 byte.
+ fn aaa_bits(&self) -> u8 {
+ match self {
+ Self::None => 0b000,
+ Self::Merging { k } | Self::Zeroing { k } => {
+ debug_assert!(*k <= 7);
+ *k
+ }
+ }
+ }
+}
+
+/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe
+/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function
+/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are
+/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring
+/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix).
+fn put_evex<CS: CodeSink + ?Sized>(
+ bits: u16,
+ reg: RegUnit,
+ vvvvv: RegUnit,
+ rm: RegUnit,
+ context: EvexContext,
+ masking: EvexMasking,
+ sink: &mut CS,
+) {
+ let enc = EncodingBits::from(bits);
+
+ // EVEX prefix.
+ sink.put1(0x62);
+
+ debug_assert!(enc.mm() < 0b100);
+ let mut p0 = enc.mm() & 0b11;
+ p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset
+ sink.put1(p0);
+
+ let mut p1 = enc.pp() | 0b100; // bit 2 is always set
+ p1 |= (!(vvvvv as u8) & 0b1111) << 3;
+ p1 |= (enc.rex_w() & 0b1) << 7;
+ sink.put1(p1);
+
+ let mut p2 = masking.aaa_bits();
+ p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3;
+ p2 |= context.bits() << 4;
+ p2 |= masking.z_bit() << 7;
+ sink.put1(p2);
+
+ // Opcode
+ sink.put1(enc.opcode_byte());
+
+ // ModR/M byte placed in recipe
+}
+
+/// Emit a ModR/M byte for reg-reg operands.
+fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+ let reg = reg as u8 & 7;
+ let rm = rm as u8 & 7;
+ let mut b = 0b11000000;
+ b |= reg << 3;
+ b |= rm;
+ sink.put1(b);
+}
+
+/// Emit a ModR/M byte where the reg bits are part of the opcode.
+fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
+ let reg = (bits >> 12) as u8 & 7;
+ let rm = rm as u8 & 7;
+ let mut b = 0b11000000;
+ b |= reg << 3;
+ b |= rm;
+ sink.put1(b);
+}
+
+/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
+/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
+/// absolute immediate 32-bit address.
+fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+ let reg = reg as u8 & 7;
+ let rm = rm as u8 & 7;
+ let mut b = 0b00000000;
+ b |= reg << 3;
+ b |= rm;
+ sink.put1(b);
+}
+
+/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
+/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
+/// section 2.2.1.6.
+fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+ modrm_rm(0b101, reg, sink)
+}
+
+/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+ let reg = reg as u8 & 7;
+ let rm = rm as u8 & 7;
+ let mut b = 0b01000000;
+ b |= reg << 3;
+ b |= rm;
+ sink.put1(b);
+}
+
+/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
+/// displacement.
+/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
+fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
+ let reg = reg as u8 & 7;
+ let rm = rm as u8 & 7;
+ let mut b = 0b10000000;
+ b |= reg << 3;
+ b |= rm;
+ sink.put1(b);
+}
+
+/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
+fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+ modrm_rm(0b100, reg, sink);
+}
+
+/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
+/// displacement are present.
+fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+ modrm_disp8(0b100, reg, sink);
+}
+
+/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
+/// displacement are present.
+fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
+ modrm_disp32(0b100, reg, sink);
+}
+
+/// Emit a SIB byte with a base register and no scale+index.
+fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
+ let base = base as u8 & 7;
+ // SIB SS_III_BBB.
+ let mut b = 0b00_100_000;
+ b |= base;
+ sink.put1(b);
+}
+
+/// Emit a SIB byte with a scale, base, and index.
+fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
+ // SIB SS_III_BBB.
+ debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
+ let scale = scale & 3;
+ let index = index as u8 & 7;
+ let base = base as u8 & 7;
+ let b: u8 = (scale << 6) | (index << 3) | base;
+ sink.put1(b);
+}
+
+/// Get the low 4 bits of an opcode for an integer condition code.
+///
+/// Add this offset to a base opcode for:
+///
+/// ---- 0x70: Short conditional branch.
+/// 0x0f 0x80: Long conditional branch.
+/// 0x0f 0x90: SetCC.
+///
+fn icc2opc(cond: IntCC) -> u16 {
+ use crate::ir::condcodes::IntCC::*;
+ match cond {
+ Overflow => 0x0,
+ NotOverflow => 0x1,
+ UnsignedLessThan => 0x2,
+ UnsignedGreaterThanOrEqual => 0x3,
+ Equal => 0x4,
+ NotEqual => 0x5,
+ UnsignedLessThanOrEqual => 0x6,
+ UnsignedGreaterThan => 0x7,
+ // 0x8 = Sign.
+ // 0x9 = !Sign.
+ // 0xa = Parity even.
+ // 0xb = Parity odd.
+ SignedLessThan => 0xc,
+ SignedGreaterThanOrEqual => 0xd,
+ SignedLessThanOrEqual => 0xe,
+ SignedGreaterThan => 0xf,
+ }
+}
+
+/// Get the low 4 bits of an opcode for a floating point condition code.
+///
+/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+///
+/// ZPC OSA
+/// UN 111 000
+/// GT 000 000
+/// LT 001 000
+/// EQ 100 000
+///
+/// Not all floating point condition codes are supported.
+fn fcc2opc(cond: FloatCC) -> u16 {
+ use crate::ir::condcodes::FloatCC::*;
+ match cond {
+ Ordered => 0xb, // EQ|LT|GT => *np (P=0)
+ Unordered => 0xa, // UN => *p (P=1)
+ OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0),
+ UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1)
+ GreaterThan => 0x7, // GT => *a (C=0&Z=0)
+ GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0)
+ UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1)
+ UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
+ Equal | // EQ
+ NotEqual | // UN|LT|GT
+ LessThan | // LT
+ LessThanOrEqual | // LT|EQ
+ UnorderedOrGreaterThan | // UN|GT
+ UnorderedOrGreaterThanOrEqual // UN|GT|EQ
+ => panic!("{} not supported", cond),
+ }
+}
+
+/// Emit a single-byte branch displacement to `destination`.
+fn disp1<CS: CodeSink + ?Sized>(destination: Block, func: &Function, sink: &mut CS) {
+ let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
+ sink.put1(delta as u8);
+}
+
+/// Emit a four-byte branch displacement to `destination`.
+fn disp4<CS: CodeSink + ?Sized>(destination: Block, func: &Function, sink: &mut CS) {
+ let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
+ sink.put4(delta);
+}
+
+/// Emit a four-byte displacement to jump table `jt`.
+fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS) {
+ let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4);
+ sink.put4(delta);
+ sink.reloc_jt(Reloc::X86PCRelRodata4, jt);
+}
+
+/// Emit a four-byte displacement to `constant`.
+fn const_disp4<CS: CodeSink + ?Sized>(constant: Constant, func: &Function, sink: &mut CS) {
+ let offset = func.dfg.constants.get_offset(constant);
+ let delta = offset.wrapping_sub(sink.offset() + 4);
+ sink.put4(delta);
+ sink.reloc_constant(Reloc::X86PCRelRodata4, offset);
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs b/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
new file mode 100644
index 0000000000..976f1581e3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/enc_tables.rs
@@ -0,0 +1,1922 @@
+//! Encoding tables for x86 ISAs.
+
+use super::registers::*;
+use crate::bitset::BitSet;
+use crate::cursor::{Cursor, FuncCursor};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::condcodes::{FloatCC, IntCC};
+use crate::ir::types::*;
+use crate::ir::{self, Function, Inst, InstBuilder, MemFlags};
+use crate::isa::constraints::*;
+use crate::isa::enc_tables::*;
+use crate::isa::encoding::base_size;
+use crate::isa::encoding::{Encoding, RecipeSizing};
+use crate::isa::RegUnit;
+use crate::isa::{self, TargetIsa};
+use crate::legalizer::expand_as_libcall;
+use crate::predicates;
+use crate::regalloc::RegDiversions;
+
+include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
+include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
+
+/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB).
+///
+/// Normal x86 instructions have only 3 bits for encoding a register.
+/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits.
+pub fn is_extended_reg(reg: RegUnit) -> bool {
+ // Extended registers have the fourth bit set.
+ reg as u8 & 0b1000 != 0
+}
+
+pub fn needs_sib_byte(reg: RegUnit) -> bool {
+ reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
+}
+pub fn needs_offset(reg: RegUnit) -> bool {
+ reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit
+}
+pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
+ needs_sib_byte(reg) || needs_offset(reg)
+}
+
+fn test_input(
+ op_index: usize,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+ condition_func: fn(RegUnit) -> bool,
+) -> bool {
+ let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
+ condition_func(in_reg)
+}
+
+fn test_result(
+ result_index: usize,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+ condition_func: fn(RegUnit) -> bool,
+) -> bool {
+ let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations);
+ condition_func(out_reg)
+}
+
+fn size_plus_maybe_offset_for_inreg_0(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ let needs_offset = test_input(0, inst, divert, func, needs_offset);
+ sizing.base_size + if needs_offset { 1 } else { 0 }
+}
+fn size_plus_maybe_offset_for_inreg_1(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ let needs_offset = test_input(1, inst, divert, func, needs_offset);
+ sizing.base_size + if needs_offset { 1 } else { 0 }
+}
+fn size_plus_maybe_sib_for_inreg_0(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ let needs_sib = test_input(0, inst, divert, func, needs_sib_byte);
+ sizing.base_size + if needs_sib { 1 } else { 0 }
+}
+fn size_plus_maybe_sib_for_inreg_1(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ let needs_sib = test_input(1, inst, divert, func, needs_sib_byte);
+ sizing.base_size + if needs_sib { 1 } else { 0 }
+}
+fn size_plus_maybe_sib_or_offset_for_inreg_0(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset);
+ sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
+}
+fn size_plus_maybe_sib_or_offset_for_inreg_1(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset);
+ sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
+}
+
+/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
+/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset.
+fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
+ sizing: &RecipeSizing,
+ enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
+ || test_input(1, inst, divert, func, is_extended_reg);
+ size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func)
+ + if needs_rex { 1 } else { 0 }
+}
+
+/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
+/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB.
+fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1(
+ sizing: &RecipeSizing,
+ enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
+ || test_input(1, inst, divert, func, is_extended_reg);
+ size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
+}
+
+/// Calculates the size while inferring if the first input register (inreg0) and first output
+/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
+/// SIB or offset.
+fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
+ sizing: &RecipeSizing,
+ enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
+ || test_result(0, inst, divert, func, is_extended_reg);
+ size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func)
+ + if needs_rex { 1 } else { 0 }
+}
+
+/// Calculates the size while inferring if the first input register (inreg0) and first output
+/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
+/// SIB.
+fn size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
+ sizing: &RecipeSizing,
+ enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
+ || test_result(0, inst, divert, func, is_extended_reg);
+ size_plus_maybe_sib_for_inreg_0(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg.
+///
+/// A REX prefix is known to be emitted if either:
+/// 1. The EncodingBits specify that REX.W is to be set.
+/// 2. Registers are used that require REX.R or REX.B bits for encoding.
+fn size_with_inferred_rex_for_inreg0(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on the second operand.
+fn size_with_inferred_rex_for_inreg1(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(1, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on the third operand.
+fn size_with_inferred_rex_for_inreg2(
+ sizing: &RecipeSizing,
+ _: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(2, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers.
+///
+/// A REX prefix is known to be emitted if either:
+/// 1. The EncodingBits specify that REX.W is to be set.
+/// 2. Registers are used that require REX.R or REX.B bits for encoding.
+fn size_with_inferred_rex_for_inreg0_inreg1(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
+ || test_input(1, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
+fn size_with_inferred_rex_for_inreg1_inreg2(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
+ || test_input(2, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on a single
+/// input register and a single output register.
+fn size_with_inferred_rex_for_inreg0_outreg0(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
+ || test_result(0, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on a single output register.
+fn size_with_inferred_rex_for_outreg0(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_result(0, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV.
+///
+/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2.
+fn size_with_inferred_rex_for_cmov(
+ sizing: &RecipeSizing,
+ _enc: Encoding,
+ inst: Inst,
+ divert: &RegDiversions,
+ func: &Function,
+) -> u8 {
+ // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+ let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
+ || test_input(2, inst, divert, func, is_extended_reg);
+ sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// If the value's definition is a constant immediate, returns its unpacked value, or None
+/// otherwise.
+fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option<i64> {
+ if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) {
+ if let ir::InstructionData::UnaryImm {
+ opcode: ir::Opcode::Iconst,
+ imm,
+ } = &pos.func.dfg[*inst]
+ {
+ let value: i64 = (*imm).into();
+ Some(value)
+ } else {
+ None
+ }
+ } else {
+ None
+ }
+}
+
+/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
+fn expand_sdivrem(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ let (x, y, is_srem) = match func.dfg[inst] {
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Sdiv,
+ args,
+ } => (args[0], args[1], false),
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Srem,
+ args,
+ } => (args[0], args[1], true),
+ _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
+ };
+
+ let old_block = func.layout.pp_block(inst);
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+ pos.func.dfg.clear_results(inst);
+
+ let avoid_div_traps = isa.flags().avoid_div_traps();
+
+ // If we can tolerate native division traps, sdiv doesn't need branching.
+ if !avoid_div_traps && !is_srem {
+ let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+ pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
+ pos.remove_inst();
+ return;
+ }
+
+ // Try to remove checks if the input value is an immediate other than 0 or -1. For these two
+ // immediates, we'd ideally replace conditional traps by traps, but this requires more
+ // manipulation of the dfg/cfg, which is out of scope here.
+ let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) {
+ (imm == 0, imm == -1)
+ } else {
+ (true, true)
+ };
+
+ // Put in an explicit division-by-zero trap if the environment requires it.
+ if avoid_div_traps && could_be_zero {
+ pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+ }
+
+ if !could_be_minus_one {
+ let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+ let reuse = if is_srem {
+ [None, Some(result)]
+ } else {
+ [Some(result), None]
+ };
+ pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y);
+ pos.remove_inst();
+ return;
+ }
+
+ // block handling the nominal case.
+ let nominal = pos.func.dfg.make_block();
+
+ // block handling the -1 divisor case.
+ let minus_one = pos.func.dfg.make_block();
+
+ // Final block with one argument representing the final result value.
+ let done = pos.func.dfg.make_block();
+
+ // Move the `inst` result value onto the `done` block.
+ pos.func.dfg.attach_block_param(done, result);
+
+ // Start by checking for a -1 divisor which needs to be handled specially.
+ let is_m1 = pos.ins().ifcmp_imm(y, -1);
+ pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
+ pos.ins().jump(nominal, &[]);
+
+ // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
+ // by zero.
+ pos.insert_block(nominal);
+ let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+ let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
+ let divres = if is_srem { rem } else { quot };
+ pos.ins().jump(done, &[divres]);
+
+ // Now deal with the -1 divisor case.
+ pos.insert_block(minus_one);
+ let m1_result = if is_srem {
+ // x % -1 = 0.
+ pos.ins().iconst(ty, 0)
+ } else {
+ // Explicitly check for overflow: Trap when x == INT_MIN.
+ debug_assert!(avoid_div_traps, "Native trapping divide handled above");
+ let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
+ pos.ins()
+ .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow);
+ // x / -1 = -x.
+ pos.ins().irsub_imm(x, 0)
+ };
+
+ // Recycle the original instruction as a jump.
+ pos.func.dfg.replace(inst).jump(done, &[m1_result]);
+
+ // Finally insert a label for the completion.
+ pos.next_inst();
+ pos.insert_block(done);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, nominal);
+ cfg.recompute_block(pos.func, minus_one);
+ cfg.recompute_block(pos.func, done);
+}
+
+/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
+fn expand_udivrem(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ let (x, y, is_urem) = match func.dfg[inst] {
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Udiv,
+ args,
+ } => (args[0], args[1], false),
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Urem,
+ args,
+ } => (args[0], args[1], true),
+ _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
+ };
+ let avoid_div_traps = isa.flags().avoid_div_traps();
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+ pos.func.dfg.clear_results(inst);
+
+ // Put in an explicit division-by-zero trap if the environment requires it.
+ if avoid_div_traps {
+ let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) {
+ // Ideally, we'd just replace the conditional trap with a trap when the immediate is
+ // zero, but this requires more manipulation of the dfg/cfg, which is out of scope
+ // here.
+ imm == 0
+ } else {
+ true
+ };
+ if zero_check {
+ pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+ }
+ }
+
+ // Now it is safe to execute the `x86_udivmodx` instruction.
+ let xhi = pos.ins().iconst(ty, 0);
+ let reuse = if is_urem {
+ [None, Some(result)]
+ } else {
+ [Some(result), None]
+ };
+ pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
+ pos.remove_inst();
+}
+
+/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
+/// instructions.
+fn expand_minmax(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Fmin,
+ args,
+ } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Fmax,
+ args,
+ } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
+ _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+ };
+ let old_block = func.layout.pp_block(inst);
+
+ // We need to handle the following conditions, depending on how x and y compare:
+ //
+ // 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
+ // 2. EQ: We need to use `bitwise_opc` to make sure that
+ // fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
+ // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
+
+ // block handling case 1) where operands are ordered but not equal.
+ let one_block = func.dfg.make_block();
+
+ // block handling case 3) where one operand is NaN.
+ let uno_block = func.dfg.make_block();
+
+ // block that handles the unordered or equal cases 2) and 3).
+ let ueq_block = func.dfg.make_block();
+
+ // block handling case 2) where operands are ordered and equal.
+ let eq_block = func.dfg.make_block();
+
+ // Final block with one argument representing the final result value.
+ let done = func.dfg.make_block();
+
+ // The basic blocks are laid out to minimize branching for the common cases:
+ //
+ // 1) One branch not taken, one jump.
+ // 2) One branch taken.
+ // 3) Two branches taken, one jump.
+
+ // Move the `inst` result value onto the `done` block.
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+ func.dfg.clear_results(inst);
+ func.dfg.attach_block_param(done, result);
+
+ // Test for case 1) ordered and not equal.
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+ let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
+ pos.ins().brnz(cmp_ueq, ueq_block, &[]);
+ pos.ins().jump(one_block, &[]);
+
+ // Handle the common ordered, not equal (LT|GT) case.
+ pos.insert_block(one_block);
+ let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
+ let one_result = pos.func.dfg.first_result(one_inst);
+ pos.ins().jump(done, &[one_result]);
+
+ // Case 3) Unordered.
+ // We know that at least one operand is a NaN that needs to be propagated. We simply use an
+ // `fadd` instruction which has the same NaN propagation semantics.
+ pos.insert_block(uno_block);
+ let uno_result = pos.ins().fadd(x, y);
+ pos.ins().jump(done, &[uno_result]);
+
+ // Case 2) or 3).
+ pos.insert_block(ueq_block);
+ // Test for case 3) (UN) one value is NaN.
+ // TODO: When we get support for flag values, we can reuse the above comparison.
+ let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
+ pos.ins().brnz(cmp_uno, uno_block, &[]);
+ pos.ins().jump(eq_block, &[]);
+
+ // We are now in case 2) where x and y compare EQ.
+ // We need a bitwise operation to get the sign right.
+ pos.insert_block(eq_block);
+ let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
+ let bw_result = pos.func.dfg.first_result(bw_inst);
+ // This should become a fall-through for this second most common case.
+ // Recycle the original instruction as a jump.
+ pos.func.dfg.replace(inst).jump(done, &[bw_result]);
+
+ // Finally insert a label for the completion.
+ pos.next_inst();
+ pos.insert_block(done);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, one_block);
+ cfg.recompute_block(pos.func, uno_block);
+ cfg.recompute_block(pos.func, ueq_block);
+ cfg.recompute_block(pos.func, eq_block);
+ cfg.recompute_block(pos.func, done);
+}
+
+/// This legalization converts a minimum/maximum operation into a sequence that matches the
+/// non-x86-friendly WebAssembly semantics of NaN handling. This logic is kept separate from
+/// [expand_minmax] above (the scalar version) for code clarity.
+fn expand_minmax_vector(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let ty = func.dfg.ctrl_typevar(inst);
+ debug_assert!(ty.is_vector());
+ let (x, y, x86_opcode, is_max) = match func.dfg[inst] {
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Fmin,
+ args,
+ } => (args[0], args[1], ir::Opcode::X86Fmin, false),
+ ir::InstructionData::Binary {
+ opcode: ir::Opcode::Fmax,
+ args,
+ } => (args[0], args[1], ir::Opcode::X86Fmax, true),
+ _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
+ };
+
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ // This sequence is complex due to how x86 handles NaNs and +0/-0. If x86 finds a NaN in
+ // either lane it returns the second operand; likewise, if both operands are in {+0.0, -0.0}
+ // it returns the second operand. To match the behavior of "return the minimum of the
+ // operands or a canonical NaN if either operand is NaN," we must compare in both
+ // directions.
+ let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y);
+ let forward = dfg.first_result(forward_inst);
+ let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x);
+ let backward = dfg.first_result(backward_inst);
+
+ let (value, mask) = if is_max {
+ // For maximum:
+ // Find any differences between the forward and backward `max` operation.
+ let difference = pos.ins().bxor(forward, backward);
+ // Merge in the differences.
+ let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference);
+ let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference);
+ // Discover which lanes have NaNs in them.
+ let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value);
+ (value, find_nan_lanes_mask)
+ } else {
+ // For minimum:
+ // If either lane is a NaN, we want to use these bits, not the second operand bits.
+ let propagate_nans = pos.ins().bor(backward, forward);
+ // Find which lanes contain a NaN with an unordered comparison, filling the mask with
+ // 1s.
+ let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans);
+ let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask);
+ // Then flood the value lane with all 1s if that lane is a NaN. This causes all NaNs
+ // along this code path to be quieted and negative: after the upcoming shift and and_not,
+ // all upper bits (sign, exponent, and payload MSB) will be 1s.
+ let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask);
+ (tmp, bitcast_find_nan_lanes_mask)
+ };
+
+ // During this lowering we will need to know how many bits to shift by and what type to
+ // convert to when using an integer shift. Recall that an IEEE754 number looks like:
+ // `[sign bit] [exponent bits] [significand bits]`
+ // A quiet NaN has all exponent bits set to 1 and the most significant bit of the
+ // significand set to 1; a signaling NaN has the same exponent but the MSB of the
+ // significand is set to 0. The payload of the NaN is the remaining significand bits, and
+ // WebAssembly assumes a canonical NaN is quiet and has 0s in its payload. To compute this
+ // canonical NaN, we create a mask for the top 10 bits on F32X4 (1 sign + 8 exp. + 1 MSB
+ // sig.) and the top 13 bits on F64X2 (1 sign + 11 exp. + 1 MSB sig.). This means that all
+ // NaNs produced with the mask will be negative (`-NaN`) which is allowed by the sign
+ // non-determinism in the spec: https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0
+ let (shift_by, ty_as_int) = match ty {
+ F32X4 => (10, I32X4),
+ F64X2 => (13, I64X2),
+ _ => unimplemented!("this legalization only understands 128-bit floating point types"),
+ };
+
+ // In order to clear the NaN payload for canonical NaNs, we shift right the NaN lanes (all
+ // 1s) leaving 0s in the top bits. Remember that non-NaN lanes are all 0s so this has
+ // little effect.
+ let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask);
+ let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by);
+ let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask);
+
+ // Finally, we replace the value with `value & ~shift_mask`. For non-NaN lanes, this is
+ // equivalent to `... & 1111...` but for NaN lanes this will only have 1s in the top bits,
+ // clearing the payload.
+ pos.func
+ .dfg
+ .replace(inst)
+ .band_not(value, shift_mask_as_float);
+}
+
+/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
+/// i64 with a pattern, the rest needs more code.
+///
+/// Note that this is the scalar implementation; for the vector implemenation see
+/// [expand_fcvt_from_uint_vector].
+fn expand_fcvt_from_uint(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let x;
+ match func.dfg[inst] {
+ ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtFromUint,
+ arg,
+ } => x = arg,
+ _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
+ }
+ let xty = func.dfg.value_type(x);
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ // Conversion from an unsigned int smaller than 64bit is easy on x86-64.
+ match xty {
+ ir::types::I8 | ir::types::I16 | ir::types::I32 => {
+ // TODO: This should be guarded by an ISA check.
+ let wide = pos.ins().uextend(ir::types::I64, x);
+ pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
+ return;
+ }
+ ir::types::I64 => {}
+ _ => unimplemented!(),
+ }
+
+ let old_block = pos.func.layout.pp_block(inst);
+
+ // block handling the case where x >= 0.
+ let poszero_block = pos.func.dfg.make_block();
+
+ // block handling the case where x < 0.
+ let neg_block = pos.func.dfg.make_block();
+
+ // Final block with one argument representing the final result value.
+ let done = pos.func.dfg.make_block();
+
+ // Move the `inst` result value onto the `done` block.
+ pos.func.dfg.clear_results(inst);
+ pos.func.dfg.attach_block_param(done, result);
+
+ // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
+ let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
+ pos.ins().brnz(is_neg, neg_block, &[]);
+ pos.ins().jump(poszero_block, &[]);
+
+ // Easy case: just use a signed conversion.
+ pos.insert_block(poszero_block);
+ let posres = pos.ins().fcvt_from_sint(ty, x);
+ pos.ins().jump(done, &[posres]);
+
+ // Now handle the negative case.
+ pos.insert_block(neg_block);
+
+ // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
+ // back up on the FP side.
+ let ihalf = pos.ins().ushr_imm(x, 1);
+ let lsb = pos.ins().band_imm(x, 1);
+ let ifinal = pos.ins().bor(ihalf, lsb);
+ let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
+ let negres = pos.ins().fadd(fhalf, fhalf);
+
+ // Recycle the original instruction as a jump.
+ pos.func.dfg.replace(inst).jump(done, &[negres]);
+
+ // Finally insert a label for the completion.
+ pos.next_inst();
+ pos.insert_block(done);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, poszero_block);
+ cfg.recompute_block(pos.func, neg_block);
+ cfg.recompute_block(pos.func, done);
+}
+
+/// To convert packed unsigned integers to their float equivalents, we must legalize to a special
+/// AVX512 instruction (using MCSR rounding) or use a long sequence of instructions. This logic is
+/// separate from [expand_fcvt_from_uint] above (the scalar version), only due to how the transform
+/// groups are set up; TODO if we change the SIMD legalization groups, then this logic could be
+/// merged into [expand_fcvt_from_uint] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
+fn expand_fcvt_from_uint_vector(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtFromUint,
+ arg,
+ } = pos.func.dfg[inst]
+ {
+ let controlling_type = pos.func.dfg.ctrl_typevar(inst);
+ if controlling_type == F32X4 {
+ debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4);
+ let x86_isa = isa
+ .as_any()
+ .downcast_ref::<isa::x86::Isa>()
+ .expect("the target ISA must be x86 at this point");
+ if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() {
+ // If we have certain AVX512 features, we can lower this instruction simply.
+ pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg);
+ } else {
+ // Otherwise, we default to a very lengthy SSE4.1-compatible sequence: PXOR,
+ // PBLENDW, PSUB, CVTDQ2PS, PSRLD, CVTDQ2PS, ADDPS, ADDPS
+ let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg);
+ let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into());
+ let zero = pos.ins().vconst(I16X8, zero_constant);
+ let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55);
+ let bitcast_low = pos.ins().raw_bitcast(I32X4, low);
+ let high = pos.ins().isub(arg, bitcast_low);
+ let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low);
+ let shift_high = pos.ins().ushr_imm(high, 1);
+ let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high);
+ let double_high = pos.ins().fadd(convert_high, convert_high);
+ pos.func.dfg.replace(inst).fadd(double_high, convert_low);
+ }
+ } else {
+ unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
+ }
+ }
+}
+
+fn expand_fcvt_to_sint(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ use crate::ir::immediates::{Ieee32, Ieee64};
+
+ let x = match func.dfg[inst] {
+ ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtToSint,
+ arg,
+ } => arg,
+ _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
+ };
+ let old_block = func.layout.pp_block(inst);
+ let xty = func.dfg.value_type(x);
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+
+ // Final block after the bad value checks.
+ let done = func.dfg.make_block();
+
+ // block for checking failure cases.
+ let maybe_trap_block = func.dfg.make_block();
+
+ // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
+ // It produces an INT_MIN result instead.
+ func.dfg.replace(inst).x86_cvtt2si(ty, x);
+
+ let mut pos = FuncCursor::new(func).after_inst(inst);
+ pos.use_srcloc(inst);
+
+ let is_done = pos
+ .ins()
+ .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
+ pos.ins().brnz(is_done, done, &[]);
+ pos.ins().jump(maybe_trap_block, &[]);
+
+ // We now have the following possibilities:
+ //
+ // 1. INT_MIN was actually the correct conversion result.
+ // 2. The input was NaN -> trap bad_toint
+ // 3. The input was out of range -> trap int_ovf
+ //
+ pos.insert_block(maybe_trap_block);
+
+ // Check for NaN.
+ let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+ pos.ins()
+ .trapnz(is_nan, ir::TrapCode::BadConversionToInteger);
+
+ // Check for case 1: INT_MIN is the correct result.
+ // Determine the smallest floating point number that would convert to INT_MIN.
+ let mut overflow_cc = FloatCC::LessThan;
+ let output_bits = ty.lane_bits();
+ let flimit = match xty {
+ ir::types::F32 =>
+ // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+ // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+ {
+ pos.ins().f32const(if output_bits < 32 {
+ overflow_cc = FloatCC::LessThanOrEqual;
+ Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+ } else {
+ Ieee32::pow2(output_bits - 1).neg()
+ })
+ }
+ ir::types::F64 =>
+ // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+ // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+ {
+ pos.ins().f64const(if output_bits < 64 {
+ overflow_cc = FloatCC::LessThanOrEqual;
+ Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+ } else {
+ Ieee64::pow2(output_bits - 1).neg()
+ })
+ }
+ _ => panic!("Can't convert {}", xty),
+ };
+ let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+ pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+ // Finally, we could have a positive value that is too large.
+ let fzero = match xty {
+ ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+ ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+ _ => panic!("Can't convert {}", xty),
+ };
+ let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+ pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
+
+ pos.ins().jump(done, &[]);
+ pos.insert_block(done);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, maybe_trap_block);
+ cfg.recompute_block(pos.func, done);
+}
+
+fn expand_fcvt_to_sint_sat(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ use crate::ir::immediates::{Ieee32, Ieee64};
+
+ let x = match func.dfg[inst] {
+ ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtToSintSat,
+ arg,
+ } => arg,
+ _ => panic!(
+ "Need fcvt_to_sint_sat: {}",
+ func.dfg.display_inst(inst, None)
+ ),
+ };
+
+ let old_block = func.layout.pp_block(inst);
+ let xty = func.dfg.value_type(x);
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+
+ // Final block after the bad value checks.
+ let done_block = func.dfg.make_block();
+ let intmin_block = func.dfg.make_block();
+ let minsat_block = func.dfg.make_block();
+ let maxsat_block = func.dfg.make_block();
+ func.dfg.clear_results(inst);
+ func.dfg.attach_block_param(done_block, result);
+
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or
+ // overflow. It produces an INT_MIN result instead.
+ let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
+
+ let is_done = pos
+ .ins()
+ .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
+ pos.ins().brnz(is_done, done_block, &[cvtt2si]);
+ pos.ins().jump(intmin_block, &[]);
+
+ // We now have the following possibilities:
+ //
+ // 1. INT_MIN was actually the correct conversion result.
+ // 2. The input was NaN -> replace the result value with 0.
+ // 3. The input was out of range -> saturate the result to the min/max value.
+ pos.insert_block(intmin_block);
+
+ // Check for NaN, which is truncated to 0.
+ let zero = pos.ins().iconst(ty, 0);
+ let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+ pos.ins().brnz(is_nan, done_block, &[zero]);
+ pos.ins().jump(minsat_block, &[]);
+
+ // Check for case 1: INT_MIN is the correct result.
+ // Determine the smallest floating point number that would convert to INT_MIN.
+ pos.insert_block(minsat_block);
+ let mut overflow_cc = FloatCC::LessThan;
+ let output_bits = ty.lane_bits();
+ let flimit = match xty {
+ ir::types::F32 =>
+ // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+ // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+ {
+ pos.ins().f32const(if output_bits < 32 {
+ overflow_cc = FloatCC::LessThanOrEqual;
+ Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+ } else {
+ Ieee32::pow2(output_bits - 1).neg()
+ })
+ }
+ ir::types::F64 =>
+ // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+ // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+ {
+ pos.ins().f64const(if output_bits < 64 {
+ overflow_cc = FloatCC::LessThanOrEqual;
+ Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+ } else {
+ Ieee64::pow2(output_bits - 1).neg()
+ })
+ }
+ _ => panic!("Can't convert {}", xty),
+ };
+
+ let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+ let min_imm = match ty {
+ ir::types::I32 => i32::min_value() as i64,
+ ir::types::I64 => i64::min_value(),
+ _ => panic!("Don't know the min value for {}", ty),
+ };
+ let min_value = pos.ins().iconst(ty, min_imm);
+ pos.ins().brnz(overflow, done_block, &[min_value]);
+ pos.ins().jump(maxsat_block, &[]);
+
+ // Finally, we could have a positive value that is too large.
+ pos.insert_block(maxsat_block);
+ let fzero = match xty {
+ ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+ ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+ _ => panic!("Can't convert {}", xty),
+ };
+
+ let max_imm = match ty {
+ ir::types::I32 => i32::max_value() as i64,
+ ir::types::I64 => i64::max_value(),
+ _ => panic!("Don't know the max value for {}", ty),
+ };
+ let max_value = pos.ins().iconst(ty, max_imm);
+
+ let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+ pos.ins().brnz(overflow, done_block, &[max_value]);
+
+ // Recycle the original instruction.
+ pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]);
+
+ // Finally insert a label for the completion.
+ pos.next_inst();
+ pos.insert_block(done_block);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, intmin_block);
+ cfg.recompute_block(pos.func, minsat_block);
+ cfg.recompute_block(pos.func, maxsat_block);
+ cfg.recompute_block(pos.func, done_block);
+}
+
+/// This legalization converts a vector of 32-bit floating point lanes to signed integer lanes
+/// using CVTTPS2DQ (see encoding of `x86_cvtt2si`). This logic is separate from [expand_fcvt_to_sint_sat]
+/// above (the scalar version), only due to how the transform groups are set up; TODO if we change
+/// the SIMD legalization groups, then this logic could be merged into [expand_fcvt_to_sint_sat]
+/// (see https://github.com/bytecodealliance/wasmtime/issues/1745).
+fn expand_fcvt_to_sint_sat_vector(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtToSintSat,
+ arg,
+ } = pos.func.dfg[inst]
+ {
+ let controlling_type = pos.func.dfg.ctrl_typevar(inst);
+ if controlling_type == I32X4 {
+ debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
+ // We must both quiet any NaNs--setting that lane to 0--and saturate any
+ // lanes that might overflow during conversion to the highest/lowest signed integer
+ // allowed in that lane.
+
+ // Saturate NaNs: `fcmp eq` will not match if a lane contains a NaN. We use ANDPS to
+ // avoid doing the comparison twice (we need the zeroed lanes to find differences).
+ let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg);
+ let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans);
+ let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast);
+
+ // Find differences with the zeroed lanes (we will only use the MSB: 1 if positive or
+ // NaN, 0 otherwise).
+ let differences = pos.ins().bxor(zeroed_nans_bitcast, arg);
+ let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences);
+
+ // Convert the numeric lanes. CVTTPS2DQ will mark overflows with 0x80000000 (MSB set).
+ let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy);
+
+ // Create a mask of all 1s only on positive overflow, 0s otherwise. This uses the MSB
+ // of `differences` (1 when positive or NaN) and the MSB of `converted` (1 on positive
+ // overflow).
+ let tmp = pos.ins().band(differences_bitcast, converted);
+ let mask = pos.ins().sshr_imm(tmp, 31);
+
+ // Apply the mask to create 0x7FFFFFFF for positive overflow. XOR of all 0s (all other
+ // cases) has no effect.
+ pos.func.dfg.replace(inst).bxor(converted, mask);
+ } else {
+ unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
+ }
+ }
+}
+
+fn expand_fcvt_to_uint(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ use crate::ir::immediates::{Ieee32, Ieee64};
+
+ let x = match func.dfg[inst] {
+ ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtToUint,
+ arg,
+ } => arg,
+ _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
+ };
+
+ let old_block = func.layout.pp_block(inst);
+ let xty = func.dfg.value_type(x);
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+
+ // block handle numbers < 2^(N-1).
+ let below_uint_max_block = func.dfg.make_block();
+
+ // block handle numbers < 0.
+ let below_zero_block = func.dfg.make_block();
+
+ // block handling numbers >= 2^(N-1).
+ let large = func.dfg.make_block();
+
+ // Final block after the bad value checks.
+ let done = func.dfg.make_block();
+
+ // Move the `inst` result value onto the `done` block.
+ func.dfg.clear_results(inst);
+ func.dfg.attach_block_param(done, result);
+
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+ // the destination integer type.
+ let pow2nm1 = match xty {
+ ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+ ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+ _ => panic!("Can't convert {}", xty),
+ };
+ let is_large = pos.ins().ffcmp(x, pow2nm1);
+ pos.ins()
+ .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+ pos.ins().jump(below_uint_max_block, &[]);
+
+ // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
+ // previous comparison.
+ pos.insert_block(below_uint_max_block);
+ pos.ins().trapff(
+ FloatCC::Unordered,
+ is_large,
+ ir::TrapCode::BadConversionToInteger,
+ );
+
+ // Now we know that x < 2^(N-1) and not NaN.
+ let sres = pos.ins().x86_cvtt2si(ty, x);
+ let is_neg = pos.ins().ifcmp_imm(sres, 0);
+ pos.ins()
+ .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
+ pos.ins().jump(below_zero_block, &[]);
+
+ pos.insert_block(below_zero_block);
+ pos.ins().trap(ir::TrapCode::IntegerOverflow);
+
+ // Handle the case where x >= 2^(N-1) and not NaN.
+ pos.insert_block(large);
+ let adjx = pos.ins().fsub(x, pow2nm1);
+ let lres = pos.ins().x86_cvtt2si(ty, adjx);
+ let is_neg = pos.ins().ifcmp_imm(lres, 0);
+ pos.ins()
+ .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow);
+ let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+ // Recycle the original instruction as a jump.
+ pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+ // Finally insert a label for the completion.
+ pos.next_inst();
+ pos.insert_block(done);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, below_uint_max_block);
+ cfg.recompute_block(pos.func, below_zero_block);
+ cfg.recompute_block(pos.func, large);
+ cfg.recompute_block(pos.func, done);
+}
+
+fn expand_fcvt_to_uint_sat(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ use crate::ir::immediates::{Ieee32, Ieee64};
+
+ let x = match func.dfg[inst] {
+ ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtToUintSat,
+ arg,
+ } => arg,
+ _ => panic!(
+ "Need fcvt_to_uint_sat: {}",
+ func.dfg.display_inst(inst, None)
+ ),
+ };
+
+ let old_block = func.layout.pp_block(inst);
+ let xty = func.dfg.value_type(x);
+ let result = func.dfg.first_result(inst);
+ let ty = func.dfg.value_type(result);
+
+ // block handle numbers < 2^(N-1).
+ let below_pow2nm1_or_nan_block = func.dfg.make_block();
+ let below_pow2nm1_block = func.dfg.make_block();
+
+ // block handling numbers >= 2^(N-1).
+ let large = func.dfg.make_block();
+
+ // block handling numbers < 2^N.
+ let uint_large_block = func.dfg.make_block();
+
+ // Final block after the bad value checks.
+ let done = func.dfg.make_block();
+
+ // Move the `inst` result value onto the `done` block.
+ func.dfg.clear_results(inst);
+ func.dfg.attach_block_param(done, result);
+
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+ // the destination integer type.
+ let pow2nm1 = match xty {
+ ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+ ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+ _ => panic!("Can't convert {}", xty),
+ };
+ let zero = pos.ins().iconst(ty, 0);
+ let is_large = pos.ins().ffcmp(x, pow2nm1);
+ pos.ins()
+ .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+ pos.ins().jump(below_pow2nm1_or_nan_block, &[]);
+
+ // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
+ pos.insert_block(below_pow2nm1_or_nan_block);
+ pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
+ pos.ins().jump(below_pow2nm1_block, &[]);
+
+ // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
+ // done; otherwise saturate to the minimum unsigned value, that is 0.
+ pos.insert_block(below_pow2nm1_block);
+ let sres = pos.ins().x86_cvtt2si(ty, x);
+ let is_neg = pos.ins().ifcmp_imm(sres, 0);
+ pos.ins()
+ .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
+ pos.ins().jump(done, &[zero]);
+
+ // Handle the case where x >= 2^(N-1) and not NaN.
+ pos.insert_block(large);
+ let adjx = pos.ins().fsub(x, pow2nm1);
+ let lres = pos.ins().x86_cvtt2si(ty, adjx);
+ let max_value = pos.ins().iconst(
+ ty,
+ match ty {
+ ir::types::I32 => u32::max_value() as i64,
+ ir::types::I64 => u64::max_value() as i64,
+ _ => panic!("Can't convert {}", ty),
+ },
+ );
+ let is_neg = pos.ins().ifcmp_imm(lres, 0);
+ pos.ins()
+ .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
+ pos.ins().jump(uint_large_block, &[]);
+
+ pos.insert_block(uint_large_block);
+ let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+ // Recycle the original instruction as a jump.
+ pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+ // Finally insert a label for the completion.
+ pos.next_inst();
+ pos.insert_block(done);
+
+ cfg.recompute_block(pos.func, old_block);
+ cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block);
+ cfg.recompute_block(pos.func, below_pow2nm1_block);
+ cfg.recompute_block(pos.func, large);
+ cfg.recompute_block(pos.func, uint_large_block);
+ cfg.recompute_block(pos.func, done);
+}
+
+// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4.
+static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [
+ 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f,
+];
+
+/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes
+/// using a long sequence of NaN quieting and truncation. This logic is separate from
+/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are
+/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into
+/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
+fn expand_fcvt_to_uint_sat_vector(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Unary {
+ opcode: ir::Opcode::FcvtToUintSat,
+ arg,
+ } = pos.func.dfg[inst]
+ {
+ let controlling_type = pos.func.dfg.ctrl_typevar(inst);
+ if controlling_type == I32X4 {
+ debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
+ // We must both quiet any NaNs--setting that lane to 0--and saturate any
+ // lanes that might overflow during conversion to the highest/lowest integer
+ // allowed in that lane.
+ let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into());
+ let max_signed_constant = pos
+ .func
+ .dfg
+ .constants
+ .insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into());
+ let zeroes = pos.ins().vconst(F32X4, zeroes_constant);
+ let max_signed = pos.ins().vconst(F32X4, max_signed_constant);
+ // Clamp the input to 0 for negative floating point numbers. TODO we need to
+ // convert NaNs to 0 but this doesn't do that?
+ let ge_zero = pos.ins().x86_fmax(arg, zeroes);
+ // Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert.
+ // For floating point numbers above this, CVTTPS2DQ returns the undefined value
+ // 0x80000000.
+ let minus_max_signed = pos.ins().fsub(ge_zero, max_signed);
+ let le_max_signed =
+ pos.ins()
+ .fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed);
+ // Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0.
+ // These lanes have the MSB set to 1 after the XOR. We are trying to calculate a
+ // valid, in-range addend.
+ let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed);
+ let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed);
+ let difference = pos
+ .ins()
+ .bxor(minus_max_signed_as_int, le_max_signed_as_int);
+ // Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified
+ // previously (MSB set to 1).
+ let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes);
+ let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int);
+ // Convert the original clamped number to an integer and add back in the addend
+ // (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these).
+ let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero);
+ pos.func.dfg.replace(inst).iadd(converted, addend);
+ } else {
+ unreachable!(
+ "{} should not be legalized in expand_fcvt_to_uint_sat_vector",
+ pos.func.dfg.display_inst(inst, None)
+ )
+ }
+ }
+}
+
+/// Convert shuffle instructions.
+fn convert_shuffle(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] {
+ // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1
+ // in the most significant position zeroes the lane.
+ let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
+
+ // We only have to worry about aliasing here because copies will be introduced later (in
+ // regalloc).
+ let a = pos.func.dfg.resolve_aliases(args[0]);
+ let b = pos.func.dfg.resolve_aliases(args[1]);
+ let mask = pos
+ .func
+ .dfg
+ .immediates
+ .get(mask)
+ .expect("The shuffle immediate should have been recorded before this point")
+ .clone();
+ if a == b {
+ // PSHUFB the first argument (since it is the same as the second).
+ let constructed_mask = mask
+ .iter()
+ // If the mask is greater than 15 it still may be referring to a lane in b.
+ .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
+ .map(zero_unknown_lane_index)
+ .collect();
+ let handle = pos.func.dfg.constants.insert(constructed_mask);
+ // Move the built mask into another XMM register.
+ let a_type = pos.func.dfg.value_type(a);
+ let mask_value = pos.ins().vconst(a_type, handle);
+ // Shuffle the single incoming argument.
+ pos.func.dfg.replace(inst).x86_pshufb(a, mask_value);
+ } else {
+ // PSHUFB the first argument, placing zeroes for unused lanes.
+ let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
+ let handle = pos.func.dfg.constants.insert(constructed_mask);
+ // Move the built mask into another XMM register.
+ let a_type = pos.func.dfg.value_type(a);
+ let mask_value = pos.ins().vconst(a_type, handle);
+ // Shuffle the first argument.
+ let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value);
+
+ // PSHUFB the second argument, placing zeroes for unused lanes.
+ let constructed_mask = mask
+ .iter()
+ .map(|b| b.wrapping_sub(16))
+ .map(zero_unknown_lane_index)
+ .collect();
+ let handle = pos.func.dfg.constants.insert(constructed_mask);
+ // Move the built mask into another XMM register.
+ let b_type = pos.func.dfg.value_type(b);
+ let mask_value = pos.ins().vconst(b_type, handle);
+ // Shuffle the second argument.
+ let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value);
+
+ // OR the vectors together to form the final shuffled value.
+ pos.func
+ .dfg
+ .replace(inst)
+ .bor(shuffled_first_arg, shuffled_second_arg);
+
+ // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
+ };
+ }
+}
+
+/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF
+/// extractlane instruction
+fn convert_extractlane(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::BinaryImm8 {
+ opcode: ir::Opcode::Extractlane,
+ arg,
+ imm: lane,
+ } = pos.func.dfg[inst]
+ {
+ // NOTE: the following legalization assumes that the upper bits of the XMM register do
+ // not need to be zeroed during extractlane.
+ let value_type = pos.func.dfg.value_type(arg);
+ if value_type.lane_type().is_float() {
+ // Floats are already in XMM registers and can stay there.
+ let shuffled = if lane != 0 {
+ // Replace the extractlane with a PSHUFD to get the float in the right place.
+ match value_type {
+ F32X4 => {
+ // Move the selected lane to the 0 lane.
+ let shuffle_mask: u8 = 0b00_00_00_00 | lane;
+ pos.ins().x86_pshufd(arg, shuffle_mask)
+ }
+ F64X2 => {
+ assert_eq!(lane, 1);
+ // Because we know the lane == 1, we move the upper 64 bits to the lower
+ // 64 bits, leaving the top 64 bits as-is.
+ let shuffle_mask = 0b11_10_11_10;
+ let bitcast = pos.ins().raw_bitcast(F32X4, arg);
+ pos.ins().x86_pshufd(bitcast, shuffle_mask)
+ }
+ _ => unreachable!(),
+ }
+ } else {
+ // Remove the extractlane instruction, leaving the float where it is.
+ arg
+ };
+ // Then we must bitcast to the right type.
+ pos.func
+ .dfg
+ .replace(inst)
+ .raw_bitcast(value_type.lane_type(), shuffled);
+ } else {
+ // For non-floats, lower with the usual PEXTR* instruction.
+ pos.func.dfg.replace(inst).x86_pextr(arg, lane);
+ }
+ }
+}
+
+/// Because floats exist in XMM registers, we can keep them there when executing a CLIF
+/// insertlane instruction
+fn convert_insertlane(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::TernaryImm8 {
+ opcode: ir::Opcode::Insertlane,
+ args: [vector, replacement],
+ imm: lane,
+ } = pos.func.dfg[inst]
+ {
+ let value_type = pos.func.dfg.value_type(vector);
+ if value_type.lane_type().is_float() {
+ // Floats are already in XMM registers and can stay there.
+ match value_type {
+ F32X4 => {
+ assert!(lane <= 3);
+ let immediate = 0b00_00_00_00 | lane << 4;
+ // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
+ // shifted into bits 5:6).
+ pos.func
+ .dfg
+ .replace(inst)
+ .x86_insertps(vector, replacement, immediate)
+ }
+ F64X2 => {
+ let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
+ if lane == 0 {
+ // Move the lowest quadword in replacement to vector without changing
+ // the upper bits.
+ pos.func
+ .dfg
+ .replace(inst)
+ .x86_movsd(vector, replacement_as_vector)
+ } else {
+ assert_eq!(lane, 1);
+ // Move the low 64 bits of replacement vector to the high 64 bits of the
+ // vector.
+ pos.func
+ .dfg
+ .replace(inst)
+ .x86_movlhps(vector, replacement_as_vector)
+ }
+ }
+ _ => unreachable!(),
+ };
+ } else {
+ // For non-floats, lower with the usual PINSR* instruction.
+ pos.func
+ .dfg
+ .replace(inst)
+ .x86_pinsr(vector, replacement, lane);
+ }
+ }
+}
+
+/// For SIMD or scalar integer negation, convert `ineg` to `vconst + isub` or `iconst + isub`.
+fn convert_ineg(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Unary {
+ opcode: ir::Opcode::Ineg,
+ arg,
+ } = pos.func.dfg[inst]
+ {
+ let value_type = pos.func.dfg.value_type(arg);
+ let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() {
+ let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into());
+ pos.ins().vconst(value_type, zero_immediate) // this should be legalized to a PXOR
+ } else if value_type.is_int() {
+ pos.ins().iconst(value_type, 0)
+ } else {
+ panic!("Can't convert ineg of type {}", value_type)
+ };
+ pos.func.dfg.replace(inst).isub(zero_value, arg);
+ } else {
+ unreachable!()
+ }
+}
+
+fn expand_dword_to_xmm<'f>(
+ pos: &mut FuncCursor<'_>,
+ arg: ir::Value,
+ arg_type: ir::Type,
+) -> ir::Value {
+ if arg_type == I64 {
+ let (arg_lo, arg_hi) = pos.ins().isplit(arg);
+ let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
+ let arg = pos.ins().insertlane(arg, arg_hi, 1);
+ let arg = pos.ins().raw_bitcast(I64X2, arg);
+ arg
+ } else {
+ pos.ins().bitcast(I64X2, arg)
+ }
+}
+
+fn contract_dword_from_xmm<'f>(
+ pos: &mut FuncCursor<'f>,
+ inst: ir::Inst,
+ ret: ir::Value,
+ ret_type: ir::Type,
+) {
+ if ret_type == I64 {
+ let ret = pos.ins().raw_bitcast(I32X4, ret);
+ let ret_lo = pos.ins().extractlane(ret, 0);
+ let ret_hi = pos.ins().extractlane(ret, 1);
+ pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
+ } else {
+ let ret = pos.ins().extractlane(ret, 0);
+ pos.func.dfg.replace(inst).ireduce(ret_type, ret);
+ }
+}
+
+// Masks for i8x16 unsigned right shift.
+static USHR_MASKS: [u8; 128] = [
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
+ 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+ 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+];
+
+// Convert a vector unsigned right shift. x86 has implementations for i16x8 and up (see `x86_pslr`),
+// but for i8x16 we translate the shift to a i16x8 shift and mask off the upper bits. This same
+// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex`
+// has a varargs field that we can't modify with the CDSL in legalize.rs.
+fn convert_ushr(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Binary {
+ opcode: ir::Opcode::Ushr,
+ args: [arg0, arg1],
+ } = pos.func.dfg[inst]
+ {
+ // Note that for Wasm, the bounding of the shift index has happened during translation
+ let arg0_type = pos.func.dfg.value_type(arg0);
+ let arg1_type = pos.func.dfg.value_type(arg1);
+ assert!(!arg1_type.is_vector() && arg1_type.is_int());
+
+ // TODO it may be more clear to use scalar_to_vector here; the current issue is that
+ // scalar_to_vector has the restriction that the vector produced has a matching lane size
+ // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 ->
+ // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift
+ // index type--this could be relaxed since it is not really meaningful.
+ let shift_index = pos.ins().bitcast(I64X2, arg1);
+
+ if arg0_type == I8X16 {
+ // First, shift the vector using an I16X8 shift.
+ let bitcasted = pos.ins().raw_bitcast(I16X8, arg0);
+ let shifted = pos.ins().x86_psrl(bitcasted, shift_index);
+ let shifted = pos.ins().raw_bitcast(I8X16, shifted);
+
+ // Then, fixup the even lanes that have incorrect upper bits. This uses the 128 mask
+ // bytes as a table that we index into. It is a substantial code-size increase but
+ // reduces the instruction count slightly.
+ let masks = pos.func.dfg.constants.insert(USHR_MASKS.as_ref().into());
+ let mask_address = pos.ins().const_addr(isa.pointer_type(), masks);
+ let mask_offset = pos.ins().ishl_imm(arg1, 4);
+ let mask =
+ pos.ins()
+ .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0);
+ pos.func.dfg.replace(inst).band(shifted, mask);
+ } else if arg0_type.is_vector() {
+ // x86 has encodings for these shifts.
+ pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
+ } else if arg0_type == I64 {
+ // 64 bit shifts need to be legalized on x86_32.
+ let x86_isa = isa
+ .as_any()
+ .downcast_ref::<isa::x86::Isa>()
+ .expect("the target ISA must be x86 at this point");
+ if x86_isa.isa_flags.has_sse41() {
+ // if we have pinstrq/pextrq (SSE 4.1), legalize to that
+ let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
+ let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
+ let shifted = pos.ins().x86_psrl(value, amount);
+ contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
+ } else {
+ // otherwise legalize to libcall
+ expand_as_libcall(inst, func, isa);
+ }
+ } else {
+ // Everything else should be already legal.
+ unreachable!()
+ }
+ }
+}
+
+// Masks for i8x16 left shift.
+static SHL_MASKS: [u8; 128] = [
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
+ 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
+ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+ 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+ 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
+ 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+];
+
+// Convert a vector left shift. x86 has implementations for i16x8 and up (see `x86_psll`),
+// but for i8x16 we translate the shift to a i16x8 shift and mask off the lower bits. This same
+// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex`
+// has a varargs field that we can't modify with the CDSL in legalize.rs.
+fn convert_ishl(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Binary {
+ opcode: ir::Opcode::Ishl,
+ args: [arg0, arg1],
+ } = pos.func.dfg[inst]
+ {
+ // Note that for Wasm, the bounding of the shift index has happened during translation
+ let arg0_type = pos.func.dfg.value_type(arg0);
+ let arg1_type = pos.func.dfg.value_type(arg1);
+ assert!(!arg1_type.is_vector() && arg1_type.is_int());
+
+ // TODO it may be more clear to use scalar_to_vector here; the current issue is that
+ // scalar_to_vector has the restriction that the vector produced has a matching lane size
+ // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 ->
+ // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift
+ // index type--this could be relaxed since it is not really meaningful.
+ let shift_index = pos.ins().bitcast(I64X2, arg1);
+
+ if arg0_type == I8X16 {
+ // First, shift the vector using an I16X8 shift.
+ let bitcasted = pos.ins().raw_bitcast(I16X8, arg0);
+ let shifted = pos.ins().x86_psll(bitcasted, shift_index);
+ let shifted = pos.ins().raw_bitcast(I8X16, shifted);
+
+ // Then, fixup the even lanes that have incorrect lower bits. This uses the 128 mask
+ // bytes as a table that we index into. It is a substantial code-size increase but
+ // reduces the instruction count slightly.
+ let masks = pos.func.dfg.constants.insert(SHL_MASKS.as_ref().into());
+ let mask_address = pos.ins().const_addr(isa.pointer_type(), masks);
+ let mask_offset = pos.ins().ishl_imm(arg1, 4);
+ let mask =
+ pos.ins()
+ .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0);
+ pos.func.dfg.replace(inst).band(shifted, mask);
+ } else if arg0_type.is_vector() {
+ // x86 has encodings for these shifts.
+ pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
+ } else if arg0_type == I64 {
+ // 64 bit shifts need to be legalized on x86_32.
+ let x86_isa = isa
+ .as_any()
+ .downcast_ref::<isa::x86::Isa>()
+ .expect("the target ISA must be x86 at this point");
+ if x86_isa.isa_flags.has_sse41() {
+ // if we have pinstrq/pextrq (SSE 4.1), legalize to that
+ let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
+ let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
+ let shifted = pos.ins().x86_psll(value, amount);
+ contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
+ } else {
+ // otherwise legalize to libcall
+ expand_as_libcall(inst, func, isa);
+ }
+ } else {
+ // Everything else should be already legal.
+ unreachable!()
+ }
+ }
+}
+
+/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
+fn convert_i64x2_imul(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+ pos.use_srcloc(inst);
+
+ if let ir::InstructionData::Binary {
+ opcode: ir::Opcode::Imul,
+ args: [arg0, arg1],
+ } = pos.func.dfg[inst]
+ {
+ let ty = pos.func.dfg.ctrl_typevar(inst);
+ if ty == I64X2 {
+ let x86_isa = isa
+ .as_any()
+ .downcast_ref::<isa::x86::Isa>()
+ .expect("the target ISA must be x86 at this point");
+ if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
+ // If we have certain AVX512 features, we can lower this instruction simply.
+ pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
+ } else {
+ // Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each
+ // 64-bit lane into 32-bit high and low sections using shifting and then performs
+ // the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 =
+ // concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1).
+ let high0 = pos.ins().ushr_imm(arg0, 32);
+ let mul0 = pos.ins().x86_pmuludq(high0, arg1);
+ let high1 = pos.ins().ushr_imm(arg1, 32);
+ let mul1 = pos.ins().x86_pmuludq(high1, arg0);
+ let addhigh = pos.ins().iadd(mul0, mul1);
+ let high = pos.ins().ishl_imm(addhigh, 32);
+ let low = pos.ins().x86_pmuludq(arg0, arg1);
+ pos.func.dfg.replace(inst).iadd(low, high);
+ }
+ } else {
+ unreachable!(
+ "{} should be encodable; it cannot be legalized by convert_i64x2_imul",
+ pos.func.dfg.display_inst(inst, None)
+ );
+ }
+ }
+}
+
+fn expand_tls_value(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ isa: &dyn TargetIsa,
+) {
+ use crate::settings::TlsModel;
+
+ assert!(
+ isa.triple().architecture == target_lexicon::Architecture::X86_64,
+ "Not yet implemented for {:?}",
+ isa.triple(),
+ );
+
+ if let ir::InstructionData::UnaryGlobalValue {
+ opcode: ir::Opcode::TlsValue,
+ global_value,
+ } = func.dfg[inst]
+ {
+ let ctrl_typevar = func.dfg.ctrl_typevar(inst);
+ assert_eq!(ctrl_typevar, ir::types::I64);
+
+ match isa.flags().tls_model() {
+ TlsModel::None => panic!("tls_model flag is not set."),
+ TlsModel::ElfGd => {
+ func.dfg.replace(inst).x86_elf_tls_get_addr(global_value);
+ }
+ TlsModel::Macho => {
+ func.dfg.replace(inst).x86_macho_tls_get_addr(global_value);
+ }
+ model => unimplemented!("tls_value for tls model {:?}", model),
+ }
+ } else {
+ unreachable!();
+ }
+}
+
+fn expand_load_splat(
+ inst: ir::Inst,
+ func: &mut ir::Function,
+ _cfg: &mut ControlFlowGraph,
+ _isa: &dyn TargetIsa,
+) {
+ let mut pos = FuncCursor::new(func).at_inst(inst);
+
+ pos.use_srcloc(inst);
+
+ let (ptr, offset, flags) = match pos.func.dfg[inst] {
+ ir::InstructionData::Load {
+ opcode: ir::Opcode::LoadSplat,
+ arg,
+ offset,
+ flags,
+ } => (arg, offset, flags),
+ _ => panic!(
+ "Expected load_splat: {}",
+ pos.func.dfg.display_inst(inst, None)
+ ),
+ };
+ let ty = pos.func.dfg.ctrl_typevar(inst);
+ let load = pos.ins().load(ty.lane_type(), flags, ptr, offset);
+
+ pos.func.dfg.replace(inst).splat(ty, load);
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs b/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs
new file mode 100644
index 0000000000..cbdeb3069d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/mod.rs
@@ -0,0 +1,190 @@
+//! x86 Instruction Set Architectures.
+
+mod abi;
+mod binemit;
+mod enc_tables;
+mod registers;
+pub mod settings;
+#[cfg(feature = "unwind")]
+pub mod unwind;
+
+use super::super::settings as shared_settings;
+#[cfg(feature = "testing_hooks")]
+use crate::binemit::CodeSink;
+use crate::binemit::{emit_function, MemoryCodeSink};
+use crate::ir;
+use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
+use crate::isa::Builder as IsaBuilder;
+#[cfg(feature = "unwind")]
+use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit};
+use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
+use crate::regalloc;
+use crate::result::CodegenResult;
+use crate::timing;
+use alloc::borrow::Cow;
+use alloc::boxed::Box;
+use core::any::Any;
+use core::fmt;
+use target_lexicon::{PointerWidth, Triple};
+
+#[allow(dead_code)]
+struct Isa {
+ triple: Triple,
+ shared_flags: shared_settings::Flags,
+ isa_flags: settings::Flags,
+ cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
+}
+
+/// Get an ISA builder for creating x86 targets.
+pub fn isa_builder(triple: Triple) -> IsaBuilder {
+ IsaBuilder {
+ triple,
+ setup: settings::builder(),
+ constructor: isa_constructor,
+ }
+}
+
+fn isa_constructor(
+ triple: Triple,
+ shared_flags: shared_settings::Flags,
+ builder: shared_settings::Builder,
+) -> Box<dyn TargetIsa> {
+ let level1 = match triple.pointer_width().unwrap() {
+ PointerWidth::U16 => unimplemented!("x86-16"),
+ PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
+ PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
+ };
+
+ let isa_flags = settings::Flags::new(&shared_flags, builder);
+
+ Box::new(Isa {
+ triple,
+ isa_flags,
+ shared_flags,
+ cpumode: level1,
+ })
+}
+
+impl TargetIsa for Isa {
+ fn name(&self) -> &'static str {
+ "x86"
+ }
+
+ fn triple(&self) -> &Triple {
+ &self.triple
+ }
+
+ fn flags(&self) -> &shared_settings::Flags {
+ &self.shared_flags
+ }
+
+ fn uses_cpu_flags(&self) -> bool {
+ true
+ }
+
+ fn uses_complex_addresses(&self) -> bool {
+ true
+ }
+
+ fn register_info(&self) -> RegInfo {
+ registers::INFO.clone()
+ }
+
+ #[cfg(feature = "unwind")]
+ fn map_dwarf_register(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
+ unwind::systemv::map_reg(self, reg).map(|r| r.0)
+ }
+
+ fn encoding_info(&self) -> EncInfo {
+ enc_tables::INFO.clone()
+ }
+
+ fn legal_encodings<'a>(
+ &'a self,
+ func: &'a ir::Function,
+ inst: &'a ir::InstructionData,
+ ctrl_typevar: ir::Type,
+ ) -> Encodings<'a> {
+ lookup_enclist(
+ ctrl_typevar,
+ inst,
+ func,
+ self.cpumode,
+ &enc_tables::LEVEL2[..],
+ &enc_tables::ENCLISTS[..],
+ &enc_tables::LEGALIZE_ACTIONS[..],
+ &enc_tables::RECIPE_PREDICATES[..],
+ &enc_tables::INST_PREDICATES[..],
+ self.isa_flags.predicate_view(),
+ )
+ }
+
+ fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
+ abi::legalize_signature(
+ sig,
+ &self.triple,
+ current,
+ &self.shared_flags,
+ &self.isa_flags,
+ )
+ }
+
+ fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
+ abi::regclass_for_abi_type(ty)
+ }
+
+ fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet {
+ abi::allocatable_registers(&self.triple, &self.shared_flags)
+ }
+
+ #[cfg(feature = "testing_hooks")]
+ fn emit_inst(
+ &self,
+ func: &ir::Function,
+ inst: ir::Inst,
+ divert: &mut regalloc::RegDiversions,
+ sink: &mut dyn CodeSink,
+ ) {
+ binemit::emit_inst(func, inst, divert, sink, self)
+ }
+
+ fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
+ emit_function(func, binemit::emit_inst, sink, self)
+ }
+
+ fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
+ let _tt = timing::prologue_epilogue();
+ abi::prologue_epilogue(func, self)
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
+ ir::condcodes::IntCC::UnsignedLessThan
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
+ ir::condcodes::IntCC::UnsignedLessThan
+ }
+
+ #[cfg(feature = "unwind")]
+ fn create_unwind_info(
+ &self,
+ func: &ir::Function,
+ ) -> CodegenResult<Option<super::unwind::UnwindInfo>> {
+ abi::create_unwind_info(func, self)
+ }
+
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ Some(unwind::systemv::create_cie())
+ }
+
+ fn as_any(&self) -> &dyn Any {
+ self as &dyn Any
+ }
+}
+
+impl fmt::Display for Isa {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/registers.rs b/third_party/rust/cranelift-codegen/src/isa/x86/registers.rs
new file mode 100644
index 0000000000..a7518b268b
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/registers.rs
@@ -0,0 +1,86 @@
+//! x86 register descriptions.
+
+use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
+
+include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::isa::RegUnit;
+ use alloc::string::{String, ToString};
+
+ #[test]
+ fn unit_encodings() {
+ fn gpr(unit: usize) -> Option<u16> {
+ Some(GPR.unit(unit))
+ }
+ // The encoding of integer registers is not alphabetical.
+ assert_eq!(INFO.parse_regunit("rax"), gpr(0));
+ assert_eq!(INFO.parse_regunit("rbx"), gpr(3));
+ assert_eq!(INFO.parse_regunit("rcx"), gpr(1));
+ assert_eq!(INFO.parse_regunit("rdx"), gpr(2));
+ assert_eq!(INFO.parse_regunit("rsi"), gpr(6));
+ assert_eq!(INFO.parse_regunit("rdi"), gpr(7));
+ assert_eq!(INFO.parse_regunit("rbp"), gpr(5));
+ assert_eq!(INFO.parse_regunit("rsp"), gpr(4));
+ assert_eq!(INFO.parse_regunit("r8"), gpr(8));
+ assert_eq!(INFO.parse_regunit("r15"), gpr(15));
+
+ fn fpr(unit: usize) -> Option<u16> {
+ Some(FPR.unit(unit))
+ }
+ assert_eq!(INFO.parse_regunit("xmm0"), fpr(0));
+ assert_eq!(INFO.parse_regunit("xmm15"), fpr(15));
+
+ // FIXME(#1306) Add these tests back in when FPR32 is re-added.
+ // fn fpr32(unit: usize) -> Option<u16> {
+ // Some(FPR32.unit(unit))
+ // }
+ // assert_eq!(INFO.parse_regunit("xmm0"), fpr32(0));
+ // assert_eq!(INFO.parse_regunit("xmm31"), fpr32(31));
+ }
+
+ #[test]
+ fn unit_names() {
+ fn gpr(ru: RegUnit) -> String {
+ INFO.display_regunit(GPR.first + ru).to_string()
+ }
+ assert_eq!(gpr(0), "%rax");
+ assert_eq!(gpr(3), "%rbx");
+ assert_eq!(gpr(1), "%rcx");
+ assert_eq!(gpr(2), "%rdx");
+ assert_eq!(gpr(6), "%rsi");
+ assert_eq!(gpr(7), "%rdi");
+ assert_eq!(gpr(5), "%rbp");
+ assert_eq!(gpr(4), "%rsp");
+ assert_eq!(gpr(8), "%r8");
+ assert_eq!(gpr(15), "%r15");
+
+ fn fpr(ru: RegUnit) -> String {
+ INFO.display_regunit(FPR.first + ru).to_string()
+ }
+ assert_eq!(fpr(0), "%xmm0");
+ assert_eq!(fpr(15), "%xmm15");
+
+ // FIXME(#1306) Add these tests back in when FPR32 is re-added.
+ // fn fpr32(ru: RegUnit) -> String {
+ // INFO.display_regunit(FPR32.first + ru).to_string()
+ // }
+ // assert_eq!(fpr32(0), "%xmm0");
+ // assert_eq!(fpr32(31), "%xmm31");
+ }
+
+ #[test]
+ fn regclasses() {
+ assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
+ assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
+ assert_eq!(GPR.intersect_index(FPR), None);
+ assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
+ assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
+ assert_eq!(ABCD.intersect_index(FPR), None);
+ assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
+ assert_eq!(FPR.intersect_index(GPR), None);
+ assert_eq!(FPR.intersect_index(ABCD), None);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/settings.rs b/third_party/rust/cranelift-codegen/src/isa/x86/settings.rs
new file mode 100644
index 0000000000..2d3a3f6698
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/settings.rs
@@ -0,0 +1,52 @@
+//! x86 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
+
+#[cfg(test)]
+mod tests {
+ use super::{builder, Flags};
+ use crate::settings::{self, Configurable};
+
+ #[test]
+ fn presets() {
+ let shared = settings::Flags::new(settings::builder());
+
+ // Nehalem has SSE4.1 but not BMI1.
+ let mut b0 = builder();
+ b0.enable("nehalem").unwrap();
+ let f0 = Flags::new(&shared, b0);
+ assert_eq!(f0.has_sse41(), true);
+ assert_eq!(f0.has_bmi1(), false);
+
+ let mut b1 = builder();
+ b1.enable("haswell").unwrap();
+ let f1 = Flags::new(&shared, b1);
+ assert_eq!(f1.has_sse41(), true);
+ assert_eq!(f1.has_bmi1(), true);
+ }
+ #[test]
+ fn display_presets() {
+ // Spot check that the flags Display impl does not cause a panic
+ let shared = settings::Flags::new(settings::builder());
+
+ let b0 = builder();
+ let f0 = Flags::new(&shared, b0);
+ let _ = format!("{}", f0);
+
+ let mut b1 = builder();
+ b1.enable("nehalem").unwrap();
+ let f1 = Flags::new(&shared, b1);
+ let _ = format!("{}", f1);
+
+ let mut b2 = builder();
+ b2.enable("haswell").unwrap();
+ let f2 = Flags::new(&shared, b2);
+ let _ = format!("{}", f2);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/x86/unwind.rs
new file mode 100644
index 0000000000..2d6b29f04d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/unwind.rs
@@ -0,0 +1,535 @@
+//! Module for x86 unwind generation for supported ABIs.
+
+pub mod systemv;
+pub mod winx64;
+
+use crate::ir::{Function, InstructionData, Opcode, ValueLoc};
+use crate::isa::x86::registers::{FPR, RU};
+use crate::isa::{RegUnit, TargetIsa};
+use crate::result::CodegenResult;
+use alloc::vec::Vec;
+use std::collections::HashMap;
+
+use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
+
+pub(crate) fn create_unwind_info(
+ func: &Function,
+ isa: &dyn TargetIsa,
+) -> CodegenResult<Option<UnwindInfo<RegUnit>>> {
+ // Find last block based on max offset.
+ let last_block = func
+ .layout
+ .blocks()
+ .max_by_key(|b| func.offsets[*b])
+ .expect("at least a block");
+ // Find last instruction offset + size, and make it function size.
+ let function_size = func
+ .inst_offsets(last_block, &isa.encoding_info())
+ .fold(0, |_, (offset, _, size)| offset + size);
+
+ let entry_block = func.layout.entry_block().expect("missing entry block");
+ let prologue_end = func.prologue_end.unwrap();
+ let epilogues_start = func
+ .epilogues_start
+ .iter()
+ .map(|(i, b)| (*b, *i))
+ .collect::<HashMap<_, _>>();
+
+ let word_size = isa.pointer_bytes();
+
+ let mut stack_size = None;
+ let mut prologue_size = 0;
+ let mut prologue_unwind_codes = Vec::new();
+ let mut epilogues_unwind_codes = Vec::new();
+ let mut frame_register: Option<RegUnit> = None;
+
+ // Process only entry block and blocks with epilogues.
+ let mut blocks = func
+ .epilogues_start
+ .iter()
+ .map(|(_, b)| *b)
+ .collect::<Vec<_>>();
+ if !blocks.contains(&entry_block) {
+ blocks.push(entry_block);
+ }
+ blocks.sort_by_key(|b| func.offsets[*b]);
+
+ for block in blocks.iter() {
+ let mut in_prologue = block == &entry_block;
+ let mut in_epilogue = false;
+ let mut epilogue_pop_offsets = Vec::new();
+
+ let epilogue_start = epilogues_start.get(block);
+ let is_last_block = block == &last_block;
+
+ for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) {
+ let offset = offset + size;
+
+ let unwind_codes;
+ if in_prologue {
+ // Check for prologue end (inclusive)
+ if prologue_end == inst {
+ in_prologue = false;
+ }
+ prologue_size += size;
+ unwind_codes = &mut prologue_unwind_codes;
+ } else if !in_epilogue && epilogue_start == Some(&inst) {
+ // Now in an epilogue, emit a remember state instruction if not last block
+ in_epilogue = true;
+
+ epilogues_unwind_codes.push(Vec::new());
+ unwind_codes = epilogues_unwind_codes.last_mut().unwrap();
+
+ if !is_last_block {
+ unwind_codes.push((offset, UnwindCode::RememberState));
+ }
+ } else if in_epilogue {
+ unwind_codes = epilogues_unwind_codes.last_mut().unwrap();
+ } else {
+ // Ignore normal instructions
+ continue;
+ }
+
+ match func.dfg[inst] {
+ InstructionData::Unary { opcode, arg } => {
+ match opcode {
+ Opcode::X86Push => {
+ let reg = func.locations[arg].unwrap_reg();
+ unwind_codes.push((
+ offset,
+ UnwindCode::StackAlloc {
+ size: word_size.into(),
+ },
+ ));
+ unwind_codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg,
+ stack_offset: 0,
+ },
+ ));
+ }
+ Opcode::AdjustSpDown => {
+ let stack_size =
+ stack_size.expect("expected a previous stack size instruction");
+
+ // This is used when calling a stack check function
+ // We need to track the assignment to RAX which has the size of the stack
+ unwind_codes
+ .push((offset, UnwindCode::StackAlloc { size: stack_size }));
+ }
+ _ => {}
+ }
+ }
+ InstructionData::UnaryImm { opcode, imm } => {
+ match opcode {
+ Opcode::Iconst => {
+ let imm: i64 = imm.into();
+ assert!(imm <= core::u32::MAX as i64);
+ assert!(stack_size.is_none());
+
+ // This instruction should only appear in a prologue to pass an
+ // argument of the stack size to a stack check function.
+ // Record the stack size so we know what it is when we encounter the adjustment
+ // instruction (which will adjust via the register assigned to this instruction).
+ stack_size = Some(imm as u32);
+ }
+ Opcode::AdjustSpDownImm => {
+ let imm: i64 = imm.into();
+ assert!(imm <= core::u32::MAX as i64);
+
+ stack_size = Some(imm as u32);
+
+ unwind_codes
+ .push((offset, UnwindCode::StackAlloc { size: imm as u32 }));
+ }
+ Opcode::AdjustSpUpImm => {
+ let imm: i64 = imm.into();
+ assert!(imm <= core::u32::MAX as i64);
+
+ stack_size = Some(imm as u32);
+
+ unwind_codes
+ .push((offset, UnwindCode::StackDealloc { size: imm as u32 }));
+ }
+ _ => {}
+ }
+ }
+ InstructionData::Store {
+ opcode: Opcode::Store,
+ args: [arg1, arg2],
+ offset: stack_offset,
+ ..
+ } => {
+ if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
+ (func.locations[arg1], func.locations[arg2])
+ {
+ // If this is a save of an FPR, record an unwind operation
+ // Note: the stack_offset here is relative to an adjusted SP
+ if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
+ let stack_offset: i32 = stack_offset.into();
+ unwind_codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: src,
+ stack_offset: stack_offset as u32,
+ },
+ ));
+ }
+ }
+ }
+ InstructionData::CopySpecial { src, dst, .. } if frame_register.is_none() => {
+ // Check for change in CFA register (RSP is always the starting CFA)
+ if src == (RU::rsp as RegUnit) {
+ unwind_codes.push((offset, UnwindCode::SetFramePointer { reg: dst }));
+ frame_register = Some(dst);
+ }
+ }
+ InstructionData::NullAry { opcode } => match opcode {
+ Opcode::X86Pop => {
+ epilogue_pop_offsets.push(offset);
+ }
+ _ => {}
+ },
+ InstructionData::MultiAry { opcode, .. } if in_epilogue => match opcode {
+ Opcode::Return => {
+ let args = func.dfg.inst_args(inst);
+ for (i, arg) in args.iter().rev().enumerate() {
+ // Only walk back the args for the pop instructions encountered
+ if i >= epilogue_pop_offsets.len() {
+ break;
+ }
+
+ let offset = epilogue_pop_offsets[i];
+
+ let reg = func.locations[*arg].unwrap_reg();
+ unwind_codes.push((offset, UnwindCode::RestoreRegister { reg }));
+ unwind_codes.push((
+ offset,
+ UnwindCode::StackDealloc {
+ size: word_size.into(),
+ },
+ ));
+
+ if Some(reg) == frame_register {
+ unwind_codes.push((offset, UnwindCode::RestoreFramePointer));
+ // Keep frame_register assigned for next epilogue.
+ }
+ }
+ epilogue_pop_offsets.clear();
+
+ // TODO ensure unwind codes sorted by offsets ?
+
+ if !is_last_block {
+ unwind_codes.push((offset, UnwindCode::RestoreState));
+ }
+
+ in_epilogue = false;
+ }
+ _ => {}
+ },
+ _ => {}
+ };
+ }
+ }
+
+ Ok(Some(UnwindInfo {
+ prologue_size,
+ prologue_unwind_codes,
+ epilogues_unwind_codes,
+ function_size,
+ word_size,
+ initial_sp_offset: word_size,
+ }))
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{
+ types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
+ };
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_small_alloc() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::WindowsFastcall,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ prologue_size: 9,
+ prologue_unwind_codes: vec![
+ (2, UnwindCode::StackAlloc { size: 8 }),
+ (
+ 2,
+ UnwindCode::SaveRegister {
+ reg: RU::rbp.into(),
+ stack_offset: 0,
+ }
+ ),
+ (
+ 5,
+ UnwindCode::SetFramePointer {
+ reg: RU::rbp.into(),
+ }
+ ),
+ (9, UnwindCode::StackAlloc { size: 64 })
+ ],
+ epilogues_unwind_codes: vec![vec![
+ (13, UnwindCode::StackDealloc { size: 64 }),
+ (
+ 15,
+ UnwindCode::RestoreRegister {
+ reg: RU::rbp.into()
+ }
+ ),
+ (15, UnwindCode::StackDealloc { size: 8 }),
+ (15, UnwindCode::RestoreFramePointer)
+ ]],
+ function_size: 16,
+ word_size: 8,
+ initial_sp_offset: 8,
+ }
+ );
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_medium_alloc() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::WindowsFastcall,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ prologue_size: 27,
+ prologue_unwind_codes: vec![
+ (2, UnwindCode::StackAlloc { size: 8 }),
+ (
+ 2,
+ UnwindCode::SaveRegister {
+ reg: RU::rbp.into(),
+ stack_offset: 0,
+ }
+ ),
+ (
+ 5,
+ UnwindCode::SetFramePointer {
+ reg: RU::rbp.into(),
+ }
+ ),
+ (27, UnwindCode::StackAlloc { size: 10000 })
+ ],
+ epilogues_unwind_codes: vec![vec![
+ (34, UnwindCode::StackDealloc { size: 10000 }),
+ (
+ 36,
+ UnwindCode::RestoreRegister {
+ reg: RU::rbp.into()
+ }
+ ),
+ (36, UnwindCode::StackDealloc { size: 8 }),
+ (36, UnwindCode::RestoreFramePointer)
+ ]],
+ function_size: 37,
+ word_size: 8,
+ initial_sp_offset: 8,
+ }
+ );
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_large_alloc() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::WindowsFastcall,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ prologue_size: 27,
+ prologue_unwind_codes: vec![
+ (2, UnwindCode::StackAlloc { size: 8 }),
+ (
+ 2,
+ UnwindCode::SaveRegister {
+ reg: RU::rbp.into(),
+ stack_offset: 0,
+ }
+ ),
+ (
+ 5,
+ UnwindCode::SetFramePointer {
+ reg: RU::rbp.into(),
+ }
+ ),
+ (27, UnwindCode::StackAlloc { size: 1000000 })
+ ],
+ epilogues_unwind_codes: vec![vec![
+ (34, UnwindCode::StackDealloc { size: 1000000 }),
+ (
+ 36,
+ UnwindCode::RestoreRegister {
+ reg: RU::rbp.into()
+ }
+ ),
+ (36, UnwindCode::StackDealloc { size: 8 }),
+ (36, UnwindCode::RestoreFramePointer)
+ ]],
+ function_size: 37,
+ word_size: 8,
+ initial_sp_offset: 8,
+ }
+ );
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_multi_return_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ prologue_size: 5,
+ prologue_unwind_codes: vec![
+ (2, UnwindCode::StackAlloc { size: 8 }),
+ (
+ 2,
+ UnwindCode::SaveRegister {
+ reg: RU::rbp.into(),
+ stack_offset: 0,
+ }
+ ),
+ (
+ 5,
+ UnwindCode::SetFramePointer {
+ reg: RU::rbp.into()
+ }
+ )
+ ],
+ epilogues_unwind_codes: vec![
+ vec![
+ (12, UnwindCode::RememberState),
+ (
+ 12,
+ UnwindCode::RestoreRegister {
+ reg: RU::rbp.into()
+ }
+ ),
+ (12, UnwindCode::StackDealloc { size: 8 }),
+ (12, UnwindCode::RestoreFramePointer),
+ (13, UnwindCode::RestoreState)
+ ],
+ vec![
+ (
+ 15,
+ UnwindCode::RestoreRegister {
+ reg: RU::rbp.into()
+ }
+ ),
+ (15, UnwindCode::StackDealloc { size: 8 }),
+ (15, UnwindCode::RestoreFramePointer)
+ ]
+ ],
+ function_size: 16,
+ word_size: 8,
+ initial_sp_offset: 8,
+ }
+ );
+ }
+
+ fn create_multi_return_function(call_conv: CallConv) -> Function {
+ let mut sig = Signature::new(call_conv);
+ sig.params.push(AbiParam::new(types::I32));
+ let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+ let block0 = func.dfg.make_block();
+ let v0 = func.dfg.append_block_param(block0, types::I32);
+ let block1 = func.dfg.make_block();
+ let block2 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().brnz(v0, block2, &[]);
+ pos.ins().jump(block1, &[]);
+
+ pos.insert_block(block1);
+ pos.ins().return_(&[]);
+
+ pos.insert_block(block2);
+ pos.ins().return_(&[]);
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/x86/unwind/systemv.rs
new file mode 100644
index 0000000000..f6333f5afb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/unwind/systemv.rs
@@ -0,0 +1,234 @@
+//! Unwind information for System V ABI (x86-64).
+
+use crate::ir::Function;
+use crate::isa::{
+ unwind::systemv::{RegisterMappingError, UnwindInfo},
+ CallConv, RegUnit, TargetIsa,
+};
+use crate::result::CodegenResult;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
+
+/// Creates a new x86-64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+ use gimli::write::CallFrameInstruction;
+
+ let mut entry = CommonInformationEntry::new(
+ Encoding {
+ address_size: 8,
+ format: Format::Dwarf32,
+ version: 1,
+ },
+ 1, // Code alignment factor
+ -8, // Data alignment factor
+ X86_64::RA,
+ );
+
+ // Every frame will start with the call frame address (CFA) at RSP+8
+ // It is +8 to account for the push of the return address by the call instruction
+ entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8));
+
+ // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP)
+ entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8));
+
+ entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result<Register, RegisterMappingError> {
+ if isa.name() != "x86" || isa.pointer_bits() != 64 {
+ return Err(RegisterMappingError::UnsupportedArchitecture);
+ }
+
+ // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
+ const X86_GP_REG_MAP: [gimli::Register; 16] = [
+ X86_64::RAX,
+ X86_64::RCX,
+ X86_64::RDX,
+ X86_64::RBX,
+ X86_64::RSP,
+ X86_64::RBP,
+ X86_64::RSI,
+ X86_64::RDI,
+ X86_64::R8,
+ X86_64::R9,
+ X86_64::R10,
+ X86_64::R11,
+ X86_64::R12,
+ X86_64::R13,
+ X86_64::R14,
+ X86_64::R15,
+ ];
+ const X86_XMM_REG_MAP: [gimli::Register; 16] = [
+ X86_64::XMM0,
+ X86_64::XMM1,
+ X86_64::XMM2,
+ X86_64::XMM3,
+ X86_64::XMM4,
+ X86_64::XMM5,
+ X86_64::XMM6,
+ X86_64::XMM7,
+ X86_64::XMM8,
+ X86_64::XMM9,
+ X86_64::XMM10,
+ X86_64::XMM11,
+ X86_64::XMM12,
+ X86_64::XMM13,
+ X86_64::XMM14,
+ X86_64::XMM15,
+ ];
+
+ let reg_info = isa.register_info();
+ let bank = reg_info
+ .bank_containing_regunit(reg)
+ .ok_or_else(|| RegisterMappingError::MissingBank)?;
+ match bank.name {
+ "IntRegs" => {
+ // x86 GP registers have a weird mapping to DWARF registers, so we use a
+ // lookup table.
+ Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize])
+ }
+ "FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]),
+ _ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)),
+ }
+}
+
+pub(crate) fn create_unwind_info(
+ func: &Function,
+ isa: &dyn TargetIsa,
+) -> CodegenResult<Option<UnwindInfo>> {
+ // Only System V-like calling conventions are supported
+ match func.signature.call_conv {
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => {}
+ _ => return Ok(None),
+ }
+
+ if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 {
+ return Ok(None);
+ }
+
+ let unwind = match super::create_unwind_info(func, isa)? {
+ Some(u) => u,
+ None => {
+ return Ok(None);
+ }
+ };
+
+ struct RegisterMapper<'a, 'b>(&'a (dyn TargetIsa + 'b));
+ impl<'a, 'b> crate::isa::unwind::systemv::RegisterMapper<RegUnit> for RegisterMapper<'a, 'b> {
+ fn map(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
+ Ok(map_reg(self.0, reg)?.0)
+ }
+ fn sp(&self) -> u16 {
+ X86_64::RSP.0
+ }
+ }
+ let map = RegisterMapper(isa);
+
+ Ok(Some(UnwindInfo::build(unwind, &map)?))
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{
+ types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
+ };
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use gimli::write::Address;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_simple_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match isa
+ .create_unwind_info(&context.func)
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(1234))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }");
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_multi_return_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match isa
+ .create_unwind_info(&context.func)
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(4321))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, SameValue(Register(6))), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }");
+ }
+
+ fn create_multi_return_function(call_conv: CallConv) -> Function {
+ let mut sig = Signature::new(call_conv);
+ sig.params.push(AbiParam::new(types::I32));
+ let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+ let block0 = func.dfg.make_block();
+ let v0 = func.dfg.append_block_param(block0, types::I32);
+ let block1 = func.dfg.make_block();
+ let block2 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().brnz(v0, block2, &[]);
+ pos.ins().jump(block1, &[]);
+
+ pos.insert_block(block1);
+ pos.ins().return_(&[]);
+
+ pos.insert_block(block2);
+ pos.ins().return_(&[]);
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs b/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs
new file mode 100644
index 0000000000..ed046f9a87
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x86/unwind/winx64.rs
@@ -0,0 +1,268 @@
+//! Unwind information for Windows x64 ABI.
+
+use crate::ir::Function;
+use crate::isa::x86::registers::{FPR, GPR};
+use crate::isa::{unwind::winx64::UnwindInfo, CallConv, RegUnit, TargetIsa};
+use crate::result::CodegenResult;
+
+pub(crate) fn create_unwind_info(
+ func: &Function,
+ isa: &dyn TargetIsa,
+) -> CodegenResult<Option<UnwindInfo>> {
+ // Only Windows fastcall is supported for unwind information
+ if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() {
+ return Ok(None);
+ }
+
+ let unwind = match super::create_unwind_info(func, isa)? {
+ Some(u) => u,
+ None => {
+ return Ok(None);
+ }
+ };
+
+ Ok(Some(UnwindInfo::build::<RegisterMapper>(unwind)?))
+}
+
+struct RegisterMapper;
+
+impl crate::isa::unwind::winx64::RegisterMapper for RegisterMapper {
+ fn map(reg: RegUnit) -> crate::isa::unwind::winx64::MappedRegister {
+ use crate::isa::unwind::winx64::MappedRegister;
+ if GPR.contains(reg) {
+ MappedRegister::Int(GPR.index_of(reg) as u8)
+ } else if FPR.contains(reg) {
+ MappedRegister::Xmm(reg as u8)
+ } else {
+ panic!()
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind};
+ use crate::isa::unwind::winx64::UnwindCode;
+ use crate::isa::x86::registers::RU;
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_wrong_calling_convention() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(CallConv::SystemV, None));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ assert_eq!(
+ create_unwind_info(&context.func, &*isa).expect("can create unwind info"),
+ None
+ );
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_small_alloc() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::WindowsFastcall,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ flags: 0,
+ prologue_size: 9,
+ frame_register: None,
+ frame_register_offset: 0,
+ unwind_codes: vec![
+ UnwindCode::PushRegister {
+ offset: 2,
+ reg: GPR.index_of(RU::rbp.into()) as u8
+ },
+ UnwindCode::StackAlloc {
+ offset: 9,
+ size: 64
+ }
+ ]
+ }
+ );
+
+ assert_eq!(unwind.emit_size(), 8);
+
+ let mut buf = [0u8; 8];
+ unwind.emit(&mut buf);
+
+ assert_eq!(
+ buf,
+ [
+ 0x01, // Version and flags (version 1, no flags)
+ 0x09, // Prologue size
+ 0x02, // Unwind code count (1 for stack alloc, 1 for push reg)
+ 0x00, // Frame register + offset (no frame register)
+ 0x09, // Prolog offset
+ 0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
+ 0x02, // Prolog offset
+ 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
+ ]
+ );
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_medium_alloc() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::WindowsFastcall,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ flags: 0,
+ prologue_size: 27,
+ frame_register: None,
+ frame_register_offset: 0,
+ unwind_codes: vec![
+ UnwindCode::PushRegister {
+ offset: 2,
+ reg: GPR.index_of(RU::rbp.into()) as u8
+ },
+ UnwindCode::StackAlloc {
+ offset: 27,
+ size: 10000
+ }
+ ]
+ }
+ );
+
+ assert_eq!(unwind.emit_size(), 12);
+
+ let mut buf = [0u8; 12];
+ unwind.emit(&mut buf);
+
+ assert_eq!(
+ buf,
+ [
+ 0x01, // Version and flags (version 1, no flags)
+ 0x1B, // Prologue size
+ 0x03, // Unwind code count (2 for stack alloc, 1 for push reg)
+ 0x00, // Frame register + offset (no frame register)
+ 0x1B, // Prolog offset
+ 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
+ 0xE2, // Low size byte
+ 0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
+ 0x02, // Prolog offset
+ 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
+ 0x00, // Padding
+ 0x00, // Padding
+ ]
+ );
+ }
+
+ #[test]
+ #[cfg_attr(feature = "x64", should_panic)] // TODO #2079
+ fn test_large_alloc() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::WindowsFastcall,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let unwind = create_unwind_info(&context.func, &*isa)
+ .expect("can create unwind info")
+ .expect("expected unwind info");
+
+ assert_eq!(
+ unwind,
+ UnwindInfo {
+ flags: 0,
+ prologue_size: 27,
+ frame_register: None,
+ frame_register_offset: 0,
+ unwind_codes: vec![
+ UnwindCode::PushRegister {
+ offset: 2,
+ reg: GPR.index_of(RU::rbp.into()) as u8
+ },
+ UnwindCode::StackAlloc {
+ offset: 27,
+ size: 1000000
+ }
+ ]
+ }
+ );
+
+ assert_eq!(unwind.emit_size(), 12);
+
+ let mut buf = [0u8; 12];
+ unwind.emit(&mut buf);
+
+ assert_eq!(
+ buf,
+ [
+ 0x01, // Version and flags (version 1, no flags)
+ 0x1B, // Prologue size
+ 0x04, // Unwind code count (3 for stack alloc, 1 for push reg)
+ 0x00, // Frame register + offset (no frame register)
+ 0x1B, // Prolog offset
+ 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
+ 0x40, // Byte 1 of size
+ 0x42, // Byte 2 of size
+ 0x0F, // Byte 3 of size
+ 0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
+ 0x02, // Prolog offset
+ 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
+ ]
+ );
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+}