summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen/src/isa/x64
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/cranelift-codegen/src/isa/x64
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/isa/x64')
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/abi.rs794
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs1215
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs2819
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs3593
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs2733
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs289
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs125
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs204
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/lower.rs3771
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/mod.rs149
-rw-r--r--third_party/rust/cranelift-codegen/src/isa/x64/settings.rs9
11 files changed, 15701 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
new file mode 100644
index 0000000000..f4c7624f36
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/abi.rs
@@ -0,0 +1,794 @@
+//! Implementation of the standard x64 ABI.
+
+use crate::ir::types::*;
+use crate::ir::{self, types, MemFlags, TrapCode, Type};
+use crate::isa;
+use crate::isa::{x64::inst::*, CallConv};
+use crate::machinst::abi_impl::*;
+use crate::machinst::*;
+use crate::settings;
+use crate::{CodegenError, CodegenResult};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use args::*;
+use regalloc::{RealReg, Reg, RegClass, Set, Writable};
+use smallvec::{smallvec, SmallVec};
+use std::convert::TryFrom;
+
+/// This is the limit for the size of argument and return-value areas on the
+/// stack. We place a reasonable limit here to avoid integer overflow issues
+/// with 32-bit arithmetic: for now, 128 MB.
+static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024;
+
+/// Offset in stack-arg area to callee-TLS slot in Baldrdash-2020 calling convention.
+static BALDRDASH_CALLEE_TLS_OFFSET: i64 = 0;
+/// Offset in stack-arg area to caller-TLS slot in Baldrdash-2020 calling convention.
+static BALDRDASH_CALLER_TLS_OFFSET: i64 = 8;
+
+/// Try to fill a Baldrdash register, returning it if it was found.
+fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option<ABIArg> {
+ if call_conv.extends_baldrdash() {
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext => {
+ // This is SpiderMonkey's `WasmTlsReg`.
+ Some(ABIArg::Reg(
+ regs::r14().to_real_reg(),
+ types::I64,
+ param.extension,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::SignatureId => {
+ // This is SpiderMonkey's `WasmTableCallSigReg`.
+ Some(ABIArg::Reg(
+ regs::r10().to_real_reg(),
+ types::I64,
+ param.extension,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::CalleeTLS => {
+ // This is SpiderMonkey's callee TLS slot in the extended frame of Wasm's ABI-2020.
+ assert!(call_conv == isa::CallConv::Baldrdash2020);
+ Some(ABIArg::Stack(
+ BALDRDASH_CALLEE_TLS_OFFSET,
+ ir::types::I64,
+ ir::ArgumentExtension::None,
+ param.purpose,
+ ))
+ }
+ &ir::ArgumentPurpose::CallerTLS => {
+ // This is SpiderMonkey's caller TLS slot in the extended frame of Wasm's ABI-2020.
+ assert!(call_conv == isa::CallConv::Baldrdash2020);
+ Some(ABIArg::Stack(
+ BALDRDASH_CALLER_TLS_OFFSET,
+ ir::types::I64,
+ ir::ArgumentExtension::None,
+ param.purpose,
+ ))
+ }
+ _ => None,
+ }
+ } else {
+ None
+ }
+}
+
+/// Support for the x64 ABI from the callee side (within a function body).
+pub(crate) type X64ABICallee = ABICalleeImpl<X64ABIMachineSpec>;
+
+/// Support for the x64 ABI from the caller side (at a callsite).
+pub(crate) type X64ABICaller = ABICallerImpl<X64ABIMachineSpec>;
+
+/// Implementation of ABI primitives for x64.
+pub(crate) struct X64ABIMachineSpec;
+
+impl ABIMachineSpec for X64ABIMachineSpec {
+ type I = Inst;
+
+ fn word_bits() -> u32 {
+ 64
+ }
+
+ /// Return required stack alignment in bytes.
+ fn stack_align(_call_conv: isa::CallConv) -> u32 {
+ 16
+ }
+
+ fn compute_arg_locs(
+ call_conv: isa::CallConv,
+ params: &[ir::AbiParam],
+ args_or_rets: ArgsOrRets,
+ add_ret_area_ptr: bool,
+ ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)> {
+ let is_baldrdash = call_conv.extends_baldrdash();
+ let has_baldrdash_tls = call_conv == isa::CallConv::Baldrdash2020;
+
+ let mut next_gpr = 0;
+ let mut next_vreg = 0;
+ let mut next_stack: u64 = 0;
+ let mut ret = vec![];
+
+ if args_or_rets == ArgsOrRets::Args && has_baldrdash_tls {
+ // Baldrdash ABI-2020 always has two stack-arg slots reserved, for the callee and
+ // caller TLS-register values, respectively.
+ next_stack = 16;
+ }
+
+ for i in 0..params.len() {
+ // Process returns backward, according to the SpiderMonkey ABI (which we
+ // adopt internally if `is_baldrdash` is set).
+ let param = match (args_or_rets, is_baldrdash) {
+ (ArgsOrRets::Args, _) => &params[i],
+ (ArgsOrRets::Rets, false) => &params[i],
+ (ArgsOrRets::Rets, true) => &params[params.len() - 1 - i],
+ };
+
+ // Validate "purpose".
+ match &param.purpose {
+ &ir::ArgumentPurpose::VMContext
+ | &ir::ArgumentPurpose::Normal
+ | &ir::ArgumentPurpose::StackLimit
+ | &ir::ArgumentPurpose::SignatureId
+ | &ir::ArgumentPurpose::CalleeTLS
+ | &ir::ArgumentPurpose::CallerTLS => {}
+ _ => panic!(
+ "Unsupported argument purpose {:?} in signature: {:?}",
+ param.purpose, params
+ ),
+ }
+
+ let intreg = in_int_reg(param.value_type);
+ let vecreg = in_vec_reg(param.value_type);
+ debug_assert!(intreg || vecreg);
+ debug_assert!(!(intreg && vecreg));
+
+ let (next_reg, candidate) = if intreg {
+ let candidate = match args_or_rets {
+ ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
+ ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i),
+ };
+ debug_assert!(candidate
+ .map(|r| r.get_class() == RegClass::I64)
+ .unwrap_or(true));
+ (&mut next_gpr, candidate)
+ } else {
+ let candidate = match args_or_rets {
+ ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
+ ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i),
+ };
+ debug_assert!(candidate
+ .map(|r| r.get_class() == RegClass::V128)
+ .unwrap_or(true));
+ (&mut next_vreg, candidate)
+ };
+
+ if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
+ assert!(intreg);
+ ret.push(param);
+ } else if let Some(reg) = candidate {
+ ret.push(ABIArg::Reg(
+ reg.to_real_reg(),
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ *next_reg += 1;
+ } else {
+ // Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
+ // stack alignment happens separately after all args.)
+ let size = (param.value_type.bits() / 8) as u64;
+ let size = std::cmp::max(size, 8);
+ // Align.
+ debug_assert!(size.is_power_of_two());
+ next_stack = (next_stack + size - 1) & !(size - 1);
+ ret.push(ABIArg::Stack(
+ next_stack as i64,
+ param.value_type,
+ param.extension,
+ param.purpose,
+ ));
+ next_stack += size;
+ }
+ }
+
+ if args_or_rets == ArgsOrRets::Rets && is_baldrdash {
+ ret.reverse();
+ }
+
+ let extra_arg = if add_ret_area_ptr {
+ debug_assert!(args_or_rets == ArgsOrRets::Args);
+ if let Some(reg) = get_intreg_for_arg_systemv(&call_conv, next_gpr) {
+ ret.push(ABIArg::Reg(
+ reg.to_real_reg(),
+ types::I64,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ } else {
+ ret.push(ABIArg::Stack(
+ next_stack as i64,
+ types::I64,
+ ir::ArgumentExtension::None,
+ ir::ArgumentPurpose::Normal,
+ ));
+ next_stack += 8;
+ }
+ Some(ret.len() - 1)
+ } else {
+ None
+ };
+
+ next_stack = (next_stack + 15) & !15;
+
+ // To avoid overflow issues, limit the arg/return size to something reasonable.
+ if next_stack > STACK_ARG_RET_SIZE_LIMIT {
+ return Err(CodegenError::ImplLimitExceeded);
+ }
+
+ Ok((ret, next_stack as i64, extra_arg))
+ }
+
+ fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64 {
+ if call_conv.extends_baldrdash() {
+ let num_words = flags.baldrdash_prologue_words() as i64;
+ debug_assert!(num_words > 0, "baldrdash must set baldrdash_prologue_words");
+ num_words * 8
+ } else {
+ 16 // frame pointer + return address.
+ }
+ }
+
+ fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
+ let ext_kind = match ty {
+ types::B1
+ | types::B8
+ | types::I8
+ | types::B16
+ | types::I16
+ | types::B32
+ | types::I32 => ExtKind::SignExtend,
+ types::B64 | types::I64 | types::R64 | types::F32 | types::F64 => ExtKind::None,
+ _ if ty.bytes() == 16 => ExtKind::None,
+ _ => panic!("load_stack({})", ty),
+ };
+ Inst::load(ty, mem, into_reg, ext_kind)
+ }
+
+ fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I {
+ Inst::store(ty, from_reg, mem)
+ }
+
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I {
+ Inst::gen_move(to_reg, from_reg, ty)
+ }
+
+ /// Generate an integer-extend operation.
+ fn gen_extend(
+ to_reg: Writable<Reg>,
+ from_reg: Reg,
+ is_signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ ) -> Self::I {
+ let ext_mode = ExtMode::new(from_bits as u16, to_bits as u16)
+ .expect(&format!("invalid extension: {} -> {}", from_bits, to_bits));
+ if is_signed {
+ Inst::movsx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
+ } else {
+ Inst::movzx_rm_r(ext_mode, RegMem::reg(from_reg), to_reg)
+ }
+ }
+
+ fn gen_ret() -> Self::I {
+ Inst::ret()
+ }
+
+ fn gen_epilogue_placeholder() -> Self::I {
+ Inst::epilogue_placeholder()
+ }
+
+ fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]> {
+ let mut ret = SmallVec::new();
+ if from_reg != into_reg.to_reg() {
+ ret.push(Inst::gen_move(into_reg, from_reg, I64));
+ }
+ ret.push(Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(imm),
+ into_reg,
+ ));
+ ret
+ }
+
+ fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]> {
+ smallvec![
+ Inst::cmp_rmi_r(/* bytes = */ 8, RegMemImm::reg(regs::rsp()), limit_reg),
+ Inst::TrapIf {
+ // NBE == "> unsigned"; args above are reversed; this tests limit_reg > rsp.
+ cc: CC::NBE,
+ trap_code: TrapCode::StackOverflow,
+ },
+ ]
+ }
+
+ fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, _ty: Type) -> Self::I {
+ let mem: SyntheticAmode = mem.into();
+ Inst::lea(mem, into_reg)
+ }
+
+ fn get_stacklimit_reg() -> Reg {
+ debug_assert!(
+ !is_callee_save_systemv(regs::r10().to_real_reg())
+ && !is_callee_save_baldrdash(regs::r10().to_real_reg())
+ );
+
+ // As per comment on trait definition, we must return a caller-save
+ // register here.
+ regs::r10()
+ }
+
+ fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
+ // Only ever used for I64s; if that changes, see if the ExtKind below needs to be changed.
+ assert_eq!(ty, I64);
+ let simm32 = offset as u32;
+ let mem = Amode::imm_reg(simm32, base);
+ Inst::load(ty, mem, into_reg, ExtKind::None)
+ }
+
+ fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I {
+ let simm32 = offset as u32;
+ let mem = Amode::imm_reg(simm32, base);
+ Inst::store(ty, from_reg, mem)
+ }
+
+ fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]> {
+ let (alu_op, amount) = if amount >= 0 {
+ (AluRmiROpcode::Add, amount)
+ } else {
+ (AluRmiROpcode::Sub, -amount)
+ };
+
+ let amount = amount as u32;
+
+ smallvec![Inst::alu_rmi_r(
+ true,
+ alu_op,
+ RegMemImm::imm(amount),
+ Writable::from_reg(regs::rsp()),
+ )]
+ }
+
+ fn gen_nominal_sp_adj(offset: i32) -> Self::I {
+ Inst::VirtualSPOffsetAdj {
+ offset: offset as i64,
+ }
+ }
+
+ fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]> {
+ let r_rsp = regs::rsp();
+ let r_rbp = regs::rbp();
+ let w_rbp = Writable::from_reg(r_rbp);
+ let mut insts = SmallVec::new();
+ // RSP before the call will be 0 % 16. So here, it is 8 % 16.
+ insts.push(Inst::push64(RegMemImm::reg(r_rbp)));
+ // RSP is now 0 % 16
+ insts.push(Inst::mov_r_r(true, r_rsp, w_rbp));
+ insts
+ }
+
+ fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]> {
+ let mut insts = SmallVec::new();
+ insts.push(Inst::mov_r_r(
+ true,
+ regs::rbp(),
+ Writable::from_reg(regs::rsp()),
+ ));
+ insts.push(Inst::pop64(Writable::from_reg(regs::rbp())));
+ insts
+ }
+
+ fn gen_clobber_save(
+ call_conv: isa::CallConv,
+ _: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> (u64, SmallVec<[Self::I; 16]>) {
+ let mut insts = SmallVec::new();
+ // Find all clobbered registers that are callee-save. These are only I64
+ // registers (all XMM registers are caller-save) so we can compute the
+ // total size of the needed stack space easily.
+ let clobbered = get_callee_saves(&call_conv, clobbers);
+ let clobbered_size = 8 * clobbered.len() as u32;
+ let stack_size = clobbered_size + fixed_frame_storage_size;
+ // Align to 16 bytes.
+ let stack_size = (stack_size + 15) & !15;
+ // Adjust the stack pointer downward with one `sub rsp, IMM`
+ // instruction.
+ if stack_size > 0 {
+ insts.push(Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Sub,
+ RegMemImm::imm(stack_size),
+ Writable::from_reg(regs::rsp()),
+ ));
+ }
+ // Store each clobbered register in order at offsets from RSP.
+ let mut cur_offset = 0;
+ for reg in &clobbered {
+ let r_reg = reg.to_reg();
+ match r_reg.get_class() {
+ RegClass::I64 => {
+ insts.push(Inst::mov_r_m(
+ /* bytes = */ 8,
+ r_reg.to_reg(),
+ Amode::imm_reg(cur_offset, regs::rsp()),
+ ));
+ cur_offset += 8;
+ }
+ // No XMM regs are callee-save, so we do not need to implement
+ // this.
+ _ => unimplemented!(),
+ }
+ }
+
+ (clobbered_size as u64, insts)
+ }
+
+ fn gen_clobber_restore(
+ call_conv: isa::CallConv,
+ flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ _fixed_frame_storage_size: u32,
+ _outgoing_args_size: u32,
+ ) -> SmallVec<[Self::I; 16]> {
+ let mut insts = SmallVec::new();
+
+ let clobbered = get_callee_saves(&call_conv, clobbers);
+ let stack_size = 8 * clobbered.len() as u32;
+ let stack_size = (stack_size + 15) & !15;
+
+ // Restore regs by loading from offsets of RSP.
+ let mut cur_offset = 0;
+ for reg in &clobbered {
+ let rreg = reg.to_reg();
+ match rreg.get_class() {
+ RegClass::I64 => {
+ insts.push(Inst::mov64_m_r(
+ Amode::imm_reg(cur_offset, regs::rsp()),
+ Writable::from_reg(rreg.to_reg()),
+ ));
+ cur_offset += 8;
+ }
+ _ => unimplemented!(),
+ }
+ }
+ // Adjust RSP back upward.
+ if stack_size > 0 {
+ insts.push(Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(stack_size),
+ Writable::from_reg(regs::rsp()),
+ ));
+ }
+
+ // If this is Baldrdash-2020, restore the callee (i.e., our) TLS
+ // register. We may have allocated it for something else and clobbered
+ // it, but the ABI expects us to leave the TLS register unchanged.
+ if call_conv == isa::CallConv::Baldrdash2020 {
+ let off = BALDRDASH_CALLEE_TLS_OFFSET + Self::fp_to_arg_offset(call_conv, flags);
+ insts.push(Inst::mov64_m_r(
+ Amode::imm_reg(off as u32, regs::rbp()),
+ Writable::from_reg(regs::r14()),
+ ));
+ }
+
+ insts
+ }
+
+ /// Generate a call instruction/sequence.
+ fn gen_call(
+ dest: &CallDest,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: ir::Opcode,
+ tmp: Writable<Reg>,
+ _callee_conv: isa::CallConv,
+ _caller_conv: isa::CallConv,
+ ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> {
+ let mut insts = SmallVec::new();
+ match dest {
+ &CallDest::ExtName(ref name, RelocDistance::Near) => {
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::call_known(name.clone(), uses, defs, opcode),
+ ));
+ }
+ &CallDest::ExtName(ref name, RelocDistance::Far) => {
+ insts.push((
+ InstIsSafepoint::No,
+ Inst::LoadExtName {
+ dst: tmp,
+ name: Box::new(name.clone()),
+ offset: 0,
+ },
+ ));
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, opcode),
+ ));
+ }
+ &CallDest::Reg(reg) => {
+ insts.push((
+ InstIsSafepoint::Yes,
+ Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode),
+ ));
+ }
+ }
+ insts
+ }
+
+ fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 {
+ // We allocate in terms of 8-byte slots.
+ match (rc, ty) {
+ (RegClass::I64, _) => 1,
+ (RegClass::V128, types::F32) | (RegClass::V128, types::F64) => 1,
+ (RegClass::V128, _) => 2,
+ _ => panic!("Unexpected register class!"),
+ }
+ }
+
+ fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64 {
+ s.virtual_sp_offset
+ }
+
+ fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64 {
+ s.nominal_sp_to_fp
+ }
+
+ fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
+ let mut caller_saved = vec![
+ // Systemv calling convention:
+ // - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
+ Writable::from_reg(regs::rsi()),
+ Writable::from_reg(regs::rdi()),
+ Writable::from_reg(regs::rax()),
+ Writable::from_reg(regs::rcx()),
+ Writable::from_reg(regs::rdx()),
+ Writable::from_reg(regs::r8()),
+ Writable::from_reg(regs::r9()),
+ Writable::from_reg(regs::r10()),
+ Writable::from_reg(regs::r11()),
+ // - XMM: all the registers!
+ Writable::from_reg(regs::xmm0()),
+ Writable::from_reg(regs::xmm1()),
+ Writable::from_reg(regs::xmm2()),
+ Writable::from_reg(regs::xmm3()),
+ Writable::from_reg(regs::xmm4()),
+ Writable::from_reg(regs::xmm5()),
+ Writable::from_reg(regs::xmm6()),
+ Writable::from_reg(regs::xmm7()),
+ Writable::from_reg(regs::xmm8()),
+ Writable::from_reg(regs::xmm9()),
+ Writable::from_reg(regs::xmm10()),
+ Writable::from_reg(regs::xmm11()),
+ Writable::from_reg(regs::xmm12()),
+ Writable::from_reg(regs::xmm13()),
+ Writable::from_reg(regs::xmm14()),
+ Writable::from_reg(regs::xmm15()),
+ ];
+
+ if call_conv_of_callee.extends_baldrdash() {
+ caller_saved.push(Writable::from_reg(regs::r12()));
+ caller_saved.push(Writable::from_reg(regs::r13()));
+ // Not r14; implicitly preserved in the entry.
+ caller_saved.push(Writable::from_reg(regs::r15()));
+ caller_saved.push(Writable::from_reg(regs::rbx()));
+ }
+
+ caller_saved
+ }
+}
+
+impl From<StackAMode> for SyntheticAmode {
+ fn from(amode: StackAMode) -> Self {
+ // We enforce a 128 MB stack-frame size limit above, so these
+ // `expect()`s should never fail.
+ match amode {
+ StackAMode::FPOffset(off, _ty) => {
+ let off = i32::try_from(off)
+ .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first");
+ let simm32 = off as u32;
+ SyntheticAmode::Real(Amode::ImmReg {
+ simm32,
+ base: regs::rbp(),
+ flags: MemFlags::trusted(),
+ })
+ }
+ StackAMode::NominalSPOffset(off, _ty) => {
+ let off = i32::try_from(off).expect(
+ "Offset in NominalSPOffset is greater than 2GB; should hit impl limit first",
+ );
+ let simm32 = off as u32;
+ SyntheticAmode::nominal_sp_offset(simm32)
+ }
+ StackAMode::SPOffset(off, _ty) => {
+ let off = i32::try_from(off)
+ .expect("Offset in SPOffset is greater than 2GB; should hit impl limit first");
+ let simm32 = off as u32;
+ SyntheticAmode::Real(Amode::ImmReg {
+ simm32,
+ base: regs::rsp(),
+ flags: MemFlags::trusted(),
+ })
+ }
+ }
+ }
+}
+
+fn in_int_reg(ty: types::Type) -> bool {
+ match ty {
+ types::I8
+ | types::I16
+ | types::I32
+ | types::I64
+ | types::B1
+ | types::B8
+ | types::B16
+ | types::B32
+ | types::B64
+ | types::R64 => true,
+ types::R32 => panic!("unexpected 32-bits refs on x64!"),
+ _ => false,
+ }
+}
+
+fn in_vec_reg(ty: types::Type) -> bool {
+ match ty {
+ types::F32 | types::F64 => true,
+ _ if ty.is_vector() => true,
+ _ => false,
+ }
+}
+
+fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast
+ | CallConv::Cold
+ | CallConv::SystemV
+ | CallConv::BaldrdashSystemV
+ | CallConv::Baldrdash2020 => {}
+ _ => panic!("int args only supported for SysV calling convention"),
+ };
+ match idx {
+ 0 => Some(regs::rdi()),
+ 1 => Some(regs::rsi()),
+ 2 => Some(regs::rdx()),
+ 3 => Some(regs::rcx()),
+ 4 => Some(regs::r8()),
+ 5 => Some(regs::r9()),
+ _ => None,
+ }
+}
+
+fn get_fltreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast
+ | CallConv::Cold
+ | CallConv::SystemV
+ | CallConv::BaldrdashSystemV
+ | CallConv::Baldrdash2020 => {}
+ _ => panic!("float args only supported for SysV calling convention"),
+ };
+ match idx {
+ 0 => Some(regs::xmm0()),
+ 1 => Some(regs::xmm1()),
+ 2 => Some(regs::xmm2()),
+ 3 => Some(regs::xmm3()),
+ 4 => Some(regs::xmm4()),
+ 5 => Some(regs::xmm5()),
+ 6 => Some(regs::xmm6()),
+ 7 => Some(regs::xmm7()),
+ _ => None,
+ }
+}
+
+fn get_intreg_for_retval_systemv(
+ call_conv: &CallConv,
+ intreg_idx: usize,
+ retval_idx: usize,
+) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => match intreg_idx {
+ 0 => Some(regs::rax()),
+ 1 => Some(regs::rdx()),
+ _ => None,
+ },
+ CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
+ if intreg_idx == 0 && retval_idx == 0 {
+ Some(regs::rax())
+ } else {
+ None
+ }
+ }
+ CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+ }
+}
+
+fn get_fltreg_for_retval_systemv(
+ call_conv: &CallConv,
+ fltreg_idx: usize,
+ retval_idx: usize,
+) -> Option<Reg> {
+ match call_conv {
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => match fltreg_idx {
+ 0 => Some(regs::xmm0()),
+ 1 => Some(regs::xmm1()),
+ _ => None,
+ },
+ CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => {
+ if fltreg_idx == 0 && retval_idx == 0 {
+ Some(regs::xmm0())
+ } else {
+ None
+ }
+ }
+ CallConv::WindowsFastcall | CallConv::BaldrdashWindows | CallConv::Probestack => todo!(),
+ }
+}
+
+fn is_callee_save_systemv(r: RealReg) -> bool {
+ use regs::*;
+ match r.get_class() {
+ RegClass::I64 => match r.get_hw_encoding() as u8 {
+ ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
+ _ => false,
+ },
+ RegClass::V128 => false,
+ _ => unimplemented!(),
+ }
+}
+
+fn is_callee_save_baldrdash(r: RealReg) -> bool {
+ use regs::*;
+ match r.get_class() {
+ RegClass::I64 => {
+ if r.get_hw_encoding() as u8 == ENC_R14 {
+ // r14 is the WasmTlsReg and is preserved implicitly.
+ false
+ } else {
+ // Defer to native for the other ones.
+ is_callee_save_systemv(r)
+ }
+ }
+ RegClass::V128 => false,
+ _ => unimplemented!(),
+ }
+}
+
+fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
+ let mut regs: Vec<Writable<RealReg>> = match call_conv {
+ CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
+ .iter()
+ .cloned()
+ .filter(|r| is_callee_save_baldrdash(r.to_reg()))
+ .collect(),
+ CallConv::BaldrdashWindows => {
+ todo!("baldrdash windows");
+ }
+ CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
+ .iter()
+ .cloned()
+ .filter(|r| is_callee_save_systemv(r.to_reg()))
+ .collect(),
+ CallConv::WindowsFastcall => todo!("windows fastcall"),
+ CallConv::Probestack => todo!("probestack?"),
+ };
+ // Sort registers for deterministic code output. We can do an unstable sort because the
+ // registers will be unique (there are no dups).
+ regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+ regs
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
new file mode 100644
index 0000000000..6a8f65feb3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/args.rs
@@ -0,0 +1,1215 @@
+//! Instruction operand sub-components (aka "parts"): definitions and printing.
+
+use super::regs::{self, show_ireg_sized};
+use super::EmitState;
+use crate::ir::condcodes::{FloatCC, IntCC};
+use crate::ir::MemFlags;
+use crate::machinst::*;
+use regalloc::{
+ PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
+ RegUsageMapper, Writable,
+};
+use std::fmt;
+use std::string::String;
+
+/// A possible addressing mode (amode) that can be used in instructions.
+/// These denote a 64-bit value only.
+#[derive(Clone, Debug)]
+pub enum Amode {
+ /// Immediate sign-extended and a Register.
+ ImmReg {
+ simm32: u32,
+ base: Reg,
+ flags: MemFlags,
+ },
+
+ /// sign-extend-32-to-64(Immediate) + Register1 + (Register2 << Shift)
+ ImmRegRegShift {
+ simm32: u32,
+ base: Reg,
+ index: Reg,
+ shift: u8, /* 0 .. 3 only */
+ flags: MemFlags,
+ },
+
+ /// sign-extend-32-to-64(Immediate) + RIP (instruction pointer).
+ /// To wit: not supported in 32-bits mode.
+ RipRelative { target: MachLabel },
+}
+
+impl Amode {
+ pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self {
+ debug_assert!(base.get_class() == RegClass::I64);
+ Self::ImmReg {
+ simm32,
+ base,
+ flags: MemFlags::trusted(),
+ }
+ }
+
+ pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Reg, index: Reg, shift: u8) -> Self {
+ debug_assert!(base.get_class() == RegClass::I64);
+ debug_assert!(index.get_class() == RegClass::I64);
+ debug_assert!(shift <= 3);
+ Self::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ flags: MemFlags::trusted(),
+ }
+ }
+
+ pub(crate) fn rip_relative(target: MachLabel) -> Self {
+ Self::RipRelative { target }
+ }
+
+ pub(crate) fn with_flags(&self, flags: MemFlags) -> Self {
+ match self {
+ &Self::ImmReg { simm32, base, .. } => Self::ImmReg {
+ simm32,
+ base,
+ flags,
+ },
+ &Self::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ ..
+ } => Self::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ flags,
+ },
+ _ => panic!("Amode {:?} cannot take memflags", self),
+ }
+ }
+
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ Amode::ImmReg { base, .. } => {
+ collector.add_use(*base);
+ }
+ Amode::ImmRegRegShift { base, index, .. } => {
+ collector.add_use(*base);
+ collector.add_use(*index);
+ }
+ Amode::RipRelative { .. } => {
+ // RIP isn't involved in regalloc.
+ }
+ }
+ }
+
+ pub(crate) fn get_flags(&self) -> MemFlags {
+ match self {
+ Amode::ImmReg { flags, .. } => *flags,
+ Amode::ImmRegRegShift { flags, .. } => *flags,
+ Amode::RipRelative { .. } => MemFlags::trusted(),
+ }
+ }
+
+ pub(crate) fn can_trap(&self) -> bool {
+ !self.get_flags().notrap()
+ }
+}
+
+impl PrettyPrint for Amode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ Amode::ImmReg { simm32, base, .. } => {
+ format!("{}({})", *simm32 as i32, base.show_rru(mb_rru))
+ }
+ Amode::ImmRegRegShift {
+ simm32,
+ base,
+ index,
+ shift,
+ ..
+ } => format!(
+ "{}({},{},{})",
+ *simm32 as i32,
+ base.show_rru(mb_rru),
+ index.show_rru(mb_rru),
+ 1 << shift
+ ),
+ Amode::RipRelative { ref target } => format!("label{}(%rip)", target.get()),
+ }
+ }
+}
+
+/// A Memory Address. These denote a 64-bit value only.
+/// Used for usual addressing modes as well as addressing modes used during compilation, when the
+/// moving SP offset is not known.
+#[derive(Clone)]
+pub enum SyntheticAmode {
+ /// A real amode.
+ Real(Amode),
+
+ /// A (virtual) offset to the "nominal SP" value, which will be recomputed as we push and pop
+ /// within the function.
+ NominalSPOffset { simm32: u32 },
+}
+
+impl SyntheticAmode {
+ pub(crate) fn nominal_sp_offset(simm32: u32) -> Self {
+ SyntheticAmode::NominalSPOffset { simm32 }
+ }
+
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector),
+ SyntheticAmode::NominalSPOffset { .. } => {
+ // Nothing to do; the base is SP and isn't involved in regalloc.
+ }
+ }
+ }
+
+ pub(crate) fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ SyntheticAmode::Real(addr) => addr.map_uses(map),
+ SyntheticAmode::NominalSPOffset { .. } => {
+ // Nothing to do.
+ }
+ }
+ }
+
+ pub(crate) fn finalize(&self, state: &mut EmitState) -> Amode {
+ match self {
+ SyntheticAmode::Real(addr) => addr.clone(),
+ SyntheticAmode::NominalSPOffset { simm32 } => {
+ let off = *simm32 as i64 + state.virtual_sp_offset;
+ // TODO will require a sequence of add etc.
+ assert!(
+ off <= u32::max_value() as i64,
+ "amode finalize: add sequence NYI"
+ );
+ Amode::imm_reg(off as u32, regs::rsp())
+ }
+ }
+ }
+}
+
+impl Into<SyntheticAmode> for Amode {
+ fn into(self) -> SyntheticAmode {
+ SyntheticAmode::Real(self)
+ }
+}
+
+impl PrettyPrint for SyntheticAmode {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ match self {
+ SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
+ SyntheticAmode::NominalSPOffset { simm32 } => {
+ format!("rsp({} + virtual offset)", *simm32 as i32)
+ }
+ }
+ }
+}
+
+/// An operand which is either an integer Register, a value in Memory or an Immediate. This can
+/// denote an 8, 16, 32 or 64 bit value. For the Immediate form, in the 8- and 16-bit case, only
+/// the lower 8 or 16 bits of `simm32` is relevant. In the 64-bit case, the value denoted by
+/// `simm32` is its sign-extension out to 64 bits.
+#[derive(Clone)]
+pub enum RegMemImm {
+ Reg { reg: Reg },
+ Mem { addr: SyntheticAmode },
+ Imm { simm32: u32 },
+}
+
+impl RegMemImm {
+ pub(crate) fn reg(reg: Reg) -> Self {
+ debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
+ Self::Reg { reg }
+ }
+ pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
+ Self::Mem { addr: addr.into() }
+ }
+ pub(crate) fn imm(simm32: u32) -> Self {
+ Self::Imm { simm32 }
+ }
+
+ /// Asserts that in register mode, the reg class is the one that's expected.
+ pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
+ if let Self::Reg { reg } = self {
+ debug_assert_eq!(reg.get_class(), expected_reg_class);
+ }
+ }
+
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ Self::Reg { reg } => collector.add_use(*reg),
+ Self::Mem { addr } => addr.get_regs_as_uses(collector),
+ Self::Imm { .. } => {}
+ }
+ }
+
+ pub(crate) fn to_reg(&self) -> Option<Reg> {
+ match self {
+ Self::Reg { reg } => Some(*reg),
+ _ => None,
+ }
+ }
+}
+
+impl PrettyPrint for RegMemImm {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.show_rru_sized(mb_rru, 8)
+ }
+}
+
+impl PrettyPrintSized for RegMemImm {
+ fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+ match self {
+ Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
+ Self::Mem { addr } => addr.show_rru(mb_rru),
+ Self::Imm { simm32 } => format!("${}", *simm32 as i32),
+ }
+ }
+}
+
+/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
+/// 32, 64, or 128 bit value.
+#[derive(Clone)]
+pub enum RegMem {
+ Reg { reg: Reg },
+ Mem { addr: SyntheticAmode },
+}
+
+impl RegMem {
+ pub(crate) fn reg(reg: Reg) -> Self {
+ debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
+ Self::Reg { reg }
+ }
+ pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
+ Self::Mem { addr: addr.into() }
+ }
+ /// Asserts that in register mode, the reg class is the one that's expected.
+ pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) {
+ if let Self::Reg { reg } = self {
+ debug_assert_eq!(reg.get_class(), expected_reg_class);
+ }
+ }
+ /// Add the regs mentioned by `self` to `collector`.
+ pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
+ match self {
+ RegMem::Reg { reg } => collector.add_use(*reg),
+ RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
+ }
+ }
+ pub(crate) fn to_reg(&self) -> Option<Reg> {
+ match self {
+ RegMem::Reg { reg } => Some(*reg),
+ _ => None,
+ }
+ }
+}
+
+impl From<Writable<Reg>> for RegMem {
+ fn from(r: Writable<Reg>) -> Self {
+ RegMem::reg(r.to_reg())
+ }
+}
+
+impl PrettyPrint for RegMem {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ self.show_rru_sized(mb_rru, 8)
+ }
+}
+
+impl PrettyPrintSized for RegMem {
+ fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+ match self {
+ RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
+ RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
+ }
+ }
+}
+
+/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
+#[derive(Copy, Clone, PartialEq)]
+pub enum AluRmiROpcode {
+ Add,
+ Sub,
+ And,
+ Or,
+ Xor,
+ /// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
+ Mul,
+}
+
+impl fmt::Debug for AluRmiROpcode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ AluRmiROpcode::Add => "add",
+ AluRmiROpcode::Sub => "sub",
+ AluRmiROpcode::And => "and",
+ AluRmiROpcode::Or => "or",
+ AluRmiROpcode::Xor => "xor",
+ AluRmiROpcode::Mul => "imul",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for AluRmiROpcode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+#[derive(Clone, PartialEq)]
+pub enum UnaryRmROpcode {
+ /// Bit-scan reverse.
+ Bsr,
+ /// Bit-scan forward.
+ Bsf,
+}
+
+impl fmt::Debug for UnaryRmROpcode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
+ UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
+ }
+ }
+}
+
+impl fmt::Display for UnaryRmROpcode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+pub(crate) enum InstructionSet {
+ SSE,
+ SSE2,
+ SSSE3,
+ SSE41,
+ SSE42,
+}
+
+/// Some SSE operations requiring 2 operands r/m and r.
+#[derive(Clone, Copy, PartialEq)]
+pub enum SseOpcode {
+ Addps,
+ Addpd,
+ Addss,
+ Addsd,
+ Andps,
+ Andpd,
+ Andnps,
+ Andnpd,
+ Comiss,
+ Comisd,
+ Cmpps,
+ Cmppd,
+ Cmpss,
+ Cmpsd,
+ Cvtdq2ps,
+ Cvtsd2ss,
+ Cvtsd2si,
+ Cvtsi2ss,
+ Cvtsi2sd,
+ Cvtss2si,
+ Cvtss2sd,
+ Cvttps2dq,
+ Cvttss2si,
+ Cvttsd2si,
+ Divps,
+ Divpd,
+ Divss,
+ Divsd,
+ Insertps,
+ Maxps,
+ Maxpd,
+ Maxss,
+ Maxsd,
+ Minps,
+ Minpd,
+ Minss,
+ Minsd,
+ Movaps,
+ Movapd,
+ Movd,
+ Movdqa,
+ Movdqu,
+ Movlhps,
+ Movmskps,
+ Movmskpd,
+ Movq,
+ Movss,
+ Movsd,
+ Movups,
+ Movupd,
+ Mulps,
+ Mulpd,
+ Mulss,
+ Mulsd,
+ Orps,
+ Orpd,
+ Pabsb,
+ Pabsw,
+ Pabsd,
+ Packsswb,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Paddsb,
+ Paddsw,
+ Paddusb,
+ Paddusw,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pcmpeqb,
+ Pcmpeqw,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpgtb,
+ Pcmpgtw,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pextrb,
+ Pextrw,
+ Pextrd,
+ Pinsrb,
+ Pinsrw,
+ Pinsrd,
+ Pmaxsb,
+ Pmaxsw,
+ Pmaxsd,
+ Pmaxub,
+ Pmaxuw,
+ Pmaxud,
+ Pminsb,
+ Pminsw,
+ Pminsd,
+ Pminub,
+ Pminuw,
+ Pminud,
+ Pmovmskb,
+ Pmulld,
+ Pmullw,
+ Pmuludq,
+ Por,
+ Pshufb,
+ Pshufd,
+ Psllw,
+ Pslld,
+ Psllq,
+ Psraw,
+ Psrad,
+ Psrlw,
+ Psrld,
+ Psrlq,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Psubsb,
+ Psubsw,
+ Psubusb,
+ Psubusw,
+ Ptest,
+ Pxor,
+ Rcpss,
+ Roundss,
+ Roundsd,
+ Rsqrtss,
+ Sqrtps,
+ Sqrtpd,
+ Sqrtss,
+ Sqrtsd,
+ Subps,
+ Subpd,
+ Subss,
+ Subsd,
+ Ucomiss,
+ Ucomisd,
+ Xorps,
+ Xorpd,
+}
+
+impl SseOpcode {
+ /// Which `InstructionSet` is the first supporting this opcode?
+ pub(crate) fn available_from(&self) -> InstructionSet {
+ use InstructionSet::*;
+ match self {
+ SseOpcode::Addps
+ | SseOpcode::Addss
+ | SseOpcode::Andps
+ | SseOpcode::Andnps
+ | SseOpcode::Comiss
+ | SseOpcode::Cmpps
+ | SseOpcode::Cmpss
+ | SseOpcode::Cvtsi2ss
+ | SseOpcode::Cvtss2si
+ | SseOpcode::Cvttss2si
+ | SseOpcode::Divps
+ | SseOpcode::Divss
+ | SseOpcode::Maxps
+ | SseOpcode::Maxss
+ | SseOpcode::Minps
+ | SseOpcode::Minss
+ | SseOpcode::Movaps
+ | SseOpcode::Movlhps
+ | SseOpcode::Movmskps
+ | SseOpcode::Movss
+ | SseOpcode::Movups
+ | SseOpcode::Mulps
+ | SseOpcode::Mulss
+ | SseOpcode::Orps
+ | SseOpcode::Rcpss
+ | SseOpcode::Rsqrtss
+ | SseOpcode::Sqrtps
+ | SseOpcode::Sqrtss
+ | SseOpcode::Subps
+ | SseOpcode::Subss
+ | SseOpcode::Ucomiss
+ | SseOpcode::Xorps => SSE,
+
+ SseOpcode::Addpd
+ | SseOpcode::Addsd
+ | SseOpcode::Andpd
+ | SseOpcode::Andnpd
+ | SseOpcode::Cmppd
+ | SseOpcode::Cmpsd
+ | SseOpcode::Comisd
+ | SseOpcode::Cvtdq2ps
+ | SseOpcode::Cvtsd2ss
+ | SseOpcode::Cvtsd2si
+ | SseOpcode::Cvtsi2sd
+ | SseOpcode::Cvtss2sd
+ | SseOpcode::Cvttps2dq
+ | SseOpcode::Cvttsd2si
+ | SseOpcode::Divpd
+ | SseOpcode::Divsd
+ | SseOpcode::Maxpd
+ | SseOpcode::Maxsd
+ | SseOpcode::Minpd
+ | SseOpcode::Minsd
+ | SseOpcode::Movapd
+ | SseOpcode::Movd
+ | SseOpcode::Movmskpd
+ | SseOpcode::Movq
+ | SseOpcode::Movsd
+ | SseOpcode::Movupd
+ | SseOpcode::Movdqa
+ | SseOpcode::Movdqu
+ | SseOpcode::Mulpd
+ | SseOpcode::Mulsd
+ | SseOpcode::Orpd
+ | SseOpcode::Packsswb
+ | SseOpcode::Paddb
+ | SseOpcode::Paddd
+ | SseOpcode::Paddq
+ | SseOpcode::Paddw
+ | SseOpcode::Paddsb
+ | SseOpcode::Paddsw
+ | SseOpcode::Paddusb
+ | SseOpcode::Paddusw
+ | SseOpcode::Pand
+ | SseOpcode::Pandn
+ | SseOpcode::Pavgb
+ | SseOpcode::Pavgw
+ | SseOpcode::Pcmpeqb
+ | SseOpcode::Pcmpeqw
+ | SseOpcode::Pcmpeqd
+ | SseOpcode::Pcmpgtb
+ | SseOpcode::Pcmpgtw
+ | SseOpcode::Pcmpgtd
+ | SseOpcode::Pextrw
+ | SseOpcode::Pinsrw
+ | SseOpcode::Pmaxsw
+ | SseOpcode::Pmaxub
+ | SseOpcode::Pminsw
+ | SseOpcode::Pminub
+ | SseOpcode::Pmovmskb
+ | SseOpcode::Pmullw
+ | SseOpcode::Pmuludq
+ | SseOpcode::Por
+ | SseOpcode::Pshufd
+ | SseOpcode::Psllw
+ | SseOpcode::Pslld
+ | SseOpcode::Psllq
+ | SseOpcode::Psraw
+ | SseOpcode::Psrad
+ | SseOpcode::Psrlw
+ | SseOpcode::Psrld
+ | SseOpcode::Psrlq
+ | SseOpcode::Psubb
+ | SseOpcode::Psubd
+ | SseOpcode::Psubq
+ | SseOpcode::Psubw
+ | SseOpcode::Psubsb
+ | SseOpcode::Psubsw
+ | SseOpcode::Psubusb
+ | SseOpcode::Psubusw
+ | SseOpcode::Pxor
+ | SseOpcode::Sqrtpd
+ | SseOpcode::Sqrtsd
+ | SseOpcode::Subpd
+ | SseOpcode::Subsd
+ | SseOpcode::Ucomisd
+ | SseOpcode::Xorpd => SSE2,
+
+ SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd | SseOpcode::Pshufb => SSSE3,
+
+ SseOpcode::Insertps
+ | SseOpcode::Pcmpeqq
+ | SseOpcode::Pextrb
+ | SseOpcode::Pextrd
+ | SseOpcode::Pinsrb
+ | SseOpcode::Pinsrd
+ | SseOpcode::Pmaxsb
+ | SseOpcode::Pmaxsd
+ | SseOpcode::Pmaxuw
+ | SseOpcode::Pmaxud
+ | SseOpcode::Pminsb
+ | SseOpcode::Pminsd
+ | SseOpcode::Pminuw
+ | SseOpcode::Pminud
+ | SseOpcode::Pmulld
+ | SseOpcode::Ptest
+ | SseOpcode::Roundss
+ | SseOpcode::Roundsd => SSE41,
+
+ SseOpcode::Pcmpgtq => SSE42,
+ }
+ }
+
+ /// Returns the src operand size for an instruction.
+ pub(crate) fn src_size(&self) -> u8 {
+ match self {
+ SseOpcode::Movd => 4,
+ _ => 8,
+ }
+ }
+}
+
+impl fmt::Debug for SseOpcode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ SseOpcode::Addps => "addps",
+ SseOpcode::Addpd => "addpd",
+ SseOpcode::Addss => "addss",
+ SseOpcode::Addsd => "addsd",
+ SseOpcode::Andpd => "andpd",
+ SseOpcode::Andps => "andps",
+ SseOpcode::Andnps => "andnps",
+ SseOpcode::Andnpd => "andnpd",
+ SseOpcode::Cmpps => "cmpps",
+ SseOpcode::Cmppd => "cmppd",
+ SseOpcode::Cmpss => "cmpss",
+ SseOpcode::Cmpsd => "cmpsd",
+ SseOpcode::Comiss => "comiss",
+ SseOpcode::Comisd => "comisd",
+ SseOpcode::Cvtdq2ps => "cvtdq2ps",
+ SseOpcode::Cvtsd2ss => "cvtsd2ss",
+ SseOpcode::Cvtsd2si => "cvtsd2si",
+ SseOpcode::Cvtsi2ss => "cvtsi2ss",
+ SseOpcode::Cvtsi2sd => "cvtsi2sd",
+ SseOpcode::Cvtss2si => "cvtss2si",
+ SseOpcode::Cvtss2sd => "cvtss2sd",
+ SseOpcode::Cvttps2dq => "cvttps2dq",
+ SseOpcode::Cvttss2si => "cvttss2si",
+ SseOpcode::Cvttsd2si => "cvttsd2si",
+ SseOpcode::Divps => "divps",
+ SseOpcode::Divpd => "divpd",
+ SseOpcode::Divss => "divss",
+ SseOpcode::Divsd => "divsd",
+ SseOpcode::Insertps => "insertps",
+ SseOpcode::Maxps => "maxps",
+ SseOpcode::Maxpd => "maxpd",
+ SseOpcode::Maxss => "maxss",
+ SseOpcode::Maxsd => "maxsd",
+ SseOpcode::Minps => "minps",
+ SseOpcode::Minpd => "minpd",
+ SseOpcode::Minss => "minss",
+ SseOpcode::Minsd => "minsd",
+ SseOpcode::Movaps => "movaps",
+ SseOpcode::Movapd => "movapd",
+ SseOpcode::Movd => "movd",
+ SseOpcode::Movdqa => "movdqa",
+ SseOpcode::Movdqu => "movdqu",
+ SseOpcode::Movlhps => "movlhps",
+ SseOpcode::Movmskps => "movmskps",
+ SseOpcode::Movmskpd => "movmskpd",
+ SseOpcode::Movq => "movq",
+ SseOpcode::Movss => "movss",
+ SseOpcode::Movsd => "movsd",
+ SseOpcode::Movups => "movups",
+ SseOpcode::Movupd => "movupd",
+ SseOpcode::Mulps => "mulps",
+ SseOpcode::Mulpd => "mulpd",
+ SseOpcode::Mulss => "mulss",
+ SseOpcode::Mulsd => "mulsd",
+ SseOpcode::Orpd => "orpd",
+ SseOpcode::Orps => "orps",
+ SseOpcode::Pabsb => "pabsb",
+ SseOpcode::Pabsw => "pabsw",
+ SseOpcode::Pabsd => "pabsd",
+ SseOpcode::Packsswb => "packsswb",
+ SseOpcode::Paddb => "paddb",
+ SseOpcode::Paddd => "paddd",
+ SseOpcode::Paddq => "paddq",
+ SseOpcode::Paddw => "paddw",
+ SseOpcode::Paddsb => "paddsb",
+ SseOpcode::Paddsw => "paddsw",
+ SseOpcode::Paddusb => "paddusb",
+ SseOpcode::Paddusw => "paddusw",
+ SseOpcode::Pand => "pand",
+ SseOpcode::Pandn => "pandn",
+ SseOpcode::Pavgb => "pavgb",
+ SseOpcode::Pavgw => "pavgw",
+ SseOpcode::Pcmpeqb => "pcmpeqb",
+ SseOpcode::Pcmpeqw => "pcmpeqw",
+ SseOpcode::Pcmpeqd => "pcmpeqd",
+ SseOpcode::Pcmpeqq => "pcmpeqq",
+ SseOpcode::Pcmpgtb => "pcmpgtb",
+ SseOpcode::Pcmpgtw => "pcmpgtw",
+ SseOpcode::Pcmpgtd => "pcmpgtd",
+ SseOpcode::Pcmpgtq => "pcmpgtq",
+ SseOpcode::Pextrb => "pextrb",
+ SseOpcode::Pextrw => "pextrw",
+ SseOpcode::Pextrd => "pextrd",
+ SseOpcode::Pinsrb => "pinsrb",
+ SseOpcode::Pinsrw => "pinsrw",
+ SseOpcode::Pinsrd => "pinsrd",
+ SseOpcode::Pmaxsb => "pmaxsb",
+ SseOpcode::Pmaxsw => "pmaxsw",
+ SseOpcode::Pmaxsd => "pmaxsd",
+ SseOpcode::Pmaxub => "pmaxub",
+ SseOpcode::Pmaxuw => "pmaxuw",
+ SseOpcode::Pmaxud => "pmaxud",
+ SseOpcode::Pminsb => "pminsb",
+ SseOpcode::Pminsw => "pminsw",
+ SseOpcode::Pminsd => "pminsd",
+ SseOpcode::Pminub => "pminub",
+ SseOpcode::Pminuw => "pminuw",
+ SseOpcode::Pminud => "pminud",
+ SseOpcode::Pmovmskb => "pmovmskb",
+ SseOpcode::Pmulld => "pmulld",
+ SseOpcode::Pmullw => "pmullw",
+ SseOpcode::Pmuludq => "pmuludq",
+ SseOpcode::Por => "por",
+ SseOpcode::Pshufb => "pshufb",
+ SseOpcode::Pshufd => "pshufd",
+ SseOpcode::Psllw => "psllw",
+ SseOpcode::Pslld => "pslld",
+ SseOpcode::Psllq => "psllq",
+ SseOpcode::Psraw => "psraw",
+ SseOpcode::Psrad => "psrad",
+ SseOpcode::Psrlw => "psrlw",
+ SseOpcode::Psrld => "psrld",
+ SseOpcode::Psrlq => "psrlq",
+ SseOpcode::Psubb => "psubb",
+ SseOpcode::Psubd => "psubd",
+ SseOpcode::Psubq => "psubq",
+ SseOpcode::Psubw => "psubw",
+ SseOpcode::Psubsb => "psubsb",
+ SseOpcode::Psubsw => "psubsw",
+ SseOpcode::Psubusb => "psubusb",
+ SseOpcode::Psubusw => "psubusw",
+ SseOpcode::Ptest => "ptest",
+ SseOpcode::Pxor => "pxor",
+ SseOpcode::Rcpss => "rcpss",
+ SseOpcode::Roundss => "roundss",
+ SseOpcode::Roundsd => "roundsd",
+ SseOpcode::Rsqrtss => "rsqrtss",
+ SseOpcode::Sqrtps => "sqrtps",
+ SseOpcode::Sqrtpd => "sqrtpd",
+ SseOpcode::Sqrtss => "sqrtss",
+ SseOpcode::Sqrtsd => "sqrtsd",
+ SseOpcode::Subps => "subps",
+ SseOpcode::Subpd => "subpd",
+ SseOpcode::Subss => "subss",
+ SseOpcode::Subsd => "subsd",
+ SseOpcode::Ucomiss => "ucomiss",
+ SseOpcode::Ucomisd => "ucomisd",
+ SseOpcode::Xorps => "xorps",
+ SseOpcode::Xorpd => "xorpd",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for SseOpcode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// This defines the ways a value can be extended: either signed- or zero-extension, or none for
+/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which
+/// values can be extended.
+#[derive(Clone, PartialEq)]
+pub enum ExtKind {
+ None,
+ SignExtend,
+ ZeroExtend,
+}
+
+/// These indicate ways of extending (widening) a value, using the Intel
+/// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
+#[derive(Clone, PartialEq)]
+pub enum ExtMode {
+ /// Byte -> Longword.
+ BL,
+ /// Byte -> Quadword.
+ BQ,
+ /// Word -> Longword.
+ WL,
+ /// Word -> Quadword.
+ WQ,
+ /// Longword -> Quadword.
+ LQ,
+}
+
+impl ExtMode {
+ /// Calculate the `ExtMode` from passed bit lengths of the from/to types.
+ pub(crate) fn new(from_bits: u16, to_bits: u16) -> Option<ExtMode> {
+ match (from_bits, to_bits) {
+ (1, 8) | (1, 16) | (1, 32) | (8, 16) | (8, 32) => Some(ExtMode::BL),
+ (1, 64) | (8, 64) => Some(ExtMode::BQ),
+ (16, 32) => Some(ExtMode::WL),
+ (16, 64) => Some(ExtMode::WQ),
+ (32, 64) => Some(ExtMode::LQ),
+ _ => None,
+ }
+ }
+
+ /// Return the source register size in bytes.
+ pub(crate) fn src_size(&self) -> u8 {
+ match self {
+ ExtMode::BL | ExtMode::BQ => 1,
+ ExtMode::WL | ExtMode::WQ => 2,
+ ExtMode::LQ => 4,
+ }
+ }
+
+ /// Return the destination register size in bytes.
+ pub(crate) fn dst_size(&self) -> u8 {
+ match self {
+ ExtMode::BL | ExtMode::WL => 4,
+ ExtMode::BQ | ExtMode::WQ | ExtMode::LQ => 8,
+ }
+ }
+}
+
+impl fmt::Debug for ExtMode {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ ExtMode::BL => "bl",
+ ExtMode::BQ => "bq",
+ ExtMode::WL => "wl",
+ ExtMode::WQ => "wq",
+ ExtMode::LQ => "lq",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for ExtMode {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
+#[derive(Clone)]
+pub enum ShiftKind {
+ ShiftLeft,
+ /// Inserts zeros in the most significant bits.
+ ShiftRightLogical,
+ /// Replicates the sign bit in the most significant bits.
+ ShiftRightArithmetic,
+ RotateLeft,
+ RotateRight,
+}
+
+impl fmt::Debug for ShiftKind {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ ShiftKind::ShiftLeft => "shl",
+ ShiftKind::ShiftRightLogical => "shr",
+ ShiftKind::ShiftRightArithmetic => "sar",
+ ShiftKind::RotateLeft => "rol",
+ ShiftKind::RotateRight => "ror",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for ShiftKind {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// What kind of division or remainer instruction this is?
+#[derive(Clone)]
+pub enum DivOrRemKind {
+ SignedDiv,
+ UnsignedDiv,
+ SignedRem,
+ UnsignedRem,
+}
+
+impl DivOrRemKind {
+ pub(crate) fn is_signed(&self) -> bool {
+ match self {
+ DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true,
+ _ => false,
+ }
+ }
+
+ pub(crate) fn is_div(&self) -> bool {
+ match self {
+ DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true,
+ _ => false,
+ }
+ }
+}
+
+/// These indicate condition code tests. Not all are represented since not all are useful in
+/// compiler-generated code.
+#[derive(Copy, Clone)]
+#[repr(u8)]
+pub enum CC {
+ /// overflow
+ O = 0,
+ /// no overflow
+ NO = 1,
+
+ /// < unsigned
+ B = 2,
+ /// >= unsigned
+ NB = 3,
+
+ /// zero
+ Z = 4,
+ /// not-zero
+ NZ = 5,
+
+ /// <= unsigned
+ BE = 6,
+ /// > unsigned
+ NBE = 7,
+
+ /// negative
+ S = 8,
+ /// not-negative
+ NS = 9,
+
+ /// < signed
+ L = 12,
+ /// >= signed
+ NL = 13,
+
+ /// <= signed
+ LE = 14,
+ /// > signed
+ NLE = 15,
+
+ /// parity
+ P = 10,
+
+ /// not parity
+ NP = 11,
+}
+
+impl CC {
+ pub(crate) fn from_intcc(intcc: IntCC) -> Self {
+ match intcc {
+ IntCC::Equal => CC::Z,
+ IntCC::NotEqual => CC::NZ,
+ IntCC::SignedGreaterThanOrEqual => CC::NL,
+ IntCC::SignedGreaterThan => CC::NLE,
+ IntCC::SignedLessThanOrEqual => CC::LE,
+ IntCC::SignedLessThan => CC::L,
+ IntCC::UnsignedGreaterThanOrEqual => CC::NB,
+ IntCC::UnsignedGreaterThan => CC::NBE,
+ IntCC::UnsignedLessThanOrEqual => CC::BE,
+ IntCC::UnsignedLessThan => CC::B,
+ IntCC::Overflow => CC::O,
+ IntCC::NotOverflow => CC::NO,
+ }
+ }
+
+ pub(crate) fn invert(&self) -> Self {
+ match self {
+ CC::O => CC::NO,
+ CC::NO => CC::O,
+
+ CC::B => CC::NB,
+ CC::NB => CC::B,
+
+ CC::Z => CC::NZ,
+ CC::NZ => CC::Z,
+
+ CC::BE => CC::NBE,
+ CC::NBE => CC::BE,
+
+ CC::S => CC::NS,
+ CC::NS => CC::S,
+
+ CC::L => CC::NL,
+ CC::NL => CC::L,
+
+ CC::LE => CC::NLE,
+ CC::NLE => CC::LE,
+
+ CC::P => CC::NP,
+ CC::NP => CC::P,
+ }
+ }
+
+ pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self {
+ match floatcc {
+ FloatCC::Ordered => CC::NP,
+ FloatCC::Unordered => CC::P,
+ // Alias for NE
+ FloatCC::OrderedNotEqual => CC::NZ,
+ // Alias for E
+ FloatCC::UnorderedOrEqual => CC::Z,
+ // Alias for A
+ FloatCC::GreaterThan => CC::NBE,
+ // Alias for AE
+ FloatCC::GreaterThanOrEqual => CC::NB,
+ FloatCC::UnorderedOrLessThan => CC::B,
+ FloatCC::UnorderedOrLessThanOrEqual => CC::BE,
+ FloatCC::Equal
+ | FloatCC::NotEqual
+ | FloatCC::LessThan
+ | FloatCC::LessThanOrEqual
+ | FloatCC::UnorderedOrGreaterThan
+ | FloatCC::UnorderedOrGreaterThanOrEqual => panic!(
+ "{:?} can't be lowered to a CC code; treat as special case.",
+ floatcc
+ ),
+ }
+ }
+
+ pub(crate) fn get_enc(self) -> u8 {
+ self as u8
+ }
+}
+
+impl fmt::Debug for CC {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ let name = match self {
+ CC::O => "o",
+ CC::NO => "no",
+ CC::B => "b",
+ CC::NB => "nb",
+ CC::Z => "z",
+ CC::NZ => "nz",
+ CC::BE => "be",
+ CC::NBE => "nbe",
+ CC::S => "s",
+ CC::NS => "ns",
+ CC::L => "l",
+ CC::NL => "nl",
+ CC::LE => "le",
+ CC::NLE => "nle",
+ CC::P => "p",
+ CC::NP => "np",
+ };
+ write!(fmt, "{}", name)
+ }
+}
+
+impl fmt::Display for CC {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(self, f)
+ }
+}
+
+/// Encode the ways that floats can be compared. This is used in float comparisons such as `cmpps`,
+/// e.g.; it is distinguished from other float comparisons (e.g. `ucomiss`) in that those use EFLAGS
+/// whereas [FcmpImm] is used as an immediate.
+pub(crate) enum FcmpImm {
+ Equal = 0x00,
+ LessThan = 0x01,
+ LessThanOrEqual = 0x02,
+ Unordered = 0x03,
+ NotEqual = 0x04,
+ UnorderedOrGreaterThanOrEqual = 0x05,
+ UnorderedOrGreaterThan = 0x06,
+ Ordered = 0x07,
+}
+
+impl FcmpImm {
+ pub(crate) fn encode(self) -> u8 {
+ self as u8
+ }
+}
+
+impl From<FloatCC> for FcmpImm {
+ fn from(cond: FloatCC) -> Self {
+ match cond {
+ FloatCC::Equal => FcmpImm::Equal,
+ FloatCC::LessThan => FcmpImm::LessThan,
+ FloatCC::LessThanOrEqual => FcmpImm::LessThanOrEqual,
+ FloatCC::Unordered => FcmpImm::Unordered,
+ FloatCC::NotEqual => FcmpImm::NotEqual,
+ FloatCC::UnorderedOrGreaterThanOrEqual => FcmpImm::UnorderedOrGreaterThanOrEqual,
+ FloatCC::UnorderedOrGreaterThan => FcmpImm::UnorderedOrGreaterThan,
+ FloatCC::Ordered => FcmpImm::Ordered,
+ _ => panic!("unable to create comparison predicate for {}", cond),
+ }
+ }
+}
+
+/// An operand's size in bits.
+#[derive(Clone, Copy, PartialEq)]
+pub enum OperandSize {
+ Size32,
+ Size64,
+}
+
+impl OperandSize {
+ pub(crate) fn from_bytes(num_bytes: u32) -> Self {
+ match num_bytes {
+ 1 | 2 | 4 => OperandSize::Size32,
+ 8 => OperandSize::Size64,
+ _ => unreachable!(),
+ }
+ }
+
+ pub(crate) fn to_bytes(&self) -> u8 {
+ match self {
+ Self::Size32 => 4,
+ Self::Size64 => 8,
+ }
+ }
+
+ pub(crate) fn to_bits(&self) -> u8 {
+ match self {
+ Self::Size32 => 32,
+ Self::Size64 => 64,
+ }
+ }
+}
+
+/// An x64 memory fence kind.
+#[derive(Clone)]
+#[allow(dead_code)]
+pub enum FenceKind {
+ /// `mfence` instruction ("Memory Fence")
+ MFence,
+ /// `lfence` instruction ("Load Fence")
+ LFence,
+ /// `sfence` instruction ("Store Fence")
+ SFence,
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
new file mode 100644
index 0000000000..dd4125a2da
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit.rs
@@ -0,0 +1,2819 @@
+use crate::binemit::{Addend, Reloc};
+use crate::ir::immediates::{Ieee32, Ieee64};
+use crate::ir::TrapCode;
+use crate::isa::x64::inst::args::*;
+use crate::isa::x64::inst::*;
+use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
+use core::convert::TryInto;
+use log::debug;
+use regalloc::{Reg, RegClass, Writable};
+
+fn low8_will_sign_extend_to_64(x: u32) -> bool {
+ let xs = (x as i32) as i64;
+ xs == ((xs << 56) >> 56)
+}
+
+fn low8_will_sign_extend_to_32(x: u32) -> bool {
+ let xs = x as i32;
+ xs == ((xs << 24) >> 24)
+}
+
+//=============================================================================
+// Instructions and subcomponents: emission
+
+// For all of the routines that take both a memory-or-reg operand (sometimes
+// called "E" in the Intel documentation) and a reg-only operand ("G" in
+// Intelese), the order is always G first, then E.
+//
+// "enc" in the following means "hardware register encoding number".
+
+#[inline(always)]
+fn encode_modrm(m0d: u8, enc_reg_g: u8, rm_e: u8) -> u8 {
+ debug_assert!(m0d < 4);
+ debug_assert!(enc_reg_g < 8);
+ debug_assert!(rm_e < 8);
+ ((m0d & 3) << 6) | ((enc_reg_g & 7) << 3) | (rm_e & 7)
+}
+
+#[inline(always)]
+fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 {
+ debug_assert!(shift < 4);
+ debug_assert!(enc_index < 8);
+ debug_assert!(enc_base < 8);
+ ((shift & 3) << 6) | ((enc_index & 7) << 3) | (enc_base & 7)
+}
+
+/// Get the encoding number of a GPR.
+#[inline(always)]
+fn int_reg_enc(reg: Reg) -> u8 {
+ debug_assert!(reg.is_real());
+ debug_assert_eq!(reg.get_class(), RegClass::I64);
+ reg.get_hw_encoding()
+}
+
+/// Get the encoding number of any register.
+#[inline(always)]
+fn reg_enc(reg: Reg) -> u8 {
+ debug_assert!(reg.is_real());
+ reg.get_hw_encoding()
+}
+
+/// A small bit field to record a REX prefix specification:
+/// - bit 0 set to 1 indicates REX.W must be 0 (cleared).
+/// - bit 1 set to 1 indicates the REX prefix must always be emitted.
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+struct RexFlags(u8);
+
+impl RexFlags {
+ /// By default, set the W field, and don't always emit.
+ #[inline(always)]
+ fn set_w() -> Self {
+ Self(0)
+ }
+ /// Creates a new RexPrefix for which the REX.W bit will be cleared.
+ #[inline(always)]
+ fn clear_w() -> Self {
+ Self(1)
+ }
+
+ #[inline(always)]
+ fn always_emit(&mut self) -> &mut Self {
+ self.0 = self.0 | 2;
+ self
+ }
+
+ #[inline(always)]
+ fn must_clear_w(&self) -> bool {
+ (self.0 & 1) != 0
+ }
+ #[inline(always)]
+ fn must_always_emit(&self) -> bool {
+ (self.0 & 2) != 0
+ }
+
+ #[inline(always)]
+ fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
+ let w = if self.must_clear_w() { 0 } else { 1 };
+ let r = (enc_g >> 3) & 1;
+ let x = 0;
+ let b = (enc_e >> 3) & 1;
+ let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+ if rex != 0x40 || self.must_always_emit() {
+ sink.put1(rex);
+ }
+ }
+
+ #[inline(always)]
+ fn emit_three_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_index: u8, enc_base: u8) {
+ let w = if self.must_clear_w() { 0 } else { 1 };
+ let r = (enc_g >> 3) & 1;
+ let x = (enc_index >> 3) & 1;
+ let b = (enc_base >> 3) & 1;
+ let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
+ if rex != 0x40 || self.must_always_emit() {
+ sink.put1(rex);
+ }
+ }
+}
+
+/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
+/// covers only the small set of possibilities that we actually need.
+enum LegacyPrefixes {
+ /// No prefix bytes
+ None,
+ /// Operand Size Override -- here, denoting "16-bit operation"
+ _66,
+ /// The Lock prefix
+ _F0,
+ /// Operand size override and Lock
+ _66F0,
+ /// REPNE, but no specific meaning here -- is just an opcode extension
+ _F2,
+ /// REP/REPE, but no specific meaning here -- is just an opcode extension
+ _F3,
+}
+
+impl LegacyPrefixes {
+ #[inline(always)]
+ fn emit(&self, sink: &mut MachBuffer<Inst>) {
+ match self {
+ LegacyPrefixes::_66 => sink.put1(0x66),
+ LegacyPrefixes::_F0 => sink.put1(0xF0),
+ LegacyPrefixes::_66F0 => {
+ // I don't think the order matters, but in any case, this is the same order that
+ // the GNU assembler uses.
+ sink.put1(0x66);
+ sink.put1(0xF0);
+ }
+ LegacyPrefixes::_F2 => sink.put1(0xF2),
+ LegacyPrefixes::_F3 => sink.put1(0xF3),
+ LegacyPrefixes::None => (),
+ }
+ }
+}
+
+/// This is the core 'emit' function for instructions that reference memory.
+///
+/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
+/// create and emit:
+/// - first the legacy prefixes, if any
+/// - then the REX prefix, if needed
+/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
+/// - then the MOD/RM byte,
+/// - then optionally, a SIB byte,
+/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
+///
+/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
+/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
+/// instructions will require their own emitter functions.
+///
+/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
+///
+/// The opcodes are written bigendianly for the convenience of callers. For example, if the opcode
+/// bytes to be emitted are, in this order, F3 0F 27, then the caller should pass `opcodes` ==
+/// 0xF3_0F_27 and `num_opcodes` == 3.
+///
+/// The register operand is represented here not as a `Reg` but as its hardware encoding, `enc_g`.
+/// `rex` can specify special handling for the REX prefix. By default, the REX prefix will
+/// indicate a 64-bit operation and will be deleted if it is redundant (0x40). Note that for a
+/// 64-bit operation, the REX prefix will normally never be redundant, since REX.W must be 1 to
+/// indicate a 64-bit operation.
+fn emit_std_enc_mem(
+ sink: &mut MachBuffer<Inst>,
+ state: &EmitState,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ mut num_opcodes: usize,
+ enc_g: u8,
+ mem_e: &Amode,
+ rex: RexFlags,
+) {
+ // General comment for this function: the registers in `mem_e` must be
+ // 64-bit integer registers, because they are part of an address
+ // expression. But `enc_g` can be derived from a register of any class.
+
+ let srcloc = state.cur_srcloc();
+ if srcloc != SourceLoc::default() && mem_e.can_trap() {
+ sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
+ }
+
+ prefixes.emit(sink);
+
+ match mem_e {
+ Amode::ImmReg { simm32, base, .. } => {
+ // First, the REX byte.
+ let enc_e = int_reg_enc(*base);
+ rex.emit_two_op(sink, enc_g, enc_e);
+
+ // Now the opcode(s). These include any other prefixes the caller
+ // hands to us.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // Now the mod/rm and associated immediates. This is
+ // significantly complicated due to the multiple special cases.
+ if *simm32 == 0
+ && enc_e != regs::ENC_RSP
+ && enc_e != regs::ENC_RBP
+ && enc_e != regs::ENC_R12
+ && enc_e != regs::ENC_R13
+ {
+ // FIXME JRS 2020Feb11: those four tests can surely be
+ // replaced by a single mask-and-compare check. We should do
+ // that because this routine is likely to be hot.
+ sink.put1(encode_modrm(0, enc_g & 7, enc_e & 7));
+ } else if *simm32 == 0 && (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12) {
+ sink.put1(encode_modrm(0, enc_g & 7, 4));
+ sink.put1(0x24);
+ } else if low8_will_sign_extend_to_32(*simm32)
+ && enc_e != regs::ENC_RSP
+ && enc_e != regs::ENC_R12
+ {
+ sink.put1(encode_modrm(1, enc_g & 7, enc_e & 7));
+ sink.put1((simm32 & 0xFF) as u8);
+ } else if enc_e != regs::ENC_RSP && enc_e != regs::ENC_R12 {
+ sink.put1(encode_modrm(2, enc_g & 7, enc_e & 7));
+ sink.put4(*simm32);
+ } else if (enc_e == regs::ENC_RSP || enc_e == regs::ENC_R12)
+ && low8_will_sign_extend_to_32(*simm32)
+ {
+ // REX.B distinguishes RSP from R12
+ sink.put1(encode_modrm(1, enc_g & 7, 4));
+ sink.put1(0x24);
+ sink.put1((simm32 & 0xFF) as u8);
+ } else if enc_e == regs::ENC_R12 || enc_e == regs::ENC_RSP {
+ //.. wait for test case for RSP case
+ // REX.B distinguishes RSP from R12
+ sink.put1(encode_modrm(2, enc_g & 7, 4));
+ sink.put1(0x24);
+ sink.put4(*simm32);
+ } else {
+ unreachable!("ImmReg");
+ }
+ }
+
+ Amode::ImmRegRegShift {
+ simm32,
+ base: reg_base,
+ index: reg_index,
+ shift,
+ ..
+ } => {
+ let enc_base = int_reg_enc(*reg_base);
+ let enc_index = int_reg_enc(*reg_index);
+
+ // The rex byte.
+ rex.emit_three_op(sink, enc_g, enc_index, enc_base);
+
+ // All other prefixes and opcodes.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // modrm, SIB, immediates.
+ if low8_will_sign_extend_to_32(*simm32) && enc_index != regs::ENC_RSP {
+ sink.put1(encode_modrm(1, enc_g & 7, 4));
+ sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
+ sink.put1(*simm32 as u8);
+ } else if enc_index != regs::ENC_RSP {
+ sink.put1(encode_modrm(2, enc_g & 7, 4));
+ sink.put1(encode_sib(*shift, enc_index & 7, enc_base & 7));
+ sink.put4(*simm32);
+ } else {
+ panic!("ImmRegRegShift");
+ }
+ }
+
+ Amode::RipRelative { ref target } => {
+ // First, the REX byte, with REX.B = 0.
+ rex.emit_two_op(sink, enc_g, 0);
+
+ // Now the opcode(s). These include any other prefixes the caller
+ // hands to us.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // RIP-relative is mod=00, rm=101.
+ sink.put1(encode_modrm(0, enc_g & 7, 0b101));
+
+ let offset = sink.cur_offset();
+ sink.use_label_at_offset(offset, *target, LabelUse::JmpRel32);
+ sink.put4(0);
+ }
+ }
+}
+
+/// This is the core 'emit' function for instructions that do not reference memory.
+///
+/// This is conceptually the same as emit_modrm_sib_enc_ge, except it is for the case where the E
+/// operand is a register rather than memory. Hence it is much simpler.
+fn emit_std_enc_enc(
+ sink: &mut MachBuffer<Inst>,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ mut num_opcodes: usize,
+ enc_g: u8,
+ enc_e: u8,
+ rex: RexFlags,
+) {
+ // EncG and EncE can be derived from registers of any class, and they
+ // don't even have to be from the same class. For example, for an
+ // integer-to-FP conversion insn, one might be RegClass::I64 and the other
+ // RegClass::V128.
+
+ // The legacy prefixes.
+ prefixes.emit(sink);
+
+ // The rex byte.
+ rex.emit_two_op(sink, enc_g, enc_e);
+
+ // All other prefixes and opcodes.
+ while num_opcodes > 0 {
+ num_opcodes -= 1;
+ sink.put1(((opcodes >> (num_opcodes << 3)) & 0xFF) as u8);
+ }
+
+ // Now the mod/rm byte. The instruction we're generating doesn't access
+ // memory, so there is no SIB byte or immediate -- we're done.
+ sink.put1(encode_modrm(3, enc_g & 7, enc_e & 7));
+}
+
+// These are merely wrappers for the above two functions that facilitate passing
+// actual `Reg`s rather than their encodings.
+
+fn emit_std_reg_mem(
+ sink: &mut MachBuffer<Inst>,
+ state: &EmitState,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ num_opcodes: usize,
+ reg_g: Reg,
+ mem_e: &Amode,
+ rex: RexFlags,
+) {
+ let enc_g = reg_enc(reg_g);
+ emit_std_enc_mem(
+ sink,
+ state,
+ prefixes,
+ opcodes,
+ num_opcodes,
+ enc_g,
+ mem_e,
+ rex,
+ );
+}
+
+fn emit_std_reg_reg(
+ sink: &mut MachBuffer<Inst>,
+ prefixes: LegacyPrefixes,
+ opcodes: u32,
+ num_opcodes: usize,
+ reg_g: Reg,
+ reg_e: Reg,
+ rex: RexFlags,
+) {
+ let enc_g = reg_enc(reg_g);
+ let enc_e = reg_enc(reg_e);
+ emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
+}
+
+/// Write a suitable number of bits from an imm64 to the sink.
+fn emit_simm(sink: &mut MachBuffer<Inst>, size: u8, simm32: u32) {
+ match size {
+ 8 | 4 => sink.put4(simm32),
+ 2 => sink.put2(simm32 as u16),
+ 1 => sink.put1(simm32 as u8),
+ _ => unreachable!(),
+ }
+}
+
+/// A small helper to generate a signed conversion instruction.
+fn emit_signed_cvt(
+ sink: &mut MachBuffer<Inst>,
+ info: &EmitInfo,
+ state: &mut EmitState,
+ src: Reg,
+ dst: Writable<Reg>,
+ to_f64: bool,
+) {
+ // Handle an unsigned int, which is the "easy" case: a signed conversion will do the
+ // right thing.
+ let op = if to_f64 {
+ SseOpcode::Cvtsi2sd
+ } else {
+ SseOpcode::Cvtsi2ss
+ };
+ let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst);
+ inst.emit(sink, info, state);
+}
+
+/// Emits a one way conditional jump if CC is set (true).
+fn one_way_jmp(sink: &mut MachBuffer<Inst>, cc: CC, label: MachLabel) {
+ let cond_start = sink.cur_offset();
+ let cond_disp_off = cond_start + 2;
+ sink.use_label_at_offset(cond_disp_off, label, LabelUse::JmpRel32);
+ sink.put1(0x0F);
+ sink.put1(0x80 + cc.get_enc());
+ sink.put4(0x0);
+}
+
+/// Emits a relocation, attaching the current source location as well.
+fn emit_reloc(
+ sink: &mut MachBuffer<Inst>,
+ state: &EmitState,
+ kind: Reloc,
+ name: &ExternalName,
+ addend: Addend,
+) {
+ let srcloc = state.cur_srcloc();
+ sink.add_reloc(srcloc, kind, name, addend);
+}
+
+/// The top-level emit function.
+///
+/// Important! Do not add improved (shortened) encoding cases to existing
+/// instructions without also adding tests for those improved encodings. That
+/// is a dangerous game that leads to hard-to-track-down errors in the emitted
+/// code.
+///
+/// For all instructions, make sure to have test coverage for all of the
+/// following situations. Do this by creating the cross product resulting from
+/// applying the following rules to each operand:
+///
+/// (1) for any insn that mentions a register: one test using a register from
+/// the group [rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi] and a second one
+/// using a register from the group [r8, r9, r10, r11, r12, r13, r14, r15].
+/// This helps detect incorrect REX prefix construction.
+///
+/// (2) for any insn that mentions a byte register: one test for each of the
+/// four encoding groups [al, cl, dl, bl], [spl, bpl, sil, dil],
+/// [r8b .. r11b] and [r12b .. r15b]. This checks that
+/// apparently-redundant REX prefixes are retained when required.
+///
+/// (3) for any insn that contains an immediate field, check the following
+/// cases: field is zero, field is in simm8 range (-128 .. 127), field is
+/// in simm32 range (-0x8000_0000 .. 0x7FFF_FFFF). This is because some
+/// instructions that require a 32-bit immediate have a short-form encoding
+/// when the imm is in simm8 range.
+///
+/// Rules (1), (2) and (3) don't apply for registers within address expressions
+/// (`Addr`s). Those are already pretty well tested, and the registers in them
+/// don't have any effect on the containing instruction (apart from possibly
+/// require REX prefix bits).
+///
+/// When choosing registers for a test, avoid using registers with the same
+/// offset within a given group. For example, don't use rax and r8, since they
+/// both have the lowest 3 bits as 000, and so the test won't detect errors
+/// where those 3-bit register sub-fields are confused by the emitter. Instead
+/// use (eg) rax (lo3 = 000) and r9 (lo3 = 001). Similarly, don't use (eg) cl
+/// and bpl since they have the same offset in their group; use instead (eg) cl
+/// and sil.
+///
+/// For all instructions, also add a test that uses only low-half registers
+/// (rax .. rdi, xmm0 .. xmm7) etc, so as to check that any redundant REX
+/// prefixes are correctly omitted. This low-half restriction must apply to
+/// _all_ registers in the insn, even those in address expressions.
+///
+/// Following these rules creates large numbers of test cases, but it's the
+/// only way to make the emitter reliable.
+///
+/// Known possible improvements:
+///
+/// * there's a shorter encoding for shl/shr/sar by a 1-bit immediate. (Do we
+/// care?)
+pub(crate) fn emit(
+ inst: &Inst,
+ sink: &mut MachBuffer<Inst>,
+ info: &EmitInfo,
+ state: &mut EmitState,
+) {
+ if let Some(iset_requirement) = inst.isa_requirement() {
+ match iset_requirement {
+ // Cranelift assumes SSE2 at least.
+ InstructionSet::SSE | InstructionSet::SSE2 => {}
+ InstructionSet::SSSE3 => assert!(info.isa_flags.has_ssse3()),
+ InstructionSet::SSE41 => assert!(info.isa_flags.has_sse41()),
+ InstructionSet::SSE42 => assert!(info.isa_flags.has_sse42()),
+ }
+ }
+
+ match inst {
+ Inst::AluRmiR {
+ is_64,
+ op,
+ src,
+ dst: reg_g,
+ } => {
+ let rex = if *is_64 {
+ RexFlags::set_w()
+ } else {
+ RexFlags::clear_w()
+ };
+
+ if *op == AluRmiROpcode::Mul {
+ // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
+ // we have to special-case it.
+ match src {
+ RegMemImm::Reg { reg: reg_e } => {
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ 0x0FAF,
+ 2,
+ reg_g.to_reg(),
+ *reg_e,
+ rex,
+ );
+ }
+
+ RegMemImm::Mem { addr } => {
+ let amode = addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x0FAF,
+ 2,
+ reg_g.to_reg(),
+ &amode,
+ rex,
+ );
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ let use_imm8 = low8_will_sign_extend_to_32(*simm32);
+ let opcode = if use_imm8 { 0x6B } else { 0x69 };
+ // Yes, really, reg_g twice.
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcode,
+ 1,
+ reg_g.to_reg(),
+ reg_g.to_reg(),
+ rex,
+ );
+ emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32);
+ }
+ }
+ } else {
+ let (opcode_r, opcode_m, subopcode_i) = match op {
+ AluRmiROpcode::Add => (0x01, 0x03, 0),
+ AluRmiROpcode::Sub => (0x29, 0x2B, 5),
+ AluRmiROpcode::And => (0x21, 0x23, 4),
+ AluRmiROpcode::Or => (0x09, 0x0B, 1),
+ AluRmiROpcode::Xor => (0x31, 0x33, 6),
+ AluRmiROpcode::Mul => panic!("unreachable"),
+ };
+
+ match src {
+ RegMemImm::Reg { reg: reg_e } => {
+ // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
+ // duality). Do this too, so as to be able to compare generated machine
+ // code easily.
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcode_r,
+ 1,
+ *reg_e,
+ reg_g.to_reg(),
+ rex,
+ );
+ // NB: if this is ever extended to handle byte size ops, be sure to retain
+ // redundant REX prefixes.
+ }
+
+ RegMemImm::Mem { addr } => {
+ // Here we revert to the "normal" G-E ordering.
+ let amode = addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ opcode_m,
+ 1,
+ reg_g.to_reg(),
+ &amode,
+ rex,
+ );
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ let use_imm8 = low8_will_sign_extend_to_32(*simm32);
+ let opcode = if use_imm8 { 0x83 } else { 0x81 };
+ // And also here we use the "normal" G-E ordering.
+ let enc_g = int_reg_enc(reg_g.to_reg());
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ opcode,
+ 1,
+ subopcode_i,
+ enc_g,
+ rex,
+ );
+ emit_simm(sink, if use_imm8 { 1 } else { 4 }, *simm32);
+ }
+ }
+ }
+ }
+
+ Inst::UnaryRmR { size, op, src, dst } => {
+ let (prefix, rex_flags) = match size {
+ 2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!(),
+ };
+
+ let (opcode, num_opcodes) = match op {
+ UnaryRmROpcode::Bsr => (0x0fbd, 2),
+ UnaryRmROpcode::Bsf => (0x0fbc, 2),
+ };
+
+ match src {
+ RegMem::Reg { reg: src } => emit_std_reg_reg(
+ sink,
+ prefix,
+ opcode,
+ num_opcodes,
+ dst.to_reg(),
+ *src,
+ rex_flags,
+ ),
+ RegMem::Mem { addr: src } => {
+ let amode = src.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ num_opcodes,
+ dst.to_reg(),
+ &amode,
+ rex_flags,
+ );
+ }
+ }
+ }
+
+ Inst::Not { size, src } => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ let subopcode = 2;
+ let src = int_reg_enc(src.to_reg());
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+ }
+
+ Inst::Neg { size, src } => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ let subopcode = 3;
+ let src = int_reg_enc(src.to_reg());
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+ }
+
+ Inst::Div {
+ size,
+ signed,
+ divisor,
+ } => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ let loc = state.cur_srcloc();
+ sink.add_trap(loc, TrapCode::IntegerDivisionByZero);
+
+ let subopcode = if *signed { 7 } else { 6 };
+ match divisor {
+ RegMem::Reg { reg } => {
+ let src = int_reg_enc(*reg);
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
+ }
+ RegMem::Mem { addr: src } => {
+ let amode = src.finalize(state);
+ emit_std_enc_mem(sink, state, prefix, opcode, 1, subopcode, &amode, rex_flags);
+ }
+ }
+ }
+
+ Inst::MulHi { size, signed, rhs } => {
+ let (prefix, rex_flags) = match size {
+ 2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!(),
+ };
+
+ let subopcode = if *signed { 5 } else { 4 };
+ match rhs {
+ RegMem::Reg { reg } => {
+ let src = int_reg_enc(*reg);
+ emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags)
+ }
+ RegMem::Mem { addr: src } => {
+ let amode = src.finalize(state);
+ emit_std_enc_mem(sink, state, prefix, 0xF7, 1, subopcode, &amode, rex_flags);
+ }
+ }
+ }
+
+ Inst::SignExtendData { size } => match size {
+ 1 => {
+ sink.put1(0x66);
+ sink.put1(0x98);
+ }
+ 2 => {
+ sink.put1(0x66);
+ sink.put1(0x99);
+ }
+ 4 => sink.put1(0x99),
+ 8 => {
+ sink.put1(0x48);
+ sink.put1(0x99);
+ }
+ _ => unreachable!(),
+ },
+
+ Inst::CheckedDivOrRemSeq {
+ kind,
+ size,
+ divisor,
+ tmp,
+ } => {
+ // Generates the following code sequence:
+ //
+ // ;; check divide by zero:
+ // cmp 0 %divisor
+ // jnz $after_trap
+ // ud2
+ // $after_trap:
+ //
+ // ;; for signed modulo/div:
+ // cmp -1 %divisor
+ // jnz $do_op
+ // ;; for signed modulo, result is 0
+ // mov #0, %rdx
+ // j $done
+ // ;; for signed div, check for integer overflow against INT_MIN of the right size
+ // cmp INT_MIN, %rax
+ // jnz $do_op
+ // ud2
+ //
+ // $do_op:
+ // ;; if signed
+ // cdq ;; sign-extend from rax into rdx
+ // ;; else
+ // mov #0, %rdx
+ // idiv %divisor
+ //
+ // $done:
+ debug_assert!(info.flags().avoid_div_traps());
+
+ // Check if the divisor is zero, first.
+ let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg());
+ inst.emit(sink, info, state);
+
+ let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero);
+ inst.emit(sink, info, state);
+
+ let (do_op, done_label) = if kind.is_signed() {
+ // Now check if the divisor is -1.
+ let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor.to_reg());
+ inst.emit(sink, info, state);
+
+ let do_op = sink.get_label();
+
+ // If not equal, jump to do-op.
+ one_way_jmp(sink, CC::NZ, do_op);
+
+ // Here, divisor == -1.
+ if !kind.is_div() {
+ // x % -1 = 0; put the result into the destination, $rdx.
+ let done_label = sink.get_label();
+
+ let inst = Inst::imm(
+ OperandSize::from_bytes(*size as u32),
+ 0,
+ Writable::from_reg(regs::rdx()),
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done_label);
+ inst.emit(sink, info, state);
+
+ (Some(do_op), Some(done_label))
+ } else {
+ // Check for integer overflow.
+ if *size == 8 {
+ let tmp = tmp.expect("temporary for i64 sdiv");
+
+ let inst = Inst::imm(OperandSize::Size64, 0x8000000000000000, tmp);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
+ inst.emit(sink, info, state);
+ } else {
+ let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax());
+ inst.emit(sink, info, state);
+ }
+
+ // If not equal, jump over the trap.
+ let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+
+ (Some(do_op), None)
+ }
+ } else {
+ (None, None)
+ };
+
+ if let Some(do_op) = do_op {
+ sink.bind_label(do_op);
+ }
+
+ assert!(
+ *size > 1,
+ "CheckedDivOrRemSeq for i8 is not yet implemented"
+ );
+
+ // Fill in the high parts:
+ if kind.is_signed() {
+ // sign-extend the sign-bit of rax into rdx, for signed opcodes.
+ let inst = Inst::sign_extend_data(*size);
+ inst.emit(sink, info, state);
+ } else {
+ // zero for unsigned opcodes.
+ let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
+ inst.emit(sink, info, state);
+ }
+
+ let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor.to_reg()));
+ inst.emit(sink, info, state);
+
+ // Lowering takes care of moving the result back into the right register, see comment
+ // there.
+
+ if let Some(done) = done_label {
+ sink.bind_label(done);
+ }
+ }
+
+ Inst::Imm {
+ dst_is_64,
+ simm64,
+ dst,
+ } => {
+ let enc_dst = int_reg_enc(dst.to_reg());
+ if *dst_is_64 {
+ if low32_will_sign_extend_to_64(*simm64) {
+ // Sign-extended move imm32.
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ 0xC7,
+ 1,
+ /* subopcode */ 0,
+ enc_dst,
+ RexFlags::set_w(),
+ );
+ sink.put4(*simm64 as u32);
+ } else {
+ sink.put1(0x48 | ((enc_dst >> 3) & 1));
+ sink.put1(0xB8 | (enc_dst & 7));
+ sink.put8(*simm64);
+ }
+ } else {
+ if ((enc_dst >> 3) & 1) == 1 {
+ sink.put1(0x41);
+ }
+ sink.put1(0xB8 | (enc_dst & 7));
+ sink.put4(*simm64 as u32);
+ }
+ }
+
+ Inst::MovRR { is_64, src, dst } => {
+ let rex = if *is_64 {
+ RexFlags::set_w()
+ } else {
+ RexFlags::clear_w()
+ };
+ emit_std_reg_reg(sink, LegacyPrefixes::None, 0x89, 1, *src, dst.to_reg(), rex);
+ }
+
+ Inst::MovzxRmR { ext_mode, src, dst } => {
+ let (opcodes, num_opcodes, mut rex_flags) = match ext_mode {
+ ExtMode::BL => {
+ // MOVZBL is (REX.W==0) 0F B6 /r
+ (0x0FB6, 2, RexFlags::clear_w())
+ }
+ ExtMode::BQ => {
+ // MOVZBQ is (REX.W==1) 0F B6 /r
+ // I'm not sure why the Intel manual offers different
+ // encodings for MOVZBQ than for MOVZBL. AIUI they should
+ // achieve the same, since MOVZBL is just going to zero out
+ // the upper half of the destination anyway.
+ (0x0FB6, 2, RexFlags::set_w())
+ }
+ ExtMode::WL => {
+ // MOVZWL is (REX.W==0) 0F B7 /r
+ (0x0FB7, 2, RexFlags::clear_w())
+ }
+ ExtMode::WQ => {
+ // MOVZWQ is (REX.W==1) 0F B7 /r
+ (0x0FB7, 2, RexFlags::set_w())
+ }
+ ExtMode::LQ => {
+ // This is just a standard 32 bit load, and we rely on the
+ // default zero-extension rule to perform the extension.
+ // Note that in reg/reg mode, gcc seems to use the swapped form R/RM, which we
+ // don't do here, since it's the same encoding size.
+ // MOV r/m32, r32 is (REX.W==0) 8B /r
+ (0x8B, 1, RexFlags::clear_w())
+ }
+ };
+
+ match src {
+ RegMem::Reg { reg: src } => {
+ match ext_mode {
+ ExtMode::BL | ExtMode::BQ => {
+ // A redundant REX prefix must be emitted for certain register inputs.
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex_flags.always_emit();
+ };
+ }
+ _ => {}
+ }
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ *src,
+ rex_flags,
+ )
+ }
+
+ RegMem::Mem { addr: src } => {
+ let src = &src.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ src,
+ rex_flags,
+ )
+ }
+ }
+ }
+
+ Inst::Mov64MR { src, dst } => {
+ let src = &src.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x8B,
+ 1,
+ dst.to_reg(),
+ src,
+ RexFlags::set_w(),
+ )
+ }
+
+ Inst::LoadEffectiveAddress { addr, dst } => {
+ let amode = addr.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x8D,
+ 1,
+ dst.to_reg(),
+ &amode,
+ RexFlags::set_w(),
+ );
+ }
+
+ Inst::MovsxRmR { ext_mode, src, dst } => {
+ let (opcodes, num_opcodes, mut rex_flags) = match ext_mode {
+ ExtMode::BL => {
+ // MOVSBL is (REX.W==0) 0F BE /r
+ (0x0FBE, 2, RexFlags::clear_w())
+ }
+ ExtMode::BQ => {
+ // MOVSBQ is (REX.W==1) 0F BE /r
+ (0x0FBE, 2, RexFlags::set_w())
+ }
+ ExtMode::WL => {
+ // MOVSWL is (REX.W==0) 0F BF /r
+ (0x0FBF, 2, RexFlags::clear_w())
+ }
+ ExtMode::WQ => {
+ // MOVSWQ is (REX.W==1) 0F BF /r
+ (0x0FBF, 2, RexFlags::set_w())
+ }
+ ExtMode::LQ => {
+ // MOVSLQ is (REX.W==1) 63 /r
+ (0x63, 1, RexFlags::set_w())
+ }
+ };
+
+ match src {
+ RegMem::Reg { reg: src } => {
+ match ext_mode {
+ ExtMode::BL | ExtMode::BQ => {
+ // A redundant REX prefix must be emitted for certain register inputs.
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex_flags.always_emit();
+ };
+ }
+ _ => {}
+ }
+ emit_std_reg_reg(
+ sink,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ *src,
+ rex_flags,
+ )
+ }
+
+ RegMem::Mem { addr: src } => {
+ let src = &src.finalize(state);
+
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ opcodes,
+ num_opcodes,
+ dst.to_reg(),
+ src,
+ rex_flags,
+ )
+ }
+ }
+ }
+
+ Inst::MovRM { size, src, dst } => {
+ let dst = &dst.finalize(state);
+
+ match size {
+ 1 => {
+ // This is one of the few places where the presence of a
+ // redundant REX prefix changes the meaning of the
+ // instruction.
+ let mut rex = RexFlags::clear_w();
+
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex.always_emit();
+ };
+
+ // MOV r8, r/m8 is (REX.W==0) 88 /r
+ emit_std_reg_mem(sink, state, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
+ }
+
+ 2 => {
+ // MOV r16, r/m16 is 66 (REX.W==0) 89 /r
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::_66,
+ 0x89,
+ 1,
+ *src,
+ dst,
+ RexFlags::clear_w(),
+ )
+ }
+
+ 4 => {
+ // MOV r32, r/m32 is (REX.W==0) 89 /r
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x89,
+ 1,
+ *src,
+ dst,
+ RexFlags::clear_w(),
+ )
+ }
+
+ 8 => {
+ // MOV r64, r/m64 is (REX.W==1) 89 /r
+ emit_std_reg_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0x89,
+ 1,
+ *src,
+ dst,
+ RexFlags::set_w(),
+ )
+ }
+
+ _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
+ }
+ }
+
+ Inst::ShiftR {
+ size,
+ kind,
+ num_bits,
+ dst,
+ } => {
+ let enc_dst = int_reg_enc(dst.to_reg());
+ let subopcode = match kind {
+ ShiftKind::RotateLeft => 0,
+ ShiftKind::RotateRight => 1,
+ ShiftKind::ShiftLeft => 4,
+ ShiftKind::ShiftRightLogical => 5,
+ ShiftKind::ShiftRightArithmetic => 7,
+ };
+
+ match num_bits {
+ None => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xD2, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ // SHL/SHR/SAR %cl, reg8 is (REX.W==0) D2 /subopcode
+ // SHL/SHR/SAR %cl, reg16 is 66 (REX.W==0) D3 /subopcode
+ // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
+ // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
+ }
+
+ Some(num_bits) => {
+ let (opcode, prefix, rex_flags) = match size {
+ 1 => (0xC0, LegacyPrefixes::None, RexFlags::clear_w()),
+ 2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("{}", size),
+ };
+
+ // SHL/SHR/SAR $ib, reg8 is (REX.W==0) C0 /subopcode
+ // SHL/SHR/SAR $ib, reg16 is 66 (REX.W==0) C1 /subopcode
+ // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
+ // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
+ // When the shift amount is 1, there's an even shorter encoding, but we don't
+ // bother with that nicety here.
+ emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
+ sink.put1(*num_bits);
+ }
+ }
+ }
+
+ Inst::XmmRmiReg { opcode, src, dst } => {
+ let rex = RexFlags::clear_w();
+ let prefix = LegacyPrefixes::_66;
+ if let RegMemImm::Imm { simm32 } = src {
+ let (opcode_bytes, reg_digit) = match opcode {
+ SseOpcode::Psllw => (0x0F71, 6),
+ SseOpcode::Pslld => (0x0F72, 6),
+ SseOpcode::Psllq => (0x0F73, 6),
+ SseOpcode::Psraw => (0x0F71, 4),
+ SseOpcode::Psrad => (0x0F72, 4),
+ SseOpcode::Psrlw => (0x0F71, 2),
+ SseOpcode::Psrld => (0x0F72, 2),
+ SseOpcode::Psrlq => (0x0F73, 2),
+ _ => panic!("invalid opcode: {}", opcode),
+ };
+ let dst_enc = reg_enc(dst.to_reg());
+ emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex);
+ let imm = (*simm32)
+ .try_into()
+ .expect("the immediate must be convertible to a u8");
+ sink.put1(imm);
+ } else {
+ let opcode_bytes = match opcode {
+ SseOpcode::Psllw => 0x0FF1,
+ SseOpcode::Pslld => 0x0FF2,
+ SseOpcode::Psllq => 0x0FF3,
+ SseOpcode::Psraw => 0x0FE1,
+ SseOpcode::Psrad => 0x0FE2,
+ SseOpcode::Psrlw => 0x0FD1,
+ SseOpcode::Psrld => 0x0FD2,
+ SseOpcode::Psrlq => 0x0FD3,
+ _ => panic!("invalid opcode: {}", opcode),
+ };
+
+ match src {
+ RegMemImm::Reg { reg } => {
+ emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst.to_reg(), *reg, rex);
+ }
+ RegMemImm::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode_bytes,
+ 2,
+ dst.to_reg(),
+ addr,
+ rex,
+ );
+ }
+ RegMemImm::Imm { .. } => unreachable!(),
+ }
+ };
+ }
+
+ Inst::CmpRmiR {
+ size,
+ src: src_e,
+ dst: reg_g,
+ } => {
+ let mut prefix = LegacyPrefixes::None;
+ if *size == 2 {
+ prefix = LegacyPrefixes::_66;
+ }
+
+ let mut rex = match size {
+ 8 => RexFlags::set_w(),
+ 4 | 2 => RexFlags::clear_w(),
+ 1 => {
+ let mut rex = RexFlags::clear_w();
+ // Here, a redundant REX prefix changes the meaning of the instruction.
+ let enc_g = int_reg_enc(*reg_g);
+ if enc_g >= 4 && enc_g <= 7 {
+ rex.always_emit();
+ }
+ rex
+ }
+ _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
+ };
+
+ match src_e {
+ RegMemImm::Reg { reg: reg_e } => {
+ if *size == 1 {
+ // Check whether the E register forces the use of a redundant REX.
+ let enc_e = int_reg_enc(*reg_e);
+ if enc_e >= 4 && enc_e <= 7 {
+ rex.always_emit();
+ }
+ }
+
+ // Use the swapped operands encoding, to stay consistent with the output of
+ // gcc/llvm.
+ let opcode = if *size == 1 { 0x38 } else { 0x39 };
+ emit_std_reg_reg(sink, prefix, opcode, 1, *reg_e, *reg_g, rex);
+ }
+
+ RegMemImm::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ // Whereas here we revert to the "normal" G-E ordering.
+ let opcode = if *size == 1 { 0x3A } else { 0x3B };
+ emit_std_reg_mem(sink, state, prefix, opcode, 1, *reg_g, addr, rex);
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ // FIXME JRS 2020Feb11: there are shorter encodings for
+ // cmp $imm, rax/eax/ax/al.
+ let use_imm8 = low8_will_sign_extend_to_32(*simm32);
+
+ // And also here we use the "normal" G-E ordering.
+ let opcode = if *size == 1 {
+ 0x80
+ } else if use_imm8 {
+ 0x83
+ } else {
+ 0x81
+ };
+
+ let enc_g = int_reg_enc(*reg_g);
+ emit_std_enc_enc(sink, prefix, opcode, 1, 7 /*subopcode*/, enc_g, rex);
+ emit_simm(sink, if use_imm8 { 1 } else { *size }, *simm32);
+ }
+ }
+ }
+
+ Inst::Setcc { cc, dst } => {
+ let opcode = 0x0f90 + cc.get_enc() as u32;
+ let mut rex_flags = RexFlags::clear_w();
+ rex_flags.always_emit();
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ opcode,
+ 2,
+ 0,
+ reg_enc(dst.to_reg()),
+ rex_flags,
+ );
+ }
+
+ Inst::Cmove {
+ size,
+ cc,
+ src,
+ dst: reg_g,
+ } => {
+ let (prefix, rex_flags) = match size {
+ 2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+ 4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+ 8 => (LegacyPrefixes::None, RexFlags::set_w()),
+ _ => unreachable!("invalid size spec for cmove"),
+ };
+ let opcode = 0x0F40 + cc.get_enc() as u32;
+ match src {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ 2,
+ reg_g.to_reg(),
+ addr,
+ rex_flags,
+ );
+ }
+ }
+ }
+
+ Inst::XmmCmove {
+ is_64,
+ cc,
+ src,
+ dst,
+ } => {
+ // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that
+ // this doesn't clobber flags. Make sure to not do so here.
+ let next = sink.get_label();
+
+ // Jump if cc is *not* set.
+ one_way_jmp(sink, cc.invert(), next);
+
+ let op = if *is_64 {
+ SseOpcode::Movsd
+ } else {
+ SseOpcode::Movss
+ };
+ let inst = Inst::xmm_unary_rm_r(op, src.clone(), *dst);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(next);
+ }
+
+ Inst::Push64 { src } => {
+ match src {
+ RegMemImm::Reg { reg } => {
+ let enc_reg = int_reg_enc(*reg);
+ let rex = 0x40 | ((enc_reg >> 3) & 1);
+ if rex != 0x40 {
+ sink.put1(rex);
+ }
+ sink.put1(0x50 | (enc_reg & 7));
+ }
+
+ RegMemImm::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_enc_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 6, /*subopcode*/
+ addr,
+ RexFlags::clear_w(),
+ );
+ }
+
+ RegMemImm::Imm { simm32 } => {
+ if low8_will_sign_extend_to_64(*simm32) {
+ sink.put1(0x6A);
+ sink.put1(*simm32 as u8);
+ } else {
+ sink.put1(0x68);
+ sink.put4(*simm32);
+ }
+ }
+ }
+ }
+
+ Inst::Pop64 { dst } => {
+ let enc_dst = int_reg_enc(dst.to_reg());
+ if enc_dst >= 8 {
+ // 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant here.
+ sink.put1(0x41);
+ }
+ sink.put1(0x58 + (enc_dst & 7));
+ }
+
+ Inst::CallKnown { dest, opcode, .. } => {
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
+ }
+ sink.put1(0xE8);
+ // The addend adjusts for the difference between the end of the instruction and the
+ // beginning of the immediate field.
+ emit_reloc(sink, state, Reloc::X86CallPCRel4, &dest, -4);
+ sink.put4(0);
+ if opcode.is_call() {
+ let loc = state.cur_srcloc();
+ sink.add_call_site(loc, *opcode);
+ }
+ }
+
+ Inst::CallUnknown { dest, opcode, .. } => {
+ let start_offset = sink.cur_offset();
+ match dest {
+ RegMem::Reg { reg } => {
+ let reg_enc = int_reg_enc(*reg);
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 2, /*subopcode*/
+ reg_enc,
+ RexFlags::clear_w(),
+ );
+ }
+
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_enc_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 2, /*subopcode*/
+ addr,
+ RexFlags::clear_w(),
+ );
+ }
+ }
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
+ }
+ if opcode.is_call() {
+ let loc = state.cur_srcloc();
+ sink.add_call_site(loc, *opcode);
+ }
+ }
+
+ Inst::Ret {} => sink.put1(0xC3),
+
+ Inst::JmpKnown { dst } => {
+ let br_start = sink.cur_offset();
+ let br_disp_off = br_start + 1;
+ let br_end = br_start + 5;
+
+ sink.use_label_at_offset(br_disp_off, *dst, LabelUse::JmpRel32);
+ sink.add_uncond_branch(br_start, br_end, *dst);
+
+ sink.put1(0xE9);
+ // Placeholder for the label value.
+ sink.put4(0x0);
+ }
+
+ Inst::JmpIf { cc, taken } => {
+ let cond_start = sink.cur_offset();
+ let cond_disp_off = cond_start + 2;
+
+ sink.use_label_at_offset(cond_disp_off, *taken, LabelUse::JmpRel32);
+ // Since this is not a terminator, don't enroll in the branch inversion mechanism.
+
+ sink.put1(0x0F);
+ sink.put1(0x80 + cc.get_enc());
+ // Placeholder for the label value.
+ sink.put4(0x0);
+ }
+
+ Inst::JmpCond {
+ cc,
+ taken,
+ not_taken,
+ } => {
+ // If taken.
+ let cond_start = sink.cur_offset();
+ let cond_disp_off = cond_start + 2;
+ let cond_end = cond_start + 6;
+
+ sink.use_label_at_offset(cond_disp_off, *taken, LabelUse::JmpRel32);
+ let inverted: [u8; 6] = [0x0F, 0x80 + (cc.invert().get_enc()), 0x00, 0x00, 0x00, 0x00];
+ sink.add_cond_branch(cond_start, cond_end, *taken, &inverted[..]);
+
+ sink.put1(0x0F);
+ sink.put1(0x80 + cc.get_enc());
+ // Placeholder for the label value.
+ sink.put4(0x0);
+
+ // If not taken.
+ let uncond_start = sink.cur_offset();
+ let uncond_disp_off = uncond_start + 1;
+ let uncond_end = uncond_start + 5;
+
+ sink.use_label_at_offset(uncond_disp_off, *not_taken, LabelUse::JmpRel32);
+ sink.add_uncond_branch(uncond_start, uncond_end, *not_taken);
+
+ sink.put1(0xE9);
+ // Placeholder for the label value.
+ sink.put4(0x0);
+ }
+
+ Inst::JmpUnknown { target } => {
+ match target {
+ RegMem::Reg { reg } => {
+ let reg_enc = int_reg_enc(*reg);
+ emit_std_enc_enc(
+ sink,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 4, /*subopcode*/
+ reg_enc,
+ RexFlags::clear_w(),
+ );
+ }
+
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_enc_mem(
+ sink,
+ state,
+ LegacyPrefixes::None,
+ 0xFF,
+ 1,
+ 4, /*subopcode*/
+ addr,
+ RexFlags::clear_w(),
+ );
+ }
+ }
+ }
+
+ Inst::JmpTableSeq {
+ idx,
+ tmp1,
+ tmp2,
+ ref targets,
+ default_target,
+ ..
+ } => {
+ // This sequence is *one* instruction in the vcode, and is expanded only here at
+ // emission time, because we cannot allow the regalloc to insert spills/reloads in
+ // the middle; we depend on hardcoded PC-rel addressing below.
+ //
+ // We don't have to worry about emitting islands, because the only label-use type has a
+ // maximum range of 2 GB. If we later consider using shorter-range label references,
+ // this will need to be revisited.
+
+ // Save index in a tmp (the live range of ridx only goes to start of this
+ // sequence; rtmp1 or rtmp2 may overwrite it).
+
+ // We generate the following sequence:
+ // ;; generated by lowering: cmp #jmp_table_size, %idx
+ // jnb $default_target
+ // movl %idx, %tmp2
+ // lea start_of_jump_table_offset(%rip), %tmp1
+ // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4
+ // addq %tmp2, %tmp1
+ // j *%tmp1
+ // $start_of_jump_table:
+ // -- jump table entries
+ one_way_jmp(sink, CC::NB, *default_target); // idx unsigned >= jmp table size
+
+ // Copy the index (and make sure to clear the high 32-bits lane of tmp2).
+ let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2);
+ inst.emit(sink, info, state);
+
+ // Load base address of jump table.
+ let start_of_jumptable = sink.get_label();
+ let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), *tmp1);
+ inst.emit(sink, info, state);
+
+ // Load value out of the jump table. It's a relative offset to the target block, so it
+ // might be negative; use a sign-extension.
+ let inst = Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
+ *tmp2,
+ );
+ inst.emit(sink, info, state);
+
+ // Add base of jump table to jump-table-sourced block offset.
+ let inst = Inst::alu_rmi_r(
+ true, /* is_64 */
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp2.to_reg()),
+ *tmp1,
+ );
+ inst.emit(sink, info, state);
+
+ // Branch to computed address.
+ let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg()));
+ inst.emit(sink, info, state);
+
+ // Emit jump table (table of 32-bit offsets).
+ sink.bind_label(start_of_jumptable);
+ let jt_off = sink.cur_offset();
+ for &target in targets.iter() {
+ let word_off = sink.cur_offset();
+ // off_into_table is an addend here embedded in the label to be later patched at
+ // the end of codegen. The offset is initially relative to this jump table entry;
+ // with the extra addend, it'll be relative to the jump table's start, after
+ // patching.
+ let off_into_table = word_off - jt_off;
+ sink.use_label_at_offset(word_off, target, LabelUse::PCRel32);
+ sink.put4(off_into_table);
+ }
+ }
+
+ Inst::TrapIf { cc, trap_code } => {
+ let else_label = sink.get_label();
+
+ // Jump over if the invert of CC is set (i.e. CC is not set).
+ one_way_jmp(sink, cc.invert(), else_label);
+
+ // Trap!
+ let inst = Inst::trap(*trap_code);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(else_label);
+ }
+
+ Inst::XmmUnaryRmR {
+ op,
+ src: src_e,
+ dst: reg_g,
+ } => {
+ let rex = RexFlags::clear_w();
+
+ let (prefix, opcode, num_opcodes) = match op {
+ SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
+ SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
+ SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),
+ SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28, 2),
+ SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F, 2),
+ SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F, 2),
+ SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
+ SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
+ SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10, 2),
+ SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10, 2),
+ SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
+ SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
+ SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
+ SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
+ SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
+ SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
+ SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51, 2),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+
+ match src_e {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(
+ sink,
+ prefix,
+ opcode,
+ num_opcodes,
+ reg_g.to_reg(),
+ *reg_e,
+ rex,
+ );
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ num_opcodes,
+ reg_g.to_reg(),
+ addr,
+ rex,
+ );
+ }
+ };
+ }
+
+ Inst::XmmRmR {
+ op,
+ src: src_e,
+ dst: reg_g,
+ } => {
+ let rex = RexFlags::clear_w();
+ let (prefix, opcode, length) = match op {
+ SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
+ SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
+ SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
+ SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
+ SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
+ SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
+ SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
+ SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
+ SseOpcode::Cvttps2dq => (LegacyPrefixes::_F3, 0x0F5B, 2),
+ SseOpcode::Cvtdq2ps => (LegacyPrefixes::None, 0x0F5B, 2),
+ SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
+ SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
+ SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
+ SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
+ SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
+ SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
+ SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
+ SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
+ SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
+ SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
+ SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
+ SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
+ SseOpcode::Movlhps => (LegacyPrefixes::None, 0x0F16, 2),
+ SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10, 2),
+ SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
+ SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
+ SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
+ SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2),
+ SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2),
+ SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2),
+ SseOpcode::Packsswb => (LegacyPrefixes::_66, 0x0F63, 2),
+ SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2),
+ SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
+ SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
+ SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
+ SseOpcode::Paddsb => (LegacyPrefixes::_66, 0x0FEC, 2),
+ SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2),
+ SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2),
+ SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2),
+ SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
+ SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
+ SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
+ SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
+ SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
+ SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
+ SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
+ SseOpcode::Pcmpeqq => (LegacyPrefixes::_66, 0x0F3829, 3),
+ SseOpcode::Pcmpgtb => (LegacyPrefixes::_66, 0x0F64, 2),
+ SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2),
+ SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2),
+ SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3),
+ SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3),
+ SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2),
+ SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3),
+ SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2),
+ SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3),
+ SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3),
+ SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3),
+ SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2),
+ SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3),
+ SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2),
+ SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3),
+ SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3),
+ SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
+ SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
+ SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
+ SseOpcode::Por => (LegacyPrefixes::_66, 0x0FEB, 2),
+ SseOpcode::Pshufb => (LegacyPrefixes::_66, 0x0F3800, 3),
+ SseOpcode::Psubb => (LegacyPrefixes::_66, 0x0FF8, 2),
+ SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
+ SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
+ SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
+ SseOpcode::Psubsb => (LegacyPrefixes::_66, 0x0FE8, 2),
+ SseOpcode::Psubsw => (LegacyPrefixes::_66, 0x0FE9, 2),
+ SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
+ SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
+ SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
+ SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
+ SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
+ SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
+ SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
+ SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
+ SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+
+ match src_e {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ length,
+ reg_g.to_reg(),
+ addr,
+ rex,
+ );
+ }
+ }
+ }
+
+ Inst::XmmMinMaxSeq {
+ size,
+ is_min,
+ lhs,
+ rhs_dst,
+ } => {
+ // Generates the following sequence:
+ // cmpss/cmpsd %lhs, %rhs_dst
+ // jnz do_min_max
+ // jp propagate_nan
+ //
+ // ;; ordered and equal: propagate the sign bit (for -0 vs 0):
+ // {and,or}{ss,sd} %lhs, %rhs_dst
+ // j done
+ //
+ // ;; to get the desired NaN behavior (signalling NaN transformed into a quiet NaN, the
+ // ;; NaN value is returned), we add both inputs.
+ // propagate_nan:
+ // add{ss,sd} %lhs, %rhs_dst
+ // j done
+ //
+ // do_min_max:
+ // {min,max}{ss,sd} %lhs, %rhs_dst
+ //
+ // done:
+ let done = sink.get_label();
+ let propagate_nan = sink.get_label();
+ let do_min_max = sink.get_label();
+
+ let (add_op, cmp_op, and_op, or_op, min_max_op) = match size {
+ OperandSize::Size32 => (
+ SseOpcode::Addss,
+ SseOpcode::Ucomiss,
+ SseOpcode::Andps,
+ SseOpcode::Orps,
+ if *is_min {
+ SseOpcode::Minss
+ } else {
+ SseOpcode::Maxss
+ },
+ ),
+ OperandSize::Size64 => (
+ SseOpcode::Addsd,
+ SseOpcode::Ucomisd,
+ SseOpcode::Andpd,
+ SseOpcode::Orpd,
+ if *is_min {
+ SseOpcode::Minsd
+ } else {
+ SseOpcode::Maxsd
+ },
+ ),
+ };
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(*lhs), rhs_dst.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NZ, do_min_max);
+ one_way_jmp(sink, CC::P, propagate_nan);
+
+ // Ordered and equal. The operands are bit-identical unless they are zero
+ // and negative zero. These instructions merge the sign bits in that
+ // case, and are no-ops otherwise.
+ let op = if *is_min { or_op } else { and_op };
+ let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+
+ // x86's min/max are not symmetric; if either operand is a NaN, they return the
+ // read-only operand: perform an addition between the two operands, which has the
+ // desired NaN propagation effects.
+ sink.bind_label(propagate_nan);
+ let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst);
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::P, done);
+
+ sink.bind_label(do_min_max);
+ let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(done);
+ }
+
+ Inst::XmmRmRImm {
+ op,
+ src,
+ dst,
+ imm,
+ is64,
+ } => {
+ let (prefix, opcode, len) = match op {
+ SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
+ SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2),
+ SseOpcode::Cmpss => (LegacyPrefixes::_F3, 0x0FC2, 2),
+ SseOpcode::Cmpsd => (LegacyPrefixes::_F2, 0x0FC2, 2),
+ SseOpcode::Insertps => (LegacyPrefixes::_66, 0x0F3A21, 3),
+ SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
+ SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
+ SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
+ SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3),
+ SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
+ SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
+ SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+ let rex = if *is64 {
+ RexFlags::set_w()
+ } else {
+ RexFlags::clear_w()
+ };
+ let regs_swapped = match *op {
+ // These opcodes (and not the SSE2 version of PEXTRW) flip the operand
+ // encoding: `dst` in ModRM's r/m, `src` in ModRM's reg field.
+ SseOpcode::Pextrb | SseOpcode::Pextrd => true,
+ // The rest of the opcodes have the customary encoding: `dst` in ModRM's reg,
+ // `src` in ModRM's r/m field.
+ _ => false,
+ };
+ match src {
+ RegMem::Reg { reg } => {
+ if regs_swapped {
+ emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex);
+ } else {
+ emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), *reg, rex);
+ }
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ assert!(
+ !regs_swapped,
+ "No existing way to encode a mem argument in the ModRM r/m field."
+ );
+ emit_std_reg_mem(sink, state, prefix, opcode, len, dst.to_reg(), addr, rex);
+ }
+ }
+ sink.put1(*imm);
+ }
+
+ Inst::XmmLoadConst { src, dst, ty } => {
+ let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src));
+ let load = Inst::load(*ty, load_offset, *dst, ExtKind::None);
+ load.emit(sink, info, state);
+ }
+
+ Inst::XmmUninitializedValue { .. } => {
+ // This instruction format only exists to declare a register as a `def`; no code is
+ // emitted.
+ }
+
+ Inst::XmmMovRM { op, src, dst } => {
+ let (prefix, opcode) = match op {
+ SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
+ SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
+ SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
+ SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
+ SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
+ SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
+ SseOpcode::Movups => (LegacyPrefixes::None, 0x0F11),
+ SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11),
+ _ => unimplemented!("Opcode {:?} not implemented", op),
+ };
+ let dst = &dst.finalize(state);
+ emit_std_reg_mem(
+ sink,
+ state,
+ prefix,
+ opcode,
+ 2,
+ *src,
+ dst,
+ RexFlags::clear_w(),
+ );
+ }
+
+ Inst::XmmToGpr {
+ op,
+ src,
+ dst,
+ dst_size,
+ } => {
+ let (prefix, opcode, dst_first) = match op {
+ SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
+ SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
+ // Movd and movq use the same opcode; the presence of the REX prefix (set below)
+ // actually determines which is used.
+ SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false),
+ SseOpcode::Movmskps => (LegacyPrefixes::None, 0x0F50, true),
+ SseOpcode::Movmskpd => (LegacyPrefixes::_66, 0x0F50, true),
+ SseOpcode::Pmovmskb => (LegacyPrefixes::_66, 0x0FD7, true),
+ _ => panic!("unexpected opcode {:?}", op),
+ };
+ let rex = match dst_size {
+ OperandSize::Size32 => RexFlags::clear_w(),
+ OperandSize::Size64 => RexFlags::set_w(),
+ };
+
+ let (src, dst) = if dst_first {
+ (dst.to_reg(), *src)
+ } else {
+ (*src, dst.to_reg())
+ };
+
+ emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
+ }
+
+ Inst::GprToXmm {
+ op,
+ src: src_e,
+ dst: reg_g,
+ src_size,
+ } => {
+ let (prefix, opcode) = match op {
+ // Movd and movq use the same opcode; the presence of the REX prefix (set below)
+ // actually determines which is used.
+ SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E),
+ SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
+ SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
+ _ => panic!("unexpected opcode {:?}", op),
+ };
+ let rex = match *src_size {
+ OperandSize::Size32 => RexFlags::clear_w(),
+ OperandSize::Size64 => RexFlags::set_w(),
+ };
+ match src_e {
+ RegMem::Reg { reg: reg_e } => {
+ emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(sink, state, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
+ }
+ }
+ }
+
+ Inst::XmmCmpRmR { op, src, dst } => {
+ let rex = RexFlags::clear_w();
+ let (prefix, opcode, len) = match op {
+ SseOpcode::Ptest => (LegacyPrefixes::_66, 0x0F3817, 3),
+ SseOpcode::Ucomisd => (LegacyPrefixes::_66, 0x0F2E, 2),
+ SseOpcode::Ucomiss => (LegacyPrefixes::None, 0x0F2E, 2),
+ _ => unimplemented!("Emit xmm cmp rm r"),
+ };
+
+ match src {
+ RegMem::Reg { reg } => {
+ emit_std_reg_reg(sink, prefix, opcode, len, *dst, *reg, rex);
+ }
+ RegMem::Mem { addr } => {
+ let addr = &addr.finalize(state);
+ emit_std_reg_mem(sink, state, prefix, opcode, len, *dst, addr, rex);
+ }
+ }
+ }
+
+ Inst::CvtUint64ToFloatSeq {
+ to_f64,
+ src,
+ dst,
+ tmp_gpr1,
+ tmp_gpr2,
+ } => {
+ // Note: this sequence is specific to 64-bit mode; a 32-bit mode would require a
+ // different sequence.
+ //
+ // Emit the following sequence:
+ //
+ // cmp 0, %src
+ // jl handle_negative
+ //
+ // ;; handle positive, which can't overflow
+ // cvtsi2sd/cvtsi2ss %src, %dst
+ // j done
+ //
+ // ;; handle negative: see below for an explanation of what it's doing.
+ // handle_negative:
+ // mov %src, %tmp_gpr1
+ // shr $1, %tmp_gpr1
+ // mov %src, %tmp_gpr2
+ // and $1, %tmp_gpr2
+ // or %tmp_gpr1, %tmp_gpr2
+ // cvtsi2sd/cvtsi2ss %tmp_gpr2, %dst
+ // addsd/addss %dst, %dst
+ //
+ // done:
+
+ assert_ne!(src, tmp_gpr1);
+ assert_ne!(src, tmp_gpr2);
+ assert_ne!(tmp_gpr1, tmp_gpr2);
+
+ let handle_negative = sink.get_label();
+ let done = sink.get_label();
+
+ // If x seen as a signed int64 is not negative, a signed-conversion will do the right
+ // thing.
+ // TODO use tst src, src here.
+ let inst = Inst::cmp_rmi_r(8, RegMemImm::imm(0), src.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::L, handle_negative);
+
+ // Handle a positive int64, which is the "easy" case: a signed conversion will do the
+ // right thing.
+ emit_signed_cvt(sink, info, state, src.to_reg(), *dst, *to_f64);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(handle_negative);
+
+ // Divide x by two to get it in range for the signed conversion, keep the LSB, and
+ // scale it back up on the FP side.
+ let inst = Inst::gen_move(*tmp_gpr1, src.to_reg(), types::I64);
+ inst.emit(sink, info, state);
+
+ // tmp_gpr1 := src >> 1
+ let inst = Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(1), *tmp_gpr1);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::alu_rmi_r(
+ true, /* 64bits */
+ AluRmiROpcode::And,
+ RegMemImm::imm(1),
+ *tmp_gpr2,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::alu_rmi_r(
+ true, /* 64bits */
+ AluRmiROpcode::Or,
+ RegMemImm::reg(tmp_gpr1.to_reg()),
+ *tmp_gpr2,
+ );
+ inst.emit(sink, info, state);
+
+ emit_signed_cvt(sink, info, state, tmp_gpr2.to_reg(), *dst, *to_f64);
+
+ let add_op = if *to_f64 {
+ SseOpcode::Addsd
+ } else {
+ SseOpcode::Addss
+ };
+ let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(done);
+ }
+
+ Inst::CvtFloatToSintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ } => {
+ // Emits the following common sequence:
+ //
+ // cvttss2si/cvttsd2si %src, %dst
+ // cmp %dst, 1
+ // jno done
+ //
+ // Then, for saturating conversions:
+ //
+ // ;; check for NaN
+ // cmpss/cmpsd %src, %src
+ // jnp not_nan
+ // xor %dst, %dst
+ //
+ // ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is
+ // ;; already in %dst.
+ // xorpd %tmp_xmm, %tmp_xmm
+ // cmpss/cmpsd %src, %tmp_xmm
+ // jnb done
+ // mov/movaps $INT_MAX, %dst
+ //
+ // done:
+ //
+ // Then, for non-saturating conversions:
+ //
+ // ;; check for NaN
+ // cmpss/cmpsd %src, %src
+ // jnp not_nan
+ // ud2 trap BadConversionToInteger
+ //
+ // ;; check if INT_MIN was the correct result, against a magic constant:
+ // not_nan:
+ // movaps/mov $magic, %tmp_gpr
+ // movq/movd %tmp_gpr, %tmp_xmm
+ // cmpss/cmpsd %tmp_xmm, %src
+ // jnb/jnbe $check_positive
+ // ud2 trap IntegerOverflow
+ //
+ // ;; if positive, it was a real overflow
+ // check_positive:
+ // xorpd %tmp_xmm, %tmp_xmm
+ // cmpss/cmpsd %src, %tmp_xmm
+ // jnb done
+ // ud2 trap IntegerOverflow
+ //
+ // done:
+
+ let src = src.to_reg();
+
+ let (cast_op, cmp_op, trunc_op) = match src_size {
+ OperandSize::Size64 => (SseOpcode::Movq, SseOpcode::Ucomisd, SseOpcode::Cvttsd2si),
+ OperandSize::Size32 => (SseOpcode::Movd, SseOpcode::Ucomiss, SseOpcode::Cvttss2si),
+ };
+
+ let done = sink.get_label();
+ let not_nan = sink.get_label();
+
+ // The truncation.
+ let inst = Inst::xmm_to_gpr(trunc_op, src, *dst, *dst_size);
+ inst.emit(sink, info, state);
+
+ // Compare against 1, in case of overflow the dst operand was INT_MIN.
+ let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(1), dst.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NO, done); // no overflow => done
+
+ // Check for NaN.
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src);
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NP, not_nan); // go to not_nan if not a NaN
+
+ if *is_saturating {
+ // For NaN, emit 0.
+ let inst = Inst::alu_rmi_r(
+ *dst_size == OperandSize::Size64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(dst.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+
+ sink.bind_label(not_nan);
+
+ // If the input was positive, saturate to INT_MAX.
+
+ // Zero out tmp_xmm.
+ let inst =
+ Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
+ inst.emit(sink, info, state);
+
+ // Jump if >= to done.
+ one_way_jmp(sink, CC::NB, done);
+
+ // Otherwise, put INT_MAX.
+ if *dst_size == OperandSize::Size64 {
+ let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
+ inst.emit(sink, info, state);
+ } else {
+ let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
+ inst.emit(sink, info, state);
+ }
+ } else {
+ let check_positive = sink.get_label();
+
+ let inst = Inst::trap(TrapCode::BadConversionToInteger);
+ inst.emit(sink, info, state);
+
+ // Check if INT_MIN was the correct result: determine the smallest floating point
+ // number that would convert to INT_MIN, put it in a temporary register, and compare
+ // against the src register.
+ // If the src register is less (or in some cases, less-or-equal) than the threshold,
+ // trap!
+
+ sink.bind_label(not_nan);
+
+ let mut no_overflow_cc = CC::NB; // >=
+ let output_bits = dst_size.to_bits();
+ match *src_size {
+ OperandSize::Size32 => {
+ let cst = Ieee32::pow2(output_bits - 1).neg().bits();
+ let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
+ inst.emit(sink, info, state);
+ }
+ OperandSize::Size64 => {
+ // An f64 can represent `i32::min_value() - 1` exactly with precision to spare,
+ // so there are values less than -2^(N-1) that convert correctly to INT_MIN.
+ let cst = if output_bits < 64 {
+ no_overflow_cc = CC::NBE; // >
+ Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+ } else {
+ Ieee64::pow2(output_bits - 1).neg()
+ };
+ let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
+ inst.emit(sink, info, state);
+ }
+ }
+
+ let inst =
+ Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src);
+ inst.emit(sink, info, state);
+
+ // jump over trap if src >= or > threshold
+ one_way_jmp(sink, no_overflow_cc, check_positive);
+
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+
+ // If positive, it was a real overflow.
+
+ sink.bind_label(check_positive);
+
+ // Zero out the tmp_xmm register.
+ let inst =
+ Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src
+
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(done);
+ }
+
+ Inst::CvtFloatToUintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ } => {
+ // The only difference in behavior between saturating and non-saturating is how we
+ // handle errors. Emits the following sequence:
+ //
+ // movaps/mov 2**(int_width - 1), %tmp_gpr
+ // movq/movd %tmp_gpr, %tmp_xmm
+ // cmpss/cmpsd %tmp_xmm, %src
+ // jnb is_large
+ //
+ // ;; check for NaN inputs
+ // jnp not_nan
+ // -- non-saturating: ud2 trap BadConversionToInteger
+ // -- saturating: xor %dst, %dst; j done
+ //
+ // not_nan:
+ // cvttss2si/cvttsd2si %src, %dst
+ // cmp 0, %dst
+ // jnl done
+ // -- non-saturating: ud2 trap IntegerOverflow
+ // -- saturating: xor %dst, %dst; j done
+ //
+ // is_large:
+ // subss/subsd %tmp_xmm, %src ; <-- we clobber %src here
+ // cvttss2si/cvttss2sd %tmp_x, %dst
+ // cmp 0, %dst
+ // jnl next_is_large
+ // -- non-saturating: ud2 trap IntegerOverflow
+ // -- saturating: movaps $UINT_MAX, %dst; j done
+ //
+ // next_is_large:
+ // add 2**(int_width -1), %dst ;; 2 instructions for 64-bits integers
+ //
+ // done:
+
+ assert_ne!(tmp_xmm, src, "tmp_xmm clobbers src!");
+
+ let (sub_op, cast_op, cmp_op, trunc_op) = if *src_size == OperandSize::Size64 {
+ (
+ SseOpcode::Subsd,
+ SseOpcode::Movq,
+ SseOpcode::Ucomisd,
+ SseOpcode::Cvttsd2si,
+ )
+ } else {
+ (
+ SseOpcode::Subss,
+ SseOpcode::Movd,
+ SseOpcode::Ucomiss,
+ SseOpcode::Cvttss2si,
+ )
+ };
+
+ let done = sink.get_label();
+
+ let cst = if *src_size == OperandSize::Size64 {
+ Ieee64::pow2(dst_size.to_bits() - 1).bits()
+ } else {
+ Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64
+ };
+
+ let inst = Inst::imm(*src_size, cst, *tmp_gpr);
+ inst.emit(sink, info, state);
+
+ let inst =
+ Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm.to_reg()), src.to_reg());
+ inst.emit(sink, info, state);
+
+ let handle_large = sink.get_label();
+ one_way_jmp(sink, CC::NB, handle_large); // jump to handle_large if src >= large_threshold
+
+ let not_nan = sink.get_label();
+ one_way_jmp(sink, CC::NP, not_nan); // jump over trap if not NaN
+
+ if *is_saturating {
+ // Emit 0.
+ let inst = Inst::alu_rmi_r(
+ *dst_size == OperandSize::Size64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(dst.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+ } else {
+ // Trap.
+ let inst = Inst::trap(TrapCode::BadConversionToInteger);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(not_nan);
+
+ // Actual truncation for small inputs: if the result is not positive, then we had an
+ // overflow.
+
+ let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(0), dst.to_reg());
+ inst.emit(sink, info, state);
+
+ one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done
+
+ if *is_saturating {
+ // The input was "small" (< 2**(width -1)), so the only way to get an integer
+ // overflow is because the input was too small: saturate to the min value, i.e. 0.
+ let inst = Inst::alu_rmi_r(
+ *dst_size == OperandSize::Size64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(dst.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+ } else {
+ // Trap.
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+ }
+
+ // Now handle large inputs.
+
+ sink.bind_label(handle_large);
+
+ let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::cmp_rmi_r(dst_size.to_bytes(), RegMemImm::imm(0), dst.to_reg());
+ inst.emit(sink, info, state);
+
+ let next_is_large = sink.get_label();
+ one_way_jmp(sink, CC::NL, next_is_large); // if dst >= 0, jump to next_is_large
+
+ if *is_saturating {
+ // The input was "large" (>= 2**(width -1)), so the only way to get an integer
+ // overflow is because the input was too large: saturate to the max value.
+ let inst = Inst::imm(
+ OperandSize::Size64,
+ if *dst_size == OperandSize::Size64 {
+ u64::max_value()
+ } else {
+ u32::max_value() as u64
+ },
+ *dst,
+ );
+ inst.emit(sink, info, state);
+
+ let inst = Inst::jmp_known(done);
+ inst.emit(sink, info, state);
+ } else {
+ let inst = Inst::trap(TrapCode::IntegerOverflow);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(next_is_large);
+
+ if *dst_size == OperandSize::Size64 {
+ let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
+ inst.emit(sink, info, state);
+
+ let inst = Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp_gpr.to_reg()),
+ *dst,
+ );
+ inst.emit(sink, info, state);
+ } else {
+ let inst =
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(1 << 31), *dst);
+ inst.emit(sink, info, state);
+ }
+
+ sink.bind_label(done);
+ }
+
+ Inst::LoadExtName { dst, name, offset } => {
+ // The full address can be encoded in the register, with a relocation.
+ // Generates: movabsq $name, %dst
+ let enc_dst = int_reg_enc(dst.to_reg());
+ sink.put1(0x48 | ((enc_dst >> 3) & 1));
+ sink.put1(0xB8 | (enc_dst & 7));
+ emit_reloc(sink, state, Reloc::Abs8, name, *offset);
+ if info.flags().emit_all_ones_funcaddrs() {
+ sink.put8(u64::max_value());
+ } else {
+ sink.put8(0);
+ }
+ }
+
+ Inst::LockCmpxchg { ty, src, dst } => {
+ // lock cmpxchg{b,w,l,q} %src, (dst)
+ // Note that 0xF0 is the Lock prefix.
+ let (prefix, rex, opcodes) = match *ty {
+ types::I8 => {
+ let mut rex_flags = RexFlags::clear_w();
+ let enc_src = int_reg_enc(*src);
+ if enc_src >= 4 && enc_src <= 7 {
+ rex_flags.always_emit();
+ };
+ (LegacyPrefixes::_F0, rex_flags, 0x0FB0)
+ }
+ types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
+ types::I32 => (LegacyPrefixes::_F0, RexFlags::clear_w(), 0x0FB1),
+ types::I64 => (LegacyPrefixes::_F0, RexFlags::set_w(), 0x0FB1),
+ _ => unreachable!(),
+ };
+ let amode = dst.finalize(state);
+ emit_std_reg_mem(sink, state, prefix, opcodes, 2, *src, &amode, rex);
+ }
+
+ Inst::AtomicRmwSeq { ty, op } => {
+ // Emit this:
+ //
+ // mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value
+ // again:
+ // movq %rax, %r11 // rax = old value, r11 = old value
+ // `op`q %r10, %r11 // rax = old value, r11 = new value
+ // lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value
+ // jnz again // If this is taken, rax will have a "revised" old value
+ //
+ // Operand conventions:
+ // IN: %r9 (addr), %r10 (2nd arg for `op`)
+ // OUT: %rax (old value), %r11 (trashed), %rflags (trashed)
+ //
+ // In the case where the operation is 'xchg', the "`op`q" instruction is instead
+ // movq %r10, %r11
+ // so that we simply write in the destination, the "2nd arg for `op`".
+ let rax = regs::rax();
+ let r9 = regs::r9();
+ let r10 = regs::r10();
+ let r11 = regs::r11();
+ let rax_w = Writable::from_reg(rax);
+ let r11_w = Writable::from_reg(r11);
+ let amode = Amode::imm_reg(0, r9);
+ let again_label = sink.get_label();
+
+ // mov{zbq,zwq,zlq,q} (%r9), %rax
+ // No need to call `add_trap` here, since the `i1` emit will do that.
+ let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend);
+ i1.emit(sink, info, state);
+
+ // again:
+ sink.bind_label(again_label);
+
+ // movq %rax, %r11
+ let i2 = Inst::mov_r_r(true, rax, r11_w);
+ i2.emit(sink, info, state);
+
+ // opq %r10, %r11
+ let r10_rmi = RegMemImm::reg(r10);
+ let i3 = if *op == inst_common::AtomicRmwOp::Xchg {
+ Inst::mov_r_r(true, r10, r11_w)
+ } else {
+ let alu_op = match op {
+ inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add,
+ inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub,
+ inst_common::AtomicRmwOp::And => AluRmiROpcode::And,
+ inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or,
+ inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor,
+ inst_common::AtomicRmwOp::Xchg => unreachable!(),
+ };
+ Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w)
+ };
+ i3.emit(sink, info, state);
+
+ // lock cmpxchg{b,w,l,q} %r11, (%r9)
+ // No need to call `add_trap` here, since the `i4` emit will do that.
+ let i4 = Inst::LockCmpxchg {
+ ty: *ty,
+ src: r11,
+ dst: amode.into(),
+ };
+ i4.emit(sink, info, state);
+
+ // jnz again
+ one_way_jmp(sink, CC::NZ, again_label);
+ }
+
+ Inst::Fence { kind } => {
+ sink.put1(0x0F);
+ sink.put1(0xAE);
+ match kind {
+ FenceKind::MFence => sink.put1(0xF0), // mfence = 0F AE F0
+ FenceKind::LFence => sink.put1(0xE8), // lfence = 0F AE E8
+ FenceKind::SFence => sink.put1(0xF8), // sfence = 0F AE F8
+ }
+ }
+
+ Inst::Hlt => {
+ sink.put1(0xcc);
+ }
+
+ Inst::Ud2 { trap_code } => {
+ let cur_srcloc = state.cur_srcloc();
+ sink.add_trap(cur_srcloc, *trap_code);
+ if let Some(s) = state.take_stack_map() {
+ sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s);
+ }
+ sink.put1(0x0f);
+ sink.put1(0x0b);
+ }
+
+ Inst::VirtualSPOffsetAdj { offset } => {
+ debug!(
+ "virtual sp offset adjusted by {} -> {}",
+ offset,
+ state.virtual_sp_offset + offset
+ );
+ state.virtual_sp_offset += offset;
+ }
+
+ Inst::Nop { len } => {
+ // These encodings can all be found in Intel's architecture manual, at the NOP
+ // instruction description.
+ let mut len = *len;
+ while len != 0 {
+ let emitted = u8::min(len, 9);
+ match emitted {
+ 0 => {}
+ 1 => sink.put1(0x90), // NOP
+ 2 => {
+ // 66 NOP
+ sink.put1(0x66);
+ sink.put1(0x90);
+ }
+ 3 => {
+ // NOP [EAX]
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x00);
+ }
+ 4 => {
+ // NOP 0(EAX), with 0 a 1-byte immediate.
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x40);
+ sink.put1(0x00);
+ }
+ 5 => {
+ // NOP [EAX, EAX, 1]
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x44);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 6 => {
+ // 66 NOP [EAX, EAX, 1]
+ sink.put1(0x66);
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x44);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 7 => {
+ // NOP 0[EAX], but 0 is a 4 bytes immediate.
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x80);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 8 => {
+ // NOP 0[EAX, EAX, 1], with 0 a 4 bytes immediate.
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x84);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ 9 => {
+ // 66 NOP 0[EAX, EAX, 1], with 0 a 4 bytes immediate.
+ sink.put1(0x66);
+ sink.put1(0x0F);
+ sink.put1(0x1F);
+ sink.put1(0x84);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ sink.put1(0x00);
+ }
+ _ => unreachable!(),
+ }
+ len -= emitted;
+ }
+ }
+
+ Inst::EpiloguePlaceholder => {
+ // Generate no code.
+ }
+ }
+
+ state.clear_post_insn();
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
new file mode 100644
index 0000000000..06092d498a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/emit_tests.rs
@@ -0,0 +1,3593 @@
+//! Tests for the emitter
+//!
+//! See comments at the top of `fn x64_emit` for advice on how to create reliable test cases.
+//!
+//! to see stdout: cargo test -- --nocapture
+//!
+//! for this specific case, as of 24 Aug 2020:
+//!
+//! cd to the top of your wasmtime tree, then:
+//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
+//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
+//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
+//! --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
+
+use super::*;
+use crate::isa::test_utils;
+use crate::isa::x64;
+use alloc::vec::Vec;
+
+#[test]
+fn test_x64_emit() {
+ let rax = regs::rax();
+ let rbx = regs::rbx();
+ let rcx = regs::rcx();
+ let rdx = regs::rdx();
+ let rsi = regs::rsi();
+ let rdi = regs::rdi();
+ let rsp = regs::rsp();
+ let rbp = regs::rbp();
+ let r8 = regs::r8();
+ let r9 = regs::r9();
+ let r10 = regs::r10();
+ let r11 = regs::r11();
+ let r12 = regs::r12();
+ let r13 = regs::r13();
+ let r14 = regs::r14();
+ let r15 = regs::r15();
+
+ let xmm0 = regs::xmm0();
+ let xmm1 = regs::xmm1();
+ let xmm2 = regs::xmm2();
+ let xmm3 = regs::xmm3();
+ let xmm4 = regs::xmm4();
+ let xmm5 = regs::xmm5();
+ let xmm6 = regs::xmm6();
+ let xmm7 = regs::xmm7();
+ let xmm8 = regs::xmm8();
+ let xmm9 = regs::xmm9();
+ let xmm10 = regs::xmm10();
+ let xmm11 = regs::xmm11();
+ let xmm12 = regs::xmm12();
+ let xmm13 = regs::xmm13();
+ let xmm14 = regs::xmm14();
+ let xmm15 = regs::xmm15();
+
+ // And Writable<> versions of the same:
+ let w_rax = Writable::<Reg>::from_reg(rax);
+ let w_rbx = Writable::<Reg>::from_reg(rbx);
+ let w_rcx = Writable::<Reg>::from_reg(rcx);
+ let w_rdx = Writable::<Reg>::from_reg(rdx);
+ let w_rsi = Writable::<Reg>::from_reg(rsi);
+ let w_rdi = Writable::<Reg>::from_reg(rdi);
+ let _w_rsp = Writable::<Reg>::from_reg(rsp);
+ let _w_rbp = Writable::<Reg>::from_reg(rbp);
+ let w_r8 = Writable::<Reg>::from_reg(r8);
+ let w_r9 = Writable::<Reg>::from_reg(r9);
+ let _w_r10 = Writable::<Reg>::from_reg(r10);
+ let w_r11 = Writable::<Reg>::from_reg(r11);
+ let w_r12 = Writable::<Reg>::from_reg(r12);
+ let w_r13 = Writable::<Reg>::from_reg(r13);
+ let w_r14 = Writable::<Reg>::from_reg(r14);
+ let w_r15 = Writable::<Reg>::from_reg(r15);
+
+ let w_xmm0 = Writable::<Reg>::from_reg(xmm0);
+ let w_xmm1 = Writable::<Reg>::from_reg(xmm1);
+ let w_xmm2 = Writable::<Reg>::from_reg(xmm2);
+ let w_xmm3 = Writable::<Reg>::from_reg(xmm3);
+ let w_xmm4 = Writable::<Reg>::from_reg(xmm4);
+ let w_xmm5 = Writable::<Reg>::from_reg(xmm5);
+ let w_xmm6 = Writable::<Reg>::from_reg(xmm6);
+ let w_xmm7 = Writable::<Reg>::from_reg(xmm7);
+ let w_xmm8 = Writable::<Reg>::from_reg(xmm8);
+ let w_xmm9 = Writable::<Reg>::from_reg(xmm9);
+ let w_xmm10 = Writable::<Reg>::from_reg(xmm10);
+ let w_xmm11 = Writable::<Reg>::from_reg(xmm11);
+ let w_xmm12 = Writable::<Reg>::from_reg(xmm12);
+ let w_xmm13 = Writable::<Reg>::from_reg(xmm13);
+ let w_xmm14 = Writable::<Reg>::from_reg(xmm14);
+ let w_xmm15 = Writable::<Reg>::from_reg(xmm15);
+
+ let mut insns = Vec::<(Inst, &str, &str)>::new();
+
+ // ========================================================
+ // Cases aimed at checking Addr-esses: IR (Imm + Reg)
+ //
+ // These are just a bunch of loads with all supported (by the emitter)
+ // permutations of address formats.
+ //
+ // Addr_IR, offset zero
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rax), w_rdi),
+ "488B38",
+ "movq 0(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rbx), w_rdi),
+ "488B3B",
+ "movq 0(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rcx), w_rdi),
+ "488B39",
+ "movq 0(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rdx), w_rdi),
+ "488B3A",
+ "movq 0(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rbp), w_rdi),
+ "488B7D00",
+ "movq 0(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rsp), w_rdi),
+ "488B3C24",
+ "movq 0(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rsi), w_rdi),
+ "488B3E",
+ "movq 0(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, rdi), w_rdi),
+ "488B3F",
+ "movq 0(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r8), w_rdi),
+ "498B38",
+ "movq 0(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r9), w_rdi),
+ "498B39",
+ "movq 0(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r10), w_rdi),
+ "498B3A",
+ "movq 0(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r11), w_rdi),
+ "498B3B",
+ "movq 0(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r12), w_rdi),
+ "498B3C24",
+ "movq 0(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r13), w_rdi),
+ "498B7D00",
+ "movq 0(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r14), w_rdi),
+ "498B3E",
+ "movq 0(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0, r15), w_rdi),
+ "498B3F",
+ "movq 0(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset max simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rax), w_rdi),
+ "488B787F",
+ "movq 127(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rbx), w_rdi),
+ "488B7B7F",
+ "movq 127(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rcx), w_rdi),
+ "488B797F",
+ "movq 127(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rdx), w_rdi),
+ "488B7A7F",
+ "movq 127(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rbp), w_rdi),
+ "488B7D7F",
+ "movq 127(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rsp), w_rdi),
+ "488B7C247F",
+ "movq 127(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rsi), w_rdi),
+ "488B7E7F",
+ "movq 127(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, rdi), w_rdi),
+ "488B7F7F",
+ "movq 127(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r8), w_rdi),
+ "498B787F",
+ "movq 127(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r9), w_rdi),
+ "498B797F",
+ "movq 127(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r10), w_rdi),
+ "498B7A7F",
+ "movq 127(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r11), w_rdi),
+ "498B7B7F",
+ "movq 127(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r12), w_rdi),
+ "498B7C247F",
+ "movq 127(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r13), w_rdi),
+ "498B7D7F",
+ "movq 127(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r14), w_rdi),
+ "498B7E7F",
+ "movq 127(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(127, r15), w_rdi),
+ "498B7F7F",
+ "movq 127(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset min simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rax), w_rdi),
+ "488B7880",
+ "movq -128(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbx), w_rdi),
+ "488B7B80",
+ "movq -128(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rcx), w_rdi),
+ "488B7980",
+ "movq -128(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdx), w_rdi),
+ "488B7A80",
+ "movq -128(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rbp), w_rdi),
+ "488B7D80",
+ "movq -128(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsp), w_rdi),
+ "488B7C2480",
+ "movq -128(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rsi), w_rdi),
+ "488B7E80",
+ "movq -128(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, rdi), w_rdi),
+ "488B7F80",
+ "movq -128(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r8), w_rdi),
+ "498B7880",
+ "movq -128(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r9), w_rdi),
+ "498B7980",
+ "movq -128(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r10), w_rdi),
+ "498B7A80",
+ "movq -128(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r11), w_rdi),
+ "498B7B80",
+ "movq -128(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r12), w_rdi),
+ "498B7C2480",
+ "movq -128(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r13), w_rdi),
+ "498B7D80",
+ "movq -128(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r14), w_rdi),
+ "498B7E80",
+ "movq -128(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-128i32 as u32, r15), w_rdi),
+ "498B7F80",
+ "movq -128(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset smallest positive simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rax), w_rdi),
+ "488BB880000000",
+ "movq 128(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rbx), w_rdi),
+ "488BBB80000000",
+ "movq 128(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rcx), w_rdi),
+ "488BB980000000",
+ "movq 128(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rdx), w_rdi),
+ "488BBA80000000",
+ "movq 128(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rbp), w_rdi),
+ "488BBD80000000",
+ "movq 128(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rsp), w_rdi),
+ "488BBC2480000000",
+ "movq 128(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rsi), w_rdi),
+ "488BBE80000000",
+ "movq 128(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, rdi), w_rdi),
+ "488BBF80000000",
+ "movq 128(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r8), w_rdi),
+ "498BB880000000",
+ "movq 128(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r9), w_rdi),
+ "498BB980000000",
+ "movq 128(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r10), w_rdi),
+ "498BBA80000000",
+ "movq 128(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r11), w_rdi),
+ "498BBB80000000",
+ "movq 128(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r12), w_rdi),
+ "498BBC2480000000",
+ "movq 128(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r13), w_rdi),
+ "498BBD80000000",
+ "movq 128(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r14), w_rdi),
+ "498BBE80000000",
+ "movq 128(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(128, r15), w_rdi),
+ "498BBF80000000",
+ "movq 128(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset smallest negative simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rax), w_rdi),
+ "488BB87FFFFFFF",
+ "movq -129(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbx), w_rdi),
+ "488BBB7FFFFFFF",
+ "movq -129(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rcx), w_rdi),
+ "488BB97FFFFFFF",
+ "movq -129(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdx), w_rdi),
+ "488BBA7FFFFFFF",
+ "movq -129(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rbp), w_rdi),
+ "488BBD7FFFFFFF",
+ "movq -129(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsp), w_rdi),
+ "488BBC247FFFFFFF",
+ "movq -129(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rsi), w_rdi),
+ "488BBE7FFFFFFF",
+ "movq -129(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, rdi), w_rdi),
+ "488BBF7FFFFFFF",
+ "movq -129(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r8), w_rdi),
+ "498BB87FFFFFFF",
+ "movq -129(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r9), w_rdi),
+ "498BB97FFFFFFF",
+ "movq -129(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r10), w_rdi),
+ "498BBA7FFFFFFF",
+ "movq -129(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r11), w_rdi),
+ "498BBB7FFFFFFF",
+ "movq -129(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r12), w_rdi),
+ "498BBC247FFFFFFF",
+ "movq -129(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r13), w_rdi),
+ "498BBD7FFFFFFF",
+ "movq -129(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r14), w_rdi),
+ "498BBE7FFFFFFF",
+ "movq -129(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-129i32 as u32, r15), w_rdi),
+ "498BBF7FFFFFFF",
+ "movq -129(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset large positive simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rax), w_rdi),
+ "488BB877207317",
+ "movq 393420919(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbx), w_rdi),
+ "488BBB77207317",
+ "movq 393420919(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rcx), w_rdi),
+ "488BB977207317",
+ "movq 393420919(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdx), w_rdi),
+ "488BBA77207317",
+ "movq 393420919(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rbp), w_rdi),
+ "488BBD77207317",
+ "movq 393420919(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsp), w_rdi),
+ "488BBC2477207317",
+ "movq 393420919(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rsi), w_rdi),
+ "488BBE77207317",
+ "movq 393420919(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, rdi), w_rdi),
+ "488BBF77207317",
+ "movq 393420919(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r8), w_rdi),
+ "498BB877207317",
+ "movq 393420919(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r9), w_rdi),
+ "498BB977207317",
+ "movq 393420919(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r10), w_rdi),
+ "498BBA77207317",
+ "movq 393420919(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r11), w_rdi),
+ "498BBB77207317",
+ "movq 393420919(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r12), w_rdi),
+ "498BBC2477207317",
+ "movq 393420919(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r13), w_rdi),
+ "498BBD77207317",
+ "movq 393420919(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r14), w_rdi),
+ "498BBE77207317",
+ "movq 393420919(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(0x17732077, r15), w_rdi),
+ "498BBF77207317",
+ "movq 393420919(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Addr_IR, offset large negative simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rax), w_rdi),
+ "488BB8D9A6BECE",
+ "movq -826366247(%rax), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbx), w_rdi),
+ "488BBBD9A6BECE",
+ "movq -826366247(%rbx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rcx), w_rdi),
+ "488BB9D9A6BECE",
+ "movq -826366247(%rcx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdx), w_rdi),
+ "488BBAD9A6BECE",
+ "movq -826366247(%rdx), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rbp), w_rdi),
+ "488BBDD9A6BECE",
+ "movq -826366247(%rbp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsp), w_rdi),
+ "488BBC24D9A6BECE",
+ "movq -826366247(%rsp), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rsi), w_rdi),
+ "488BBED9A6BECE",
+ "movq -826366247(%rsi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, rdi), w_rdi),
+ "488BBFD9A6BECE",
+ "movq -826366247(%rdi), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r8), w_rdi),
+ "498BB8D9A6BECE",
+ "movq -826366247(%r8), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r9), w_rdi),
+ "498BB9D9A6BECE",
+ "movq -826366247(%r9), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r10), w_rdi),
+ "498BBAD9A6BECE",
+ "movq -826366247(%r10), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r11), w_rdi),
+ "498BBBD9A6BECE",
+ "movq -826366247(%r11), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r12), w_rdi),
+ "498BBC24D9A6BECE",
+ "movq -826366247(%r12), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r13), w_rdi),
+ "498BBDD9A6BECE",
+ "movq -826366247(%r13), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r14), w_rdi),
+ "498BBED9A6BECE",
+ "movq -826366247(%r14), %rdi",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg(-0x31415927i32 as u32, r15), w_rdi),
+ "498BBFD9A6BECE",
+ "movq -826366247(%r15), %rdi",
+ ));
+
+ // ========================================================
+ // Cases aimed at checking Addr-esses: IRRS (Imm + Reg + (Reg << Shift))
+ // Note these don't check the case where the index reg is RSP, since we
+ // don't encode any of those.
+ //
+ // Addr_IRRS, offset max simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rax, 0), w_r11),
+ "4C8B5C007F",
+ "movq 127(%rax,%rax,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rax, 1), w_r11),
+ "4C8B5C477F",
+ "movq 127(%rdi,%rax,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rax, 2), w_r11),
+ "4D8B5C807F",
+ "movq 127(%r8,%rax,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rax, 3), w_r11),
+ "4D8B5CC77F",
+ "movq 127(%r15,%rax,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rax, rdi, 3), w_r11),
+ "4C8B5CF87F",
+ "movq 127(%rax,%rdi,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, rdi, rdi, 2), w_r11),
+ "4C8B5CBF7F",
+ "movq 127(%rdi,%rdi,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r8, rdi, 1), w_r11),
+ "4D8B5C787F",
+ "movq 127(%r8,%rdi,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(127, r15, rdi, 0), w_r11),
+ "4D8B5C3F7F",
+ "movq 127(%r15,%rdi,1), %r11",
+ ));
+
+ // ========================================================
+ // Addr_IRRS, offset min simm8
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r8, 2), w_r11),
+ "4E8B5C8080",
+ "movq -128(%rax,%r8,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r8, 3), w_r11),
+ "4E8B5CC780",
+ "movq -128(%rdi,%r8,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r8, 0), w_r11),
+ "4F8B5C0080",
+ "movq -128(%r8,%r8,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r8, 1), w_r11),
+ "4F8B5C4780",
+ "movq -128(%r15,%r8,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rax, r15, 1), w_r11),
+ "4E8B5C7880",
+ "movq -128(%rax,%r15,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, rdi, r15, 0), w_r11),
+ "4E8B5C3F80",
+ "movq -128(%rdi,%r15,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r8, r15, 3), w_r11),
+ "4F8B5CF880",
+ "movq -128(%r8,%r15,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(-128i32 as u32, r15, r15, 2), w_r11),
+ "4F8B5CBF80",
+ "movq -128(%r15,%r15,4), %r11",
+ ));
+
+ // ========================================================
+ // Addr_IRRS, offset large positive simm32
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rax, 0), w_r11),
+ "4C8B9C00BE25664F",
+ "movq 1332094398(%rax,%rax,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rax, 1), w_r11),
+ "4C8B9C47BE25664F",
+ "movq 1332094398(%rdi,%rax,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rax, 2), w_r11),
+ "4D8B9C80BE25664F",
+ "movq 1332094398(%r8,%rax,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rax, 3), w_r11),
+ "4D8B9CC7BE25664F",
+ "movq 1332094398(%r15,%rax,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rax, rdi, 3), w_r11),
+ "4C8B9CF8BE25664F",
+ "movq 1332094398(%rax,%rdi,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, rdi, rdi, 2), w_r11),
+ "4C8B9CBFBE25664F",
+ "movq 1332094398(%rdi,%rdi,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r8, rdi, 1), w_r11),
+ "4D8B9C78BE25664F",
+ "movq 1332094398(%r8,%rdi,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(0x4f6625be, r15, rdi, 0), w_r11),
+ "4D8B9C3FBE25664F",
+ "movq 1332094398(%r15,%rdi,1), %r11",
+ ));
+
+ // ========================================================
+ // Addr_IRRS, offset large negative simm32
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r8, 2),
+ w_r11,
+ ),
+ "4E8B9C8070E9B2D9",
+ "movq -642586256(%rax,%r8,4), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r8, 3),
+ w_r11,
+ ),
+ "4E8B9CC770E9B2D9",
+ "movq -642586256(%rdi,%r8,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r8, 0),
+ w_r11,
+ ),
+ "4F8B9C0070E9B2D9",
+ "movq -642586256(%r8,%r8,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r8, 1),
+ w_r11,
+ ),
+ "4F8B9C4770E9B2D9",
+ "movq -642586256(%r15,%r8,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rax, r15, 1),
+ w_r11,
+ ),
+ "4E8B9C7870E9B2D9",
+ "movq -642586256(%rax,%r15,2), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, rdi, r15, 0),
+ w_r11,
+ ),
+ "4E8B9C3F70E9B2D9",
+ "movq -642586256(%rdi,%r15,1), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r8, r15, 3),
+ w_r11,
+ ),
+ "4F8B9CF870E9B2D9",
+ "movq -642586256(%r8,%r15,8), %r11",
+ ));
+ insns.push((
+ Inst::mov64_m_r(
+ Amode::imm_reg_reg_shift(-0x264d1690i32 as u32, r15, r15, 2),
+ w_r11,
+ ),
+ "4F8B9CBF70E9B2D9",
+ "movq -642586256(%r15,%r15,4), %r11",
+ ));
+
+ // End of test cases for Addr
+ // ========================================================
+
+ // ========================================================
+ // General tests for each insn. Don't forget to follow the
+ // guidelines commented just prior to `fn x64_emit`.
+ //
+ // Alu_RMI_R
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Add, RegMemImm::reg(r15), w_rdx),
+ "4C01FA",
+ "addq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::reg(rcx), w_r8),
+ "4101C8",
+ "addl %ecx, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::reg(rcx), w_rsi),
+ "01CE",
+ "addl %ecx, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rdx,
+ ),
+ "48035763",
+ "addq 99(%rdi), %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_r8,
+ ),
+ "44034763",
+ "addl 99(%rdi), %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rsi,
+ ),
+ "037763",
+ "addl 99(%rdi), %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-127i32 as u32),
+ w_rdx,
+ ),
+ "4883C281",
+ "addq $-127, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-129i32 as u32),
+ w_rdx,
+ ),
+ "4881C27FFFFFFF",
+ "addq $-129, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Add, RegMemImm::imm(76543210), w_rdx),
+ "4881C2EAF48F04",
+ "addq $76543210, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-127i32 as u32),
+ w_r8,
+ ),
+ "4183C081",
+ "addl $-127, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-129i32 as u32),
+ w_r8,
+ ),
+ "4181C07FFFFFFF",
+ "addl $-129, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-76543210i32 as u32),
+ w_r8,
+ ),
+ "4181C0160B70FB",
+ "addl $-76543210, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-127i32 as u32),
+ w_rsi,
+ ),
+ "83C681",
+ "addl $-127, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Add,
+ RegMemImm::imm(-129i32 as u32),
+ w_rsi,
+ ),
+ "81C67FFFFFFF",
+ "addl $-129, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Add, RegMemImm::imm(76543210), w_rsi),
+ "81C6EAF48F04",
+ "addl $76543210, %esi",
+ ));
+ // This is pretty feeble
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Sub, RegMemImm::reg(r15), w_rdx),
+ "4C29FA",
+ "subq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::And, RegMemImm::reg(r15), w_rdx),
+ "4C21FA",
+ "andq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Or, RegMemImm::reg(r15), w_rdx),
+ "4C09FA",
+ "orq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Xor, RegMemImm::reg(r15), w_rdx),
+ "4C31FA",
+ "xorq %r15, %rdx",
+ ));
+ // Test all mul cases, though
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Mul, RegMemImm::reg(r15), w_rdx),
+ "490FAFD7",
+ "imulq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::reg(rcx), w_r8),
+ "440FAFC1",
+ "imull %ecx, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::reg(rcx), w_rsi),
+ "0FAFF1",
+ "imull %ecx, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Mul,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rdx,
+ ),
+ "480FAF5763",
+ "imulq 99(%rdi), %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_r8,
+ ),
+ "440FAF4763",
+ "imull 99(%rdi), %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::mem(Amode::imm_reg(99, rdi)),
+ w_rsi,
+ ),
+ "0FAF7763",
+ "imull 99(%rdi), %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-127i32 as u32),
+ w_rdx,
+ ),
+ "486BD281",
+ "imulq $-127, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ true,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-129i32 as u32),
+ w_rdx,
+ ),
+ "4869D27FFFFFFF",
+ "imulq $-129, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(true, AluRmiROpcode::Mul, RegMemImm::imm(76543210), w_rdx),
+ "4869D2EAF48F04",
+ "imulq $76543210, %rdx",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-127i32 as u32),
+ w_r8,
+ ),
+ "456BC081",
+ "imull $-127, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-129i32 as u32),
+ w_r8,
+ ),
+ "4569C07FFFFFFF",
+ "imull $-129, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-76543210i32 as u32),
+ w_r8,
+ ),
+ "4569C0160B70FB",
+ "imull $-76543210, %r8d",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-127i32 as u32),
+ w_rsi,
+ ),
+ "6BF681",
+ "imull $-127, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(-129i32 as u32),
+ w_rsi,
+ ),
+ "69F67FFFFFFF",
+ "imull $-129, %esi",
+ ));
+ insns.push((
+ Inst::alu_rmi_r(false, AluRmiROpcode::Mul, RegMemImm::imm(76543210), w_rsi),
+ "69F6EAF48F04",
+ "imull $76543210, %esi",
+ ));
+
+ // ========================================================
+ // UnaryRmR
+
+ insns.push((
+ Inst::unary_rm_r(4, UnaryRmROpcode::Bsr, RegMem::reg(rsi), w_rdi),
+ "0FBDFE",
+ "bsrl %esi, %edi",
+ ));
+ insns.push((
+ Inst::unary_rm_r(8, UnaryRmROpcode::Bsr, RegMem::reg(r15), w_rax),
+ "490FBDC7",
+ "bsrq %r15, %rax",
+ ));
+
+ // ========================================================
+ // Not
+ insns.push((
+ Inst::not(4, Writable::from_reg(regs::rsi())),
+ "F7D6",
+ "notl %esi",
+ ));
+ insns.push((
+ Inst::not(8, Writable::from_reg(regs::r15())),
+ "49F7D7",
+ "notq %r15",
+ ));
+ insns.push((
+ Inst::not(4, Writable::from_reg(regs::r14())),
+ "41F7D6",
+ "notl %r14d",
+ ));
+ insns.push((
+ Inst::not(2, Writable::from_reg(regs::rdi())),
+ "66F7D7",
+ "notw %di",
+ ));
+
+ // ========================================================
+ // Neg
+ insns.push((
+ Inst::neg(4, Writable::from_reg(regs::rsi())),
+ "F7DE",
+ "negl %esi",
+ ));
+ insns.push((
+ Inst::neg(8, Writable::from_reg(regs::r15())),
+ "49F7DF",
+ "negq %r15",
+ ));
+ insns.push((
+ Inst::neg(4, Writable::from_reg(regs::r14())),
+ "41F7DE",
+ "negl %r14d",
+ ));
+ insns.push((
+ Inst::neg(2, Writable::from_reg(regs::rdi())),
+ "66F7DF",
+ "negw %di",
+ ));
+
+ // ========================================================
+ // Div
+ insns.push((
+ Inst::div(4, true /*signed*/, RegMem::reg(regs::rsi())),
+ "F7FE",
+ "idiv %esi",
+ ));
+ insns.push((
+ Inst::div(8, true /*signed*/, RegMem::reg(regs::r15())),
+ "49F7FF",
+ "idiv %r15",
+ ));
+ insns.push((
+ Inst::div(4, false /*signed*/, RegMem::reg(regs::r14())),
+ "41F7F6",
+ "div %r14d",
+ ));
+ insns.push((
+ Inst::div(8, false /*signed*/, RegMem::reg(regs::rdi())),
+ "48F7F7",
+ "div %rdi",
+ ));
+
+ // ========================================================
+ // MulHi
+ insns.push((
+ Inst::mul_hi(4, true /*signed*/, RegMem::reg(regs::rsi())),
+ "F7EE",
+ "imul %esi",
+ ));
+ insns.push((
+ Inst::mul_hi(8, true /*signed*/, RegMem::reg(regs::r15())),
+ "49F7EF",
+ "imul %r15",
+ ));
+ insns.push((
+ Inst::mul_hi(4, false /*signed*/, RegMem::reg(regs::r14())),
+ "41F7E6",
+ "mul %r14d",
+ ));
+ insns.push((
+ Inst::mul_hi(8, false /*signed*/, RegMem::reg(regs::rdi())),
+ "48F7E7",
+ "mul %rdi",
+ ));
+
+ // ========================================================
+ // cbw
+ insns.push((Inst::sign_extend_data(1), "6698", "cbw"));
+
+ // ========================================================
+ // cdq family: SignExtendRaxRdx
+ insns.push((Inst::sign_extend_data(2), "6699", "cwd"));
+ insns.push((Inst::sign_extend_data(4), "99", "cdq"));
+ insns.push((Inst::sign_extend_data(8), "4899", "cqo"));
+
+ // ========================================================
+ // Imm_R
+ //
+ insns.push((
+ Inst::imm(OperandSize::Size32, 1234567, w_r14),
+ "41BE87D61200",
+ "movl $1234567, %r14d",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size32, -126i64 as u64, w_r14),
+ "41BE82FFFFFF",
+ "movl $-126, %r14d",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, 1234567898765, w_r14),
+ "49BE8D26FB711F010000",
+ "movabsq $1234567898765, %r14",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, -126i64 as u64, w_r14),
+ "49C7C682FFFFFF",
+ "movabsq $-126, %r14",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size32, 1234567, w_rcx),
+ "B987D61200",
+ "movl $1234567, %ecx",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size32, -126i64 as u64, w_rcx),
+ "B982FFFFFF",
+ "movl $-126, %ecx",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, 1234567898765, w_rsi),
+ "48BE8D26FB711F010000",
+ "movabsq $1234567898765, %rsi",
+ ));
+ insns.push((
+ Inst::imm(OperandSize::Size64, -126i64 as u64, w_rbx),
+ "48C7C382FFFFFF",
+ "movabsq $-126, %rbx",
+ ));
+
+ // ========================================================
+ // Mov_R_R
+ insns.push((
+ Inst::mov_r_r(false, rbx, w_rsi),
+ "89DE",
+ "movl %ebx, %esi",
+ ));
+ insns.push((
+ Inst::mov_r_r(false, rbx, w_r9),
+ "4189D9",
+ "movl %ebx, %r9d",
+ ));
+ insns.push((
+ Inst::mov_r_r(false, r11, w_rsi),
+ "4489DE",
+ "movl %r11d, %esi",
+ ));
+ insns.push((
+ Inst::mov_r_r(false, r12, w_r9),
+ "4589E1",
+ "movl %r12d, %r9d",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, rbx, w_rsi),
+ "4889DE",
+ "movq %rbx, %rsi",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, rbx, w_r9),
+ "4989D9",
+ "movq %rbx, %r9",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, r11, w_rsi),
+ "4C89DE",
+ "movq %r11, %rsi",
+ ));
+ insns.push((
+ Inst::mov_r_r(true, r12, w_r9),
+ "4D89E1",
+ "movq %r12, %r9",
+ ));
+
+ // ========================================================
+ // MovZX_RM_R
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rdi), w_rdi),
+ "400FB6FF",
+ "movzbl %dil, %edi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(rax), w_rsi),
+ "0FB6F0",
+ "movzbl %al, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BL, RegMem::reg(r15), w_rsi),
+ "410FB6F7",
+ "movzbl %r15b, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FB671F9",
+ "movzbl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FB658F9",
+ "movzbl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FB64AF9",
+ "movzbl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FB653F9",
+ "movzbl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(rax), w_rsi),
+ "480FB6F0",
+ "movzbq %al, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::BQ, RegMem::reg(r10), w_rsi),
+ "490FB6F2",
+ "movzbq %r10b, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FB671F9",
+ "movzbq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FB658F9",
+ "movzbq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FB64AF9",
+ "movzbq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FB653F9",
+ "movzbq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi),
+ "0FB7F1",
+ "movzwl %cx, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WL, RegMem::reg(r10), w_rsi),
+ "410FB7F2",
+ "movzwl %r10w, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FB771F9",
+ "movzwl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FB758F9",
+ "movzwl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FB74AF9",
+ "movzwl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FB753F9",
+ "movzwl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi),
+ "480FB7F1",
+ "movzwq %cx, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::WQ, RegMem::reg(r11), w_rsi),
+ "490FB7F3",
+ "movzwq %r11w, %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FB771F9",
+ "movzwq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FB758F9",
+ "movzwq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FB74AF9",
+ "movzwq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FB753F9",
+ "movzwq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi),
+ "8BF1",
+ "movl %ecx, %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "8B71F9",
+ "movl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "418B58F9",
+ "movl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "458B4AF9",
+ "movl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movzx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "418B53F9",
+ "movl -7(%r11), %edx",
+ ));
+
+ // ========================================================
+ // Mov64_M_R
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_rcx),
+ "488B8C18B3000000",
+ "movq 179(%rax,%rbx,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, rbx, 0), w_r8),
+ "4C8B8418B3000000",
+ "movq 179(%rax,%rbx,1), %r8",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_rcx),
+ "4A8B8C08B3000000",
+ "movq 179(%rax,%r9,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, rax, r9, 0), w_r8),
+ "4E8B8408B3000000",
+ "movq 179(%rax,%r9,1), %r8",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_rcx),
+ "498B8C1AB3000000",
+ "movq 179(%r10,%rbx,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, rbx, 0), w_r8),
+ "4D8B841AB3000000",
+ "movq 179(%r10,%rbx,1), %r8",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_rcx),
+ "4B8B8C0AB3000000",
+ "movq 179(%r10,%r9,1), %rcx",
+ ));
+ insns.push((
+ Inst::mov64_m_r(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
+ "4F8B840AB3000000",
+ "movq 179(%r10,%r9,1), %r8",
+ ));
+
+ // ========================================================
+ // LoadEffectiveAddress
+ insns.push((
+ Inst::lea(Amode::imm_reg(42, r10), w_r8),
+ "4D8D422A",
+ "lea 42(%r10), %r8",
+ ));
+ insns.push((
+ Inst::lea(Amode::imm_reg(42, r10), w_r15),
+ "4D8D7A2A",
+ "lea 42(%r10), %r15",
+ ));
+ insns.push((
+ Inst::lea(Amode::imm_reg_reg_shift(179, r10, r9, 0), w_r8),
+ "4F8D840AB3000000",
+ "lea 179(%r10,%r9,1), %r8",
+ ));
+ insns.push((
+ Inst::lea(Amode::rip_relative(MachLabel::from_block(0)), w_rdi),
+ "488D3D00000000",
+ "lea label0(%rip), %rdi",
+ ));
+
+ // ========================================================
+ // MovSX_RM_R
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rdi), w_rdi),
+ "400FBEFF",
+ "movsbl %dil, %edi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rcx), w_rsi),
+ "0FBEF1",
+ "movsbl %cl, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(r14), w_rsi),
+ "410FBEF6",
+ "movsbl %r14b, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FBE71F9",
+ "movsbl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FBE58F9",
+ "movsbl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FBE4AF9",
+ "movsbl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FBE53F9",
+ "movsbl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(rcx), w_rsi),
+ "480FBEF1",
+ "movsbq %cl, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::BQ, RegMem::reg(r15), w_rsi),
+ "490FBEF7",
+ "movsbq %r15b, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FBE71F9",
+ "movsbq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FBE58F9",
+ "movsbq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FBE4AF9",
+ "movsbq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::BQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FBE53F9",
+ "movsbq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(rcx), w_rsi),
+ "0FBFF1",
+ "movswl %cx, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WL, RegMem::reg(r14), w_rsi),
+ "410FBFF6",
+ "movswl %r14w, %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "0FBF71F9",
+ "movswl -7(%rcx), %esi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "410FBF58F9",
+ "movswl -7(%r8), %ebx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "450FBF4AF9",
+ "movswl -7(%r10), %r9d",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WL,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "410FBF53F9",
+ "movswl -7(%r11), %edx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(rcx), w_rsi),
+ "480FBFF1",
+ "movswq %cx, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::WQ, RegMem::reg(r13), w_rsi),
+ "490FBFF5",
+ "movswq %r13w, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "480FBF71F9",
+ "movswq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "490FBF58F9",
+ "movswq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D0FBF4AF9",
+ "movswq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::WQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "490FBF53F9",
+ "movswq -7(%r11), %rdx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(rcx), w_rsi),
+ "4863F1",
+ "movslq %ecx, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(ExtMode::LQ, RegMem::reg(r15), w_rsi),
+ "4963F7",
+ "movslq %r15d, %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, rcx)),
+ w_rsi,
+ ),
+ "486371F9",
+ "movslq -7(%rcx), %rsi",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r8)),
+ w_rbx,
+ ),
+ "496358F9",
+ "movslq -7(%r8), %rbx",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r10)),
+ w_r9,
+ ),
+ "4D634AF9",
+ "movslq -7(%r10), %r9",
+ ));
+ insns.push((
+ Inst::movsx_rm_r(
+ ExtMode::LQ,
+ RegMem::mem(Amode::imm_reg(-7i32 as u32, r11)),
+ w_rdx,
+ ),
+ "496353F9",
+ "movslq -7(%r11), %rdx",
+ ));
+
+ // ========================================================
+ // Mov_R_M. Byte stores are tricky. Check everything carefully.
+ insns.push((
+ Inst::mov_r_m(8, rax, Amode::imm_reg(99, rdi)),
+ "48894763",
+ "movq %rax, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rbx, Amode::imm_reg(99, r8)),
+ "49895863",
+ "movq %rbx, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rcx, Amode::imm_reg(99, rsi)),
+ "48894E63",
+ "movq %rcx, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rdx, Amode::imm_reg(99, r9)),
+ "49895163",
+ "movq %rdx, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rsi, Amode::imm_reg(99, rax)),
+ "48897063",
+ "movq %rsi, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rdi, Amode::imm_reg(99, r15)),
+ "49897F63",
+ "movq %rdi, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rsp, Amode::imm_reg(99, rcx)),
+ "48896163",
+ "movq %rsp, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, rbp, Amode::imm_reg(99, r14)),
+ "49896E63",
+ "movq %rbp, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r8, Amode::imm_reg(99, rdi)),
+ "4C894763",
+ "movq %r8, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r9, Amode::imm_reg(99, r8)),
+ "4D894863",
+ "movq %r9, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r10, Amode::imm_reg(99, rsi)),
+ "4C895663",
+ "movq %r10, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r11, Amode::imm_reg(99, r9)),
+ "4D895963",
+ "movq %r11, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r12, Amode::imm_reg(99, rax)),
+ "4C896063",
+ "movq %r12, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r13, Amode::imm_reg(99, r15)),
+ "4D896F63",
+ "movq %r13, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r14, Amode::imm_reg(99, rcx)),
+ "4C897163",
+ "movq %r14, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(8, r15, Amode::imm_reg(99, r14)),
+ "4D897E63",
+ "movq %r15, 99(%r14)",
+ ));
+ //
+ insns.push((
+ Inst::mov_r_m(4, rax, Amode::imm_reg(99, rdi)),
+ "894763",
+ "movl %eax, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rbx, Amode::imm_reg(99, r8)),
+ "41895863",
+ "movl %ebx, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rcx, Amode::imm_reg(99, rsi)),
+ "894E63",
+ "movl %ecx, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rdx, Amode::imm_reg(99, r9)),
+ "41895163",
+ "movl %edx, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rsi, Amode::imm_reg(99, rax)),
+ "897063",
+ "movl %esi, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rdi, Amode::imm_reg(99, r15)),
+ "41897F63",
+ "movl %edi, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rsp, Amode::imm_reg(99, rcx)),
+ "896163",
+ "movl %esp, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, rbp, Amode::imm_reg(99, r14)),
+ "41896E63",
+ "movl %ebp, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r8, Amode::imm_reg(99, rdi)),
+ "44894763",
+ "movl %r8d, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r9, Amode::imm_reg(99, r8)),
+ "45894863",
+ "movl %r9d, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r10, Amode::imm_reg(99, rsi)),
+ "44895663",
+ "movl %r10d, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r11, Amode::imm_reg(99, r9)),
+ "45895963",
+ "movl %r11d, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r12, Amode::imm_reg(99, rax)),
+ "44896063",
+ "movl %r12d, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r13, Amode::imm_reg(99, r15)),
+ "45896F63",
+ "movl %r13d, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r14, Amode::imm_reg(99, rcx)),
+ "44897163",
+ "movl %r14d, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(4, r15, Amode::imm_reg(99, r14)),
+ "45897E63",
+ "movl %r15d, 99(%r14)",
+ ));
+ //
+ insns.push((
+ Inst::mov_r_m(2, rax, Amode::imm_reg(99, rdi)),
+ "66894763",
+ "movw %ax, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rbx, Amode::imm_reg(99, r8)),
+ "6641895863",
+ "movw %bx, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rcx, Amode::imm_reg(99, rsi)),
+ "66894E63",
+ "movw %cx, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rdx, Amode::imm_reg(99, r9)),
+ "6641895163",
+ "movw %dx, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rsi, Amode::imm_reg(99, rax)),
+ "66897063",
+ "movw %si, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rdi, Amode::imm_reg(99, r15)),
+ "6641897F63",
+ "movw %di, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rsp, Amode::imm_reg(99, rcx)),
+ "66896163",
+ "movw %sp, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, rbp, Amode::imm_reg(99, r14)),
+ "6641896E63",
+ "movw %bp, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r8, Amode::imm_reg(99, rdi)),
+ "6644894763",
+ "movw %r8w, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r9, Amode::imm_reg(99, r8)),
+ "6645894863",
+ "movw %r9w, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r10, Amode::imm_reg(99, rsi)),
+ "6644895663",
+ "movw %r10w, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r11, Amode::imm_reg(99, r9)),
+ "6645895963",
+ "movw %r11w, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r12, Amode::imm_reg(99, rax)),
+ "6644896063",
+ "movw %r12w, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r13, Amode::imm_reg(99, r15)),
+ "6645896F63",
+ "movw %r13w, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r14, Amode::imm_reg(99, rcx)),
+ "6644897163",
+ "movw %r14w, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(2, r15, Amode::imm_reg(99, r14)),
+ "6645897E63",
+ "movw %r15w, 99(%r14)",
+ ));
+ //
+ insns.push((
+ Inst::mov_r_m(1, rax, Amode::imm_reg(99, rdi)),
+ "884763",
+ "movb %al, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rbx, Amode::imm_reg(99, r8)),
+ "41885863",
+ "movb %bl, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rcx, Amode::imm_reg(99, rsi)),
+ "884E63",
+ "movb %cl, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rdx, Amode::imm_reg(99, r9)),
+ "41885163",
+ "movb %dl, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rsi, Amode::imm_reg(99, rax)),
+ "40887063",
+ "movb %sil, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rdi, Amode::imm_reg(99, r15)),
+ "41887F63",
+ "movb %dil, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rsp, Amode::imm_reg(99, rcx)),
+ "40886163",
+ "movb %spl, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, rbp, Amode::imm_reg(99, r14)),
+ "41886E63",
+ "movb %bpl, 99(%r14)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r8, Amode::imm_reg(99, rdi)),
+ "44884763",
+ "movb %r8b, 99(%rdi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r9, Amode::imm_reg(99, r8)),
+ "45884863",
+ "movb %r9b, 99(%r8)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r10, Amode::imm_reg(99, rsi)),
+ "44885663",
+ "movb %r10b, 99(%rsi)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r11, Amode::imm_reg(99, r9)),
+ "45885963",
+ "movb %r11b, 99(%r9)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r12, Amode::imm_reg(99, rax)),
+ "44886063",
+ "movb %r12b, 99(%rax)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r13, Amode::imm_reg(99, r15)),
+ "45886F63",
+ "movb %r13b, 99(%r15)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r14, Amode::imm_reg(99, rcx)),
+ "44887163",
+ "movb %r14b, 99(%rcx)",
+ ));
+ insns.push((
+ Inst::mov_r_m(1, r15, Amode::imm_reg(99, r14)),
+ "45887E63",
+ "movb %r15b, 99(%r14)",
+ ));
+
+ // ========================================================
+ // Shift_R
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_rdi),
+ "D3E7",
+ "shll %cl, %edi",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_r12),
+ "41D3E4",
+ "shll %cl, %r12d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, Some(2), w_r8),
+ "41C1E002",
+ "shll $2, %r8d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftLeft, Some(31), w_r13),
+ "41C1E51F",
+ "shll $31, %r13d",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_r13),
+ "49D3E5",
+ "shlq %cl, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_rdi),
+ "48D3E7",
+ "shlq %cl, %rdi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, Some(2), w_r8),
+ "49C1E002",
+ "shlq $2, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, Some(3), w_rbx),
+ "48C1E303",
+ "shlq $3, %rbx",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftLeft, Some(63), w_r13),
+ "49C1E53F",
+ "shlq $63, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightLogical, None, w_rdi),
+ "D3EF",
+ "shrl %cl, %edi",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(2), w_r8),
+ "41C1E802",
+ "shrl $2, %r8d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(31), w_r13),
+ "41C1ED1F",
+ "shrl $31, %r13d",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightLogical, None, w_rdi),
+ "48D3EF",
+ "shrq %cl, %rdi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(2), w_r8),
+ "49C1E802",
+ "shrq $2, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(63), w_r13),
+ "49C1ED3F",
+ "shrq $63, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, None, w_rdi),
+ "D3FF",
+ "sarl %cl, %edi",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
+ "41C1F802",
+ "sarl $2, %r8d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(31), w_r13),
+ "41C1FD1F",
+ "sarl $31, %r13d",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, None, w_rdi),
+ "48D3FF",
+ "sarq %cl, %rdi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
+ "49C1F802",
+ "sarq $2, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(63), w_r13),
+ "49C1FD3F",
+ "sarq $63, %r13",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::RotateLeft, None, w_r8),
+ "49D3C0",
+ "rolq %cl, %r8",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::RotateLeft, Some(3), w_r9),
+ "41C1C103",
+ "roll $3, %r9d",
+ ));
+ insns.push((
+ Inst::shift_r(4, ShiftKind::RotateRight, None, w_rsi),
+ "D3CE",
+ "rorl %cl, %esi",
+ ));
+ insns.push((
+ Inst::shift_r(8, ShiftKind::RotateRight, Some(5), w_r15),
+ "49C1CF05",
+ "rorq $5, %r15",
+ ));
+ insns.push((
+ Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
+ "D2CE",
+ "rorb %cl, %sil",
+ ));
+ insns.push((
+ Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
+ "41C0CF05",
+ "rorb $5, %r15b",
+ ));
+ insns.push((
+ Inst::shift_r(2, ShiftKind::RotateRight, None, w_rsi),
+ "66D3CE",
+ "rorw %cl, %si",
+ ));
+ insns.push((
+ Inst::shift_r(2, ShiftKind::RotateRight, Some(5), w_r15),
+ "6641C1CF05",
+ "rorw $5, %r15w",
+ ));
+
+ // ========================================================
+ // CmpRMIR
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::reg(r15), rdx),
+ "4C39FA",
+ "cmpq %r15, %rdx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::reg(rcx), r8),
+ "4939C8",
+ "cmpq %rcx, %r8",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::reg(rcx), rsi),
+ "4839CE",
+ "cmpq %rcx, %rsi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "483B5763",
+ "cmpq 99(%rdi), %rdx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "4C3B4763",
+ "cmpq 99(%rdi), %r8",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "483B7763",
+ "cmpq 99(%rdi), %rsi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::imm(76543210), rdx),
+ "4881FAEAF48F04",
+ "cmpq $76543210, %rdx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::imm(-76543210i32 as u32), r8),
+ "4981F8160B70FB",
+ "cmpq $-76543210, %r8",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(8, RegMemImm::imm(76543210), rsi),
+ "4881FEEAF48F04",
+ "cmpq $76543210, %rsi",
+ ));
+ //
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::reg(r15), rdx),
+ "4439FA",
+ "cmpl %r15d, %edx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::reg(rcx), r8),
+ "4139C8",
+ "cmpl %ecx, %r8d",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::reg(rcx), rsi),
+ "39CE",
+ "cmpl %ecx, %esi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "3B5763",
+ "cmpl 99(%rdi), %edx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "443B4763",
+ "cmpl 99(%rdi), %r8d",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "3B7763",
+ "cmpl 99(%rdi), %esi",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::imm(76543210), rdx),
+ "81FAEAF48F04",
+ "cmpl $76543210, %edx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::imm(-76543210i32 as u32), r8),
+ "4181F8160B70FB",
+ "cmpl $-76543210, %r8d",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(4, RegMemImm::imm(76543210), rsi),
+ "81FEEAF48F04",
+ "cmpl $76543210, %esi",
+ ));
+ //
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::reg(r15), rdx),
+ "664439FA",
+ "cmpw %r15w, %dx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::reg(rcx), r8),
+ "664139C8",
+ "cmpw %cx, %r8w",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::reg(rcx), rsi),
+ "6639CE",
+ "cmpw %cx, %si",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "663B5763",
+ "cmpw 99(%rdi), %dx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "66443B4763",
+ "cmpw 99(%rdi), %r8w",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "663B7763",
+ "cmpw 99(%rdi), %si",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::imm(23210), rdx),
+ "6681FAAA5A",
+ "cmpw $23210, %dx",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::imm(-7654i32 as u32), r8),
+ "664181F81AE2",
+ "cmpw $-7654, %r8w",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(2, RegMemImm::imm(7654), rsi),
+ "6681FEE61D",
+ "cmpw $7654, %si",
+ ));
+ //
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r15), rdx),
+ "4438FA",
+ "cmpb %r15b, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), r8),
+ "4138C8",
+ "cmpb %cl, %r8b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), rsi),
+ "4038CE",
+ "cmpb %cl, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rdx),
+ "3A5763",
+ "cmpb 99(%rdi), %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), r8),
+ "443A4763",
+ "cmpb 99(%rdi), %r8b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::mem(Amode::imm_reg(99, rdi)), rsi),
+ "403A7763",
+ "cmpb 99(%rdi), %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::imm(70), rdx),
+ "80FA46",
+ "cmpb $70, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::imm(-76i32 as u32), r8),
+ "4180F8B4",
+ "cmpb $-76, %r8b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::imm(76), rsi),
+ "4080FE4C",
+ "cmpb $76, %sil",
+ ));
+ // Extra byte-cases (paranoia!) for cmp_rmi_r for first operand = R
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rax), rbx),
+ "38C3",
+ "cmpb %al, %bl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbx), rax),
+ "38D8",
+ "cmpb %bl, %al",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), rdx),
+ "38CA",
+ "cmpb %cl, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), rsi),
+ "4038CE",
+ "cmpb %cl, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), r10),
+ "4138CA",
+ "cmpb %cl, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rcx), r14),
+ "4138CE",
+ "cmpb %cl, %r14b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), rdx),
+ "4038EA",
+ "cmpb %bpl, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), rsi),
+ "4038EE",
+ "cmpb %bpl, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), r10),
+ "4138EA",
+ "cmpb %bpl, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(rbp), r14),
+ "4138EE",
+ "cmpb %bpl, %r14b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), rdx),
+ "4438CA",
+ "cmpb %r9b, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), rsi),
+ "4438CE",
+ "cmpb %r9b, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), r10),
+ "4538CA",
+ "cmpb %r9b, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r9), r14),
+ "4538CE",
+ "cmpb %r9b, %r14b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), rdx),
+ "4438EA",
+ "cmpb %r13b, %dl",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), rsi),
+ "4438EE",
+ "cmpb %r13b, %sil",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), r10),
+ "4538EA",
+ "cmpb %r13b, %r10b",
+ ));
+ insns.push((
+ Inst::cmp_rmi_r(1, RegMemImm::reg(r13), r14),
+ "4538EE",
+ "cmpb %r13b, %r14b",
+ ));
+
+ // ========================================================
+ // SetCC
+ insns.push((Inst::setcc(CC::O, w_rsi), "400F90C6", "seto %sil"));
+ insns.push((Inst::setcc(CC::NLE, w_rsi), "400F9FC6", "setnle %sil"));
+ insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b"));
+ insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
+ insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp %r9b"));
+ insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp %r8b"));
+ // ========================================================
+ // Cmove
+ insns.push((
+ Inst::cmove(2, CC::O, RegMem::reg(rdi), w_rsi),
+ "660F40F7",
+ "cmovow %di, %si",
+ ));
+ insns.push((
+ Inst::cmove(
+ 2,
+ CC::NO,
+ RegMem::mem(Amode::imm_reg_reg_shift(37, rdi, rsi, 2)),
+ w_r15,
+ ),
+ "66440F417CB725",
+ "cmovnow 37(%rdi,%rsi,4), %r15w",
+ ));
+ insns.push((
+ Inst::cmove(4, CC::LE, RegMem::reg(rdi), w_rsi),
+ "0F4EF7",
+ "cmovlel %edi, %esi",
+ ));
+ insns.push((
+ Inst::cmove(4, CC::NLE, RegMem::mem(Amode::imm_reg(0, r15)), w_rsi),
+ "410F4F37",
+ "cmovnlel 0(%r15), %esi",
+ ));
+ insns.push((
+ Inst::cmove(8, CC::Z, RegMem::reg(rdi), w_r14),
+ "4C0F44F7",
+ "cmovzq %rdi, %r14",
+ ));
+ insns.push((
+ Inst::cmove(8, CC::NZ, RegMem::mem(Amode::imm_reg(13, rdi)), w_r14),
+ "4C0F45770D",
+ "cmovnzq 13(%rdi), %r14",
+ ));
+
+ // ========================================================
+ // Push64
+ insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi"));
+ insns.push((Inst::push64(RegMemImm::reg(r8)), "4150", "pushq %r8"));
+ insns.push((
+ Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
+ "FFB4CE41010000",
+ "pushq 321(%rsi,%rcx,8)",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::mem(Amode::imm_reg_reg_shift(321, r9, rbx, 2))),
+ "41FFB49941010000",
+ "pushq 321(%r9,%rbx,4)",
+ ));
+ insns.push((Inst::push64(RegMemImm::imm(0)), "6A00", "pushq $0"));
+ insns.push((Inst::push64(RegMemImm::imm(127)), "6A7F", "pushq $127"));
+ insns.push((
+ Inst::push64(RegMemImm::imm(128)),
+ "6880000000",
+ "pushq $128",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(0x31415927)),
+ "6827594131",
+ "pushq $826366247",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(-128i32 as u32)),
+ "6A80",
+ "pushq $-128",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(-129i32 as u32)),
+ "687FFFFFFF",
+ "pushq $-129",
+ ));
+ insns.push((
+ Inst::push64(RegMemImm::imm(-0x75c4e8a1i32 as u32)),
+ "685F173B8A",
+ "pushq $-1975838881",
+ ));
+
+ // ========================================================
+ // Pop64
+ insns.push((Inst::pop64(w_rax), "58", "popq %rax"));
+ insns.push((Inst::pop64(w_rdi), "5F", "popq %rdi"));
+ insns.push((Inst::pop64(w_r8), "4158", "popq %r8"));
+ insns.push((Inst::pop64(w_r15), "415F", "popq %r15"));
+
+ // ========================================================
+ // CallKnown
+ insns.push((
+ Inst::call_known(
+ ExternalName::User {
+ namespace: 0,
+ index: 0,
+ },
+ Vec::new(),
+ Vec::new(),
+ Opcode::Call,
+ ),
+ "E800000000",
+ "call User { namespace: 0, index: 0 }",
+ ));
+
+ // ========================================================
+ // CallUnknown
+ fn call_unknown(rm: RegMem) -> Inst {
+ Inst::call_unknown(rm, Vec::new(), Vec::new(), Opcode::CallIndirect)
+ }
+
+ insns.push((call_unknown(RegMem::reg(rbp)), "FFD5", "call *%rbp"));
+ insns.push((call_unknown(RegMem::reg(r11)), "41FFD3", "call *%r11"));
+ insns.push((
+ call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
+ "FF94CE41010000",
+ "call *321(%rsi,%rcx,8)",
+ ));
+ insns.push((
+ call_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
+ "41FF949241010000",
+ "call *321(%r10,%rdx,4)",
+ ));
+
+ // ========================================================
+ // Ret
+ insns.push((Inst::ret(), "C3", "ret"));
+
+ // ========================================================
+ // JmpKnown skipped for now
+
+ // ========================================================
+ // JmpCondSymm isn't a real instruction
+
+ // ========================================================
+ // JmpCond skipped for now
+
+ // ========================================================
+ // JmpCondCompound isn't a real instruction
+
+ // ========================================================
+ // JmpUnknown
+ insns.push((Inst::jmp_unknown(RegMem::reg(rbp)), "FFE5", "jmp *%rbp"));
+ insns.push((
+ Inst::jmp_unknown(RegMem::reg(r11)),
+ "41FFE3",
+ "jmp *%r11",
+ ));
+ insns.push((
+ Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, rsi, rcx, 3))),
+ "FFA4CE41010000",
+ "jmp *321(%rsi,%rcx,8)",
+ ));
+ insns.push((
+ Inst::jmp_unknown(RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rdx, 2))),
+ "41FFA49241010000",
+ "jmp *321(%r10,%rdx,4)",
+ ));
+
+ // ========================================================
+ // XMM_CMP_RM_R
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm1), xmm2),
+ "0F2ED1",
+ "ucomiss %xmm1, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm0), xmm9),
+ "440F2EC8",
+ "ucomiss %xmm0, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm13), xmm4),
+ "66410F2EE5",
+ "ucomisd %xmm13, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm11), xmm12),
+ "66450F2EE3",
+ "ucomisd %xmm11, %xmm12",
+ ));
+
+ // ========================================================
+ // XMM_RM_R: float binary ops
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0),
+ "F30F58C1",
+ "addss %xmm1, %xmm0",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
+ "F3450F58EB",
+ "addss %xmm11, %xmm13",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(
+ SseOpcode::Addss,
+ RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)),
+ w_xmm0,
+ ),
+ "F3410F5844927B",
+ "addss 123(%r10,%rdx,4), %xmm0",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4),
+ "F2410F58E7",
+ "addsd %xmm15, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1),
+ "F30F5CC8",
+ "subss %xmm0, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
+ "F3410F5CCC",
+ "subss %xmm12, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(
+ SseOpcode::Subss,
+ RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)),
+ w_xmm10,
+ ),
+ "F3450F5C94C241010000",
+ "subss 321(%r10,%rax,8), %xmm10",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14),
+ "F2440F5CF5",
+ "subsd %xmm5, %xmm14",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
+ "F30F59E5",
+ "mulss %xmm5, %xmm4",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4),
+ "F20F59E5",
+ "mulsd %xmm5, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7),
+ "F3410F5EF8",
+ "divss %xmm8, %xmm7",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4),
+ "F20F5EE5",
+ "divsd %xmm5, %xmm4",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12),
+ "440F54E3",
+ "andps %xmm3, %xmm12",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11),
+ "440F55DC",
+ "andnps %xmm4, %xmm11",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15),
+ "440F56F9",
+ "orps %xmm1, %xmm15",
+ ));
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4),
+ "0F56E5",
+ "orps %xmm5, %xmm4",
+ ));
+
+ // ========================================================
+ // XMM_RM_R: Integer Packed
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5),
+ "66410FFCE9",
+ "paddb %xmm9, %xmm5",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6),
+ "660FFDF7",
+ "paddw %xmm7, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13),
+ "66450FFEEC",
+ "paddd %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8),
+ "66440FD4C1",
+ "paddq %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5),
+ "66410FECE9",
+ "paddsb %xmm9, %xmm5",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6),
+ "660FEDF7",
+ "paddsw %xmm7, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13),
+ "66450FDCEC",
+ "paddusb %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8),
+ "66440FDDC1",
+ "paddusw %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5),
+ "66410FE8E9",
+ "psubsb %xmm9, %xmm5",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6),
+ "660FE9F7",
+ "psubsw %xmm7, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13),
+ "66450FD8EC",
+ "psubusb %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8),
+ "66440FD9C1",
+ "psubusw %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
+ "66450FE0EC",
+ "pavgb %xmm12, %xmm13",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8),
+ "66440FE3C1",
+ "pavgw %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9),
+ "66440FF8CD",
+ "psubb %xmm5, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7),
+ "660FF9FE",
+ "psubw %xmm6, %xmm7",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12),
+ "66450FFAE5",
+ "psubd %xmm13, %xmm12",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1),
+ "66410FFBC8",
+ "psubq %xmm8, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
+ "66410F3840F7",
+ "pmulld %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
+ "66410FD5CE",
+ "pmullw %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
+ "66450FF4C8",
+ "pmuludq %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
+ "66410F383CF7",
+ "pmaxsb %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
+ "66410FEEF7",
+ "pmaxsw %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
+ "66410F383DF7",
+ "pmaxsd %xmm15, %xmm6",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
+ "66410FDECE",
+ "pmaxub %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
+ "66410F383ECE",
+ "pmaxuw %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
+ "66410F383FCE",
+ "pmaxud %xmm14, %xmm1",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
+ "66450F3838C8",
+ "pminsb %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
+ "66450FEAC8",
+ "pminsw %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
+ "66450F3839C8",
+ "pminsd %xmm8, %xmm9",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
+ "660FDAD3",
+ "pminub %xmm3, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
+ "660F383AD3",
+ "pminuw %xmm3, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
+ "660F383BD3",
+ "pminud %xmm3, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
+ "66410FEFD3",
+ "pxor %xmm11, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2),
+ "66410F3800D3",
+ "pshufb %xmm11, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(xmm11), w_xmm2),
+ "66410F63D3",
+ "packsswb %xmm11, %xmm2",
+ ));
+
+ // ========================================================
+ // XMM_RM_R: Integer Conversion
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8),
+ "440F5BC1",
+ "cvtdq2ps %xmm1, %xmm8",
+ ));
+
+ insns.push((
+ Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8),
+ "F3450F5BC1",
+ "cvttps2dq %xmm9, %xmm8",
+ ));
+
+ // XMM_Mov_R_M: float stores
+ insns.push((
+ Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),
+ "F3450F11BC2480000000",
+ "movss %xmm15, 128(%r12)",
+ ));
+ insns.push((
+ Inst::xmm_mov_r_m(SseOpcode::Movsd, xmm1, Amode::imm_reg(0, rsi)),
+ "F20F110E",
+ "movsd %xmm1, 0(%rsi)",
+ ));
+
+ // XmmUnary: moves and unary float ops
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movss, RegMem::reg(xmm13), w_xmm2),
+ "F3410F10D5",
+ "movss %xmm13, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movsd, RegMem::reg(xmm0), w_xmm1),
+ "F20F10C8",
+ "movsd %xmm0, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(
+ SseOpcode::Movsd,
+ RegMem::mem(Amode::imm_reg(0, rsi)),
+ w_xmm2,
+ ),
+ "F20F1016",
+ "movsd 0(%rsi), %xmm2",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movsd, RegMem::reg(xmm14), w_xmm3),
+ "F2410F10DE",
+ "movsd %xmm14, %xmm3",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Movaps, RegMem::reg(xmm5), w_xmm14),
+ "440F28F5",
+ "movaps %xmm5, %xmm14",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8),
+ "F3440F51C7",
+ "sqrtss %xmm7, %xmm8",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Sqrtsd, RegMem::reg(xmm1), w_xmm2),
+ "F20F51D1",
+ "sqrtsd %xmm1, %xmm2",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, RegMem::reg(xmm0), w_xmm1),
+ "F30F5AC8",
+ "cvtss2sd %xmm0, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, RegMem::reg(xmm1), w_xmm0),
+ "F20F5AC1",
+ "cvtsd2ss %xmm1, %xmm0",
+ ));
+
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Pabsb, RegMem::reg(xmm2), w_xmm1),
+ "660F381CCA",
+ "pabsb %xmm2, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Pabsw, RegMem::reg(xmm0), w_xmm0),
+ "660F381DC0",
+ "pabsw %xmm0, %xmm0",
+ ));
+ insns.push((
+ Inst::xmm_unary_rm_r(SseOpcode::Pabsd, RegMem::reg(xmm10), w_xmm11),
+ "66450F381EDA",
+ "pabsd %xmm10, %xmm11",
+ ));
+
+ // Xmm to int conversions, and conversely.
+
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movd, xmm0, w_rsi, OperandSize::Size32),
+ "660F7EC6",
+ "movd %xmm0, %esi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movq, xmm2, w_rdi, OperandSize::Size64),
+ "66480F7ED7",
+ "movq %xmm2, %rdi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttss2si, xmm0, w_rsi, OperandSize::Size32),
+ "F30F2CF0",
+ "cvttss2si %xmm0, %esi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttss2si, xmm0, w_rdi, OperandSize::Size64),
+ "F3480F2CF8",
+ "cvttss2si %xmm0, %rdi",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttsd2si, xmm0, w_rax, OperandSize::Size32),
+ "F20F2CC0",
+ "cvttsd2si %xmm0, %eax",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Cvttsd2si, xmm0, w_r15, OperandSize::Size64),
+ "F24C0F2CF8",
+ "cvttsd2si %xmm0, %r15",
+ ));
+
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Pmovmskb, xmm10, w_rax, OperandSize::Size32),
+ "66410FD7C2",
+ "pmovmskb %xmm10, %eax",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movmskps, xmm2, w_rax, OperandSize::Size32),
+ "0F50C2",
+ "movmskps %xmm2, %eax",
+ ));
+ insns.push((
+ Inst::xmm_to_gpr(SseOpcode::Movmskpd, xmm0, w_rcx, OperandSize::Size32),
+ "660F50C8",
+ "movmskpd %xmm0, %ecx",
+ ));
+
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::reg(rax),
+ OperandSize::Size32,
+ w_xmm15,
+ ),
+ "66440F6EF8",
+ "movd %eax, %xmm15",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::mem(Amode::imm_reg(2, r10)),
+ OperandSize::Size32,
+ w_xmm9,
+ ),
+ "66450F6E4A02",
+ "movd 2(%r10), %xmm9",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::reg(rsi),
+ OperandSize::Size32,
+ w_xmm1,
+ ),
+ "660F6ECE",
+ "movd %esi, %xmm1",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Movq,
+ RegMem::reg(rdi),
+ OperandSize::Size64,
+ w_xmm15,
+ ),
+ "664C0F6EFF",
+ "movq %rdi, %xmm15",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Cvtsi2ss,
+ RegMem::reg(rdi),
+ OperandSize::Size32,
+ w_xmm15,
+ ),
+ "F3440F2AFF",
+ "cvtsi2ss %edi, %xmm15",
+ ));
+ insns.push((
+ Inst::gpr_to_xmm(
+ SseOpcode::Cvtsi2sd,
+ RegMem::reg(rsi),
+ OperandSize::Size64,
+ w_xmm1,
+ ),
+ "F2480F2ACE",
+ "cvtsi2sd %rsi, %xmm1",
+ ));
+
+ // ========================================================
+ // XmmRmi
+ insns.push((
+ Inst::xmm_rmi_reg(SseOpcode::Psraw, RegMemImm::reg(xmm10), w_xmm1),
+ "66410FE1CA",
+ "psraw %xmm10, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(31), w_xmm1),
+ "660F72F11F",
+ "pslld $31, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rmi_reg(SseOpcode::Psrlq, RegMemImm::imm(1), w_xmm3),
+ "660F73D301",
+ "psrlq $1, %xmm3",
+ ));
+
+ // ========================================================
+ // XmmRmRImm
+ insns.push((
+ Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false),
+ "660FC2CD02",
+ "cmppd $2, %xmm5, %xmm1",
+ ));
+ insns.push((
+ Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false),
+ "410FC2FF00",
+ "cmpps $0, %xmm15, %xmm7",
+ ));
+
+ // ========================================================
+ // Pertaining to atomics.
+ let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
+ // `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing
+ // for retention of the apparently-redundant rex prefix in the 8-bit case.
+ let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into();
+
+ // A general 8-bit case.
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: rbx,
+ dst: am1,
+ },
+ "F0410FB09C9241010000",
+ "lock cmpxchgb %bl, 321(%r10,%rdx,4)",
+ ));
+ // Check redundant rex retention in 8-bit cases.
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: rdx,
+ dst: am2.clone(),
+ },
+ "F00FB094F1C7CFFFFF",
+ "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "F0400FB0B4F1C7CFFFFF",
+ "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "F0440FB094F1C7CFFFFF",
+ "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I8,
+ src: r15,
+ dst: am2.clone(),
+ },
+ "F0440FB0BCF1C7CFFFFF",
+ "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
+ ));
+ // 16 bit cases
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I16,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "66F00FB1B4F1C7CFFFFF",
+ "lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I16,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "66F0440FB194F1C7CFFFFF",
+ "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
+ ));
+ // 32 bit cases
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I32,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "F00FB1B4F1C7CFFFFF",
+ "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I32,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "F0440FB194F1C7CFFFFF",
+ "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
+ ));
+ // 64 bit cases
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I64,
+ src: rsi,
+ dst: am2.clone(),
+ },
+ "F0480FB1B4F1C7CFFFFF",
+ "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
+ ));
+ insns.push((
+ Inst::LockCmpxchg {
+ ty: types::I64,
+ src: r10,
+ dst: am2.clone(),
+ },
+ "F04C0FB194F1C7CFFFFF",
+ "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
+ ));
+
+ // AtomicRmwSeq
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, },
+ "490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
+ "atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, },
+ "490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
+ "atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, },
+ "418B014989C34D89D3F0450FB1190F85EFFFFFFF",
+ "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+ insns.push((
+ Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, },
+ "498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
+ "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+ ));
+
+ // Fence
+ insns.push((
+ Inst::Fence {
+ kind: FenceKind::MFence,
+ },
+ "0FAEF0",
+ "mfence",
+ ));
+ insns.push((
+ Inst::Fence {
+ kind: FenceKind::LFence,
+ },
+ "0FAEE8",
+ "lfence",
+ ));
+ insns.push((
+ Inst::Fence {
+ kind: FenceKind::SFence,
+ },
+ "0FAEF8",
+ "sfence",
+ ));
+
+ // ========================================================
+ // Misc instructions.
+
+ insns.push((Inst::Hlt, "CC", "hlt"));
+
+ let trap_code = TrapCode::UnreachableCodeReached;
+ insns.push((Inst::Ud2 { trap_code }, "0F0B", "ud2 unreachable"));
+
+ // ========================================================
+ // Actually run the tests!
+ let flags = settings::Flags::new(settings::builder());
+
+ use crate::settings::Configurable;
+ let mut isa_flag_builder = x64::settings::builder();
+ isa_flag_builder.enable("has_ssse3").unwrap();
+ isa_flag_builder.enable("has_sse41").unwrap();
+ let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder);
+
+ let rru = regs::create_reg_universe_systemv(&flags);
+ let emit_info = EmitInfo::new(flags, isa_flags);
+ for (insn, expected_encoding, expected_printing) in insns {
+ // Check the printed text is as expected.
+ let actual_printing = insn.show_rru(Some(&rru));
+ assert_eq!(expected_printing, actual_printing);
+ let mut sink = test_utils::TestCodeSink::new();
+ let mut buffer = MachBuffer::new();
+
+ insn.emit(&mut buffer, &emit_info, &mut Default::default());
+
+ // Allow one label just after the instruction (so the offset is 0).
+ let label = buffer.get_label();
+ buffer.bind_label(label);
+
+ let buffer = buffer.finish();
+ buffer.emit(&mut sink);
+ let actual_encoding = &sink.stringify();
+ assert_eq!(expected_encoding, actual_encoding, "{}", expected_printing);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
new file mode 100644
index 0000000000..1172b22eff
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/mod.rs
@@ -0,0 +1,2733 @@
+//! This module defines x86_64-specific machine instruction types.
+
+use crate::binemit::{CodeOffset, StackMap};
+use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type};
+use crate::isa::x64::settings as x64_settings;
+use crate::machinst::*;
+use crate::{settings, settings::Flags, CodegenError, CodegenResult};
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use regalloc::{
+ PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
+ RegUsageMapper, SpillSlot, VirtualReg, Writable,
+};
+use smallvec::SmallVec;
+use std::fmt;
+use std::string::{String, ToString};
+
+pub mod args;
+mod emit;
+#[cfg(test)]
+mod emit_tests;
+pub mod regs;
+pub mod unwind;
+
+use args::*;
+use regs::{create_reg_universe_systemv, show_ireg_sized};
+
+//=============================================================================
+// Instructions (top level): definition
+
+// Don't build these directly. Instead use the Inst:: functions to create them.
+
+/// Instructions. Destinations are on the RIGHT (a la AT&T syntax).
+#[derive(Clone)]
+pub enum Inst {
+ /// Nops of various sizes, including zero.
+ Nop { len: u8 },
+
+ // =====================================
+ // Integer instructions.
+ /// Integer arithmetic/bit-twiddling: (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
+ AluRmiR {
+ is_64: bool,
+ op: AluRmiROpcode,
+ src: RegMemImm,
+ dst: Writable<Reg>,
+ },
+
+ /// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc.
+ UnaryRmR {
+ size: u8, // 2, 4 or 8
+ op: UnaryRmROpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// Bitwise not
+ Not {
+ size: u8, // 1, 2, 4 or 8
+ src: Writable<Reg>,
+ },
+
+ /// Integer negation
+ Neg {
+ size: u8, // 1, 2, 4 or 8
+ src: Writable<Reg>,
+ },
+
+ /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr)
+ Div {
+ size: u8, // 1, 2, 4 or 8
+ signed: bool,
+ divisor: RegMem,
+ },
+
+ /// The high bits (RDX) of a (un)signed multiply: RDX:RAX := RAX * rhs.
+ MulHi { size: u8, signed: bool, rhs: RegMem },
+
+ /// A synthetic sequence to implement the right inline checks for remainder and division,
+ /// assuming the dividend is in %rax.
+ /// Puts the result back into %rax if is_div, %rdx if !is_div, to mimic what the div
+ /// instruction does.
+ /// The generated code sequence is described in the emit's function match arm for this
+ /// instruction.
+ ///
+ /// Note: %rdx is marked as modified by this instruction, to avoid an early clobber problem
+ /// with the temporary and divisor registers. Make sure to zero %rdx right before this
+ /// instruction, or you might run into regalloc failures where %rdx is live before its first
+ /// def!
+ CheckedDivOrRemSeq {
+ kind: DivOrRemKind,
+ size: u8,
+ /// The divisor operand. Note it's marked as modified so that it gets assigned a register
+ /// different from the temporary.
+ divisor: Writable<Reg>,
+ tmp: Option<Writable<Reg>>,
+ },
+
+ /// Do a sign-extend based on the sign of the value in rax into rdx: (cwd cdq cqo)
+ /// or al into ah: (cbw)
+ SignExtendData {
+ size: u8, // 1, 2, 4 or 8
+ },
+
+ /// Constant materialization: (imm32 imm64) reg.
+ /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
+ Imm {
+ dst_is_64: bool,
+ simm64: u64,
+ dst: Writable<Reg>,
+ },
+
+ /// GPR to GPR move: mov (64 32) reg reg.
+ MovRR {
+ is_64: bool,
+ src: Reg,
+ dst: Writable<Reg>,
+ },
+
+ /// Zero-extended loads, except for 64 bits: movz (bl bq wl wq lq) addr reg.
+ /// Note that the lq variant doesn't really exist since the default zero-extend rule makes it
+ /// unnecessary. For that case we emit the equivalent "movl AM, reg32".
+ MovzxRmR {
+ ext_mode: ExtMode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
+ Mov64MR {
+ src: SyntheticAmode,
+ dst: Writable<Reg>,
+ },
+
+ /// Loads the memory address of addr into dst.
+ LoadEffectiveAddress {
+ addr: SyntheticAmode,
+ dst: Writable<Reg>,
+ },
+
+ /// Sign-extended loads and moves: movs (bl bq wl wq lq) addr reg.
+ MovsxRmR {
+ ext_mode: ExtMode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// Integer stores: mov (b w l q) reg addr.
+ MovRM {
+ size: u8, // 1, 2, 4 or 8.
+ src: Reg,
+ dst: SyntheticAmode,
+ },
+
+ /// Arithmetic shifts: (shl shr sar) (b w l q) imm reg.
+ ShiftR {
+ size: u8, // 1, 2, 4 or 8
+ kind: ShiftKind,
+ /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
+ num_bits: Option<u8>,
+ dst: Writable<Reg>,
+ },
+
+ /// Arithmetic SIMD shifts.
+ XmmRmiReg {
+ opcode: SseOpcode,
+ src: RegMemImm,
+ dst: Writable<Reg>,
+ },
+
+ /// Integer comparisons/tests: cmp (b w l q) (reg addr imm) reg.
+ CmpRmiR {
+ size: u8, // 1, 2, 4 or 8
+ src: RegMemImm,
+ dst: Reg,
+ },
+
+ /// Materializes the requested condition code in the destination reg.
+ Setcc { cc: CC, dst: Writable<Reg> },
+
+ /// Integer conditional move.
+ /// Overwrites the destination register.
+ Cmove {
+ /// Possible values are 2, 4 or 8. Checked in the related factory.
+ size: u8,
+ cc: CC,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ // =====================================
+ // Stack manipulation.
+ /// pushq (reg addr imm)
+ Push64 { src: RegMemImm },
+
+ /// popq reg
+ Pop64 { dst: Writable<Reg> },
+
+ // =====================================
+ // Floating-point operations.
+ /// XMM (scalar or vector) binary op: (add sub and or xor mul adc? sbb?) (32 64) (reg addr) reg
+ XmmRmR {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt,
+ /// etc.
+ ///
+ /// This differs from XMM_RM_R in that the dst register of XmmUnaryRmR is not used in the
+ /// computation of the instruction dst value and so does not have to be a previously valid
+ /// value. This is characteristic of mov instructions.
+ XmmUnaryRmR {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// XMM (scalar or vector) unary op (from xmm to reg/mem): stores, movd, movq
+ XmmMovRM {
+ op: SseOpcode,
+ src: Reg,
+ dst: SyntheticAmode,
+ },
+
+ /// XMM (vector) unary op (to move a constant value into an xmm register): movups
+ XmmLoadConst {
+ src: VCodeConstant,
+ dst: Writable<Reg>,
+ ty: Type,
+ },
+
+ /// XMM (scalar) unary op (from xmm to integer reg): movd, movq, cvtts{s,d}2si
+ XmmToGpr {
+ op: SseOpcode,
+ src: Reg,
+ dst: Writable<Reg>,
+ dst_size: OperandSize,
+ },
+
+ /// XMM (scalar) unary op (from integer to float reg): movd, movq, cvtsi2s{s,d}
+ GprToXmm {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ src_size: OperandSize,
+ },
+
+ /// Converts an unsigned int64 to a float32/float64.
+ CvtUint64ToFloatSeq {
+ /// Is the target a 64-bits or 32-bits register?
+ to_f64: bool,
+ /// A copy of the source register, fed by lowering. It is marked as modified during
+ /// register allocation to make sure that the temporary registers differ from the src
+ /// register, since both registers are live at the same time in the generated code
+ /// sequence.
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr1: Writable<Reg>,
+ tmp_gpr2: Writable<Reg>,
+ },
+
+ /// Converts a scalar xmm to a signed int32/int64.
+ CvtFloatToSintSeq {
+ dst_size: OperandSize,
+ src_size: OperandSize,
+ is_saturating: bool,
+ /// A copy of the source register, fed by lowering. It is marked as modified during
+ /// register allocation to make sure that the temporary xmm register differs from the src
+ /// register, since both registers are live at the same time in the generated code
+ /// sequence.
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ },
+
+ /// Converts a scalar xmm to an unsigned int32/int64.
+ CvtFloatToUintSeq {
+ src_size: OperandSize,
+ dst_size: OperandSize,
+ is_saturating: bool,
+ /// A copy of the source register, fed by lowering, reused as a temporary. It is marked as
+ /// modified during register allocation to make sure that the temporary xmm register
+ /// differs from the src register, since both registers are live at the same time in the
+ /// generated code sequence.
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ },
+
+ /// A sequence to compute min/max with the proper NaN semantics for xmm registers.
+ XmmMinMaxSeq {
+ size: OperandSize,
+ is_min: bool,
+ lhs: Reg,
+ rhs_dst: Writable<Reg>,
+ },
+
+ /// XMM (scalar) conditional move.
+ /// Overwrites the destination register if cc is set.
+ XmmCmove {
+ /// Whether the cmove is moving either 32 or 64 bits.
+ is_64: bool,
+ cc: CC,
+ src: RegMem,
+ dst: Writable<Reg>,
+ },
+
+ /// Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
+ XmmCmpRmR {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Reg,
+ },
+
+ /// A binary XMM instruction with an 8-bit immediate: e.g. cmp (ps pd) imm (reg addr) reg
+ XmmRmRImm {
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ imm: u8,
+ is64: bool,
+ },
+
+ // =====================================
+ // Control flow instructions.
+ /// Direct call: call simm32.
+ CallKnown {
+ dest: ExternalName,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ },
+
+ /// Indirect call: callq (reg mem).
+ CallUnknown {
+ dest: RegMem,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ },
+
+ /// Return.
+ Ret,
+
+ /// A placeholder instruction, generating no code, meaning that a function epilogue must be
+ /// inserted there.
+ EpiloguePlaceholder,
+
+ /// Jump to a known target: jmp simm32.
+ JmpKnown { dst: MachLabel },
+
+ /// One-way conditional branch: jcond cond target.
+ ///
+ /// This instruction is useful when we have conditional jumps depending on more than two
+ /// conditions, see for instance the lowering of Brz/brnz with Fcmp inputs.
+ ///
+ /// A note of caution: in contexts where the branch target is another block, this has to be the
+ /// same successor as the one specified in the terminator branch of the current block.
+ /// Otherwise, this might confuse register allocation by creating new invisible edges.
+ JmpIf { cc: CC, taken: MachLabel },
+
+ /// Two-way conditional branch: jcond cond target target.
+ /// Emitted as a compound sequence; the MachBuffer will shrink it as appropriate.
+ JmpCond {
+ cc: CC,
+ taken: MachLabel,
+ not_taken: MachLabel,
+ },
+
+ /// Jump-table sequence, as one compound instruction (see note in lower.rs for rationale).
+ /// The generated code sequence is described in the emit's function match arm for this
+ /// instruction.
+ /// See comment in lowering about the temporaries signedness.
+ JmpTableSeq {
+ idx: Reg,
+ tmp1: Writable<Reg>,
+ tmp2: Writable<Reg>,
+ default_target: MachLabel,
+ targets: Vec<MachLabel>,
+ targets_for_term: Vec<MachLabel>,
+ },
+
+ /// Indirect jump: jmpq (reg mem).
+ JmpUnknown { target: RegMem },
+
+ /// Traps if the condition code is set.
+ TrapIf { cc: CC, trap_code: TrapCode },
+
+ /// A debug trap.
+ Hlt,
+
+ /// An instruction that will always trigger the illegal instruction exception.
+ Ud2 { trap_code: TrapCode },
+
+ /// Loads an external symbol in a register, with a relocation: movabsq $name, dst
+ LoadExtName {
+ dst: Writable<Reg>,
+ name: Box<ExternalName>,
+ offset: i64,
+ },
+
+ // =====================================
+ // Instructions pertaining to atomic memory accesses.
+ /// A standard (native) `lock cmpxchg src, (amode)`, with register conventions:
+ ///
+ /// `dst` (read) address
+ /// `src` (read) replacement value
+ /// %rax (modified) in: expected value, out: value that was actually at `dst`
+ /// %rflags is written. Do not assume anything about it after the instruction.
+ ///
+ /// The instruction "succeeded" iff the lowest `ty` bits of %rax afterwards are the same as
+ /// they were before.
+ LockCmpxchg {
+ ty: Type, // I8, I16, I32 or I64
+ src: Reg,
+ dst: SyntheticAmode,
+ },
+
+ /// A synthetic instruction, based on a loop around a native `lock cmpxchg` instruction.
+ /// This atomically modifies a value in memory and returns the old value. The sequence
+ /// consists of an initial "normal" load from `dst`, followed by a loop which computes the
+ /// new value and tries to compare-and-swap ("CAS") it into `dst`, using the native
+ /// instruction `lock cmpxchg{b,w,l,q}` . The loop iterates until the CAS is successful.
+ /// If there is no contention, there will be only one pass through the loop body. The
+ /// sequence does *not* perform any explicit memory fence instructions
+ /// (mfence/sfence/lfence).
+ ///
+ /// Note that the transaction is atomic in the sense that, as observed by some other thread,
+ /// `dst` either has the initial or final value, but no other. It isn't atomic in the sense
+ /// of guaranteeing that no other thread writes to `dst` in between the initial load and the
+ /// CAS -- but that would cause the CAS to fail unless the other thread's last write before
+ /// the CAS wrote the same value that was already there. In other words, this
+ /// implementation suffers (unavoidably) from the A-B-A problem.
+ ///
+ /// This instruction sequence has fixed register uses as follows:
+ ///
+ /// %r9 (read) address
+ /// %r10 (read) second operand for `op`
+ /// %r11 (written) scratch reg; value afterwards has no meaning
+ /// %rax (written) the old value at %r9
+ /// %rflags is written. Do not assume anything about it after the instruction.
+ AtomicRmwSeq {
+ ty: Type, // I8, I16, I32 or I64
+ op: inst_common::AtomicRmwOp,
+ },
+
+ /// A memory fence (mfence, lfence or sfence).
+ Fence { kind: FenceKind },
+
+ // =====================================
+ // Meta-instructions generating no code.
+ /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
+ /// controls how MemArg::NominalSPOffset args are lowered.
+ VirtualSPOffsetAdj { offset: i64 },
+
+ /// Provides a way to tell the register allocator that the upcoming sequence of instructions
+ /// will overwrite `dst` so it should be considered as a `def`; use this with care.
+ ///
+ /// This is useful when we have a sequence of instructions whose register usages are nominally
+ /// `mod`s, but such that the combination of operations creates a result that is independent of
+ /// the initial register value. It's thus semantically a `def`, not a `mod`, when all the
+ /// instructions are taken together, so we want to ensure the register is defined (its
+ /// live-range starts) prior to the sequence to keep analyses happy.
+ ///
+ /// One alternative would be a compound instruction that somehow encapsulates the others and
+ /// reports its own `def`s/`use`s/`mod`s; this adds complexity (the instruction list is no
+ /// longer flat) and requires knowledge about semantics and initial-value independence anyway.
+ XmmUninitializedValue { dst: Writable<Reg> },
+}
+
+pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool {
+ let xs = x as i64;
+ xs == ((xs << 32) >> 32)
+}
+
+impl Inst {
+ fn isa_requirement(&self) -> Option<InstructionSet> {
+ match self {
+ // These instructions are part of SSE2, which is a basic requirement in Cranelift, and
+ // don't have to be checked.
+ Inst::AluRmiR { .. }
+ | Inst::AtomicRmwSeq { .. }
+ | Inst::CallKnown { .. }
+ | Inst::CallUnknown { .. }
+ | Inst::CheckedDivOrRemSeq { .. }
+ | Inst::Cmove { .. }
+ | Inst::CmpRmiR { .. }
+ | Inst::CvtFloatToSintSeq { .. }
+ | Inst::CvtFloatToUintSeq { .. }
+ | Inst::CvtUint64ToFloatSeq { .. }
+ | Inst::Div { .. }
+ | Inst::EpiloguePlaceholder
+ | Inst::Fence { .. }
+ | Inst::Hlt
+ | Inst::Imm { .. }
+ | Inst::JmpCond { .. }
+ | Inst::JmpIf { .. }
+ | Inst::JmpKnown { .. }
+ | Inst::JmpTableSeq { .. }
+ | Inst::JmpUnknown { .. }
+ | Inst::LoadEffectiveAddress { .. }
+ | Inst::LoadExtName { .. }
+ | Inst::LockCmpxchg { .. }
+ | Inst::Mov64MR { .. }
+ | Inst::MovRM { .. }
+ | Inst::MovRR { .. }
+ | Inst::MovsxRmR { .. }
+ | Inst::MovzxRmR { .. }
+ | Inst::MulHi { .. }
+ | Inst::Neg { .. }
+ | Inst::Not { .. }
+ | Inst::Nop { .. }
+ | Inst::Pop64 { .. }
+ | Inst::Push64 { .. }
+ | Inst::Ret
+ | Inst::Setcc { .. }
+ | Inst::ShiftR { .. }
+ | Inst::SignExtendData { .. }
+ | Inst::TrapIf { .. }
+ | Inst::Ud2 { .. }
+ | Inst::UnaryRmR { .. }
+ | Inst::VirtualSPOffsetAdj { .. }
+ | Inst::XmmCmove { .. }
+ | Inst::XmmCmpRmR { .. }
+ | Inst::XmmLoadConst { .. }
+ | Inst::XmmMinMaxSeq { .. }
+ | Inst::XmmUninitializedValue { .. } => None,
+
+ // These use dynamic SSE opcodes.
+ Inst::GprToXmm { op, .. }
+ | Inst::XmmMovRM { op, .. }
+ | Inst::XmmRmiReg { opcode: op, .. }
+ | Inst::XmmRmR { op, .. }
+ | Inst::XmmRmRImm { op, .. }
+ | Inst::XmmToGpr { op, .. }
+ | Inst::XmmUnaryRmR { op, .. } => Some(op.available_from()),
+ }
+ }
+}
+
+// Handy constructors for Insts.
+
+impl Inst {
+ pub(crate) fn nop(len: u8) -> Self {
+ debug_assert!(len <= 16);
+ Self::Nop { len }
+ }
+
+ pub(crate) fn alu_rmi_r(
+ is_64: bool,
+ op: AluRmiROpcode,
+ src: RegMemImm,
+ dst: Writable<Reg>,
+ ) -> Self {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Self::AluRmiR {
+ is_64,
+ op,
+ src,
+ dst,
+ }
+ }
+
+ pub(crate) fn unary_rm_r(
+ size: u8,
+ op: UnaryRmROpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ ) -> Self {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2);
+ Self::UnaryRmR { size, op, src, dst }
+ }
+
+ pub(crate) fn not(size: u8, src: Writable<Reg>) -> Inst {
+ debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::Not { size, src }
+ }
+
+ pub(crate) fn neg(size: u8, src: Writable<Reg>) -> Inst {
+ debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::Neg { size, src }
+ }
+
+ pub(crate) fn div(size: u8, signed: bool, divisor: RegMem) -> Inst {
+ divisor.assert_regclass_is(RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::Div {
+ size,
+ signed,
+ divisor,
+ }
+ }
+
+ pub(crate) fn mul_hi(size: u8, signed: bool, rhs: RegMem) -> Inst {
+ rhs.assert_regclass_is(RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::MulHi { size, signed, rhs }
+ }
+
+ pub(crate) fn checked_div_or_rem_seq(
+ kind: DivOrRemKind,
+ size: u8,
+ divisor: Writable<Reg>,
+ tmp: Option<Writable<Reg>>,
+ ) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(divisor.to_reg().get_class() == RegClass::I64);
+ debug_assert!(tmp
+ .map(|tmp| tmp.to_reg().get_class() == RegClass::I64)
+ .unwrap_or(true));
+ Inst::CheckedDivOrRemSeq {
+ kind,
+ size,
+ divisor,
+ tmp,
+ }
+ }
+
+ pub(crate) fn sign_extend_data(size: u8) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ Inst::SignExtendData { size }
+ }
+
+ pub(crate) fn imm(size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
+ // the semantics of movl).
+ let dst_is_64 = size == OperandSize::Size64 && simm64 > u32::max_value() as u64;
+ Inst::Imm {
+ dst_is_64,
+ simm64,
+ dst,
+ }
+ }
+
+ pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
+ debug_assert!(src.get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::MovRR { is_64, src, dst }
+ }
+
+ // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level)
+ pub(crate) fn xmm_mov(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmUnaryRmR { op, src, dst }
+ }
+
+ pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ debug_assert!(ty.is_vector() && ty.bits() == 128);
+ Inst::XmmLoadConst { src, dst, ty }
+ }
+
+ /// Convenient helper for unary float operations.
+ pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmUnaryRmR { op, src, dst }
+ }
+
+ pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmRmR { op, src, dst }
+ }
+
+ pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self {
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmUninitializedValue { dst }
+ }
+
+ pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
+ debug_assert!(src.get_class() == RegClass::V128);
+ Inst::XmmMovRM {
+ op,
+ src,
+ dst: dst.into(),
+ }
+ }
+
+ pub(crate) fn xmm_to_gpr(
+ op: SseOpcode,
+ src: Reg,
+ dst: Writable<Reg>,
+ dst_size: OperandSize,
+ ) -> Inst {
+ debug_assert!(src.get_class() == RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::XmmToGpr {
+ op,
+ src,
+ dst,
+ dst_size,
+ }
+ }
+
+ pub(crate) fn gpr_to_xmm(
+ op: SseOpcode,
+ src: RegMem,
+ src_size: OperandSize,
+ dst: Writable<Reg>,
+ ) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::GprToXmm {
+ op,
+ src,
+ dst,
+ src_size,
+ }
+ }
+
+ pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.get_class() == RegClass::V128);
+ Inst::XmmCmpRmR { op, src, dst }
+ }
+
+ pub(crate) fn cvt_u64_to_float_seq(
+ to_f64: bool,
+ src: Writable<Reg>,
+ tmp_gpr1: Writable<Reg>,
+ tmp_gpr2: Writable<Reg>,
+ dst: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(src.to_reg().get_class() == RegClass::I64);
+ debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64);
+ debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::CvtUint64ToFloatSeq {
+ src,
+ dst,
+ tmp_gpr1,
+ tmp_gpr2,
+ to_f64,
+ }
+ }
+
+ pub(crate) fn cvt_float_to_sint_seq(
+ src_size: OperandSize,
+ dst_size: OperandSize,
+ is_saturating: bool,
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(src.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::CvtFloatToSintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ }
+ }
+
+ pub(crate) fn cvt_float_to_uint_seq(
+ src_size: OperandSize,
+ dst_size: OperandSize,
+ is_saturating: bool,
+ src: Writable<Reg>,
+ dst: Writable<Reg>,
+ tmp_gpr: Writable<Reg>,
+ tmp_xmm: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(src.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128);
+ debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::CvtFloatToUintSeq {
+ src_size,
+ dst_size,
+ is_saturating,
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ }
+ }
+
+ pub(crate) fn xmm_min_max_seq(
+ size: OperandSize,
+ is_min: bool,
+ lhs: Reg,
+ rhs_dst: Writable<Reg>,
+ ) -> Inst {
+ debug_assert_eq!(lhs.get_class(), RegClass::V128);
+ debug_assert_eq!(rhs_dst.to_reg().get_class(), RegClass::V128);
+ Inst::XmmMinMaxSeq {
+ size,
+ is_min,
+ lhs,
+ rhs_dst,
+ }
+ }
+
+ pub(crate) fn xmm_rm_r_imm(
+ op: SseOpcode,
+ src: RegMem,
+ dst: Writable<Reg>,
+ imm: u8,
+ is64: bool,
+ ) -> Inst {
+ Inst::XmmRmRImm {
+ op,
+ src,
+ dst,
+ imm,
+ is64,
+ }
+ }
+
+ pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::MovzxRmR { ext_mode, src, dst }
+ }
+
+ pub(crate) fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmRmiReg { opcode, src, dst }
+ }
+
+ pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::MovsxRmR { ext_mode, src, dst }
+ }
+
+ pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Mov64MR {
+ src: src.into(),
+ dst,
+ }
+ }
+
+ /// A convenience function to be able to use a RegMem as the source of a move.
+ pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ match src {
+ RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst),
+ RegMem::Mem { addr } => Self::mov64_m_r(addr, dst),
+ }
+ }
+
+ pub(crate) fn mov_r_m(
+ size: u8, // 1, 2, 4 or 8
+ src: Reg,
+ dst: impl Into<SyntheticAmode>,
+ ) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(src.get_class() == RegClass::I64);
+ Inst::MovRM {
+ size,
+ src,
+ dst: dst.into(),
+ }
+ }
+
+ pub(crate) fn lea(addr: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::LoadEffectiveAddress {
+ addr: addr.into(),
+ dst,
+ }
+ }
+
+ pub(crate) fn shift_r(
+ size: u8,
+ kind: ShiftKind,
+ num_bits: Option<u8>,
+ dst: Writable<Reg>,
+ ) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(if let Some(num_bits) = num_bits {
+ num_bits < size * 8
+ } else {
+ true
+ });
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::ShiftR {
+ size,
+ kind,
+ num_bits,
+ dst,
+ }
+ }
+
+ /// Does a comparison of dst - src for operands of size `size`, as stated by the machine
+ /// instruction semantics. Be careful with the order of parameters!
+ pub(crate) fn cmp_rmi_r(
+ size: u8, // 1, 2, 4 or 8
+ src: RegMemImm,
+ dst: Reg,
+ ) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
+ debug_assert!(dst.get_class() == RegClass::I64);
+ Inst::CmpRmiR { size, src, dst }
+ }
+
+ pub(crate) fn trap(trap_code: TrapCode) -> Inst {
+ Inst::Ud2 {
+ trap_code: trap_code,
+ }
+ }
+
+ pub(crate) fn setcc(cc: CC, dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Setcc { cc, dst }
+ }
+
+ pub(crate) fn cmove(size: u8, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
+ debug_assert!(size == 8 || size == 4 || size == 2);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Cmove { size, cc, src, dst }
+ }
+
+ pub(crate) fn xmm_cmove(is_64: bool, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
+ src.assert_regclass_is(RegClass::V128);
+ debug_assert!(dst.to_reg().get_class() == RegClass::V128);
+ Inst::XmmCmove {
+ is_64,
+ cc,
+ src,
+ dst,
+ }
+ }
+
+ pub(crate) fn push64(src: RegMemImm) -> Inst {
+ src.assert_regclass_is(RegClass::I64);
+ Inst::Push64 { src }
+ }
+
+ pub(crate) fn pop64(dst: Writable<Reg>) -> Inst {
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+ Inst::Pop64 { dst }
+ }
+
+ pub(crate) fn call_known(
+ dest: ExternalName,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ ) -> Inst {
+ Inst::CallKnown {
+ dest,
+ uses,
+ defs,
+ opcode,
+ }
+ }
+
+ pub(crate) fn call_unknown(
+ dest: RegMem,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: Opcode,
+ ) -> Inst {
+ dest.assert_regclass_is(RegClass::I64);
+ Inst::CallUnknown {
+ dest,
+ uses,
+ defs,
+ opcode,
+ }
+ }
+
+ pub(crate) fn ret() -> Inst {
+ Inst::Ret
+ }
+
+ pub(crate) fn epilogue_placeholder() -> Inst {
+ Inst::EpiloguePlaceholder
+ }
+
+ pub(crate) fn jmp_known(dst: MachLabel) -> Inst {
+ Inst::JmpKnown { dst }
+ }
+
+ pub(crate) fn jmp_if(cc: CC, taken: MachLabel) -> Inst {
+ Inst::JmpIf { cc, taken }
+ }
+
+ pub(crate) fn jmp_cond(cc: CC, taken: MachLabel, not_taken: MachLabel) -> Inst {
+ Inst::JmpCond {
+ cc,
+ taken,
+ not_taken,
+ }
+ }
+
+ pub(crate) fn jmp_unknown(target: RegMem) -> Inst {
+ target.assert_regclass_is(RegClass::I64);
+ Inst::JmpUnknown { target }
+ }
+
+ pub(crate) fn trap_if(cc: CC, trap_code: TrapCode) -> Inst {
+ Inst::TrapIf { cc, trap_code }
+ }
+
+ /// Choose which instruction to use for loading a register value from memory. For loads smaller
+ /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend],
+ /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this.
+ pub(crate) fn load(
+ ty: Type,
+ from_addr: impl Into<SyntheticAmode>,
+ to_reg: Writable<Reg>,
+ ext_kind: ExtKind,
+ ) -> Inst {
+ let rc = to_reg.to_reg().get_class();
+ match rc {
+ RegClass::I64 => {
+ let ext_mode = match ty.bytes() {
+ 1 => Some(ExtMode::BQ),
+ 2 => Some(ExtMode::WQ),
+ 4 => Some(ExtMode::LQ),
+ 8 => None,
+ _ => unreachable!("the type should never use a scalar load: {}", ty),
+ };
+ if let Some(ext_mode) = ext_mode {
+ // Values smaller than 64 bits must be extended in some way.
+ match ext_kind {
+ ExtKind::SignExtend => {
+ Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
+ }
+ ExtKind::ZeroExtend => {
+ Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg)
+ }
+ ExtKind::None => panic!(
+ "expected an extension kind for extension mode: {:?}",
+ ext_mode
+ ),
+ }
+ } else {
+ // 64-bit values can be moved directly.
+ Inst::mov64_m_r(from_addr, to_reg)
+ }
+ }
+ RegClass::V128 => {
+ let opcode = match ty {
+ types::F32 => SseOpcode::Movss,
+ types::F64 => SseOpcode::Movsd,
+ types::F32X4 => SseOpcode::Movups,
+ types::F64X2 => SseOpcode::Movupd,
+ _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu,
+ _ => unimplemented!("unable to load type: {}", ty),
+ };
+ Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg)
+ }
+ _ => panic!("unable to generate load for register class: {:?}", rc),
+ }
+ }
+
+ /// Choose which instruction to use for storing a register value to memory.
+ pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into<SyntheticAmode>) -> Inst {
+ let rc = from_reg.get_class();
+ match rc {
+ RegClass::I64 => {
+ // Always store the full register, to ensure that the high bits are properly set
+ // when doing a full reload.
+ Inst::mov_r_m(8 /* bytes */, from_reg, to_addr)
+ }
+ RegClass::V128 => {
+ let opcode = match ty {
+ types::F32 => SseOpcode::Movss,
+ types::F64 => SseOpcode::Movsd,
+ types::F32X4 => SseOpcode::Movups,
+ types::F64X2 => SseOpcode::Movupd,
+ _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu,
+ _ => unimplemented!("unable to store type: {}", ty),
+ };
+ Inst::xmm_mov_r_m(opcode, from_reg, to_addr)
+ }
+ _ => panic!("unable to generate store for register class: {:?}", rc),
+ }
+ }
+}
+
+// Inst helpers.
+
+impl Inst {
+ /// In certain cases, instructions of this format can act as a definition of an XMM register,
+ /// producing a value that is independent of its initial value.
+ ///
+ /// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
+ /// itself will generate all ones as a result, regardless of its value. From the register
+ /// allocator's point of view, we should (i) record the first register, which is normally a
+ /// mod, as a def instead; and (ii) not record the second register as a use, because it is the
+ /// same as the first register (already handled).
+ fn produces_const(&self) -> bool {
+ match self {
+ Self::AluRmiR { op, src, dst, .. } => {
+ src.to_reg() == Some(dst.to_reg())
+ && (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
+ }
+
+ Self::XmmRmR { op, src, dst, .. } => {
+ src.to_reg() == Some(dst.to_reg())
+ && (*op == SseOpcode::Xorps
+ || *op == SseOpcode::Xorpd
+ || *op == SseOpcode::Pxor
+ || *op == SseOpcode::Pcmpeqb
+ || *op == SseOpcode::Pcmpeqw
+ || *op == SseOpcode::Pcmpeqd
+ || *op == SseOpcode::Pcmpeqq)
+ }
+
+ Self::XmmRmRImm {
+ op, src, dst, imm, ..
+ } => {
+ src.to_reg() == Some(dst.to_reg())
+ && (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
+ && *imm == FcmpImm::Equal.encode()
+ }
+
+ _ => false,
+ }
+ }
+
+ /// Choose which instruction to use for comparing two values for equality.
+ pub(crate) fn equals(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to),
+ types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to),
+ types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to),
+ types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to),
+ types::F32X4 => {
+ Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false)
+ }
+ types::F64X2 => {
+ Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false)
+ }
+ _ => unimplemented!("unimplemented type for Inst::equals: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise AND on two values.
+ pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to),
+ _ => unimplemented!("unimplemented type for Inst::and: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise AND NOT on two values.
+ pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to),
+ _ => unimplemented!("unimplemented type for Inst::and_not: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise OR on two values.
+ pub(crate) fn or(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to),
+ _ => unimplemented!("unimplemented type for Inst::or: {}", ty),
+ }
+ }
+
+ /// Choose which instruction to use for computing a bitwise XOR on two values.
+ pub(crate) fn xor(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
+ match ty {
+ types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to),
+ types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to),
+ _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to),
+ _ => unimplemented!("unimplemented type for Inst::xor: {}", ty),
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: printing
+
+impl PrettyPrint for Inst {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ fn ljustify(s: String) -> String {
+ let w = 7;
+ if s.len() >= w {
+ s
+ } else {
+ let need = usize::min(w, w - s.len());
+ s + &format!("{nil: <width$}", nil = "", width = need)
+ }
+ }
+
+ fn ljustify2(s1: String, s2: String) -> String {
+ ljustify(s1 + &s2)
+ }
+
+ fn suffix_lq(is_64: bool) -> String {
+ (if is_64 { "q" } else { "l" }).to_string()
+ }
+
+ fn size_lq(is_64: bool) -> u8 {
+ if is_64 {
+ 8
+ } else {
+ 4
+ }
+ }
+
+ fn suffix_bwlq(size: u8) -> String {
+ match size {
+ 1 => "b".to_string(),
+ 2 => "w".to_string(),
+ 4 => "l".to_string(),
+ 8 => "q".to_string(),
+ _ => panic!("Inst(x64).show.suffixBWLQ: size={}", size),
+ }
+ }
+
+ match self {
+ Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
+
+ Inst::AluRmiR {
+ is_64,
+ op,
+ src,
+ dst,
+ } => format!(
+ "{} {}, {}",
+ ljustify2(op.to_string(), suffix_lq(*is_64)),
+ src.show_rru_sized(mb_rru, size_lq(*is_64)),
+ show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64)),
+ ),
+
+ Inst::UnaryRmR { src, dst, op, size } => format!(
+ "{} {}, {}",
+ ljustify2(op.to_string(), suffix_bwlq(*size)),
+ src.show_rru_sized(mb_rru, *size),
+ show_ireg_sized(dst.to_reg(), mb_rru, *size),
+ ),
+
+ Inst::Not { size, src } => format!(
+ "{} {}",
+ ljustify2("not".to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(src.to_reg(), mb_rru, *size)
+ ),
+
+ Inst::Neg { size, src } => format!(
+ "{} {}",
+ ljustify2("neg".to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(src.to_reg(), mb_rru, *size)
+ ),
+
+ Inst::Div {
+ size,
+ signed,
+ divisor,
+ ..
+ } => format!(
+ "{} {}",
+ ljustify(if *signed {
+ "idiv".to_string()
+ } else {
+ "div".into()
+ }),
+ divisor.show_rru_sized(mb_rru, *size)
+ ),
+
+ Inst::MulHi {
+ size, signed, rhs, ..
+ } => format!(
+ "{} {}",
+ ljustify(if *signed {
+ "imul".to_string()
+ } else {
+ "mul".to_string()
+ }),
+ rhs.show_rru_sized(mb_rru, *size)
+ ),
+
+ Inst::CheckedDivOrRemSeq {
+ kind,
+ size,
+ divisor,
+ ..
+ } => format!(
+ "{} $rax:$rdx, {}",
+ match kind {
+ DivOrRemKind::SignedDiv => "sdiv",
+ DivOrRemKind::UnsignedDiv => "udiv",
+ DivOrRemKind::SignedRem => "srem",
+ DivOrRemKind::UnsignedRem => "urem",
+ },
+ show_ireg_sized(divisor.to_reg(), mb_rru, *size),
+ ),
+
+ Inst::SignExtendData { size } => match size {
+ 1 => "cbw",
+ 2 => "cwd",
+ 4 => "cdq",
+ 8 => "cqo",
+ _ => unreachable!(),
+ }
+ .into(),
+
+ Inst::XmmUnaryRmR { op, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, op.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::XmmMovRM { op, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ show_ireg_sized(*src, mb_rru, 8),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::XmmRmR { op, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, 8),
+ show_ireg_sized(dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::XmmMinMaxSeq {
+ lhs,
+ rhs_dst,
+ is_min,
+ size,
+ } => format!(
+ "{} {}, {}",
+ ljustify2(
+ if *is_min {
+ "xmm min seq ".to_string()
+ } else {
+ "xmm max seq ".to_string()
+ },
+ match size {
+ OperandSize::Size32 => "f32",
+ OperandSize::Size64 => "f64",
+ }
+ .into()
+ ),
+ show_ireg_sized(*lhs, mb_rru, 8),
+ show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::XmmRmRImm { op, src, dst, imm, is64, .. } => format!(
+ "{} ${}, {}, {}",
+ ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })),
+ imm,
+ src.show_rru(mb_rru),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::XmmUninitializedValue { dst } => format!(
+ "{} {}",
+ ljustify("uninit".into()),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::XmmLoadConst { src, dst, .. } => {
+ format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),)
+ }
+
+ Inst::XmmToGpr {
+ op,
+ src,
+ dst,
+ dst_size,
+ } => {
+ let dst_size = match dst_size {
+ OperandSize::Size32 => 4,
+ OperandSize::Size64 => 8,
+ };
+ format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru(mb_rru),
+ show_ireg_sized(dst.to_reg(), mb_rru, dst_size),
+ )
+ }
+
+ Inst::GprToXmm {
+ op,
+ src,
+ src_size,
+ dst,
+ } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, src_size.to_bytes()),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::XmmCmpRmR { op, src, dst } => format!(
+ "{} {}, {}",
+ ljustify(op.to_string()),
+ src.show_rru_sized(mb_rru, 8),
+ show_ireg_sized(*dst, mb_rru, 8),
+ ),
+
+ Inst::CvtUint64ToFloatSeq {
+ src, dst, to_f64, ..
+ } => format!(
+ "{} {}, {}",
+ ljustify(format!(
+ "u64_to_{}_seq",
+ if *to_f64 { "f64" } else { "f32" }
+ )),
+ show_ireg_sized(src.to_reg(), mb_rru, 8),
+ dst.show_rru(mb_rru),
+ ),
+
+ Inst::CvtFloatToSintSeq {
+ src,
+ dst,
+ src_size,
+ dst_size,
+ ..
+ } => format!(
+ "{} {}, {}",
+ ljustify(format!(
+ "cvt_float{}_to_sint{}_seq",
+ if *src_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ },
+ if *dst_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ }
+ )),
+ show_ireg_sized(src.to_reg(), mb_rru, 8),
+ show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
+ ),
+
+ Inst::CvtFloatToUintSeq {
+ src,
+ dst,
+ src_size,
+ dst_size,
+ ..
+ } => format!(
+ "{} {}, {}",
+ ljustify(format!(
+ "cvt_float{}_to_uint{}_seq",
+ if *src_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ },
+ if *dst_size == OperandSize::Size64 {
+ "64"
+ } else {
+ "32"
+ }
+ )),
+ show_ireg_sized(src.to_reg(), mb_rru, 8),
+ show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
+ ),
+
+ Inst::Imm {
+ dst_is_64,
+ simm64,
+ dst,
+ } => {
+ if *dst_is_64 {
+ format!(
+ "{} ${}, {}",
+ ljustify("movabsq".to_string()),
+ *simm64 as i64,
+ show_ireg_sized(dst.to_reg(), mb_rru, 8)
+ )
+ } else {
+ format!(
+ "{} ${}, {}",
+ ljustify("movl".to_string()),
+ (*simm64 as u32) as i32,
+ show_ireg_sized(dst.to_reg(), mb_rru, 4)
+ )
+ }
+ }
+
+ Inst::MovRR { is_64, src, dst } => format!(
+ "{} {}, {}",
+ ljustify2("mov".to_string(), suffix_lq(*is_64)),
+ show_ireg_sized(*src, mb_rru, size_lq(*is_64)),
+ show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64))
+ ),
+
+ Inst::MovzxRmR {
+ ext_mode, src, dst, ..
+ } => {
+ if *ext_mode == ExtMode::LQ {
+ format!(
+ "{} {}, {}",
+ ljustify("movl".to_string()),
+ src.show_rru_sized(mb_rru, ext_mode.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, 4)
+ )
+ } else {
+ format!(
+ "{} {}, {}",
+ ljustify2("movz".to_string(), ext_mode.to_string()),
+ src.show_rru_sized(mb_rru, ext_mode.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
+ )
+ }
+ }
+
+ Inst::Mov64MR { src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify("movq".to_string()),
+ src.show_rru(mb_rru),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::LoadEffectiveAddress { addr, dst } => format!(
+ "{} {}, {}",
+ ljustify("lea".to_string()),
+ addr.show_rru(mb_rru),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::MovsxRmR {
+ ext_mode, src, dst, ..
+ } => format!(
+ "{} {}, {}",
+ ljustify2("movs".to_string(), ext_mode.to_string()),
+ src.show_rru_sized(mb_rru, ext_mode.src_size()),
+ show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
+ ),
+
+ Inst::MovRM { size, src, dst, .. } => format!(
+ "{} {}, {}",
+ ljustify2("mov".to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(*src, mb_rru, *size),
+ dst.show_rru(mb_rru)
+ ),
+
+ Inst::ShiftR {
+ size,
+ kind,
+ num_bits,
+ dst,
+ } => match num_bits {
+ None => format!(
+ "{} %cl, {}",
+ ljustify2(kind.to_string(), suffix_bwlq(*size)),
+ show_ireg_sized(dst.to_reg(), mb_rru, *size)
+ ),
+
+ Some(num_bits) => format!(
+ "{} ${}, {}",
+ ljustify2(kind.to_string(), suffix_bwlq(*size)),
+ num_bits,
+ show_ireg_sized(dst.to_reg(), mb_rru, *size)
+ ),
+ },
+
+ Inst::XmmRmiReg { opcode, src, dst } => format!(
+ "{} {}, {}",
+ ljustify(opcode.to_string()),
+ src.show_rru(mb_rru),
+ dst.to_reg().show_rru(mb_rru)
+ ),
+
+ Inst::CmpRmiR { size, src, dst } => format!(
+ "{} {}, {}",
+ ljustify2("cmp".to_string(), suffix_bwlq(*size)),
+ src.show_rru_sized(mb_rru, *size),
+ show_ireg_sized(*dst, mb_rru, *size)
+ ),
+
+ Inst::Setcc { cc, dst } => format!(
+ "{} {}",
+ ljustify2("set".to_string(), cc.to_string()),
+ show_ireg_sized(dst.to_reg(), mb_rru, 1)
+ ),
+
+ Inst::Cmove { size, cc, src, dst } => format!(
+ "{} {}, {}",
+ ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size))),
+ src.show_rru_sized(mb_rru, *size),
+ show_ireg_sized(dst.to_reg(), mb_rru, *size)
+ ),
+
+ Inst::XmmCmove {
+ is_64,
+ cc,
+ src,
+ dst,
+ } => {
+ let size = if *is_64 { 8 } else { 4 };
+ format!(
+ "j{} $next; mov{} {}, {}; $next: ",
+ cc.invert().to_string(),
+ if *is_64 { "sd" } else { "ss" },
+ src.show_rru_sized(mb_rru, size),
+ show_ireg_sized(dst.to_reg(), mb_rru, size)
+ )
+ }
+
+ Inst::Push64 { src } => {
+ format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
+ }
+
+ Inst::Pop64 { dst } => {
+ format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru))
+ }
+
+ Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest),
+
+ Inst::CallUnknown { dest, .. } => format!(
+ "{} *{}",
+ ljustify("call".to_string()),
+ dest.show_rru(mb_rru)
+ ),
+
+ Inst::Ret => "ret".to_string(),
+
+ Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(),
+
+ Inst::JmpKnown { dst } => {
+ format!("{} {}", ljustify("jmp".to_string()), dst.to_string())
+ }
+
+ Inst::JmpIf { cc, taken } => format!(
+ "{} {}",
+ ljustify2("j".to_string(), cc.to_string()),
+ taken.to_string(),
+ ),
+
+ Inst::JmpCond {
+ cc,
+ taken,
+ not_taken,
+ } => format!(
+ "{} {}; j {}",
+ ljustify2("j".to_string(), cc.to_string()),
+ taken.to_string(),
+ not_taken.to_string()
+ ),
+
+ Inst::JmpTableSeq { idx, .. } => {
+ format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru))
+ }
+
+ Inst::JmpUnknown { target } => format!(
+ "{} *{}",
+ ljustify("jmp".to_string()),
+ target.show_rru(mb_rru)
+ ),
+
+ Inst::TrapIf { cc, trap_code, .. } => {
+ format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code)
+ }
+
+ Inst::LoadExtName {
+ dst, name, offset, ..
+ } => format!(
+ "{} {}+{}, {}",
+ ljustify("movaps".into()),
+ name,
+ offset,
+ show_ireg_sized(dst.to_reg(), mb_rru, 8),
+ ),
+
+ Inst::LockCmpxchg { ty, src, dst, .. } => {
+ let size = ty.bytes() as u8;
+ format!("lock cmpxchg{} {}, {}",
+ suffix_bwlq(size), show_ireg_sized(*src, mb_rru, size), dst.show_rru(mb_rru))
+ }
+
+ Inst::AtomicRmwSeq { ty, op, .. } => {
+ format!(
+ "atomically {{ {}_bits_at_[%r9]) {:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}",
+ ty.bits(), op)
+ },
+
+ Inst::Fence { kind } => {
+ match kind {
+ FenceKind::MFence => "mfence".to_string(),
+ FenceKind::LFence => "lfence".to_string(),
+ FenceKind::SFence => "sfence".to_string(),
+ }
+ }
+
+ Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
+
+ Inst::Hlt => "hlt".into(),
+
+ Inst::Ud2 { trap_code } => format!("ud2 {}", trap_code),
+ }
+ }
+}
+
+// Temp hook for legacy printing machinery
+impl fmt::Debug for Inst {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ // Print the insn without a Universe :-(
+ write!(fmt, "{}", self.show_rru(None))
+ }
+}
+
+fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
+ // This is a bit subtle. If some register is in the modified set, then it may not be in either
+ // the use or def sets. However, enforcing that directly is somewhat difficult. Instead,
+ // regalloc.rs will "fix" this for us by removing the the modified set from the use and def
+ // sets.
+ match inst {
+ Inst::AluRmiR { src, dst, .. } => {
+ if inst.produces_const() {
+ // No need to account for src, since src == dst.
+ collector.add_def(*dst);
+ } else {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ }
+ Inst::Not { src, .. } => {
+ collector.add_mod(*src);
+ }
+ Inst::Neg { src, .. } => {
+ collector.add_mod(*src);
+ }
+ Inst::Div { size, divisor, .. } => {
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ if *size == 1 {
+ collector.add_def(Writable::from_reg(regs::rdx()));
+ } else {
+ collector.add_mod(Writable::from_reg(regs::rdx()));
+ }
+ divisor.get_regs_as_uses(collector);
+ }
+ Inst::MulHi { rhs, .. } => {
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ collector.add_def(Writable::from_reg(regs::rdx()));
+ rhs.get_regs_as_uses(collector);
+ }
+ Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
+ // Mark both fixed registers as mods, to avoid an early clobber problem in codegen
+ // (i.e. the temporary is allocated one of the fixed registers). This requires writing
+ // the rdx register *before* the instruction, which is not too bad.
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ collector.add_mod(Writable::from_reg(regs::rdx()));
+ collector.add_mod(*divisor);
+ if let Some(tmp) = tmp {
+ collector.add_def(*tmp);
+ }
+ }
+ Inst::SignExtendData { size } => match size {
+ 1 => collector.add_mod(Writable::from_reg(regs::rax())),
+ 2 | 4 | 8 => {
+ collector.add_use(regs::rax());
+ collector.add_def(Writable::from_reg(regs::rdx()));
+ }
+ _ => unreachable!(),
+ },
+ Inst::UnaryRmR { src, dst, .. } | Inst::XmmUnaryRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::XmmRmR { src, dst, .. } => {
+ if inst.produces_const() {
+ // No need to account for src, since src == dst.
+ collector.add_def(*dst);
+ } else {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ }
+ Inst::XmmRmRImm { op, src, dst, .. } => {
+ if inst.produces_const() {
+ // No need to account for src, since src == dst.
+ collector.add_def(*dst);
+ } else if *op == SseOpcode::Pextrb
+ || *op == SseOpcode::Pextrw
+ || *op == SseOpcode::Pextrd
+ || *op == SseOpcode::Pshufd
+ {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ } else {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ }
+ Inst::XmmUninitializedValue { dst } => collector.add_def(*dst),
+ Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst),
+ Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => {
+ collector.add_use(*lhs);
+ collector.add_mod(*rhs_dst);
+ }
+ Inst::XmmRmiReg { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ Inst::XmmMovRM { src, dst, .. } => {
+ collector.add_use(*src);
+ dst.get_regs_as_uses(collector);
+ }
+ Inst::XmmCmpRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_use(*dst);
+ }
+ Inst::Imm { dst, .. } => {
+ collector.add_def(*dst);
+ }
+ Inst::MovRR { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => {
+ collector.add_use(*src);
+ collector.add_def(*dst);
+ }
+ Inst::GprToXmm { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::CvtUint64ToFloatSeq {
+ src,
+ dst,
+ tmp_gpr1,
+ tmp_gpr2,
+ ..
+ } => {
+ collector.add_mod(*src);
+ collector.add_def(*dst);
+ collector.add_def(*tmp_gpr1);
+ collector.add_def(*tmp_gpr2);
+ }
+ Inst::CvtFloatToSintSeq {
+ src,
+ dst,
+ tmp_xmm,
+ tmp_gpr,
+ ..
+ }
+ | Inst::CvtFloatToUintSeq {
+ src,
+ dst,
+ tmp_gpr,
+ tmp_xmm,
+ ..
+ } => {
+ collector.add_mod(*src);
+ collector.add_def(*dst);
+ collector.add_def(*tmp_gpr);
+ collector.add_def(*tmp_xmm);
+ }
+ Inst::MovzxRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::Mov64MR { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst)
+ }
+ Inst::MovsxRmR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_def(*dst);
+ }
+ Inst::MovRM { src, dst, .. } => {
+ collector.add_use(*src);
+ dst.get_regs_as_uses(collector);
+ }
+ Inst::ShiftR { num_bits, dst, .. } => {
+ if num_bits.is_none() {
+ collector.add_use(regs::rcx());
+ }
+ collector.add_mod(*dst);
+ }
+ Inst::CmpRmiR { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_use(*dst); // yes, really `add_use`
+ }
+ Inst::Setcc { dst, .. } => {
+ collector.add_def(*dst);
+ }
+ Inst::Cmove { src, dst, .. } | Inst::XmmCmove { src, dst, .. } => {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(*dst);
+ }
+ Inst::Push64 { src } => {
+ src.get_regs_as_uses(collector);
+ collector.add_mod(Writable::from_reg(regs::rsp()));
+ }
+ Inst::Pop64 { dst } => {
+ collector.add_def(*dst);
+ }
+
+ Inst::CallKnown {
+ ref uses, ref defs, ..
+ } => {
+ collector.add_uses(uses);
+ collector.add_defs(defs);
+ }
+
+ Inst::CallUnknown {
+ ref uses,
+ ref defs,
+ dest,
+ ..
+ } => {
+ collector.add_uses(uses);
+ collector.add_defs(defs);
+ dest.get_regs_as_uses(collector);
+ }
+
+ Inst::JmpTableSeq {
+ ref idx,
+ ref tmp1,
+ ref tmp2,
+ ..
+ } => {
+ collector.add_use(*idx);
+ collector.add_def(*tmp1);
+ collector.add_def(*tmp2);
+ }
+
+ Inst::JmpUnknown { target } => {
+ target.get_regs_as_uses(collector);
+ }
+
+ Inst::LoadExtName { dst, .. } => {
+ collector.add_def(*dst);
+ }
+
+ Inst::LockCmpxchg { src, dst, .. } => {
+ dst.get_regs_as_uses(collector);
+ collector.add_use(*src);
+ collector.add_mod(Writable::from_reg(regs::rax()));
+ }
+
+ Inst::AtomicRmwSeq { .. } => {
+ collector.add_use(regs::r9());
+ collector.add_use(regs::r10());
+ collector.add_def(Writable::from_reg(regs::r11()));
+ collector.add_def(Writable::from_reg(regs::rax()));
+ }
+
+ Inst::Ret
+ | Inst::EpiloguePlaceholder
+ | Inst::JmpKnown { .. }
+ | Inst::JmpIf { .. }
+ | Inst::JmpCond { .. }
+ | Inst::Nop { .. }
+ | Inst::TrapIf { .. }
+ | Inst::VirtualSPOffsetAdj { .. }
+ | Inst::Hlt
+ | Inst::Ud2 { .. }
+ | Inst::Fence { .. } => {
+ // No registers are used.
+ }
+ }
+}
+
+//=============================================================================
+// Instructions and subcomponents: map_regs
+
+fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) {
+ if let Some(reg) = r.as_virtual_reg() {
+ let new = m.get_use(reg).unwrap().to_reg();
+ *r = new;
+ }
+}
+
+fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if let Some(reg) = r.to_reg().as_virtual_reg() {
+ let new = m.get_def(reg).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+}
+
+fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) {
+ if let Some(reg) = r.to_reg().as_virtual_reg() {
+ let new = m.get_mod(reg).unwrap().to_reg();
+ *r = Writable::from_reg(new);
+ }
+}
+
+impl Amode {
+ fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ Amode::ImmReg { ref mut base, .. } => map_use(map, base),
+ Amode::ImmRegRegShift {
+ ref mut base,
+ ref mut index,
+ ..
+ } => {
+ map_use(map, base);
+ map_use(map, index);
+ }
+ Amode::RipRelative { .. } => {
+ // RIP isn't involved in regalloc.
+ }
+ }
+ }
+}
+
+impl RegMemImm {
+ fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ RegMemImm::Reg { ref mut reg } => map_use(map, reg),
+ RegMemImm::Mem { ref mut addr } => addr.map_uses(map),
+ RegMemImm::Imm { .. } => {}
+ }
+ }
+
+ fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ match self {
+ Self::Reg { reg } => {
+ let mut writable_src = Writable::from_reg(*reg);
+ map_def(mapper, &mut writable_src);
+ *self = Self::reg(writable_src.to_reg());
+ }
+ _ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"),
+ }
+ }
+}
+
+impl RegMem {
+ fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
+ match self {
+ RegMem::Reg { ref mut reg } => map_use(map, reg),
+ RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
+ }
+ }
+
+ fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ match self {
+ Self::Reg { reg } => {
+ let mut writable_src = Writable::from_reg(*reg);
+ map_def(mapper, &mut writable_src);
+ *self = Self::reg(writable_src.to_reg());
+ }
+ _ => panic!("unexpected RegMem kind in map_src_reg_as_def"),
+ }
+ }
+}
+
+fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
+ // Note this must be carefully synchronized with x64_get_regs.
+ let produces_const = inst.produces_const();
+
+ match inst {
+ // ** Nop
+ Inst::AluRmiR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ if produces_const {
+ src.map_as_def(mapper);
+ map_def(mapper, dst);
+ } else {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ }
+ Inst::Not { src, .. } | Inst::Neg { src, .. } => map_mod(mapper, src),
+ Inst::Div { divisor, .. } => divisor.map_uses(mapper),
+ Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
+ Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => {
+ map_mod(mapper, divisor);
+ if let Some(tmp) = tmp {
+ map_def(mapper, tmp)
+ }
+ }
+ Inst::SignExtendData { .. } => {}
+ Inst::XmmUnaryRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ }
+ | Inst::UnaryRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::XmmRmRImm {
+ ref op,
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ if produces_const {
+ src.map_as_def(mapper);
+ map_def(mapper, dst);
+ } else if *op == SseOpcode::Pextrb
+ || *op == SseOpcode::Pextrw
+ || *op == SseOpcode::Pextrd
+ || *op == SseOpcode::Pshufd
+ {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ } else {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ }
+ Inst::XmmRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ if produces_const {
+ src.map_as_def(mapper);
+ map_def(mapper, dst);
+ } else {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ }
+ Inst::XmmRmiReg {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_mod(mapper, dst);
+ }
+ Inst::XmmUninitializedValue { ref mut dst, .. } => {
+ map_def(mapper, dst);
+ }
+ Inst::XmmLoadConst { ref mut dst, .. } => {
+ map_def(mapper, dst);
+ }
+ Inst::XmmMinMaxSeq {
+ ref mut lhs,
+ ref mut rhs_dst,
+ ..
+ } => {
+ map_use(mapper, lhs);
+ map_mod(mapper, rhs_dst);
+ }
+ Inst::XmmMovRM {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ dst.map_uses(mapper);
+ }
+ Inst::XmmCmpRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_use(mapper, dst);
+ }
+ Inst::Imm { ref mut dst, .. } => map_def(mapper, dst),
+ Inst::MovRR {
+ ref mut src,
+ ref mut dst,
+ ..
+ }
+ | Inst::XmmToGpr {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ map_def(mapper, dst);
+ }
+ Inst::GprToXmm {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::CvtUint64ToFloatSeq {
+ ref mut src,
+ ref mut dst,
+ ref mut tmp_gpr1,
+ ref mut tmp_gpr2,
+ ..
+ } => {
+ map_mod(mapper, src);
+ map_def(mapper, dst);
+ map_def(mapper, tmp_gpr1);
+ map_def(mapper, tmp_gpr2);
+ }
+ Inst::CvtFloatToSintSeq {
+ ref mut src,
+ ref mut dst,
+ ref mut tmp_xmm,
+ ref mut tmp_gpr,
+ ..
+ }
+ | Inst::CvtFloatToUintSeq {
+ ref mut src,
+ ref mut dst,
+ ref mut tmp_gpr,
+ ref mut tmp_xmm,
+ ..
+ } => {
+ map_mod(mapper, src);
+ map_def(mapper, dst);
+ map_def(mapper, tmp_gpr);
+ map_def(mapper, tmp_xmm);
+ }
+ Inst::MovzxRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::Mov64MR { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::MovsxRmR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_def(mapper, dst);
+ }
+ Inst::MovRM {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ dst.map_uses(mapper);
+ }
+ Inst::ShiftR { ref mut dst, .. } => {
+ map_mod(mapper, dst);
+ }
+ Inst::CmpRmiR {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_use(mapper, dst);
+ }
+ Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
+ Inst::Cmove {
+ ref mut src,
+ ref mut dst,
+ ..
+ }
+ | Inst::XmmCmove {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ src.map_uses(mapper);
+ map_mod(mapper, dst)
+ }
+ Inst::Push64 { ref mut src } => src.map_uses(mapper),
+ Inst::Pop64 { ref mut dst } => {
+ map_def(mapper, dst);
+ }
+
+ Inst::CallKnown {
+ ref mut uses,
+ ref mut defs,
+ ..
+ } => {
+ for r in uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ }
+
+ Inst::CallUnknown {
+ ref mut uses,
+ ref mut defs,
+ ref mut dest,
+ ..
+ } => {
+ for r in uses.iter_mut() {
+ map_use(mapper, r);
+ }
+ for r in defs.iter_mut() {
+ map_def(mapper, r);
+ }
+ dest.map_uses(mapper);
+ }
+
+ Inst::JmpTableSeq {
+ ref mut idx,
+ ref mut tmp1,
+ ref mut tmp2,
+ ..
+ } => {
+ map_use(mapper, idx);
+ map_def(mapper, tmp1);
+ map_def(mapper, tmp2);
+ }
+
+ Inst::JmpUnknown { ref mut target } => target.map_uses(mapper),
+
+ Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
+
+ Inst::LockCmpxchg {
+ ref mut src,
+ ref mut dst,
+ ..
+ } => {
+ map_use(mapper, src);
+ dst.map_uses(mapper);
+ }
+
+ Inst::Ret
+ | Inst::EpiloguePlaceholder
+ | Inst::JmpKnown { .. }
+ | Inst::JmpCond { .. }
+ | Inst::JmpIf { .. }
+ | Inst::Nop { .. }
+ | Inst::TrapIf { .. }
+ | Inst::VirtualSPOffsetAdj { .. }
+ | Inst::Ud2 { .. }
+ | Inst::Hlt
+ | Inst::AtomicRmwSeq { .. }
+ | Inst::Fence { .. } => {
+ // Instruction doesn't explicitly mention any regs, so it can't have any virtual
+ // regs that we'd need to remap. Hence no action required.
+ }
+ }
+}
+
+//=============================================================================
+// Instructions: misc functions and external interface
+
+impl MachInst for Inst {
+ fn get_regs(&self, collector: &mut RegUsageCollector) {
+ x64_get_regs(&self, collector)
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
+ x64_map_regs(self, mapper);
+ }
+
+ fn is_move(&self) -> Option<(Writable<Reg>, Reg)> {
+ match self {
+ // Note (carefully!) that a 32-bit mov *isn't* a no-op since it zeroes
+ // out the upper 32 bits of the destination. For example, we could
+ // conceivably use `movl %reg, %reg` to zero out the top 32 bits of
+ // %reg.
+ Self::MovRR {
+ is_64, src, dst, ..
+ } if *is_64 => Some((*dst, *src)),
+ // Note as well that MOVS[S|D] when used in the `XmmUnaryRmR` context are pure moves of
+ // scalar floating-point values (and annotate `dst` as `def`s to the register allocator)
+ // whereas the same operation in a packed context, e.g. `XMM_RM_R`, is used to merge a
+ // value into the lowest lane of a vector (not a move).
+ Self::XmmUnaryRmR { op, src, dst, .. }
+ if *op == SseOpcode::Movss
+ || *op == SseOpcode::Movsd
+ || *op == SseOpcode::Movaps
+ || *op == SseOpcode::Movapd
+ || *op == SseOpcode::Movups
+ || *op == SseOpcode::Movupd
+ || *op == SseOpcode::Movdqa
+ || *op == SseOpcode::Movdqu =>
+ {
+ if let RegMem::Reg { reg } = src {
+ Some((*dst, *reg))
+ } else {
+ None
+ }
+ }
+ _ => None,
+ }
+ }
+
+ fn is_epilogue_placeholder(&self) -> bool {
+ if let Self::EpiloguePlaceholder = self {
+ true
+ } else {
+ false
+ }
+ }
+
+ fn is_term<'a>(&'a self) -> MachTerminator<'a> {
+ match self {
+ // Interesting cases.
+ &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret,
+ &Self::JmpKnown { dst } => MachTerminator::Uncond(dst),
+ &Self::JmpCond {
+ taken, not_taken, ..
+ } => MachTerminator::Cond(taken, not_taken),
+ &Self::JmpTableSeq {
+ ref targets_for_term,
+ ..
+ } => MachTerminator::Indirect(&targets_for_term[..]),
+ // All other cases are boring.
+ _ => MachTerminator::None,
+ }
+ }
+
+ fn gen_move(dst_reg: Writable<Reg>, src_reg: Reg, ty: Type) -> Inst {
+ let rc_dst = dst_reg.to_reg().get_class();
+ let rc_src = src_reg.get_class();
+ // If this isn't true, we have gone way off the rails.
+ debug_assert!(rc_dst == rc_src);
+ match rc_dst {
+ RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
+ RegClass::V128 => {
+ // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions",
+ // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for
+ // those, which may write more lanes that we need, but are specified to have
+ // zero-latency.
+ let opcode = match ty {
+ types::F32 | types::F64 | types::F32X4 => SseOpcode::Movaps,
+ types::F64X2 => SseOpcode::Movapd,
+ _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa,
+ _ => unimplemented!("unable to move type: {}", ty),
+ };
+ Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg)
+ }
+ _ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst),
+ }
+ }
+
+ fn gen_zero_len_nop() -> Inst {
+ Inst::Nop { len: 0 }
+ }
+
+ fn gen_nop(preferred_size: usize) -> Inst {
+ Inst::nop((preferred_size % 16) as u8)
+ }
+
+ fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> {
+ None
+ }
+
+ fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
+ match ty {
+ types::I8
+ | types::I16
+ | types::I32
+ | types::I64
+ | types::B1
+ | types::B8
+ | types::B16
+ | types::B32
+ | types::B64
+ | types::R32
+ | types::R64 => Ok(RegClass::I64),
+ types::F32 | types::F64 => Ok(RegClass::V128),
+ _ if ty.bits() == 128 => Ok(RegClass::V128),
+ types::IFLAGS | types::FFLAGS => Ok(RegClass::I64),
+ _ => Err(CodegenError::Unsupported(format!(
+ "Unexpected SSA-value type: {}",
+ ty
+ ))),
+ }
+ }
+
+ fn gen_jump(label: MachLabel) -> Inst {
+ Inst::jmp_known(label)
+ }
+
+ fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ to_reg: Writable<Reg>,
+ value: u64,
+ ty: Type,
+ mut alloc_tmp: F,
+ ) -> SmallVec<[Self; 4]> {
+ let mut ret = SmallVec::new();
+ if ty == types::F32 {
+ if value == 0 {
+ ret.push(Inst::xmm_rm_r(
+ SseOpcode::Xorps,
+ RegMem::reg(to_reg.to_reg()),
+ to_reg,
+ ));
+ } else {
+ let tmp = alloc_tmp(RegClass::I64, types::I32);
+ ret.push(Inst::imm(OperandSize::Size32, value, tmp));
+
+ ret.push(Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ RegMem::reg(tmp.to_reg()),
+ OperandSize::Size32,
+ to_reg,
+ ));
+ }
+ } else if ty == types::F64 {
+ if value == 0 {
+ ret.push(Inst::xmm_rm_r(
+ SseOpcode::Xorpd,
+ RegMem::reg(to_reg.to_reg()),
+ to_reg,
+ ));
+ } else {
+ let tmp = alloc_tmp(RegClass::I64, types::I64);
+ ret.push(Inst::imm(OperandSize::Size64, value, tmp));
+
+ ret.push(Inst::gpr_to_xmm(
+ SseOpcode::Movq,
+ RegMem::reg(tmp.to_reg()),
+ OperandSize::Size64,
+ to_reg,
+ ));
+ }
+ } else {
+ // Must be an integer type.
+ debug_assert!(
+ ty == types::B1
+ || ty == types::I8
+ || ty == types::B8
+ || ty == types::I16
+ || ty == types::B16
+ || ty == types::I32
+ || ty == types::B32
+ || ty == types::I64
+ || ty == types::B64
+ || ty == types::R32
+ || ty == types::R64
+ );
+ if value == 0 {
+ ret.push(Inst::alu_rmi_r(
+ ty == types::I64,
+ AluRmiROpcode::Xor,
+ RegMemImm::reg(to_reg.to_reg()),
+ to_reg,
+ ));
+ } else {
+ ret.push(Inst::imm(
+ OperandSize::from_bytes(ty.bytes()),
+ value.into(),
+ to_reg,
+ ));
+ }
+ }
+ ret
+ }
+
+ fn reg_universe(flags: &Flags) -> RealRegUniverse {
+ create_reg_universe_systemv(flags)
+ }
+
+ fn worst_case_size() -> CodeOffset {
+ 15
+ }
+
+ fn ref_type_regclass(_: &settings::Flags) -> RegClass {
+ RegClass::I64
+ }
+
+ type LabelUse = LabelUse;
+}
+
+/// State carried between emissions of a sequence of instructions.
+#[derive(Default, Clone, Debug)]
+pub struct EmitState {
+ /// Addend to convert nominal-SP offsets to real-SP offsets at the current
+ /// program point.
+ pub(crate) virtual_sp_offset: i64,
+ /// Offset of FP from nominal-SP.
+ pub(crate) nominal_sp_to_fp: i64,
+ /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
+ stack_map: Option<StackMap>,
+ /// Current source location.
+ cur_srcloc: SourceLoc,
+}
+
+/// Constant state used during emissions of a sequence of instructions.
+pub struct EmitInfo {
+ flags: settings::Flags,
+ isa_flags: x64_settings::Flags,
+}
+
+impl EmitInfo {
+ pub(crate) fn new(flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self {
+ Self { flags, isa_flags }
+ }
+}
+
+impl MachInstEmitInfo for EmitInfo {
+ fn flags(&self) -> &Flags {
+ &self.flags
+ }
+}
+
+impl MachInstEmit for Inst {
+ type State = EmitState;
+ type Info = EmitInfo;
+ type UnwindInfo = unwind::X64UnwindInfo;
+
+ fn emit(&self, sink: &mut MachBuffer<Inst>, info: &Self::Info, state: &mut Self::State) {
+ emit::emit(self, sink, info, state);
+ }
+
+ fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String {
+ self.show_rru(mb_rru)
+ }
+}
+
+impl MachInstEmitState<Inst> for EmitState {
+ fn new(abi: &dyn ABICallee<I = Inst>) -> Self {
+ EmitState {
+ virtual_sp_offset: 0,
+ nominal_sp_to_fp: abi.frame_size() as i64,
+ stack_map: None,
+ cur_srcloc: SourceLoc::default(),
+ }
+ }
+
+ fn pre_safepoint(&mut self, stack_map: StackMap) {
+ self.stack_map = Some(stack_map);
+ }
+
+ fn pre_sourceloc(&mut self, srcloc: SourceLoc) {
+ self.cur_srcloc = srcloc;
+ }
+}
+
+impl EmitState {
+ fn take_stack_map(&mut self) -> Option<StackMap> {
+ self.stack_map.take()
+ }
+
+ fn clear_post_insn(&mut self) {
+ self.stack_map = None;
+ }
+
+ fn cur_srcloc(&self) -> SourceLoc {
+ self.cur_srcloc
+ }
+}
+
+/// A label-use (internal relocation) in generated code.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum LabelUse {
+ /// A 32-bit offset from location of relocation itself, added to the existing value at that
+ /// location. Used for control flow instructions which consider an offset from the start of the
+ /// next instruction (so the size of the payload -- 4 bytes -- is subtracted from the payload).
+ JmpRel32,
+
+ /// A 32-bit offset from location of relocation itself, added to the existing value at that
+ /// location.
+ PCRel32,
+}
+
+impl MachInstLabelUse for LabelUse {
+ const ALIGN: CodeOffset = 1;
+
+ fn max_pos_range(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x7fff_ffff,
+ }
+ }
+
+ fn max_neg_range(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 0x8000_0000,
+ }
+ }
+
+ fn patch_size(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 4,
+ }
+ }
+
+ fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) {
+ let pc_rel = (label_offset as i64) - (use_offset as i64);
+ debug_assert!(pc_rel <= self.max_pos_range() as i64);
+ debug_assert!(pc_rel >= -(self.max_neg_range() as i64));
+ let pc_rel = pc_rel as u32;
+ match self {
+ LabelUse::JmpRel32 => {
+ let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+ let value = pc_rel.wrapping_add(addend).wrapping_sub(4);
+ buffer.copy_from_slice(&value.to_le_bytes()[..]);
+ }
+ LabelUse::PCRel32 => {
+ let addend = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]);
+ let value = pc_rel.wrapping_add(addend);
+ buffer.copy_from_slice(&value.to_le_bytes()[..]);
+ }
+ }
+ }
+
+ fn supports_veneer(self) -> bool {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => false,
+ }
+ }
+
+ fn veneer_size(self) -> CodeOffset {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => 0,
+ }
+ }
+
+ fn generate_veneer(self, _: &mut [u8], _: CodeOffset) -> (CodeOffset, LabelUse) {
+ match self {
+ LabelUse::JmpRel32 | LabelUse::PCRel32 => {
+ panic!("Veneer not supported for JumpRel32 label-use.");
+ }
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
new file mode 100644
index 0000000000..04bc1f09bf
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/regs.rs
@@ -0,0 +1,289 @@
+//! Registers, the Universe thereof, and printing.
+//!
+//! These are ordered by sequence number, as required in the Universe. The strange ordering is
+//! intended to make callee-save registers available before caller-saved ones. This is a net win
+//! provided that each function makes at least one onward call. It'll be a net loss for leaf
+//! functions, and we should change the ordering in that case, so as to make caller-save regs
+//! available first.
+//!
+//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
+//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
+//! for each function we compile.
+
+use crate::settings;
+use alloc::vec::Vec;
+use regalloc::{
+ PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES,
+};
+use std::string::String;
+
+// Hardware encodings for a few registers.
+
+pub const ENC_RBX: u8 = 3;
+pub const ENC_RSP: u8 = 4;
+pub const ENC_RBP: u8 = 5;
+pub const ENC_R12: u8 = 12;
+pub const ENC_R13: u8 = 13;
+pub const ENC_R14: u8 = 14;
+pub const ENC_R15: u8 = 15;
+
+fn gpr(enc: u8, index: u8) -> Reg {
+ Reg::new_real(RegClass::I64, enc, index)
+}
+
+pub(crate) fn r12() -> Reg {
+ gpr(ENC_R12, 16)
+}
+pub(crate) fn r13() -> Reg {
+ gpr(ENC_R13, 17)
+}
+pub(crate) fn r14() -> Reg {
+ gpr(ENC_R14, 18)
+}
+pub(crate) fn rbx() -> Reg {
+ gpr(ENC_RBX, 19)
+}
+pub(crate) fn rsi() -> Reg {
+ gpr(6, 20)
+}
+pub(crate) fn rdi() -> Reg {
+ gpr(7, 21)
+}
+pub(crate) fn rax() -> Reg {
+ gpr(0, 22)
+}
+pub(crate) fn rcx() -> Reg {
+ gpr(1, 23)
+}
+pub(crate) fn rdx() -> Reg {
+ gpr(2, 24)
+}
+pub(crate) fn r8() -> Reg {
+ gpr(8, 25)
+}
+pub(crate) fn r9() -> Reg {
+ gpr(9, 26)
+}
+pub(crate) fn r10() -> Reg {
+ gpr(10, 27)
+}
+pub(crate) fn r11() -> Reg {
+ gpr(11, 28)
+}
+
+pub(crate) fn r15() -> Reg {
+ // r15 is put aside since this is the pinned register.
+ gpr(ENC_R15, 29)
+}
+
+/// The pinned register on this architecture.
+/// It must be the same as Spidermonkey's HeapReg, as found in this file.
+/// https://searchfox.org/mozilla-central/source/js/src/jit/x64/Assembler-x64.h#99
+pub(crate) fn pinned_reg() -> Reg {
+ r15()
+}
+
+fn fpr(enc: u8, index: u8) -> Reg {
+ Reg::new_real(RegClass::V128, enc, index)
+}
+
+pub(crate) fn xmm0() -> Reg {
+ fpr(0, 0)
+}
+pub(crate) fn xmm1() -> Reg {
+ fpr(1, 1)
+}
+pub(crate) fn xmm2() -> Reg {
+ fpr(2, 2)
+}
+pub(crate) fn xmm3() -> Reg {
+ fpr(3, 3)
+}
+pub(crate) fn xmm4() -> Reg {
+ fpr(4, 4)
+}
+pub(crate) fn xmm5() -> Reg {
+ fpr(5, 5)
+}
+pub(crate) fn xmm6() -> Reg {
+ fpr(6, 6)
+}
+pub(crate) fn xmm7() -> Reg {
+ fpr(7, 7)
+}
+pub(crate) fn xmm8() -> Reg {
+ fpr(8, 8)
+}
+pub(crate) fn xmm9() -> Reg {
+ fpr(9, 9)
+}
+pub(crate) fn xmm10() -> Reg {
+ fpr(10, 10)
+}
+pub(crate) fn xmm11() -> Reg {
+ fpr(11, 11)
+}
+pub(crate) fn xmm12() -> Reg {
+ fpr(12, 12)
+}
+pub(crate) fn xmm13() -> Reg {
+ fpr(13, 13)
+}
+pub(crate) fn xmm14() -> Reg {
+ fpr(14, 14)
+}
+pub(crate) fn xmm15() -> Reg {
+ fpr(15, 15)
+}
+
+pub(crate) fn rsp() -> Reg {
+ gpr(ENC_RSP, 30)
+}
+pub(crate) fn rbp() -> Reg {
+ gpr(ENC_RBP, 31)
+}
+
+/// Create the register universe for X64.
+///
+/// The ordering of registers matters, as commented in the file doc comment: assumes the
+/// calling-convention is SystemV, at the moment.
+pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse {
+ let mut regs = Vec::<(RealReg, String)>::new();
+ let mut allocable_by_class = [None; NUM_REG_CLASSES];
+
+ let use_pinned_reg = flags.enable_pinned_reg();
+
+ // XMM registers
+ let first_fpr = regs.len();
+ regs.push((xmm0().to_real_reg(), "%xmm0".into()));
+ regs.push((xmm1().to_real_reg(), "%xmm1".into()));
+ regs.push((xmm2().to_real_reg(), "%xmm2".into()));
+ regs.push((xmm3().to_real_reg(), "%xmm3".into()));
+ regs.push((xmm4().to_real_reg(), "%xmm4".into()));
+ regs.push((xmm5().to_real_reg(), "%xmm5".into()));
+ regs.push((xmm6().to_real_reg(), "%xmm6".into()));
+ regs.push((xmm7().to_real_reg(), "%xmm7".into()));
+ regs.push((xmm8().to_real_reg(), "%xmm8".into()));
+ regs.push((xmm9().to_real_reg(), "%xmm9".into()));
+ regs.push((xmm10().to_real_reg(), "%xmm10".into()));
+ regs.push((xmm11().to_real_reg(), "%xmm11".into()));
+ regs.push((xmm12().to_real_reg(), "%xmm12".into()));
+ regs.push((xmm13().to_real_reg(), "%xmm13".into()));
+ regs.push((xmm14().to_real_reg(), "%xmm14".into()));
+ regs.push((xmm15().to_real_reg(), "%xmm15".into()));
+ let last_fpr = regs.len() - 1;
+
+ // Integer regs.
+ let first_gpr = regs.len();
+
+ // Callee-saved, in the SystemV x86_64 ABI.
+ regs.push((r12().to_real_reg(), "%r12".into()));
+ regs.push((r13().to_real_reg(), "%r13".into()));
+ regs.push((r14().to_real_reg(), "%r14".into()));
+
+ regs.push((rbx().to_real_reg(), "%rbx".into()));
+
+ // Caller-saved, in the SystemV x86_64 ABI.
+ regs.push((rsi().to_real_reg(), "%rsi".into()));
+ regs.push((rdi().to_real_reg(), "%rdi".into()));
+ regs.push((rax().to_real_reg(), "%rax".into()));
+ regs.push((rcx().to_real_reg(), "%rcx".into()));
+ regs.push((rdx().to_real_reg(), "%rdx".into()));
+ regs.push((r8().to_real_reg(), "%r8".into()));
+ regs.push((r9().to_real_reg(), "%r9".into()));
+ regs.push((r10().to_real_reg(), "%r10".into()));
+ regs.push((r11().to_real_reg(), "%r11".into()));
+
+ // Other regs, not available to the allocator.
+ debug_assert_eq!(r15(), pinned_reg());
+ let allocable = if use_pinned_reg {
+ // The pinned register is not allocatable in this case, so record the length before adding
+ // it.
+ let len = regs.len();
+ regs.push((r15().to_real_reg(), "%r15/pinned".into()));
+ len
+ } else {
+ regs.push((r15().to_real_reg(), "%r15".into()));
+ regs.len()
+ };
+ let last_gpr = allocable - 1;
+
+ regs.push((rsp().to_real_reg(), "%rsp".into()));
+ regs.push((rbp().to_real_reg(), "%rbp".into()));
+
+ allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo {
+ first: first_gpr,
+ last: last_gpr,
+ suggested_scratch: Some(r12().get_index()),
+ });
+ allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo {
+ first: first_fpr,
+ last: last_fpr,
+ suggested_scratch: Some(xmm15().get_index()),
+ });
+
+ // Sanity-check: the index passed to the Reg ctor must match the order in the register list.
+ for (i, reg) in regs.iter().enumerate() {
+ assert_eq!(i, reg.0.get_index());
+ }
+
+ RealRegUniverse {
+ regs,
+ allocable,
+ allocable_by_class,
+ }
+}
+
+/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some
+/// smaller size (4, 2 or 1 bytes).
+pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
+ let mut s = reg.show_rru(mb_rru);
+
+ if reg.get_class() != RegClass::I64 || size == 8 {
+ // We can't do any better.
+ return s;
+ }
+
+ if reg.is_real() {
+ // Change (eg) "rax" into "eax", "ax" or "al" as appropriate. This is something one could
+ // describe diplomatically as "a kludge", but it's only debug code.
+ let remapper = match s.as_str() {
+ "%rax" => Some(["%eax", "%ax", "%al"]),
+ "%rbx" => Some(["%ebx", "%bx", "%bl"]),
+ "%rcx" => Some(["%ecx", "%cx", "%cl"]),
+ "%rdx" => Some(["%edx", "%dx", "%dl"]),
+ "%rsi" => Some(["%esi", "%si", "%sil"]),
+ "%rdi" => Some(["%edi", "%di", "%dil"]),
+ "%rbp" => Some(["%ebp", "%bp", "%bpl"]),
+ "%rsp" => Some(["%esp", "%sp", "%spl"]),
+ "%r8" => Some(["%r8d", "%r8w", "%r8b"]),
+ "%r9" => Some(["%r9d", "%r9w", "%r9b"]),
+ "%r10" => Some(["%r10d", "%r10w", "%r10b"]),
+ "%r11" => Some(["%r11d", "%r11w", "%r11b"]),
+ "%r12" => Some(["%r12d", "%r12w", "%r12b"]),
+ "%r13" => Some(["%r13d", "%r13w", "%r13b"]),
+ "%r14" => Some(["%r14d", "%r14w", "%r14b"]),
+ "%r15" => Some(["%r15d", "%r15w", "%r15b"]),
+ _ => None,
+ };
+ if let Some(smaller_names) = remapper {
+ match size {
+ 4 => s = smaller_names[0].into(),
+ 2 => s = smaller_names[1].into(),
+ 1 => s = smaller_names[2].into(),
+ _ => panic!("show_ireg_sized: real"),
+ }
+ }
+ } else {
+ // Add a "l", "w" or "b" suffix to RegClass::I64 vregs used at narrower widths.
+ let suffix = match size {
+ 4 => "l",
+ 2 => "w",
+ 1 => "b",
+ _ => panic!("show_ireg_sized: virtual"),
+ };
+ s = s + suffix;
+ }
+
+ s
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs
new file mode 100644
index 0000000000..ffe43930f0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind.rs
@@ -0,0 +1,125 @@
+use crate::isa::unwind::input::UnwindInfo;
+use crate::isa::x64::inst::{
+ args::{AluRmiROpcode, Amode, RegMemImm, SyntheticAmode},
+ regs, Inst,
+};
+use crate::machinst::{UnwindInfoContext, UnwindInfoGenerator};
+use crate::result::CodegenResult;
+use alloc::vec::Vec;
+use regalloc::Reg;
+
+#[cfg(feature = "unwind")]
+pub(crate) mod systemv;
+
+pub struct X64UnwindInfo;
+
+impl UnwindInfoGenerator<Inst> for X64UnwindInfo {
+ fn create_unwind_info(
+ context: UnwindInfoContext<Inst>,
+ ) -> CodegenResult<Option<UnwindInfo<Reg>>> {
+ use crate::isa::unwind::input::{self, UnwindCode};
+ let mut codes = Vec::new();
+ const WORD_SIZE: u8 = 8;
+
+ for i in context.prologue.clone() {
+ let i = i as usize;
+ let inst = &context.insts[i];
+ let offset = context.insts_layout[i];
+
+ match inst {
+ Inst::Push64 {
+ src: RegMemImm::Reg { reg },
+ } => {
+ codes.push((
+ offset,
+ UnwindCode::StackAlloc {
+ size: WORD_SIZE.into(),
+ },
+ ));
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *reg,
+ stack_offset: 0,
+ },
+ ));
+ }
+ Inst::MovRR { src, dst, .. } => {
+ if *src == regs::rsp() {
+ codes.push((offset, UnwindCode::SetFramePointer { reg: dst.to_reg() }));
+ }
+ }
+ Inst::AluRmiR {
+ is_64: true,
+ op: AluRmiROpcode::Sub,
+ src: RegMemImm::Imm { simm32 },
+ dst,
+ ..
+ } if dst.to_reg() == regs::rsp() => {
+ let imm = *simm32;
+ codes.push((offset, UnwindCode::StackAlloc { size: imm }));
+ }
+ Inst::MovRM {
+ src,
+ dst: SyntheticAmode::Real(Amode::ImmReg { simm32, base, .. }),
+ ..
+ } if *base == regs::rsp() => {
+ // `mov reg, imm(rsp)`
+ let imm = *simm32;
+ codes.push((
+ offset,
+ UnwindCode::SaveRegister {
+ reg: *src,
+ stack_offset: imm,
+ },
+ ));
+ }
+ Inst::AluRmiR {
+ is_64: true,
+ op: AluRmiROpcode::Add,
+ src: RegMemImm::Imm { simm32 },
+ dst,
+ ..
+ } if dst.to_reg() == regs::rsp() => {
+ let imm = *simm32;
+ codes.push((offset, UnwindCode::StackDealloc { size: imm }));
+ }
+ _ => {}
+ }
+ }
+
+ let last_epilogue_end = context.len;
+ let epilogues_unwind_codes = context
+ .epilogues
+ .iter()
+ .map(|epilogue| {
+ // TODO add logic to process epilogue instruction instead of
+ // returning empty array.
+ let end = epilogue.end as usize - 1;
+ let end_offset = context.insts_layout[end];
+ if end_offset == last_epilogue_end {
+ // Do not remember/restore for very last epilogue.
+ return vec![];
+ }
+
+ let start = epilogue.start as usize;
+ let offset = context.insts_layout[start];
+ vec![
+ (offset, UnwindCode::RememberState),
+ // TODO epilogue instructions
+ (end_offset, UnwindCode::RestoreState),
+ ]
+ })
+ .collect();
+
+ let prologue_size = context.insts_layout[context.prologue.end as usize];
+ Ok(Some(input::UnwindInfo {
+ prologue_size,
+ prologue_unwind_codes: codes,
+ epilogues_unwind_codes,
+ function_size: context.len,
+ word_size: WORD_SIZE,
+ initial_sp_offset: WORD_SIZE,
+ }))
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs
new file mode 100644
index 0000000000..68473a8afb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/inst/unwind/systemv.rs
@@ -0,0 +1,204 @@
+//! Unwind information for System V ABI (x86-64).
+
+use crate::isa::unwind::input;
+use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo};
+use crate::result::CodegenResult;
+use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
+use regalloc::{Reg, RegClass};
+
+/// Creates a new x86-64 common information entry (CIE).
+pub fn create_cie() -> CommonInformationEntry {
+ use gimli::write::CallFrameInstruction;
+
+ let mut entry = CommonInformationEntry::new(
+ Encoding {
+ address_size: 8,
+ format: Format::Dwarf32,
+ version: 1,
+ },
+ 1, // Code alignment factor
+ -8, // Data alignment factor
+ X86_64::RA,
+ );
+
+ // Every frame will start with the call frame address (CFA) at RSP+8
+ // It is +8 to account for the push of the return address by the call instruction
+ entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8));
+
+ // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP)
+ entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8));
+
+ entry
+}
+
+/// Map Cranelift registers to their corresponding Gimli registers.
+pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
+ // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
+ const X86_GP_REG_MAP: [gimli::Register; 16] = [
+ X86_64::RAX,
+ X86_64::RCX,
+ X86_64::RDX,
+ X86_64::RBX,
+ X86_64::RSP,
+ X86_64::RBP,
+ X86_64::RSI,
+ X86_64::RDI,
+ X86_64::R8,
+ X86_64::R9,
+ X86_64::R10,
+ X86_64::R11,
+ X86_64::R12,
+ X86_64::R13,
+ X86_64::R14,
+ X86_64::R15,
+ ];
+ const X86_XMM_REG_MAP: [gimli::Register; 16] = [
+ X86_64::XMM0,
+ X86_64::XMM1,
+ X86_64::XMM2,
+ X86_64::XMM3,
+ X86_64::XMM4,
+ X86_64::XMM5,
+ X86_64::XMM6,
+ X86_64::XMM7,
+ X86_64::XMM8,
+ X86_64::XMM9,
+ X86_64::XMM10,
+ X86_64::XMM11,
+ X86_64::XMM12,
+ X86_64::XMM13,
+ X86_64::XMM14,
+ X86_64::XMM15,
+ ];
+
+ match reg.get_class() {
+ RegClass::I64 => {
+ // x86 GP registers have a weird mapping to DWARF registers, so we use a
+ // lookup table.
+ Ok(X86_GP_REG_MAP[reg.get_hw_encoding() as usize])
+ }
+ RegClass::V128 => Ok(X86_XMM_REG_MAP[reg.get_hw_encoding() as usize]),
+ _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")),
+ }
+}
+
+pub(crate) fn create_unwind_info(
+ unwind: input::UnwindInfo<Reg>,
+) -> CodegenResult<Option<UnwindInfo>> {
+ struct RegisterMapper;
+ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
+ fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> {
+ Ok(map_reg(reg)?.0)
+ }
+ fn sp(&self) -> u16 {
+ X86_64::RSP.0
+ }
+ }
+ let map = RegisterMapper;
+
+ Ok(Some(UnwindInfo::build(unwind, &map)?))
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::{
+ types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+ StackSlotKind,
+ };
+ use crate::isa::{lookup, CallConv};
+ use crate::settings::{builder, Flags};
+ use crate::Context;
+ use gimli::write::Address;
+ use std::str::FromStr;
+ use target_lexicon::triple;
+
+ #[test]
+ fn test_simple_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_function(
+ CallConv::SystemV,
+ Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+ ));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(1234))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 13, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
+ }
+
+ fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+ let mut func =
+ Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+ let block0 = func.dfg.make_block();
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().return_(&[]);
+
+ if let Some(stack_slot) = stack_slot {
+ func.stack_slots.push(stack_slot);
+ }
+
+ func
+ }
+
+ #[test]
+ fn test_multi_return_func() {
+ let isa = lookup(triple!("x86_64"))
+ .expect("expect x86 ISA")
+ .finish(Flags::new(builder()));
+
+ let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+ context.compile(&*isa).expect("expected compilation");
+
+ let fde = match context
+ .create_unwind_info(isa.as_ref())
+ .expect("can create unwind info")
+ {
+ Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+ info.to_fde(Address::Constant(4321))
+ }
+ _ => panic!("expected unwind information"),
+ };
+
+ assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 23, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6))), (16, RememberState), (18, RestoreState)] }");
+ }
+
+ fn create_multi_return_function(call_conv: CallConv) -> Function {
+ let mut sig = Signature::new(call_conv);
+ sig.params.push(AbiParam::new(types::I32));
+ let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+ let block0 = func.dfg.make_block();
+ let v0 = func.dfg.append_block_param(block0, types::I32);
+ let block1 = func.dfg.make_block();
+ let block2 = func.dfg.make_block();
+
+ let mut pos = FuncCursor::new(&mut func);
+ pos.insert_block(block0);
+ pos.ins().brnz(v0, block2, &[]);
+ pos.ins().jump(block1, &[]);
+
+ pos.insert_block(block1);
+ pos.ins().return_(&[]);
+
+ pos.insert_block(block2);
+ pos.ins().return_(&[]);
+
+ func
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
new file mode 100644
index 0000000000..0862154360
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/lower.rs
@@ -0,0 +1,3771 @@
+//! Lowering rules for X64.
+
+use crate::data_value::DataValue;
+use crate::ir::{
+ condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName,
+ Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type,
+};
+use crate::isa::x64::abi::*;
+use crate::isa::x64::inst::args::*;
+use crate::isa::x64::inst::*;
+use crate::isa::{x64::X64Backend, CallConv};
+use crate::machinst::lower::*;
+use crate::machinst::*;
+use crate::result::CodegenResult;
+use crate::settings::Flags;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use cranelift_codegen_shared::condcodes::CondCode;
+use log::trace;
+use regalloc::{Reg, RegClass, Writable};
+use smallvec::SmallVec;
+use std::convert::TryFrom;
+use target_lexicon::Triple;
+
+/// Context passed to all lowering functions.
+type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
+
+//=============================================================================
+// Helpers for instruction lowering.
+
+fn is_int_or_ref_ty(ty: Type) -> bool {
+ match ty {
+ types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
+ types::R32 => panic!("shouldn't have 32-bits refs on x64"),
+ _ => false,
+ }
+}
+
+fn is_bool_ty(ty: Type) -> bool {
+ match ty {
+ types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true,
+ types::R32 => panic!("shouldn't have 32-bits refs on x64"),
+ _ => false,
+ }
+}
+
+/// This is target-word-size dependent. And it excludes booleans and reftypes.
+fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
+ match ty {
+ types::I8 | types::I16 | types::I32 | types::I64 => true,
+ _ => false,
+ }
+}
+
+/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
+/// `op`.
+// TODO investigate failures with checking against the result index.
+fn matches_input<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ op: Opcode,
+) -> Option<IRInst> {
+ let inputs = ctx.get_input(input.insn, input.input);
+ inputs.inst.and_then(|(src_inst, _)| {
+ let data = ctx.data(src_inst);
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ None
+ })
+}
+
+/// Returns whether the given specified `input` is a result produced by an instruction with any of
+/// the opcodes specified in `ops`.
+fn matches_input_any<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ input: InsnInput,
+ ops: &[Opcode],
+) -> Option<IRInst> {
+ let inputs = ctx.get_input(input.insn, input.input);
+ inputs.inst.and_then(|(src_inst, _)| {
+ let data = ctx.data(src_inst);
+ for &op in ops {
+ if data.opcode() == op {
+ return Some(src_inst);
+ }
+ }
+ None
+ })
+}
+
+fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
+ ctx.use_input_reg(input);
+ input.reg
+}
+
+/// Put the given input into a register, and mark it as used (side-effect).
+fn put_input_in_reg(ctx: Ctx, spec: InsnInput) -> Reg {
+ let input = ctx.get_input(spec.insn, spec.input);
+
+ if let Some(c) = input.constant {
+ // Generate constants fresh at each use to minimize long-range register pressure.
+ let ty = ctx.input_ty(spec.insn, spec.input);
+ let from_bits = ty_bits(ty);
+ let masked = if from_bits < 64 {
+ c & ((1u64 << from_bits) - 1)
+ } else {
+ c
+ };
+
+ let cst_copy = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty);
+ for inst in Inst::gen_constant(cst_copy, masked, ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ })
+ .into_iter()
+ {
+ ctx.emit(inst);
+ }
+ cst_copy.to_reg()
+ } else {
+ lowerinput_to_reg(ctx, input)
+ }
+}
+
+/// An extension specification for `extend_input_to_reg`.
+#[derive(Clone, Copy)]
+enum ExtSpec {
+ ZeroExtendTo32,
+ ZeroExtendTo64,
+ SignExtendTo32,
+ SignExtendTo64,
+}
+
+/// Put the given input into a register, marking it as used, and do a zero- or signed- extension if
+/// required. (This obviously causes side-effects.)
+fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
+ let requested_size = match ext_spec {
+ ExtSpec::ZeroExtendTo32 | ExtSpec::SignExtendTo32 => 32,
+ ExtSpec::ZeroExtendTo64 | ExtSpec::SignExtendTo64 => 64,
+ };
+ let input_size = ctx.input_ty(spec.insn, spec.input).bits();
+
+ let requested_ty = if requested_size == 32 {
+ types::I32
+ } else {
+ types::I64
+ };
+
+ let ext_mode = match (input_size, requested_size) {
+ (a, b) if a == b => return put_input_in_reg(ctx, spec),
+ (1, 8) => return put_input_in_reg(ctx, spec),
+ (a, b) => ExtMode::new(a, b).expect(&format!("invalid extension: {} -> {}", a, b)),
+ };
+
+ let src = input_to_reg_mem(ctx, spec);
+ let dst = ctx.alloc_tmp(RegClass::I64, requested_ty);
+ match ext_spec {
+ ExtSpec::ZeroExtendTo32 | ExtSpec::ZeroExtendTo64 => {
+ ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst))
+ }
+ ExtSpec::SignExtendTo32 | ExtSpec::SignExtendTo64 => {
+ ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst))
+ }
+ }
+ dst.to_reg()
+}
+
+fn lowerinput_to_reg_mem(ctx: Ctx, input: LowerInput) -> RegMem {
+ // TODO handle memory.
+ RegMem::reg(lowerinput_to_reg(ctx, input))
+}
+
+/// Put the given input into a register or a memory operand.
+/// Effectful: may mark the given input as used, when returning the register form.
+fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
+ let input = ctx.get_input(spec.insn, spec.input);
+ lowerinput_to_reg_mem(ctx, input)
+}
+
+/// Returns whether the given input is an immediate that can be properly sign-extended, without any
+/// possible side-effect.
+fn lowerinput_to_sext_imm(input: LowerInput, input_ty: Type) -> Option<u32> {
+ input.constant.and_then(|x| {
+ // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
+ // to 64 bits. For other sizes, it doesn't matter and we can just use the plain
+ // constant.
+ if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) {
+ Some(x as u32)
+ } else {
+ None
+ }
+ })
+}
+
+fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
+ let input = ctx.get_input(spec.insn, spec.input);
+ let input_ty = ctx.input_ty(spec.insn, spec.input);
+ lowerinput_to_sext_imm(input, input_ty)
+}
+
+fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
+ ctx.get_input(spec.insn, spec.input).constant
+}
+
+/// Put the given input into an immediate, a register or a memory operand.
+/// Effectful: may mark the given input as used, when returning the register form.
+fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
+ let input = ctx.get_input(spec.insn, spec.input);
+ let input_ty = ctx.input_ty(spec.insn, spec.input);
+ match lowerinput_to_sext_imm(input, input_ty) {
+ Some(x) => RegMemImm::imm(x),
+ None => match lowerinput_to_reg_mem(ctx, input) {
+ RegMem::Reg { reg } => RegMemImm::reg(reg),
+ RegMem::Mem { addr } => RegMemImm::mem(addr),
+ },
+ }
+}
+
+/// Emit an instruction to insert a value `src` into a lane of `dst`.
+fn emit_insert_lane<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ src: RegMem,
+ dst: Writable<Reg>,
+ lane: u8,
+ ty: Type,
+) {
+ if !ty.is_float() {
+ let (sse_op, is64) = match ty.lane_bits() {
+ 8 => (SseOpcode::Pinsrb, false),
+ 16 => (SseOpcode::Pinsrw, false),
+ 32 => (SseOpcode::Pinsrd, false),
+ 64 => (SseOpcode::Pinsrd, true),
+ _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
+ };
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64));
+ } else if ty == types::F32 {
+ let sse_op = SseOpcode::Insertps;
+ // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
+ // shifted into bits 5:6).
+ let lane = 0b00_00_00_00 | lane << 4;
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false));
+ } else if ty == types::F64 {
+ let sse_op = match lane {
+ // Move the lowest quadword in replacement to vector without changing
+ // the upper bits.
+ 0 => SseOpcode::Movsd,
+ // Move the low 64 bits of replacement vector to the high 64 bits of the
+ // vector.
+ 1 => SseOpcode::Movlhps,
+ _ => unreachable!(),
+ };
+ // Here we use the `xmm_rm_r` encoding because it correctly tells the register
+ // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
+ // encoding formats like `xmm_unary_rm_r` treat it as a `def`.
+ ctx.emit(Inst::xmm_rm_r(sse_op, src, dst));
+ } else {
+ panic!("unable to emit insertlane for type: {}", ty)
+ }
+}
+
+/// Emits an int comparison instruction.
+///
+/// Note: make sure that there are no instructions modifying the flags between a call to this
+/// function and the use of the flags!
+fn emit_cmp(ctx: Ctx, insn: IRInst) {
+ let ty = ctx.input_ty(insn, 0);
+
+ let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+
+ // TODO Try to commute the operands (and invert the condition) if one is an immediate.
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
+
+ // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
+ // us dst - src at the machine instruction level, so invert operands.
+ ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
+}
+
+/// A specification for a fcmp emission.
+enum FcmpSpec {
+ /// Normal flow.
+ Normal,
+
+ /// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that
+ /// happens with `InvertedEqualOrConditions`.
+ ///
+ /// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
+ /// sequence of instructions) that check for an "AND" combination of condition codes; see for
+ /// instance lowering of Select.
+ InvertEqual,
+}
+
+/// This explains how to interpret the results of an fcmp instruction.
+enum FcmpCondResult {
+ /// The given condition code must be set.
+ Condition(CC),
+
+ /// Both condition codes must be set.
+ AndConditions(CC, CC),
+
+ /// Either of the conditions codes must be set.
+ OrConditions(CC, CC),
+
+ /// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either
+ /// of the condition codes must be set, and the user must invert meaning of analyzing the
+ /// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be
+ /// reached.
+ InvertedEqualOrConditions(CC, CC),
+}
+
+/// Emits a float comparison instruction.
+///
+/// Note: make sure that there are no instructions modifying the flags between a call to this
+/// function and the use of the flags!
+fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
+ let (flip_operands, inverted_equal) = match cond_code {
+ FloatCC::LessThan
+ | FloatCC::LessThanOrEqual
+ | FloatCC::UnorderedOrGreaterThan
+ | FloatCC::UnorderedOrGreaterThanOrEqual => {
+ cond_code = cond_code.reverse();
+ (true, false)
+ }
+ FloatCC::Equal => {
+ let inverted_equal = match spec {
+ FcmpSpec::Normal => false,
+ FcmpSpec::InvertEqual => {
+ cond_code = FloatCC::NotEqual; // same as .inverse()
+ true
+ }
+ };
+ (false, inverted_equal)
+ }
+ _ => (false, false),
+ };
+
+ // The only valid CC constructed with `from_floatcc` can be put in the flag
+ // register with a direct float comparison; do this here.
+ let op = match ctx.input_ty(insn, 0) {
+ types::F32 => SseOpcode::Ucomiss,
+ types::F64 => SseOpcode::Ucomisd,
+ _ => panic!("Bad input type to Fcmp"),
+ };
+
+ let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
+ let (lhs_input, rhs_input) = if flip_operands {
+ (inputs[1], inputs[0])
+ } else {
+ (inputs[0], inputs[1])
+ };
+ let lhs = put_input_in_reg(ctx, lhs_input);
+ let rhs = input_to_reg_mem(ctx, rhs_input);
+ ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
+
+ let cond_result = match cond_code {
+ FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
+ FloatCC::NotEqual if inverted_equal => {
+ FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ)
+ }
+ FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ),
+ _ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)),
+ };
+
+ cond_result
+}
+
+fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
+ let mut sig = Signature::new(call_conv);
+ for i in 0..ctx.num_inputs(insn) {
+ sig.params.push(AbiParam::new(ctx.input_ty(insn, i)));
+ }
+ for i in 0..ctx.num_outputs(insn) {
+ sig.returns.push(AbiParam::new(ctx.output_ty(insn, i)));
+ }
+ if call_conv.extends_baldrdash() {
+ // Adds the special VMContext parameter to the signature.
+ sig.params
+ .push(AbiParam::special(ptr_ty, ArgumentPurpose::VMContext));
+ }
+ sig
+}
+
+fn emit_vm_call<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ flags: &Flags,
+ triple: &Triple,
+ libcall: LibCall,
+ insn: IRInst,
+ inputs: SmallVec<[InsnInput; 4]>,
+ outputs: SmallVec<[InsnOutput; 2]>,
+) -> CodegenResult<()> {
+ let extname = ExternalName::LibCall(libcall);
+
+ let dist = if flags.use_colocated_libcalls() {
+ RelocDistance::Near
+ } else {
+ RelocDistance::Far
+ };
+
+ // TODO avoid recreating signatures for every single Libcall function.
+ let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
+ let sig = make_libcall_sig(ctx, insn, call_conv, types::I64);
+ let caller_conv = ctx.abi().call_conv();
+
+ let mut abi = X64ABICaller::from_func(&sig, &extname, dist, caller_conv)?;
+
+ abi.emit_stack_pre_adjust(ctx);
+
+ let vm_context = if call_conv.extends_baldrdash() { 1 } else { 0 };
+ assert_eq!(inputs.len() + vm_context, abi.num_args());
+
+ for (i, input) in inputs.iter().enumerate() {
+ let arg_reg = put_input_in_reg(ctx, *input);
+ abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+ }
+ if call_conv.extends_baldrdash() {
+ let vm_context_vreg = ctx
+ .get_vm_context()
+ .expect("should have a VMContext to pass to libcall funcs");
+ abi.emit_copy_reg_to_arg(ctx, inputs.len(), vm_context_vreg);
+ }
+
+ abi.emit_call(ctx);
+ for (i, output) in outputs.iter().enumerate() {
+ let retval_reg = get_output_reg(ctx, *output);
+ abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+ }
+ abi.emit_stack_post_adjust(ctx);
+
+ Ok(())
+}
+
+/// Returns whether the given input is a shift by a constant value less or equal than 3.
+/// The goal is to embed it within an address mode.
+fn matches_small_constant_shift<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ spec: InsnInput,
+) -> Option<(InsnInput, u8)> {
+ matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
+ match input_to_imm(
+ ctx,
+ InsnInput {
+ insn: shift,
+ input: 1,
+ },
+ ) {
+ Some(shift_amt) if shift_amt <= 3 => Some((
+ InsnInput {
+ insn: shift,
+ input: 0,
+ },
+ shift_amt as u8,
+ )),
+ _ => None,
+ }
+ })
+}
+
+/// Lowers an instruction to one of the x86 addressing modes.
+///
+/// Note: the 32-bit offset in Cranelift has to be sign-extended, which maps x86's behavior.
+fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i32) -> Amode {
+ let flags = ctx
+ .memflags(spec.insn)
+ .expect("Instruction with amode should have memflags");
+
+ // We now either have an add that we must materialize, or some other input; as well as the
+ // final offset.
+ if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
+ debug_assert_eq!(ctx.output_ty(add, 0), types::I64);
+ let add_inputs = &[
+ InsnInput {
+ insn: add,
+ input: 0,
+ },
+ InsnInput {
+ insn: add,
+ input: 1,
+ },
+ ];
+
+ // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
+ // aren't happening in the wasm case. We could do better, given some range analysis.
+ let (base, index, shift) = if let Some((shift_input, shift_amt)) =
+ matches_small_constant_shift(ctx, add_inputs[0])
+ {
+ (
+ put_input_in_reg(ctx, add_inputs[1]),
+ put_input_in_reg(ctx, shift_input),
+ shift_amt,
+ )
+ } else if let Some((shift_input, shift_amt)) =
+ matches_small_constant_shift(ctx, add_inputs[1])
+ {
+ (
+ put_input_in_reg(ctx, add_inputs[0]),
+ put_input_in_reg(ctx, shift_input),
+ shift_amt,
+ )
+ } else {
+ for i in 0..=1 {
+ let input = ctx.get_input(add, i);
+
+ // Try to pierce through uextend.
+ if let Some(uextend) = matches_input(
+ ctx,
+ InsnInput {
+ insn: add,
+ input: i,
+ },
+ Opcode::Uextend,
+ ) {
+ if let Some(cst) = ctx.get_input(uextend, 0).constant {
+ // Zero the upper bits.
+ let input_size = ctx.input_ty(uextend, 0).bits() as u64;
+ let shift: u64 = 64 - input_size;
+ let uext_cst: u64 = (cst << shift) >> shift;
+
+ let final_offset = (offset as i64).wrapping_add(uext_cst as i64);
+ if low32_will_sign_extend_to_64(final_offset as u64) {
+ let base = put_input_in_reg(ctx, add_inputs[1 - i]);
+ return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
+ }
+ }
+ }
+
+ // If it's a constant, add it directly!
+ if let Some(cst) = input.constant {
+ let final_offset = (offset as i64).wrapping_add(cst as i64);
+ if low32_will_sign_extend_to_64(final_offset as u64) {
+ let base = put_input_in_reg(ctx, add_inputs[1 - i]);
+ return Amode::imm_reg(final_offset as u32, base).with_flags(flags);
+ }
+ }
+ }
+
+ (
+ put_input_in_reg(ctx, add_inputs[0]),
+ put_input_in_reg(ctx, add_inputs[1]),
+ 0,
+ )
+ };
+
+ return Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags);
+ }
+
+ let input = put_input_in_reg(ctx, spec);
+ Amode::imm_reg(offset as u32, input).with_flags(flags)
+}
+
+//=============================================================================
+// Top-level instruction lowering entry point, for one instruction.
+
+/// Actually codegen an instruction's results into registers.
+fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
+ ctx: &mut C,
+ insn: IRInst,
+ flags: &Flags,
+ triple: &Triple,
+) -> CodegenResult<()> {
+ let op = ctx.data(insn).opcode();
+
+ let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
+ .map(|i| InsnInput { insn, input: i })
+ .collect();
+ let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
+ .map(|i| InsnOutput { insn, output: i })
+ .collect();
+
+ let ty = if outputs.len() > 0 {
+ Some(ctx.output_ty(insn, 0))
+ } else {
+ None
+ };
+
+ match op {
+ Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
+ let value = ctx
+ .get_constant(insn)
+ .expect("constant value for iconst et al");
+ let dst = get_output_reg(ctx, outputs[0]);
+ for inst in Inst::gen_constant(dst, value, ty.unwrap(), |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::Iadd
+ | Opcode::IaddIfcout
+ | Opcode::SaddSat
+ | Opcode::UaddSat
+ | Opcode::Isub
+ | Opcode::SsubSat
+ | Opcode::UsubSat
+ | Opcode::Imul
+ | Opcode::AvgRound
+ | Opcode::Band
+ | Opcode::Bor
+ | Opcode::Bxor => {
+ let ty = ty.unwrap();
+ if ty.lane_count() > 1 {
+ let sse_op = match op {
+ Opcode::Iadd => match ty {
+ types::I8X16 => SseOpcode::Paddb,
+ types::I16X8 => SseOpcode::Paddw,
+ types::I32X4 => SseOpcode::Paddd,
+ types::I64X2 => SseOpcode::Paddq,
+ _ => panic!("Unsupported type for packed iadd instruction: {}", ty),
+ },
+ Opcode::SaddSat => match ty {
+ types::I8X16 => SseOpcode::Paddsb,
+ types::I16X8 => SseOpcode::Paddsw,
+ _ => panic!("Unsupported type for packed sadd_sat instruction: {}", ty),
+ },
+ Opcode::UaddSat => match ty {
+ types::I8X16 => SseOpcode::Paddusb,
+ types::I16X8 => SseOpcode::Paddusw,
+ _ => panic!("Unsupported type for packed uadd_sat instruction: {}", ty),
+ },
+ Opcode::Isub => match ty {
+ types::I8X16 => SseOpcode::Psubb,
+ types::I16X8 => SseOpcode::Psubw,
+ types::I32X4 => SseOpcode::Psubd,
+ types::I64X2 => SseOpcode::Psubq,
+ _ => panic!("Unsupported type for packed isub instruction: {}", ty),
+ },
+ Opcode::SsubSat => match ty {
+ types::I8X16 => SseOpcode::Psubsb,
+ types::I16X8 => SseOpcode::Psubsw,
+ _ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
+ },
+ Opcode::UsubSat => match ty {
+ types::I8X16 => SseOpcode::Psubusb,
+ types::I16X8 => SseOpcode::Psubusw,
+ _ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
+ },
+ Opcode::Imul => match ty {
+ types::I16X8 => SseOpcode::Pmullw,
+ types::I32X4 => SseOpcode::Pmulld,
+ types::I64X2 => {
+ // Note for I64X2 we describe a lane A as being composed of a
+ // 32-bit upper half "Ah" and a 32-bit lower half "Al".
+ // The 32-bit long hand multiplication can then be written as:
+ // Ah Al
+ // * Bh Bl
+ // -----
+ // Al * Bl
+ // + (Ah * Bl) << 32
+ // + (Al * Bh) << 32
+ //
+ // So for each lane we will compute:
+ // A * B = (Al * Bl) + ((Ah * Bl) + (Al * Bh)) << 32
+ //
+ // Note, the algorithm will use pmuldq which operates directly on
+ // the lower 32-bit (Al or Bl) of a lane and writes the result
+ // to the full 64-bits of the lane of the destination. For this
+ // reason we don't need shifts to isolate the lower 32-bits, however
+ // we will need to use shifts to isolate the high 32-bits when doing
+ // calculations, i.e. Ah == A >> 32
+ //
+ // The full sequence then is as follows:
+ // A' = A
+ // A' = A' >> 32
+ // A' = Ah' * Bl
+ // B' = B
+ // B' = B' >> 32
+ // B' = Bh' * Al
+ // B' = B' + A'
+ // B' = B' << 32
+ // A' = A
+ // A' = Al' * Bl
+ // A' = A' + B'
+ // dst = A'
+
+ // Get inputs rhs=A and lhs=B and the dst register
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // A' = A
+ let rhs_1 = ctx.alloc_tmp(RegClass::V128, types::I64X2);
+ ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
+
+ // A' = A' >> 32
+ // A' = Ah' * Bl
+ ctx.emit(Inst::xmm_rmi_reg(
+ SseOpcode::Psrlq,
+ RegMemImm::imm(32),
+ rhs_1,
+ ));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pmuludq,
+ RegMem::reg(lhs.clone()),
+ rhs_1,
+ ));
+
+ // B' = B
+ let lhs_1 = ctx.alloc_tmp(RegClass::V128, types::I64X2);
+ ctx.emit(Inst::gen_move(lhs_1, lhs, ty));
+
+ // B' = B' >> 32
+ // B' = Bh' * Al
+ ctx.emit(Inst::xmm_rmi_reg(
+ SseOpcode::Psrlq,
+ RegMemImm::imm(32),
+ lhs_1,
+ ));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1));
+
+ // B' = B' + A'
+ // B' = B' << 32
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Paddq,
+ RegMem::reg(rhs_1.to_reg()),
+ lhs_1,
+ ));
+ ctx.emit(Inst::xmm_rmi_reg(
+ SseOpcode::Psllq,
+ RegMemImm::imm(32),
+ lhs_1,
+ ));
+
+ // A' = A
+ // A' = Al' * Bl
+ // A' = A' + B'
+ // dst = A'
+ ctx.emit(Inst::gen_move(rhs_1, rhs, ty));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pmuludq,
+ RegMem::reg(lhs.clone()),
+ rhs_1,
+ ));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Paddq,
+ RegMem::reg(lhs_1.to_reg()),
+ rhs_1,
+ ));
+ ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
+ return Ok(());
+ }
+ _ => panic!("Unsupported type for packed imul instruction: {}", ty),
+ },
+ Opcode::AvgRound => match ty {
+ types::I8X16 => SseOpcode::Pavgb,
+ types::I16X8 => SseOpcode::Pavgw,
+ _ => panic!("Unsupported type for packed avg_round instruction: {}", ty),
+ },
+ Opcode::Band => match ty {
+ types::F32X4 => SseOpcode::Andps,
+ types::F64X2 => SseOpcode::Andpd,
+ _ => SseOpcode::Pand,
+ },
+ Opcode::Bor => match ty {
+ types::F32X4 => SseOpcode::Orps,
+ types::F64X2 => SseOpcode::Orpd,
+ _ => SseOpcode::Por,
+ },
+ Opcode::Bxor => match ty {
+ types::F32X4 => SseOpcode::Xorps,
+ types::F64X2 => SseOpcode::Xorpd,
+ _ => SseOpcode::Pxor,
+ },
+ _ => panic!("Unsupported packed instruction: {}", op),
+ };
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // Move the `lhs` to the same register as `dst`.
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+ ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
+ } else {
+ let is_64 = ty == types::I64;
+ let alu_op = match op {
+ Opcode::Iadd | Opcode::IaddIfcout => AluRmiROpcode::Add,
+ Opcode::Isub => AluRmiROpcode::Sub,
+ Opcode::Imul => AluRmiROpcode::Mul,
+ Opcode::Band => AluRmiROpcode::And,
+ Opcode::Bor => AluRmiROpcode::Or,
+ Opcode::Bxor => AluRmiROpcode::Xor,
+ _ => unreachable!(),
+ };
+
+ let (lhs, rhs) = match op {
+ Opcode::Iadd
+ | Opcode::IaddIfcout
+ | Opcode::Imul
+ | Opcode::Band
+ | Opcode::Bor
+ | Opcode::Bxor => {
+ // For commutative operations, try to commute operands if one is an
+ // immediate.
+ if let Some(imm) = input_to_sext_imm(ctx, inputs[0]) {
+ (put_input_in_reg(ctx, inputs[1]), RegMemImm::imm(imm))
+ } else {
+ (
+ put_input_in_reg(ctx, inputs[0]),
+ input_to_reg_mem_imm(ctx, inputs[1]),
+ )
+ }
+ }
+ Opcode::Isub => (
+ put_input_in_reg(ctx, inputs[0]),
+ input_to_reg_mem_imm(ctx, inputs[1]),
+ ),
+ _ => unreachable!(),
+ };
+
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::mov_r_r(true, lhs, dst));
+ ctx.emit(Inst::alu_rmi_r(is_64, alu_op, rhs, dst));
+ }
+ }
+
+ Opcode::BandNot => {
+ let ty = ty.unwrap();
+ debug_assert!(ty.is_vector() && ty.bytes() == 16);
+ let lhs = input_to_reg_mem(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let sse_op = match ty {
+ types::F32X4 => SseOpcode::Andnps,
+ types::F64X2 => SseOpcode::Andnpd,
+ _ => SseOpcode::Pandn,
+ };
+ // Note the flipping of operands: the `rhs` operand is used as the destination instead
+ // of the `lhs` as in the other bit operations above (e.g. `band`).
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst));
+ }
+
+ Opcode::Iabs => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if ty.is_vector() {
+ let opcode = match ty {
+ types::I8X16 => SseOpcode::Pabsb,
+ types::I16X8 => SseOpcode::Pabsw,
+ types::I32X4 => SseOpcode::Pabsd,
+ _ => panic!("Unsupported type for packed iabs instruction: {}", ty),
+ };
+ ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
+ } else {
+ unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
+ }
+ }
+
+ Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ if ty.is_vector() {
+ let sse_op = match op {
+ Opcode::Imax => match ty {
+ types::I8X16 => SseOpcode::Pmaxsb,
+ types::I16X8 => SseOpcode::Pmaxsw,
+ types::I32X4 => SseOpcode::Pmaxsd,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ Opcode::Umax => match ty {
+ types::I8X16 => SseOpcode::Pmaxub,
+ types::I16X8 => SseOpcode::Pmaxuw,
+ types::I32X4 => SseOpcode::Pmaxud,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ Opcode::Imin => match ty {
+ types::I8X16 => SseOpcode::Pminsb,
+ types::I16X8 => SseOpcode::Pminsw,
+ types::I32X4 => SseOpcode::Pminsd,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ Opcode::Umin => match ty {
+ types::I8X16 => SseOpcode::Pminub,
+ types::I16X8 => SseOpcode::Pminuw,
+ types::I32X4 => SseOpcode::Pminud,
+ _ => panic!("Unsupported type for packed {} instruction: {}", op, ty),
+ },
+ _ => unreachable!("This is a bug: the external and internal `match op` should be over the same opcodes."),
+ };
+
+ // Move the `lhs` to the same register as `dst`.
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+ ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
+ } else {
+ panic!("Unsupported type for {} instruction: {}", op, ty);
+ }
+ }
+
+ Opcode::Bnot => {
+ let ty = ty.unwrap();
+ let size = ty.bytes() as u8;
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, ty));
+
+ if ty.is_vector() {
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::equals(ty, RegMem::from(tmp), tmp));
+ ctx.emit(Inst::xor(ty, RegMem::from(tmp), dst));
+ } else if ty.is_bool() {
+ unimplemented!("bool bnot")
+ } else {
+ ctx.emit(Inst::not(size, dst));
+ }
+ }
+
+ Opcode::Bitselect => {
+ let ty = ty.unwrap();
+ let condition = put_input_in_reg(ctx, inputs[0]);
+ let if_true = put_input_in_reg(ctx, inputs[1]);
+ let if_false = input_to_reg_mem(ctx, inputs[2]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ if ty.is_vector() {
+ let tmp1 = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::gen_move(tmp1, if_true, ty));
+ ctx.emit(Inst::and(ty, RegMem::reg(condition.clone()), tmp1));
+
+ let tmp2 = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::gen_move(tmp2, condition, ty));
+ ctx.emit(Inst::and_not(ty, if_false, tmp2));
+
+ ctx.emit(Inst::gen_move(dst, tmp2.to_reg(), ty));
+ ctx.emit(Inst::or(ty, RegMem::from(tmp1), dst));
+ } else {
+ unimplemented!("scalar bitselect")
+ }
+ }
+
+ Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => {
+ let dst_ty = ctx.output_ty(insn, 0);
+ debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty);
+
+ let (size, lhs) = match dst_ty {
+ types::I8 | types::I16 => match op {
+ Opcode::Ishl => (4, put_input_in_reg(ctx, inputs[0])),
+ Opcode::Ushr => (
+ 4,
+ extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo32),
+ ),
+ Opcode::Sshr => (
+ 4,
+ extend_input_to_reg(ctx, inputs[0], ExtSpec::SignExtendTo32),
+ ),
+ Opcode::Rotl | Opcode::Rotr => {
+ (dst_ty.bytes() as u8, put_input_in_reg(ctx, inputs[0]))
+ }
+ _ => unreachable!(),
+ },
+ types::I32 | types::I64 => (dst_ty.bytes() as u8, put_input_in_reg(ctx, inputs[0])),
+ _ => unreachable!("unhandled output type for shift/rotates: {}", dst_ty),
+ };
+
+ let (count, rhs) = if let Some(cst) = ctx.get_input(insn, 1).constant {
+ // Mask count, according to Cranelift's semantics.
+ let cst = (cst as u8) & (dst_ty.bits() as u8 - 1);
+ (Some(cst), None)
+ } else {
+ (None, Some(put_input_in_reg(ctx, inputs[1])))
+ };
+
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let shift_kind = match op {
+ Opcode::Ishl => ShiftKind::ShiftLeft,
+ Opcode::Ushr => ShiftKind::ShiftRightLogical,
+ Opcode::Sshr => ShiftKind::ShiftRightArithmetic,
+ Opcode::Rotl => ShiftKind::RotateLeft,
+ Opcode::Rotr => ShiftKind::RotateRight,
+ _ => unreachable!(),
+ };
+
+ let w_rcx = Writable::from_reg(regs::rcx());
+ ctx.emit(Inst::mov_r_r(true, lhs, dst));
+ if count.is_none() {
+ ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx));
+ }
+ ctx.emit(Inst::shift_r(size, shift_kind, count, dst));
+ }
+
+ Opcode::Ineg => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ if ty.is_vector() {
+ // Zero's out a register and then does a packed subtraction
+ // of the input from the register.
+
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
+
+ let subtract_opcode = match ty {
+ types::I8X16 => SseOpcode::Psubb,
+ types::I16X8 => SseOpcode::Psubw,
+ types::I32X4 => SseOpcode::Psubd,
+ types::I64X2 => SseOpcode::Psubq,
+ _ => panic!("Unsupported type for Ineg instruction, found {}", ty),
+ };
+
+ // Note we must zero out a tmp instead of using the destination register since
+ // the desitnation could be an alias for the source input register
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pxor,
+ RegMem::reg(tmp.to_reg()),
+ tmp,
+ ));
+ ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp));
+ ctx.emit(Inst::xmm_unary_rm_r(
+ SseOpcode::Movapd,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ } else {
+ let size = ty.bytes() as u8;
+ let src = put_input_in_reg(ctx, inputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ ctx.emit(Inst::neg(size, dst));
+ }
+ }
+
+ Opcode::Clz => {
+ // TODO when the x86 flags have use_lzcnt, we can use LZCNT.
+
+ // General formula using bit-scan reverse (BSR):
+ // mov -1, %dst
+ // bsr %src, %tmp
+ // cmovz %dst, %tmp
+ // mov $(size_bits - 1), %dst
+ // sub %tmp, %dst
+
+ let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
+ types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32),
+ a if a == types::I32 || a == types::I64 => (None, a),
+ _ => unreachable!(),
+ };
+
+ let src = if let Some(ext_spec) = ext_spec {
+ RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
+ } else {
+ input_to_reg_mem(ctx, inputs[0])
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let tmp = ctx.alloc_tmp(RegClass::I64, ty);
+ ctx.emit(Inst::imm(
+ OperandSize::from_bytes(ty.bytes()),
+ u64::max_value(),
+ dst,
+ ));
+
+ ctx.emit(Inst::unary_rm_r(
+ ty.bytes() as u8,
+ UnaryRmROpcode::Bsr,
+ src,
+ tmp,
+ ));
+
+ ctx.emit(Inst::cmove(
+ ty.bytes() as u8,
+ CC::Z,
+ RegMem::reg(dst.to_reg()),
+ tmp,
+ ));
+
+ ctx.emit(Inst::imm(
+ OperandSize::from_bytes(ty.bytes()),
+ ty.bits() as u64 - 1,
+ dst,
+ ));
+
+ ctx.emit(Inst::alu_rmi_r(
+ ty == types::I64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+
+ Opcode::Ctz => {
+ // TODO when the x86 flags have use_bmi1, we can use TZCNT.
+
+ // General formula using bit-scan forward (BSF):
+ // bsf %src, %dst
+ // mov $(size_bits), %tmp
+ // cmovz %tmp, %dst
+ let ty = ctx.input_ty(insn, 0);
+ let ty = if ty.bits() < 32 { types::I32 } else { ty };
+ debug_assert!(ty == types::I32 || ty == types::I64);
+
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let tmp = ctx.alloc_tmp(RegClass::I64, ty);
+ ctx.emit(Inst::imm(OperandSize::Size32, ty.bits() as u64, tmp));
+
+ ctx.emit(Inst::unary_rm_r(
+ ty.bytes() as u8,
+ UnaryRmROpcode::Bsf,
+ src,
+ dst,
+ ));
+
+ ctx.emit(Inst::cmove(
+ ty.bytes() as u8,
+ CC::Z,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+
+ Opcode::Popcnt => {
+ // TODO when the x86 flags have use_popcnt, we can use the popcnt instruction.
+
+ let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
+ types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32),
+ a if a == types::I32 || a == types::I64 => (None, a),
+ _ => unreachable!(),
+ };
+
+ let src = if let Some(ext_spec) = ext_spec {
+ RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
+ } else {
+ input_to_reg_mem(ctx, inputs[0])
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ if ty == types::I64 {
+ let is_64 = true;
+
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let cst = ctx.alloc_tmp(RegClass::I64, types::I64);
+
+ // mov src, tmp1
+ ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // mov 0x7777_7777_7777_7777, cst
+ ctx.emit(Inst::imm(OperandSize::Size64, 0x7777777777777777, cst));
+
+ // andq cst, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ tmp1,
+ ));
+
+ // mov src, tmp2
+ ctx.emit(Inst::mov64_rm_r(src, tmp2));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and cst, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and cst, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // mov tmp2, dst
+ ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst));
+
+ // shr $4, dst
+ ctx.emit(Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(4), dst));
+
+ // add tmp2, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp2.to_reg()),
+ dst,
+ ));
+
+ // mov $0x0F0F_0F0F_0F0F_0F0F, cst
+ ctx.emit(Inst::imm(OperandSize::Size64, 0x0F0F0F0F0F0F0F0F, cst));
+
+ // and cst, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::reg(cst.to_reg()),
+ dst,
+ ));
+
+ // mov $0x0101_0101_0101_0101, cst
+ ctx.emit(Inst::imm(OperandSize::Size64, 0x0101010101010101, cst));
+
+ // mul cst, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Mul,
+ RegMemImm::reg(cst.to_reg()),
+ dst,
+ ));
+
+ // shr $56, dst
+ ctx.emit(Inst::shift_r(
+ 8,
+ ShiftKind::ShiftRightLogical,
+ Some(56),
+ dst,
+ ));
+ } else {
+ assert_eq!(ty, types::I32);
+ let is_64 = false;
+
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+
+ // mov src, tmp1
+ ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // andq $0x7777_7777, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x77777777),
+ tmp1,
+ ));
+
+ // mov src, tmp2
+ ctx.emit(Inst::mov64_rm_r(src, tmp2));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and 0x7777_7777, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x77777777),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // shr $1, tmp1
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(1),
+ tmp1,
+ ));
+
+ // and $0x7777_7777, tmp1
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x77777777),
+ tmp1,
+ ));
+
+ // sub tmp1, tmp2
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Sub,
+ RegMemImm::reg(tmp1.to_reg()),
+ tmp2,
+ ));
+
+ // mov tmp2, dst
+ ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst));
+
+ // shr $4, dst
+ ctx.emit(Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(4), dst));
+
+ // add tmp2, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Add,
+ RegMemImm::reg(tmp2.to_reg()),
+ dst,
+ ));
+
+ // and $0x0F0F_0F0F, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::And,
+ RegMemImm::imm(0x0F0F0F0F),
+ dst,
+ ));
+
+ // mul $0x0101_0101, dst
+ ctx.emit(Inst::alu_rmi_r(
+ is_64,
+ AluRmiROpcode::Mul,
+ RegMemImm::imm(0x01010101),
+ dst,
+ ));
+
+ // shr $24, dst
+ ctx.emit(Inst::shift_r(
+ 4,
+ ShiftKind::ShiftRightLogical,
+ Some(24),
+ dst,
+ ));
+ }
+ }
+
+ Opcode::IsNull | Opcode::IsInvalid => {
+ // Null references are represented by the constant value 0; invalid references are
+ // represented by the constant value -1. See `define_reftypes()` in
+ // `meta/src/isa/x86/encodings.rs` to confirm.
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ let imm = match op {
+ Opcode::IsNull => {
+ // TODO could use tst src, src for IsNull
+ 0
+ }
+ Opcode::IsInvalid => {
+ // We can do a 32-bit comparison even in 64-bits mode, as the constant is then
+ // sign-extended.
+ 0xffffffff
+ }
+ _ => unreachable!(),
+ };
+ ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, RegMemImm::imm(imm), src));
+ ctx.emit(Inst::setcc(CC::Z, dst));
+ }
+
+ Opcode::Uextend
+ | Opcode::Sextend
+ | Opcode::Bint
+ | Opcode::Breduce
+ | Opcode::Bextend
+ | Opcode::Ireduce => {
+ let src_ty = ctx.input_ty(insn, 0);
+ let dst_ty = ctx.output_ty(insn, 0);
+
+ // Sextend requires a sign-extended move, but all the other opcodes are simply a move
+ // from a zero-extended source. Here is why this works, in each case:
+ //
+ // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we merely need to
+ // zero-extend here.
+ //
+ // - Breduce, Bextend: changing width of a boolean. We represent a bool as a 0 or 1, so
+ // again, this is a zero-extend / no-op.
+ //
+ // - Ireduce: changing width of an integer. Smaller ints are stored with undefined
+ // high-order bits, so we can simply do a copy.
+
+ if src_ty == types::I32 && dst_ty == types::I64 && op != Opcode::Sextend {
+ // As a particular x64 extra-pattern matching opportunity, all the ALU opcodes on
+ // 32-bits will zero-extend the upper 32-bits, so we can even not generate a
+ // zero-extended move in this case.
+ // TODO add loads and shifts here.
+ if let Some(_) = matches_input_any(
+ ctx,
+ inputs[0],
+ &[
+ Opcode::Iadd,
+ Opcode::IaddIfcout,
+ Opcode::Isub,
+ Opcode::Imul,
+ Opcode::Band,
+ Opcode::Bor,
+ Opcode::Bxor,
+ ],
+ ) {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, types::I64));
+ return Ok(());
+ }
+ }
+
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let ext_mode = ExtMode::new(src_ty.bits(), dst_ty.bits());
+ assert_eq!(
+ src_ty.bits() < dst_ty.bits(),
+ ext_mode.is_some(),
+ "unexpected extension: {} -> {}",
+ src_ty,
+ dst_ty
+ );
+
+ if let Some(ext_mode) = ext_mode {
+ if op == Opcode::Sextend {
+ ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst));
+ } else {
+ ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst));
+ }
+ } else {
+ ctx.emit(Inst::mov64_rm_r(src, dst));
+ }
+ }
+
+ Opcode::Icmp => {
+ let condcode = ctx.data(insn).cond_code().unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.input_ty(insn, 0);
+ if !ty.is_vector() {
+ emit_cmp(ctx, insn);
+ let cc = CC::from_intcc(condcode);
+ ctx.emit(Inst::setcc(cc, dst));
+ } else {
+ assert_eq!(ty.bits(), 128);
+ let eq = |ty| match ty {
+ types::I8X16 => SseOpcode::Pcmpeqb,
+ types::I16X8 => SseOpcode::Pcmpeqw,
+ types::I32X4 => SseOpcode::Pcmpeqd,
+ types::I64X2 => SseOpcode::Pcmpeqq,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let gt = |ty| match ty {
+ types::I8X16 => SseOpcode::Pcmpgtb,
+ types::I16X8 => SseOpcode::Pcmpgtw,
+ types::I32X4 => SseOpcode::Pcmpgtd,
+ types::I64X2 => SseOpcode::Pcmpgtq,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let maxu = |ty| match ty {
+ types::I8X16 => SseOpcode::Pmaxub,
+ types::I16X8 => SseOpcode::Pmaxuw,
+ types::I32X4 => SseOpcode::Pmaxud,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let mins = |ty| match ty {
+ types::I8X16 => SseOpcode::Pminsb,
+ types::I16X8 => SseOpcode::Pminsw,
+ types::I32X4 => SseOpcode::Pminsd,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+ let minu = |ty| match ty {
+ types::I8X16 => SseOpcode::Pminub,
+ types::I16X8 => SseOpcode::Pminuw,
+ types::I32X4 => SseOpcode::Pminud,
+ _ => panic!(
+ "Unable to find an instruction for {} for type: {}",
+ condcode, ty
+ ),
+ };
+
+ // Here we decide which operand to use as the read/write `dst` (ModRM reg field)
+ // and which to use as the read `input` (ModRM r/m field). In the normal case we
+ // use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for
+ // the less-than cases so that we can reuse the greater-than implementation.
+ let input = match condcode {
+ IntCC::SignedLessThan
+ | IntCC::SignedLessThanOrEqual
+ | IntCC::UnsignedLessThan
+ | IntCC::UnsignedLessThanOrEqual => {
+ let lhs = input_to_reg_mem(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ lhs
+ }
+ _ => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+ rhs
+ }
+ };
+
+ match condcode {
+ IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)),
+ IntCC::NotEqual => {
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
+ // Emit all 1s into the `tmp` register.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
+ // Invert the result of the `PCMPEQ*`.
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
+ }
+ IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
+ ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
+ }
+ IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
+ ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
+ }
+ IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
+ ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));
+ // Emit all 1s into the `tmp` register.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
+ // Invert the result of the `PCMPEQ*`.
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
+ }
+ IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
+ ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst));
+ ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
+ }
+ _ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
+ }
+ }
+ }
+
+ Opcode::Fcmp => {
+ let cond_code = ctx.data(insn).fp_cond_code().unwrap();
+ let input_ty = ctx.input_ty(insn, 0);
+ if !input_ty.is_vector() {
+ // Unordered is returned by setting ZF, PF, CF <- 111
+ // Greater than by ZF, PF, CF <- 000
+ // Less than by ZF, PF, CF <- 001
+ // Equal by ZF, PF, CF <- 100
+ //
+ // Checking the result of comiss is somewhat annoying because you don't have setcc
+ // instructions that explicitly check simultaneously for the condition (i.e. eq, le,
+ // gt, etc) *and* orderedness.
+ //
+ // So that might mean we need more than one setcc check and then a logical "and" or
+ // "or" to determine both, in some cases. However knowing that if the parity bit is
+ // set, then the result was considered unordered and knowing that if the parity bit is
+ // set, then both the ZF and CF flag bits must also be set we can get away with using
+ // one setcc for most condition codes.
+
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ match emit_fcmp(ctx, insn, cond_code, FcmpSpec::Normal) {
+ FcmpCondResult::Condition(cc) => {
+ ctx.emit(Inst::setcc(cc, dst));
+ }
+ FcmpCondResult::AndConditions(cc1, cc2) => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::setcc(cc1, tmp));
+ ctx.emit(Inst::setcc(cc2, dst));
+ ctx.emit(Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::And,
+ RegMemImm::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+ FcmpCondResult::OrConditions(cc1, cc2) => {
+ let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::setcc(cc1, tmp));
+ ctx.emit(Inst::setcc(cc2, dst));
+ ctx.emit(Inst::alu_rmi_r(
+ false,
+ AluRmiROpcode::Or,
+ RegMemImm::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+ FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
+ }
+ } else {
+ let op = match input_ty {
+ types::F32X4 => SseOpcode::Cmpps,
+ types::F64X2 => SseOpcode::Cmppd,
+ _ => panic!("Bad input type to fcmp: {}", input_ty),
+ };
+
+ // Since some packed comparisons are not available, some of the condition codes
+ // must be inverted, with a corresponding `flip` of the operands.
+ let (imm, flip) = match cond_code {
+ FloatCC::GreaterThan => (FcmpImm::LessThan, true),
+ FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true),
+ FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true),
+ FloatCC::UnorderedOrLessThanOrEqual => {
+ (FcmpImm::UnorderedOrGreaterThanOrEqual, true)
+ }
+ FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => {
+ panic!("unsupported float condition code: {}", cond_code)
+ }
+ _ => (FcmpImm::from(cond_code), false),
+ };
+
+ // Determine the operands of the comparison, possibly by flipping them.
+ let (lhs, rhs) = if flip {
+ (
+ put_input_in_reg(ctx, inputs[1]),
+ input_to_reg_mem(ctx, inputs[0]),
+ )
+ } else {
+ (
+ put_input_in_reg(ctx, inputs[0]),
+ input_to_reg_mem(ctx, inputs[1]),
+ )
+ };
+
+ // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+ // but ensures that the registers are the same to match x86's read-write operand
+ // encoding.
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, lhs, input_ty));
+
+ // Emit the comparison.
+ ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false));
+ }
+ }
+
+ Opcode::FallthroughReturn | Opcode::Return => {
+ for i in 0..ctx.num_inputs(insn) {
+ let src_reg = put_input_in_reg(ctx, inputs[i]);
+ let retval_reg = ctx.retval(i);
+ let ty = ctx.input_ty(insn, i);
+ ctx.emit(Inst::gen_move(retval_reg, src_reg, ty));
+ }
+ // N.B.: the Ret itself is generated by the ABI.
+ }
+
+ Opcode::Call | Opcode::CallIndirect => {
+ let caller_conv = ctx.abi().call_conv();
+ let (mut abi, inputs) = match op {
+ Opcode::Call => {
+ let (extname, dist) = ctx.call_target(insn).unwrap();
+ let sig = ctx.call_sig(insn).unwrap();
+ assert_eq!(inputs.len(), sig.params.len());
+ assert_eq!(outputs.len(), sig.returns.len());
+ (
+ X64ABICaller::from_func(sig, &extname, dist, caller_conv)?,
+ &inputs[..],
+ )
+ }
+
+ Opcode::CallIndirect => {
+ let ptr = put_input_in_reg(ctx, inputs[0]);
+ let sig = ctx.call_sig(insn).unwrap();
+ assert_eq!(inputs.len() - 1, sig.params.len());
+ assert_eq!(outputs.len(), sig.returns.len());
+ (
+ X64ABICaller::from_ptr(sig, ptr, op, caller_conv)?,
+ &inputs[1..],
+ )
+ }
+
+ _ => unreachable!(),
+ };
+
+ abi.emit_stack_pre_adjust(ctx);
+ assert_eq!(inputs.len(), abi.num_args());
+ for (i, input) in inputs.iter().enumerate() {
+ let arg_reg = put_input_in_reg(ctx, *input);
+ abi.emit_copy_reg_to_arg(ctx, i, arg_reg);
+ }
+ abi.emit_call(ctx);
+ for (i, output) in outputs.iter().enumerate() {
+ let retval_reg = get_output_reg(ctx, *output);
+ abi.emit_copy_retval_to_reg(ctx, i, retval_reg);
+ }
+ abi.emit_stack_post_adjust(ctx);
+ }
+
+ Opcode::Debugtrap => {
+ ctx.emit(Inst::Hlt);
+ }
+
+ Opcode::Trap | Opcode::ResumableTrap => {
+ let trap_code = ctx.data(insn).trap_code().unwrap();
+ ctx.emit_safepoint(Inst::Ud2 { trap_code });
+ }
+
+ Opcode::Trapif | Opcode::Trapff => {
+ let trap_code = ctx.data(insn).trap_code().unwrap();
+
+ if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
+ let cond_code = ctx.data(insn).cond_code().unwrap();
+ // The flags must not have been clobbered by any other instruction between the
+ // iadd_ifcout and this instruction, as verified by the CLIF validator; so we can
+ // simply use the flags here.
+ let cc = CC::from_intcc(cond_code);
+
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+ } else if op == Opcode::Trapif {
+ let cond_code = ctx.data(insn).cond_code().unwrap();
+ let cc = CC::from_intcc(cond_code);
+
+ // Verification ensures that the input is always a single-def ifcmp.
+ let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap();
+ emit_cmp(ctx, ifcmp);
+
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc });
+ } else {
+ let cond_code = ctx.data(insn).fp_cond_code().unwrap();
+
+ // Verification ensures that the input is always a single-def ffcmp.
+ let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
+
+ match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
+ FcmpCondResult::Condition(cc) => {
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc })
+ }
+ FcmpCondResult::AndConditions(cc1, cc2) => {
+ // A bit unfortunate, but materialize the flags in their own register, and
+ // check against this.
+ let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::setcc(cc1, tmp));
+ ctx.emit(Inst::setcc(cc2, tmp2));
+ ctx.emit(Inst::alu_rmi_r(
+ false, /* is_64 */
+ AluRmiROpcode::And,
+ RegMemImm::reg(tmp.to_reg()),
+ tmp2,
+ ));
+ ctx.emit_safepoint(Inst::TrapIf {
+ trap_code,
+ cc: CC::NZ,
+ });
+ }
+ FcmpCondResult::OrConditions(cc1, cc2) => {
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 });
+ ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 });
+ }
+ FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
+ };
+ };
+ }
+
+ Opcode::F64const => {
+ // TODO use cmpeqpd for all 1s.
+ let value = ctx.get_constant(insn).unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ for inst in Inst::gen_constant(dst, value, types::F64, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::F32const => {
+ // TODO use cmpeqps for all 1s.
+ let value = ctx.get_constant(insn).unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ for inst in Inst::gen_constant(dst, value, types::F32, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ }
+
+ Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+ // but ensures that the registers are the same to match x86's read-write operand
+ // encoding.
+ ctx.emit(Inst::gen_move(dst, lhs, ty));
+
+ // Note: min and max can't be handled here, because of the way Cranelift defines them:
+ // if any operand is a NaN, they must return the NaN operand, while the x86 machine
+ // instruction will return the second operand if either operand is a NaN.
+ let sse_op = match ty {
+ types::F32 => match op {
+ Opcode::Fadd => SseOpcode::Addss,
+ Opcode::Fsub => SseOpcode::Subss,
+ Opcode::Fmul => SseOpcode::Mulss,
+ Opcode::Fdiv => SseOpcode::Divss,
+ _ => unreachable!(),
+ },
+ types::F64 => match op {
+ Opcode::Fadd => SseOpcode::Addsd,
+ Opcode::Fsub => SseOpcode::Subsd,
+ Opcode::Fmul => SseOpcode::Mulsd,
+ Opcode::Fdiv => SseOpcode::Divsd,
+ _ => unreachable!(),
+ },
+ types::F32X4 => match op {
+ Opcode::Fadd => SseOpcode::Addps,
+ Opcode::Fsub => SseOpcode::Subps,
+ Opcode::Fmul => SseOpcode::Mulps,
+ Opcode::Fdiv => SseOpcode::Divps,
+ _ => unreachable!(),
+ },
+ types::F64X2 => match op {
+ Opcode::Fadd => SseOpcode::Addpd,
+ Opcode::Fsub => SseOpcode::Subpd,
+ Opcode::Fmul => SseOpcode::Mulpd,
+ Opcode::Fdiv => SseOpcode::Divpd,
+ _ => unreachable!(),
+ },
+ _ => panic!(
+ "invalid type: expected one of [F32, F64, F32X4, F64X2], found {}",
+ ty
+ ),
+ };
+ ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
+ }
+
+ Opcode::Fmin | Opcode::Fmax => {
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let is_min = op == Opcode::Fmin;
+ let output_ty = ty.unwrap();
+ ctx.emit(Inst::gen_move(dst, rhs, output_ty));
+ if !output_ty.is_vector() {
+ let op_size = match output_ty {
+ types::F32 => OperandSize::Size32,
+ types::F64 => OperandSize::Size64,
+ _ => panic!("unexpected type {:?} for fmin/fmax", output_ty),
+ };
+ ctx.emit(Inst::xmm_min_max_seq(op_size, is_min, lhs, dst));
+ } else {
+ // X64's implementation of floating point min and floating point max does not
+ // propagate NaNs and +0's in a way that is friendly to the SIMD spec. For the
+ // scalar approach we use jumps to handle cases where NaN and +0 propagation is
+ // not consistent with what is needed. However for packed floating point min and
+ // floating point max we implement a different approach to avoid the sequence
+ // of jumps that would be required on a per lane basis. Because we do not need to
+ // lower labels and jumps but do need ctx for creating temporaries we implement
+ // the lowering here in lower.rs instead of emit.rs as is done in the case for scalars.
+ // The outline of approach is as follows:
+ //
+ // First we preform the Min/Max in both directions. This is because in the
+ // case of an operand's lane containing a NaN or in the case of the lanes of the
+ // two operands containing 0 but with mismatched signs, x64 will return the second
+ // operand regardless of its contents. So in order to make sure we capture NaNs and
+ // normalize NaNs and 0 values we capture the operation in both directions and merge the
+ // results. Then we normalize the results through operations that create a mask for the
+ // lanes containing NaNs, we use that mask to adjust NaNs to quite NaNs and normalize
+ // 0s.
+ //
+ // The following sequence is generated for min:
+ //
+ // movap{s,d} %lhs, %tmp
+ // minp{s,d} %dst, %tmp
+ // minp,{s,d} %lhs, %dst
+ // orp{s,d} %dst, %tmp
+ // cmpp{s,d} %tmp, %dst, $3
+ // orps{s,d} %dst, %tmp
+ // psrl{s,d} {$10, $13}, %dst
+ // andnp{s,d} %tmp, %dst
+ //
+ // and for max the sequence is:
+ //
+ // movap{s,d} %lhs, %tmp
+ // minp{s,d} %dst, %tmp
+ // minp,{s,d} %lhs, %dst
+ // xorp{s,d} %tmp, %dst
+ // orp{s,d} %dst, %tmp
+ // subp{s,d} %dst, %tmp
+ // cmpp{s,d} %tmp, %dst, $3
+ // psrl{s,d} {$10, $13}, %dst
+ // andnp{s,d} %tmp, %dst
+
+ if is_min {
+ let (mov_op, min_op, or_op, cmp_op, shift_op, shift_by, andn_op) =
+ match output_ty {
+ types::F32X4 => (
+ SseOpcode::Movaps,
+ SseOpcode::Minps,
+ SseOpcode::Orps,
+ SseOpcode::Cmpps,
+ SseOpcode::Psrld,
+ 10,
+ SseOpcode::Andnps,
+ ),
+ types::F64X2 => (
+ SseOpcode::Movapd,
+ SseOpcode::Minpd,
+ SseOpcode::Orpd,
+ SseOpcode::Cmppd,
+ SseOpcode::Psrlq,
+ 13,
+ SseOpcode::Andnpd,
+ ),
+ _ => unimplemented!("unsupported op type {:?}", output_ty),
+ };
+
+ // Copy lhs into tmp
+ let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, output_ty);
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1));
+
+ // Perform min in reverse direction
+ ctx.emit(Inst::xmm_rm_r(min_op, RegMem::from(dst), tmp_xmm1));
+
+ // Perform min in original direction
+ ctx.emit(Inst::xmm_rm_r(min_op, RegMem::reg(lhs), dst));
+
+ // X64 handles propagation of -0's and Nans differently between left and right
+ // operands. After doing the min in both directions, this OR will
+ // guarrentee capture of -0's and Nan in our tmp register
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::from(dst), tmp_xmm1));
+
+ // Compare unordered to create mask for lanes containing NaNs and then use
+ // that mask to saturate the NaN containing lanes in the tmp register with 1s.
+ // TODO: Would a check for NaN and then a jump be better here in the
+ // common case than continuing on to normalize NaNs that might not exist?
+ let cond = FcmpImm::from(FloatCC::Unordered);
+ ctx.emit(Inst::xmm_rm_r_imm(
+ cmp_op,
+ RegMem::reg(tmp_xmm1.to_reg()),
+ dst,
+ cond.encode(),
+ false,
+ ));
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // The dst register holds a mask for lanes containing NaNs.
+ // We take that mask and shift in preparation for creating a different mask
+ // to normalize NaNs (create a quite NaN) by zeroing out the appropriate
+ // number of least signficant bits. We shift right each lane by 10 bits
+ // (1 sign + 8 exp. + 1 MSB sig.) for F32X4 and by 13 bits (1 sign +
+ // 11 exp. + 1 MSB sig.) for F64X2.
+ ctx.emit(Inst::xmm_rmi_reg(shift_op, RegMemImm::imm(shift_by), dst));
+
+ // Finally we do a nand with the tmp register to produce the final results
+ // in the dst.
+ ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+ } else {
+ let (
+ mov_op,
+ max_op,
+ xor_op,
+ or_op,
+ sub_op,
+ cmp_op,
+ shift_op,
+ shift_by,
+ andn_op,
+ ) = match output_ty {
+ types::F32X4 => (
+ SseOpcode::Movaps,
+ SseOpcode::Maxps,
+ SseOpcode::Xorps,
+ SseOpcode::Orps,
+ SseOpcode::Subps,
+ SseOpcode::Cmpps,
+ SseOpcode::Psrld,
+ 10,
+ SseOpcode::Andnps,
+ ),
+ types::F64X2 => (
+ SseOpcode::Movapd,
+ SseOpcode::Maxpd,
+ SseOpcode::Xorpd,
+ SseOpcode::Orpd,
+ SseOpcode::Subpd,
+ SseOpcode::Cmppd,
+ SseOpcode::Psrlq,
+ 13,
+ SseOpcode::Andnpd,
+ ),
+ _ => unimplemented!("unsupported op type {:?}", output_ty),
+ };
+
+ // Copy lhs into tmp.
+ let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, types::F32);
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1));
+
+ // Perform max in reverse direction.
+ ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // Perform max in original direction.
+ ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(lhs), dst));
+
+ // Get the difference between the two results and store in tmp.
+ // Max uses a different approach than min to account for potential
+ // discrepancies with plus/minus 0.
+ ctx.emit(Inst::xmm_rm_r(xor_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+
+ // X64 handles propagation of -0's and Nans differently between left and right
+ // operands. After doing the max in both directions, this OR will
+ // guarentee capture of 0's and Nan in our tmp register.
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // Capture NaNs and sign discrepancies.
+ ctx.emit(Inst::xmm_rm_r(sub_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
+
+ // Compare unordered to create mask for lanes containing NaNs and then use
+ // that mask to saturate the NaN containing lanes in the tmp register with 1s.
+ let cond = FcmpImm::from(FloatCC::Unordered);
+ ctx.emit(Inst::xmm_rm_r_imm(
+ cmp_op,
+ RegMem::reg(tmp_xmm1.to_reg()),
+ dst,
+ cond.encode(),
+ false,
+ ));
+
+ // The dst register holds a mask for lanes containing NaNs.
+ // We take that mask and shift in preparation for creating a different mask
+ // to normalize NaNs (create a quite NaN) by zeroing out the appropriate
+ // number of least signficant bits. We shift right each lane by 10 bits
+ // (1 sign + 8 exp. + 1 MSB sig.) for F32X4 and by 13 bits (1 sign +
+ // 11 exp. + 1 MSB sig.) for F64X2.
+ ctx.emit(Inst::xmm_rmi_reg(shift_op, RegMemImm::imm(shift_by), dst));
+
+ // Finally we do a nand with the tmp register to produce the final results
+ // in the dst.
+ ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+ }
+ }
+ }
+
+ Opcode::FminPseudo | Opcode::FmaxPseudo => {
+ let lhs = input_to_reg_mem(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ let sse_opcode = match (ty, op) {
+ (types::F32X4, Opcode::FminPseudo) => SseOpcode::Minps,
+ (types::F32X4, Opcode::FmaxPseudo) => SseOpcode::Maxps,
+ (types::F64X2, Opcode::FminPseudo) => SseOpcode::Minpd,
+ (types::F64X2, Opcode::FmaxPseudo) => SseOpcode::Maxpd,
+ _ => unimplemented!("unsupported type {} for {}", ty, op),
+ };
+ ctx.emit(Inst::xmm_rm_r(sse_opcode, lhs, dst));
+ }
+
+ Opcode::Sqrt => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ let sse_op = match ty {
+ types::F32 => SseOpcode::Sqrtss,
+ types::F64 => SseOpcode::Sqrtsd,
+ types::F32X4 => SseOpcode::Sqrtps,
+ types::F64X2 => SseOpcode::Sqrtpd,
+ _ => panic!(
+ "invalid type: expected one of [F32, F64, F32X4, F64X2], found {}",
+ ty
+ ),
+ };
+
+ ctx.emit(Inst::xmm_unary_rm_r(sse_op, src, dst));
+ }
+
+ Opcode::Fpromote => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtss2sd, src, dst));
+ }
+
+ Opcode::Fdemote => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst));
+ }
+
+ Opcode::FcvtFromSint => {
+ let output_ty = ty.unwrap();
+ if !output_ty.is_vector() {
+ let (ext_spec, src_size) = match ctx.input_ty(insn, 0) {
+ types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32),
+ types::I32 => (None, OperandSize::Size32),
+ types::I64 => (None, OperandSize::Size64),
+ _ => unreachable!(),
+ };
+
+ let src = match ext_spec {
+ Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)),
+ None => input_to_reg_mem(ctx, inputs[0]),
+ };
+
+ let opcode = if output_ty == types::F32 {
+ SseOpcode::Cvtsi2ss
+ } else {
+ assert_eq!(output_ty, types::F64);
+ SseOpcode::Cvtsi2sd
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst));
+ } else {
+ let ty = ty.unwrap();
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let opcode = match ctx.input_ty(insn, 0) {
+ types::I32X4 => SseOpcode::Cvtdq2ps,
+ _ => {
+ unimplemented!("unable to use type {} for op {}", ctx.input_ty(insn, 0), op)
+ }
+ };
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
+ }
+ }
+
+ Opcode::FcvtFromUint => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+
+ let input_ty = ctx.input_ty(insn, 0);
+ if !ty.is_vector() {
+ match input_ty {
+ types::I8 | types::I16 | types::I32 => {
+ // Conversion from an unsigned int smaller than 64-bit is easy: zero-extend +
+ // do a signed conversion (which won't overflow).
+ let opcode = if ty == types::F32 {
+ SseOpcode::Cvtsi2ss
+ } else {
+ assert_eq!(ty, types::F64);
+ SseOpcode::Cvtsi2sd
+ };
+
+ let src = RegMem::reg(extend_input_to_reg(
+ ctx,
+ inputs[0],
+ ExtSpec::ZeroExtendTo64,
+ ));
+ ctx.emit(Inst::gpr_to_xmm(opcode, src, OperandSize::Size64, dst));
+ }
+
+ types::I64 => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+
+ let src_copy = ctx.alloc_tmp(RegClass::I64, types::I64);
+ ctx.emit(Inst::gen_move(src_copy, src, types::I64));
+
+ let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ let tmp_gpr2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ ctx.emit(Inst::cvt_u64_to_float_seq(
+ ty == types::F64,
+ src_copy,
+ tmp_gpr1,
+ tmp_gpr2,
+ dst,
+ ));
+ }
+ _ => panic!("unexpected input type for FcvtFromUint: {:?}", input_ty),
+ };
+ } else {
+ // Converting packed unsigned integers to packed floats requires a few steps.
+ // There is no single instruction lowering for converting unsigned floats but there
+ // is for converting packed signed integers to float (cvtdq2ps). In the steps below
+ // we isolate the upper half (16 bits) and lower half (16 bits) of each lane and
+ // then we convert each half separately using cvtdq2ps meant for signed integers.
+ // In order for this to work for the upper half bits we must shift right by 1
+ // (divide by 2) these bits in order to ensure the most significant bit is 0 not
+ // signed, and then after the conversion we double the value. Finally we add the
+ // converted values where addition will correctly round.
+ //
+ // Sequence:
+ // -> A = 0xffffffff
+ // -> Ah = 0xffff0000
+ // -> Al = 0x0000ffff
+ // -> Convert(Al) // Convert int to float
+ // -> Ah = Ah >> 1 // Shift right 1 to assure Ah conversion isn't treated as signed
+ // -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
+ // -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
+ // -> dst = Ah + Al // Add the two floats together
+
+ assert_eq!(ctx.input_ty(insn, 0), types::I32X4);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // Create a temporary register
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
+ ctx.emit(Inst::xmm_unary_rm_r(
+ SseOpcode::Movapd,
+ RegMem::reg(src),
+ tmp,
+ ));
+ ctx.emit(Inst::gen_move(dst, src, ty));
+
+ // Get the low 16 bits
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(16), tmp));
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(16), tmp));
+
+ // Get the high 16 bits
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst));
+
+ // Convert the low 16 bits
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(tmp), tmp));
+
+ // Shift the high bits by 1, convert, and double to get the correct value.
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(dst), dst));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Addps,
+ RegMem::reg(dst.to_reg()),
+ dst,
+ ));
+
+ // Add together the two converted values.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Addps,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ }
+ }
+
+ Opcode::FcvtToUint | Opcode::FcvtToUintSat | Opcode::FcvtToSint | Opcode::FcvtToSintSat => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let input_ty = ctx.input_ty(insn, 0);
+ if !input_ty.is_vector() {
+ let src_size = if input_ty == types::F32 {
+ OperandSize::Size32
+ } else {
+ assert_eq!(input_ty, types::F64);
+ OperandSize::Size64
+ };
+
+ let output_ty = ty.unwrap();
+ let dst_size = if output_ty == types::I32 {
+ OperandSize::Size32
+ } else {
+ assert_eq!(output_ty, types::I64);
+ OperandSize::Size64
+ };
+
+ let to_signed = op == Opcode::FcvtToSint || op == Opcode::FcvtToSintSat;
+ let is_sat = op == Opcode::FcvtToUintSat || op == Opcode::FcvtToSintSat;
+
+ let src_copy = ctx.alloc_tmp(RegClass::V128, input_ty);
+ ctx.emit(Inst::gen_move(src_copy, src, input_ty));
+
+ let tmp_xmm = ctx.alloc_tmp(RegClass::V128, input_ty);
+ let tmp_gpr = ctx.alloc_tmp(RegClass::I64, output_ty);
+
+ if to_signed {
+ ctx.emit(Inst::cvt_float_to_sint_seq(
+ src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm,
+ ));
+ } else {
+ ctx.emit(Inst::cvt_float_to_uint_seq(
+ src_size, dst_size, is_sat, src_copy, dst, tmp_gpr, tmp_xmm,
+ ));
+ }
+ } else {
+ if op == Opcode::FcvtToSintSat {
+ // Sets destination to zero if float is NaN
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I32X4);
+ ctx.emit(Inst::xmm_unary_rm_r(
+ SseOpcode::Movapd,
+ RegMem::reg(src),
+ tmp,
+ ));
+ ctx.emit(Inst::gen_move(dst, src, input_ty));
+ let cond = FcmpImm::from(FloatCC::Equal);
+ ctx.emit(Inst::xmm_rm_r_imm(
+ SseOpcode::Cmpps,
+ RegMem::reg(tmp.to_reg()),
+ tmp,
+ cond.encode(),
+ false,
+ ));
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Andps,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+
+ // Sets top bit of tmp if float is positive
+ // Setting up to set top bit on negative float values
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pxor,
+ RegMem::reg(dst.to_reg()),
+ tmp,
+ ));
+
+ // Convert the packed float to packed doubleword.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Cvttps2dq,
+ RegMem::reg(dst.to_reg()),
+ dst,
+ ));
+
+ // Set top bit only if < 0
+ // Saturate lane with sign (top) bit.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pand,
+ RegMem::reg(dst.to_reg()),
+ tmp,
+ ));
+ ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrad, RegMemImm::imm(31), tmp));
+
+ // On overflow 0x80000000 is returned to a lane.
+ // Below sets positive overflow lanes to 0x7FFFFFFF
+ // Keeps negative overflow lanes as is.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pxor,
+ RegMem::reg(tmp.to_reg()),
+ dst,
+ ));
+ } else if op == Opcode::FcvtToUintSat {
+ unimplemented!("f32x4.convert_i32x4_u");
+ } else {
+ // Since this branch is also guarded by a check for vector types
+ // neither Opcode::FcvtToUint nor Opcode::FcvtToSint can reach here
+ // due to vector varients not existing. The first two branches will
+ // cover all reachable cases.
+ unreachable!();
+ }
+ }
+ }
+
+ Opcode::Bitcast => {
+ let input_ty = ctx.input_ty(insn, 0);
+ let output_ty = ctx.output_ty(insn, 0);
+ match (input_ty, output_ty) {
+ (types::F32, types::I32) => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_to_gpr(
+ SseOpcode::Movd,
+ src,
+ dst,
+ OperandSize::Size32,
+ ));
+ }
+ (types::I32, types::F32) => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gpr_to_xmm(
+ SseOpcode::Movd,
+ src,
+ OperandSize::Size32,
+ dst,
+ ));
+ }
+ (types::F64, types::I64) => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::xmm_to_gpr(
+ SseOpcode::Movq,
+ src,
+ dst,
+ OperandSize::Size64,
+ ));
+ }
+ (types::I64, types::F64) => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gpr_to_xmm(
+ SseOpcode::Movq,
+ src,
+ OperandSize::Size64,
+ dst,
+ ));
+ }
+ _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty),
+ }
+ }
+
+ Opcode::Fabs | Opcode::Fneg => {
+ let src = input_to_reg_mem(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // In both cases, generate a constant and apply a single binary instruction:
+ // - to compute the absolute value, set all bits to 1 but the MSB to 0, and bit-AND the
+ // src with it.
+ // - to compute the negated value, set all bits to 0 but the MSB to 1, and bit-XOR the
+ // src with it.
+ let output_ty = ty.unwrap();
+ if !output_ty.is_vector() {
+ let (val, opcode) = match output_ty {
+ types::F32 => match op {
+ Opcode::Fabs => (0x7fffffff, SseOpcode::Andps),
+ Opcode::Fneg => (0x80000000, SseOpcode::Xorps),
+ _ => unreachable!(),
+ },
+ types::F64 => match op {
+ Opcode::Fabs => (0x7fffffffffffffff, SseOpcode::Andpd),
+ Opcode::Fneg => (0x8000000000000000, SseOpcode::Xorpd),
+ _ => unreachable!(),
+ },
+ _ => panic!("unexpected type {:?} for Fabs", output_ty),
+ };
+
+ for inst in Inst::gen_constant(dst, val, output_ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+
+ ctx.emit(Inst::xmm_rm_r(opcode, src, dst));
+ } else {
+ // Eventually vector constants should be available in `gen_constant` and this block
+ // can be merged with the one above (TODO).
+ if output_ty.bits() == 128 {
+ // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+ // but ensures that the registers are the same to match x86's read-write operand
+ // encoding.
+ let src = put_input_in_reg(ctx, inputs[0]);
+ ctx.emit(Inst::gen_move(dst, src, output_ty));
+
+ // Generate an all 1s constant in an XMM register. This uses CMPPS but could
+ // have used CMPPD with the same effect.
+ let tmp = ctx.alloc_tmp(RegClass::V128, output_ty);
+ let cond = FcmpImm::from(FloatCC::Equal);
+ let cmpps = Inst::xmm_rm_r_imm(
+ SseOpcode::Cmpps,
+ RegMem::reg(tmp.to_reg()),
+ tmp,
+ cond.encode(),
+ false,
+ );
+ ctx.emit(cmpps);
+
+ // Shift the all 1s constant to generate the mask.
+ let lane_bits = output_ty.lane_bits();
+ let (shift_opcode, opcode, shift_by) = match (op, lane_bits) {
+ (Opcode::Fabs, 32) => (SseOpcode::Psrld, SseOpcode::Andps, 1),
+ (Opcode::Fabs, 64) => (SseOpcode::Psrlq, SseOpcode::Andpd, 1),
+ (Opcode::Fneg, 32) => (SseOpcode::Pslld, SseOpcode::Xorps, 31),
+ (Opcode::Fneg, 64) => (SseOpcode::Psllq, SseOpcode::Xorpd, 63),
+ _ => unreachable!(
+ "unexpected opcode and lane size: {:?}, {} bits",
+ op, lane_bits
+ ),
+ };
+ let shift = Inst::xmm_rmi_reg(shift_opcode, RegMemImm::imm(shift_by), tmp);
+ ctx.emit(shift);
+
+ // Apply shifted mask (XOR or AND).
+ let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst);
+ ctx.emit(mask);
+ } else {
+ panic!("unexpected type {:?} for Fabs", output_ty);
+ }
+ }
+ }
+
+ Opcode::Fcopysign => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+
+ let ty = ty.unwrap();
+
+ // We're going to generate the following sequence:
+ //
+ // movabs $INT_MIN, tmp_gpr1
+ // mov{d,q} tmp_gpr1, tmp_xmm1
+ // movap{s,d} tmp_xmm1, dst
+ // andnp{s,d} src_1, dst
+ // movap{s,d} src_2, tmp_xmm2
+ // andp{s,d} tmp_xmm1, tmp_xmm2
+ // orp{s,d} tmp_xmm2, dst
+
+ let tmp_xmm1 = ctx.alloc_tmp(RegClass::V128, types::F32);
+ let tmp_xmm2 = ctx.alloc_tmp(RegClass::V128, types::F32);
+
+ let (sign_bit_cst, mov_op, and_not_op, and_op, or_op) = match ty {
+ types::F32 => (
+ 0x8000_0000,
+ SseOpcode::Movaps,
+ SseOpcode::Andnps,
+ SseOpcode::Andps,
+ SseOpcode::Orps,
+ ),
+ types::F64 => (
+ 0x8000_0000_0000_0000,
+ SseOpcode::Movapd,
+ SseOpcode::Andnpd,
+ SseOpcode::Andpd,
+ SseOpcode::Orpd,
+ ),
+ _ => {
+ panic!("unexpected type {:?} for copysign", ty);
+ }
+ };
+
+ for inst in Inst::gen_constant(tmp_xmm1, sign_bit_cst, ty, |reg_class, ty| {
+ ctx.alloc_tmp(reg_class, ty)
+ }) {
+ ctx.emit(inst);
+ }
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(tmp_xmm1.to_reg()), dst));
+ ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst));
+ ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2));
+ ctx.emit(Inst::xmm_rm_r(
+ and_op,
+ RegMem::reg(tmp_xmm1.to_reg()),
+ tmp_xmm2,
+ ));
+ ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst));
+ }
+
+ Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {
+ // TODO use ROUNDSS/ROUNDSD after sse4.1.
+
+ // Lower to VM calls when there's no access to SSE4.1.
+ let ty = ty.unwrap();
+ let libcall = match (ty, op) {
+ (types::F32, Opcode::Ceil) => LibCall::CeilF32,
+ (types::F64, Opcode::Ceil) => LibCall::CeilF64,
+ (types::F32, Opcode::Floor) => LibCall::FloorF32,
+ (types::F64, Opcode::Floor) => LibCall::FloorF64,
+ (types::F32, Opcode::Nearest) => LibCall::NearestF32,
+ (types::F64, Opcode::Nearest) => LibCall::NearestF64,
+ (types::F32, Opcode::Trunc) => LibCall::TruncF32,
+ (types::F64, Opcode::Trunc) => LibCall::TruncF64,
+ _ => panic!(
+ "unexpected type/opcode {:?}/{:?} in Ceil/Floor/Nearest/Trunc",
+ ty, op
+ ),
+ };
+
+ emit_vm_call(ctx, flags, triple, libcall, insn, inputs, outputs)?;
+ }
+
+ Opcode::Load
+ | Opcode::Uload8
+ | Opcode::Sload8
+ | Opcode::Uload16
+ | Opcode::Sload16
+ | Opcode::Uload32
+ | Opcode::Sload32
+ | Opcode::LoadComplex
+ | Opcode::Uload8Complex
+ | Opcode::Sload8Complex
+ | Opcode::Uload16Complex
+ | Opcode::Sload16Complex
+ | Opcode::Uload32Complex
+ | Opcode::Sload32Complex => {
+ let offset = ctx.data(insn).load_store_offset().unwrap();
+
+ let elem_ty = match op {
+ Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
+ types::I8
+ }
+ Opcode::Sload16
+ | Opcode::Uload16
+ | Opcode::Sload16Complex
+ | Opcode::Uload16Complex => types::I16,
+ Opcode::Sload32
+ | Opcode::Uload32
+ | Opcode::Sload32Complex
+ | Opcode::Uload32Complex => types::I32,
+ Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
+ _ => unimplemented!(),
+ };
+
+ let ext_mode = ExtMode::new(elem_ty.bits(), 64);
+
+ let sign_extend = match op {
+ Opcode::Sload8
+ | Opcode::Sload8Complex
+ | Opcode::Sload16
+ | Opcode::Sload16Complex
+ | Opcode::Sload32
+ | Opcode::Sload32Complex => true,
+ _ => false,
+ };
+
+ let amode = match op {
+ Opcode::Load
+ | Opcode::Uload8
+ | Opcode::Sload8
+ | Opcode::Uload16
+ | Opcode::Sload16
+ | Opcode::Uload32
+ | Opcode::Sload32 => {
+ assert_eq!(inputs.len(), 1, "only one input for load operands");
+ lower_to_amode(ctx, inputs[0], offset)
+ }
+
+ Opcode::LoadComplex
+ | Opcode::Uload8Complex
+ | Opcode::Sload8Complex
+ | Opcode::Uload16Complex
+ | Opcode::Sload16Complex
+ | Opcode::Uload32Complex
+ | Opcode::Sload32Complex => {
+ assert_eq!(
+ inputs.len(),
+ 2,
+ "can't handle more than two inputs in complex load"
+ );
+ let base = put_input_in_reg(ctx, inputs[0]);
+ let index = put_input_in_reg(ctx, inputs[1]);
+ let shift = 0;
+ let flags = ctx.memflags(insn).expect("load should have memflags");
+ Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
+ }
+
+ _ => unreachable!(),
+ };
+
+ let dst = get_output_reg(ctx, outputs[0]);
+ let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
+ match (sign_extend, is_xmm) {
+ (true, false) => {
+ // The load is sign-extended only when the output size is lower than 64 bits,
+ // so ext-mode is defined in this case.
+ ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(amode), dst));
+ }
+ (false, false) => {
+ if elem_ty.bytes() == 8 {
+ // Use a plain load.
+ ctx.emit(Inst::mov64_m_r(amode, dst))
+ } else {
+ // Use a zero-extended load.
+ ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(amode), dst))
+ }
+ }
+ (_, true) => {
+ ctx.emit(match elem_ty {
+ types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst),
+ types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst),
+ _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
+ Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst)
+ } // TODO Specialize for different types: MOVUPD, MOVDQU
+ _ => unreachable!("unexpected type for load: {:?}", elem_ty),
+ });
+ }
+ }
+ }
+
+ Opcode::Store
+ | Opcode::Istore8
+ | Opcode::Istore16
+ | Opcode::Istore32
+ | Opcode::StoreComplex
+ | Opcode::Istore8Complex
+ | Opcode::Istore16Complex
+ | Opcode::Istore32Complex => {
+ let offset = ctx.data(insn).load_store_offset().unwrap();
+
+ let elem_ty = match op {
+ Opcode::Istore8 | Opcode::Istore8Complex => types::I8,
+ Opcode::Istore16 | Opcode::Istore16Complex => types::I16,
+ Opcode::Istore32 | Opcode::Istore32Complex => types::I32,
+ Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
+ _ => unreachable!(),
+ };
+
+ let addr = match op {
+ Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
+ assert_eq!(inputs.len(), 2, "only one input for store memory operands");
+ lower_to_amode(ctx, inputs[1], offset)
+ }
+
+ Opcode::StoreComplex
+ | Opcode::Istore8Complex
+ | Opcode::Istore16Complex
+ | Opcode::Istore32Complex => {
+ assert_eq!(
+ inputs.len(),
+ 3,
+ "can't handle more than two inputs in complex store"
+ );
+ let base = put_input_in_reg(ctx, inputs[1]);
+ let index = put_input_in_reg(ctx, inputs[2]);
+ let shift = 0;
+ let flags = ctx.memflags(insn).expect("store should have memflags");
+ Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
+ }
+
+ _ => unreachable!(),
+ };
+
+ let src = put_input_in_reg(ctx, inputs[0]);
+
+ ctx.emit(match elem_ty {
+ types::F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
+ types::F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr),
+ _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
+ // TODO Specialize for different types: MOVUPD, MOVDQU, etc.
+ Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr)
+ }
+ _ => Inst::mov_r_m(elem_ty.bytes() as u8, src, addr),
+ });
+ }
+
+ Opcode::AtomicRmw => {
+ // This is a simple, general-case atomic update, based on a loop involving
+ // `cmpxchg`. Note that we could do much better than this in the case where the old
+ // value at the location (that is to say, the SSA `Value` computed by this CLIF
+ // instruction) is not required. In that case, we could instead implement this
+ // using a single `lock`-prefixed x64 read-modify-write instruction. Also, even in
+ // the case where the old value is required, for the `add` and `sub` cases, we can
+ // use the single instruction `lock xadd`. However, those improvements have been
+ // left for another day.
+ // TODO: filed as https://github.com/bytecodealliance/wasmtime/issues/2153
+ let dst = get_output_reg(ctx, outputs[0]);
+ let mut addr = put_input_in_reg(ctx, inputs[0]);
+ let mut arg2 = put_input_in_reg(ctx, inputs[1]);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ // Make sure that both args are in virtual regs, since in effect we have to do a
+ // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not
+ // guaranteed safe if either is in a real reg.
+ addr = ctx.ensure_in_vreg(addr, types::I64);
+ arg2 = ctx.ensure_in_vreg(arg2, types::I64);
+
+ // Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq`
+ // operates at whatever width is specified by `ty`, so there's no need to
+ // zero-extend `arg2` in the case of `ty` being I8/I16/I32.
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::r9()),
+ addr,
+ types::I64,
+ ));
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::r10()),
+ arg2,
+ types::I64,
+ ));
+
+ // Now the AtomicRmwSeq (pseudo-) instruction itself
+ let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
+ ctx.emit(Inst::AtomicRmwSeq { ty: ty_access, op });
+
+ // And finally, copy the preordained AtomicRmwSeq output reg to its destination.
+ ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
+ }
+
+ Opcode::AtomicCas => {
+ // This is very similar to, but not identical to, the `AtomicRmw` case. As with
+ // `AtomicRmw`, there's no need to zero-extend narrow values here.
+ let dst = get_output_reg(ctx, outputs[0]);
+ let addr = lower_to_amode(ctx, inputs[0], 0);
+ let expected = put_input_in_reg(ctx, inputs[1]);
+ let replacement = put_input_in_reg(ctx, inputs[2]);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ // Move the expected value into %rax. Because there's only one fixed register on
+ // the input side, we don't have to use `ensure_in_vreg`, as is necessary in the
+ // `AtomicRmw` case.
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::rax()),
+ expected,
+ types::I64,
+ ));
+ ctx.emit(Inst::LockCmpxchg {
+ ty: ty_access,
+ src: replacement,
+ dst: addr.into(),
+ });
+ // And finally, copy the old value at the location to its destination reg.
+ ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
+ }
+
+ Opcode::AtomicLoad => {
+ // This is a normal load. The x86-TSO memory model provides sufficient sequencing
+ // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
+ // need for any fence instructions.
+ let data = get_output_reg(ctx, outputs[0]);
+ let addr = lower_to_amode(ctx, inputs[0], 0);
+ let ty_access = ty.unwrap();
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ let rm = RegMem::mem(addr);
+ if ty_access == types::I64 {
+ ctx.emit(Inst::mov64_rm_r(rm, data));
+ } else {
+ let ext_mode = ExtMode::new(ty_access.bits(), 64).expect(&format!(
+ "invalid extension during AtomicLoad: {} -> {}",
+ ty_access.bits(),
+ 64
+ ));
+ ctx.emit(Inst::movzx_rm_r(ext_mode, rm, data));
+ }
+ }
+
+ Opcode::AtomicStore => {
+ // This is a normal store, followed by an `mfence` instruction.
+ let data = put_input_in_reg(ctx, inputs[0]);
+ let addr = lower_to_amode(ctx, inputs[1], 0);
+ let ty_access = ctx.input_ty(insn, 0);
+ assert!(is_valid_atomic_transaction_ty(ty_access));
+
+ ctx.emit(Inst::mov_r_m(ty_access.bytes() as u8, data, addr));
+ ctx.emit(Inst::Fence {
+ kind: FenceKind::MFence,
+ });
+ }
+
+ Opcode::Fence => {
+ ctx.emit(Inst::Fence {
+ kind: FenceKind::MFence,
+ });
+ }
+
+ Opcode::FuncAddr => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let (extname, _) = ctx.call_target(insn).unwrap();
+ let extname = extname.clone();
+ ctx.emit(Inst::LoadExtName {
+ dst,
+ name: Box::new(extname),
+ offset: 0,
+ });
+ }
+
+ Opcode::SymbolValue => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let (extname, _, offset) = ctx.symbol_value(insn).unwrap();
+ let extname = extname.clone();
+ ctx.emit(Inst::LoadExtName {
+ dst,
+ name: Box::new(extname),
+ offset,
+ });
+ }
+
+ Opcode::StackAddr => {
+ let (stack_slot, offset) = match *ctx.data(insn) {
+ InstructionData::StackLoad {
+ opcode: Opcode::StackAddr,
+ stack_slot,
+ offset,
+ } => (stack_slot, offset),
+ _ => unreachable!(),
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+ let offset: i32 = offset.into();
+ let inst = ctx
+ .abi()
+ .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
+ ctx.emit(inst);
+ }
+
+ Opcode::Select => {
+ let flag_input = inputs[0];
+ if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
+ let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
+
+ // For equal, we flip the operands, because we can't test a conjunction of
+ // CPU flags with a single cmove; see InvertedEqualOrConditions doc comment.
+ let (lhs_input, rhs_input) = match cond_code {
+ FloatCC::Equal => (inputs[2], inputs[1]),
+ _ => (inputs[1], inputs[2]),
+ };
+
+ let ty = ctx.output_ty(insn, 0);
+ let rhs = put_input_in_reg(ctx, rhs_input);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let lhs = if is_int_or_ref_ty(ty) && ty.bytes() < 4 {
+ // Special case: since the higher bits are undefined per CLIF semantics, we
+ // can just apply a 32-bit cmove here. Force inputs into registers, to
+ // avoid partial spilling out-of-bounds with memory accesses, though.
+ // Sign-extend operands to 32, then do a cmove of size 4.
+ RegMem::reg(put_input_in_reg(ctx, lhs_input))
+ } else {
+ input_to_reg_mem(ctx, lhs_input)
+ };
+
+ // We request inversion of Equal to NotEqual here: taking LHS if equal would mean
+ // take it if both CC::NP and CC::Z are set, the conjunction of which can't be
+ // modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
+ // select operation, and invert the equal to a not-equal here.
+ let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
+
+ if let FcmpCondResult::InvertedEqualOrConditions(_, _) = &fcmp_results {
+ // Keep this sync'd with the lowering of the select inputs above.
+ assert_eq!(cond_code, FloatCC::Equal);
+ }
+
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+
+ match fcmp_results {
+ FcmpCondResult::Condition(cc) => {
+ if is_int_or_ref_ty(ty) {
+ let size = u8::max(ty.bytes() as u8, 4);
+ ctx.emit(Inst::cmove(size, cc, lhs, dst));
+ } else {
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
+ }
+ }
+ FcmpCondResult::AndConditions(_, _) => {
+ unreachable!(
+ "can't AND with select; see above comment about inverting equal"
+ );
+ }
+ FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
+ | FcmpCondResult::OrConditions(cc1, cc2) => {
+ if is_int_or_ref_ty(ty) {
+ let size = u8::max(ty.bytes() as u8, 4);
+ ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst));
+ ctx.emit(Inst::cmove(size, cc2, lhs, dst));
+ } else {
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc1, lhs.clone(), dst));
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc2, lhs, dst));
+ }
+ }
+ }
+ } else {
+ let ty = ty.unwrap();
+
+ let mut size = ty.bytes() as u8;
+ let lhs = if is_int_or_ref_ty(ty) {
+ if size < 4 {
+ // Special case: since the higher bits are undefined per CLIF semantics, we
+ // can just apply a 32-bit cmove here. Force inputs into registers, to
+ // avoid partial spilling out-of-bounds with memory accesses, though.
+ size = 4;
+ RegMem::reg(put_input_in_reg(ctx, inputs[1]))
+ } else {
+ input_to_reg_mem(ctx, inputs[1])
+ }
+ } else {
+ input_to_reg_mem(ctx, inputs[1])
+ };
+
+ let rhs = put_input_in_reg(ctx, inputs[2]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
+ emit_cmp(ctx, icmp);
+ let cond_code = ctx.data(icmp).cond_code().unwrap();
+ CC::from_intcc(cond_code)
+ } else {
+ // The input is a boolean value, compare it against zero.
+ let size = ctx.input_ty(insn, 0).bytes() as u8;
+ let test = put_input_in_reg(ctx, flag_input);
+ ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
+ CC::NZ
+ };
+
+ // This doesn't affect the flags.
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+
+ if is_int_or_ref_ty(ty) {
+ ctx.emit(Inst::cmove(size, cc, lhs, dst));
+ } else {
+ debug_assert!(ty == types::F32 || ty == types::F64);
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
+ }
+ }
+ }
+
+ Opcode::Selectif | Opcode::SelectifSpectreGuard => {
+ let lhs = input_to_reg_mem(ctx, inputs[1]);
+ let rhs = put_input_in_reg(ctx, inputs[2]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ctx.output_ty(insn, 0);
+
+ // Verification ensures that the input is always a single-def ifcmp.
+ let cmp_insn = ctx
+ .get_input(inputs[0].insn, inputs[0].input)
+ .inst
+ .unwrap()
+ .0;
+ debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
+ emit_cmp(ctx, cmp_insn);
+
+ let cc = CC::from_intcc(ctx.data(insn).cond_code().unwrap());
+
+ if is_int_or_ref_ty(ty) {
+ let size = ty.bytes() as u8;
+ if size == 1 {
+ // Sign-extend operands to 32, then do a cmove of size 4.
+ let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32);
+ ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se));
+ ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst));
+ ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst));
+ } else {
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ ctx.emit(Inst::cmove(size, cc, lhs, dst));
+ }
+ } else {
+ debug_assert!(ty == types::F32 || ty == types::F64);
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
+ }
+ }
+
+ Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
+ let kind = match op {
+ Opcode::Udiv => DivOrRemKind::UnsignedDiv,
+ Opcode::Sdiv => DivOrRemKind::SignedDiv,
+ Opcode::Urem => DivOrRemKind::UnsignedRem,
+ Opcode::Srem => DivOrRemKind::SignedRem,
+ _ => unreachable!(),
+ };
+ let is_div = kind.is_div();
+
+ let input_ty = ctx.input_ty(insn, 0);
+ let size = input_ty.bytes() as u8;
+
+ let dividend = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::rax()),
+ dividend,
+ input_ty,
+ ));
+
+ if flags.avoid_div_traps() {
+ // A vcode meta-instruction is used to lower the inline checks, since they embed
+ // pc-relative offsets that must not change, thus requiring regalloc to not
+ // interfere by introducing spills and reloads.
+ //
+ // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
+ // regalloc is aware of the coalescing opportunity between rax/rdx and the
+ // destination register.
+ let divisor = put_input_in_reg(ctx, inputs[1]);
+
+ let divisor_copy = ctx.alloc_tmp(RegClass::I64, types::I64);
+ ctx.emit(Inst::gen_move(divisor_copy, divisor, types::I64));
+
+ let tmp = if op == Opcode::Sdiv && size == 8 {
+ Some(ctx.alloc_tmp(RegClass::I64, types::I64))
+ } else {
+ None
+ };
+ // TODO use xor
+ ctx.emit(Inst::imm(
+ OperandSize::Size32,
+ 0,
+ Writable::from_reg(regs::rdx()),
+ ));
+ ctx.emit(Inst::checked_div_or_rem_seq(kind, size, divisor_copy, tmp));
+ } else {
+ let divisor = input_to_reg_mem(ctx, inputs[1]);
+
+ // Fill in the high parts:
+ if kind.is_signed() {
+ // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
+ // signed opcodes.
+ ctx.emit(Inst::sign_extend_data(size));
+ } else if input_ty == types::I8 {
+ ctx.emit(Inst::movzx_rm_r(
+ ExtMode::BL,
+ RegMem::reg(regs::rax()),
+ Writable::from_reg(regs::rax()),
+ ));
+ } else {
+ // zero for unsigned opcodes.
+ ctx.emit(Inst::imm(
+ OperandSize::Size64,
+ 0,
+ Writable::from_reg(regs::rdx()),
+ ));
+ }
+
+ // Emit the actual idiv.
+ ctx.emit(Inst::div(size, kind.is_signed(), divisor));
+ }
+
+ // Move the result back into the destination reg.
+ if is_div {
+ // The quotient is in rax.
+ ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
+ } else {
+ // The remainder is in rdx.
+ ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
+ }
+ }
+
+ Opcode::Umulhi | Opcode::Smulhi => {
+ let input_ty = ctx.input_ty(insn, 0);
+ let size = input_ty.bytes() as u8;
+
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = input_to_reg_mem(ctx, inputs[1]);
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // Move lhs in %rax.
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::rax()),
+ lhs,
+ input_ty,
+ ));
+
+ // Emit the actual mul or imul.
+ let signed = op == Opcode::Smulhi;
+ ctx.emit(Inst::mul_hi(size, signed, rhs));
+
+ // Read the result from the high part (stored in %rdx).
+ ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
+ }
+
+ Opcode::GetPinnedReg => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64));
+ }
+
+ Opcode::SetPinnedReg => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ ctx.emit(Inst::gen_move(
+ Writable::from_reg(regs::pinned_reg()),
+ src,
+ types::I64,
+ ));
+ }
+
+ Opcode::Vconst => {
+ let used_constant = if let &InstructionData::UnaryConst {
+ constant_handle, ..
+ } = ctx.data(insn)
+ {
+ ctx.use_constant(VCodeConstantData::Pool(
+ constant_handle,
+ ctx.get_constant_data(constant_handle).clone(),
+ ))
+ } else {
+ unreachable!("vconst should always have unary_const format")
+ };
+ // TODO use Inst::gen_constant() instead.
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::xmm_load_const(used_constant, dst, ty));
+ }
+
+ Opcode::RawBitcast => {
+ // A raw_bitcast is just a mechanism for correcting the type of V128 values (see
+ // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
+ // instruction should emit no machine code but a move is necessary to give the register
+ // allocator a definition for the output virtual register.
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let dst = get_output_reg(ctx, outputs[0]);
+ let ty = ty.unwrap();
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ }
+
+ Opcode::Shuffle => {
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let lhs_ty = ctx.input_ty(insn, 0);
+ let lhs = put_input_in_reg(ctx, inputs[0]);
+ let rhs = put_input_in_reg(ctx, inputs[1]);
+ let mask = match ctx.get_immediate(insn) {
+ Some(DataValue::V128(bytes)) => bytes.to_vec(),
+ _ => unreachable!("shuffle should always have a 16-byte immediate"),
+ };
+
+ // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a
+ // 1 in the most significant position zeroes the lane.
+ let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
+
+ ctx.emit(Inst::gen_move(dst, rhs, ty));
+ if rhs == lhs {
+ // If `lhs` and `rhs` are the same we can use a single PSHUFB to shuffle the XMM
+ // register. We statically build `constructed_mask` to zero out any unknown lane
+ // indices (may not be completely necessary: verification could fail incorrect mask
+ // values) and fix the indexes to all point to the `dst` vector.
+ let constructed_mask = mask
+ .iter()
+ // If the mask is greater than 15 it still may be referring to a lane in b.
+ .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
+ .map(zero_unknown_lane_index)
+ .collect();
+ let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
+ let tmp = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ ctx.emit(Inst::xmm_load_const(constant, tmp, ty));
+ // After loading the constructed mask in a temporary register, we use this to
+ // shuffle the `dst` register (remember that, in this case, it is the same as
+ // `src` so we disregard this register).
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst));
+ } else {
+ // If `lhs` and `rhs` are different, we must shuffle each separately and then OR
+ // them together. This is necessary due to PSHUFB semantics. As in the case above,
+ // we build the `constructed_mask` for each case statically.
+
+ // PSHUFB the `lhs` argument into `tmp0`, placing zeroes for unused lanes.
+ let tmp0 = ctx.alloc_tmp(RegClass::V128, lhs_ty);
+ ctx.emit(Inst::gen_move(tmp0, lhs, lhs_ty));
+ let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
+ let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
+ let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ ctx.emit(Inst::xmm_load_const(constant, tmp1, ty));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp1), tmp0));
+
+ // PSHUFB the second argument, placing zeroes for unused lanes.
+ let constructed_mask = mask
+ .iter()
+ .map(|b| b.wrapping_sub(16))
+ .map(zero_unknown_lane_index)
+ .collect();
+ let constant = ctx.use_constant(VCodeConstantData::Generated(constructed_mask));
+ let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ ctx.emit(Inst::xmm_load_const(constant, tmp2, ty));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp2), dst));
+
+ // OR the shuffled registers (the mechanism and lane-size for OR-ing the registers
+ // is not important).
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Orps, RegMem::from(tmp0), dst));
+
+ // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
+ }
+ }
+
+ Opcode::Swizzle => {
+ // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec
+ // requiring mask indexes greater than 15 to have the same semantics as a 0 index. For
+ // the spec discussion, see https://github.com/WebAssembly/simd/issues/93. The CLIF
+ // semantics match the Wasm SIMD semantics for this instruction.
+ // The instruction format maps to variables like: %dst = swizzle %src, %mask
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let swizzle_mask = put_input_in_reg(ctx, inputs[1]);
+
+ // Inform the register allocator that `src` and `dst` should be in the same register.
+ ctx.emit(Inst::gen_move(dst, src, ty));
+
+ // Create a mask for zeroing out-of-bounds lanes of the swizzle mask.
+ let zero_mask = ctx.alloc_tmp(RegClass::V128, types::I8X16);
+ static ZERO_MASK_VALUE: [u8; 16] = [
+ 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70,
+ 0x70, 0x70,
+ ];
+ let constant = ctx.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE));
+ ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
+
+ // Use the `zero_mask` on a writable `swizzle_mask`.
+ let swizzle_mask = Writable::from_reg(swizzle_mask);
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Paddusb,
+ RegMem::from(zero_mask),
+ swizzle_mask,
+ ));
+
+ // Shuffle `dst` using the fixed-up `swizzle_mask`.
+ ctx.emit(Inst::xmm_rm_r(
+ SseOpcode::Pshufb,
+ RegMem::from(swizzle_mask),
+ dst,
+ ));
+ }
+
+ Opcode::Insertlane => {
+ // The instruction format maps to variables like: %dst = insertlane %in_vec, %src, %lane
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let in_vec = put_input_in_reg(ctx, inputs[0]);
+ let src_ty = ctx.input_ty(insn, 1);
+ debug_assert!(!src_ty.is_vector());
+ let src = input_to_reg_mem(ctx, inputs[1]);
+ let lane = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) {
+ *imm
+ } else {
+ unreachable!();
+ };
+ debug_assert!(lane < ty.lane_count() as u8);
+
+ ctx.emit(Inst::gen_move(dst, in_vec, ty));
+ emit_insert_lane(ctx, src, dst, lane, ty.lane_type());
+ }
+
+ Opcode::Extractlane => {
+ // The instruction format maps to variables like: %dst = extractlane %src, %lane
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert_eq!(src_ty.bits(), 128);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let lane = if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(insn) {
+ *imm
+ } else {
+ unreachable!();
+ };
+ debug_assert!(lane < src_ty.lane_count() as u8);
+
+ if !ty.is_float() {
+ let (sse_op, w_bit) = match ty.lane_bits() {
+ 8 => (SseOpcode::Pextrb, false),
+ 16 => (SseOpcode::Pextrw, false),
+ 32 => (SseOpcode::Pextrd, false),
+ 64 => (SseOpcode::Pextrd, true),
+ _ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()),
+ };
+ let src = RegMem::reg(src);
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit));
+ } else {
+ if lane == 0 {
+ // Remove the extractlane instruction, leaving the float where it is. The upper
+ // bits will remain unchanged; for correctness, this relies on Cranelift type
+ // checking to avoid using those bits.
+ ctx.emit(Inst::gen_move(dst, src, ty));
+ } else {
+ // Otherwise, shuffle the bits in `lane` to the lowest lane.
+ let sse_op = SseOpcode::Pshufd;
+ let mask = match src_ty {
+ // Move the value at `lane` to lane 0, copying existing value at lane 0 to
+ // other lanes. Again, this relies on Cranelift type checking to avoid
+ // using those bits.
+ types::F32X4 => 0b00_00_00_00 | lane,
+ // Move the value at `lane` 1 (we know it must be 1 because of the `if`
+ // statement above) to lane 0 and leave lane 1 unchanged. The Cranelift type
+ // checking assumption also applies here.
+ types::F64X2 => 0b11_10_11_10,
+ _ => unreachable!(),
+ };
+ let src = RegMem::reg(src);
+ ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false));
+ }
+ }
+ }
+
+ Opcode::Splat | Opcode::LoadSplat => {
+ let ty = ty.unwrap();
+ assert_eq!(ty.bits(), 128);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert!(src_ty.bits() < 128);
+
+ let src = match op {
+ Opcode::Splat => input_to_reg_mem(ctx, inputs[0]),
+ Opcode::LoadSplat => {
+ let offset = ctx.data(insn).load_store_offset().unwrap();
+ let amode = lower_to_amode(ctx, inputs[0], offset);
+ RegMem::mem(amode)
+ }
+ _ => unreachable!(),
+ };
+ let dst = get_output_reg(ctx, outputs[0]);
+
+ // We know that splat will overwrite all of the lanes of `dst` but it takes several
+ // instructions to do so. Because of the multiple instructions, there is no good way to
+ // declare `dst` a `def` except with the following pseudo-instruction.
+ ctx.emit(Inst::xmm_uninit_value(dst));
+
+ // TODO: eventually many of these sequences could be optimized with AVX's VBROADCAST*
+ // and VPBROADCAST*.
+ match ty.lane_bits() {
+ 8 => {
+ emit_insert_lane(ctx, src, dst, 0, ty.lane_type());
+ // Initialize a register with all 0s.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
+ // Shuffle the lowest byte lane to all other lanes.
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst))
+ }
+ 16 => {
+ emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type());
+ emit_insert_lane(ctx, src, dst, 1, ty.lane_type());
+ // Shuffle the lowest two lanes to all other lanes.
+ ctx.emit(Inst::xmm_rm_r_imm(
+ SseOpcode::Pshufd,
+ RegMem::from(dst),
+ dst,
+ 0,
+ false,
+ ))
+ }
+ 32 => {
+ emit_insert_lane(ctx, src, dst, 0, ty.lane_type());
+ // Shuffle the lowest lane to all other lanes.
+ ctx.emit(Inst::xmm_rm_r_imm(
+ SseOpcode::Pshufd,
+ RegMem::from(dst),
+ dst,
+ 0,
+ false,
+ ))
+ }
+ 64 => {
+ emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type());
+ emit_insert_lane(ctx, src, dst, 1, ty.lane_type());
+ }
+ _ => panic!("Invalid type to splat: {}", ty),
+ }
+ }
+
+ Opcode::VanyTrue => {
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert_eq!(src_ty.bits(), 128);
+ let src = put_input_in_reg(ctx, inputs[0]);
+ // Set the ZF if the result is all zeroes.
+ ctx.emit(Inst::xmm_cmp_rm_r(SseOpcode::Ptest, RegMem::reg(src), src));
+ // If the ZF is not set, place a 1 in `dst`.
+ ctx.emit(Inst::setcc(CC::NZ, dst));
+ }
+
+ Opcode::VallTrue => {
+ let ty = ty.unwrap();
+ let dst = get_output_reg(ctx, outputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ assert_eq!(src_ty.bits(), 128);
+ let src = input_to_reg_mem(ctx, inputs[0]);
+
+ let eq = |ty: Type| match ty.lane_bits() {
+ 8 => SseOpcode::Pcmpeqb,
+ 16 => SseOpcode::Pcmpeqw,
+ 32 => SseOpcode::Pcmpeqd,
+ 64 => SseOpcode::Pcmpeqq,
+ _ => panic!("Unable to find an instruction for {} for type: {}", op, ty),
+ };
+
+ // Initialize a register with all 0s.
+ let tmp = ctx.alloc_tmp(RegClass::V128, ty);
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
+ // Compare to see what lanes are filled with all 1s.
+ ctx.emit(Inst::xmm_rm_r(eq(src_ty), src, tmp));
+ // Set the ZF if the result is all zeroes.
+ ctx.emit(Inst::xmm_cmp_rm_r(
+ SseOpcode::Ptest,
+ RegMem::from(tmp),
+ tmp.to_reg(),
+ ));
+ // If the ZF is set, place a 1 in `dst`.
+ ctx.emit(Inst::setcc(CC::Z, dst));
+ }
+
+ Opcode::VhighBits => {
+ let src = put_input_in_reg(ctx, inputs[0]);
+ let src_ty = ctx.input_ty(insn, 0);
+ debug_assert!(src_ty.is_vector() && src_ty.bits() == 128);
+ let dst = get_output_reg(ctx, outputs[0]);
+ debug_assert!(dst.to_reg().get_class() == RegClass::I64);
+
+ // The Intel specification allows using both 32-bit and 64-bit GPRs as destination for
+ // the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode,
+ // the instruction can access additional registers when used with a REX.R prefix. The
+ // default operand size is 64-bit in 64-bit mode" (PMOVMSKB in IA Software Development
+ // Manual, vol. 2). This being the case, we will always clear REX.W since its use is
+ // unnecessary (`OperandSize` is used for setting/clearing REX.W).
+ let size = OperandSize::Size32;
+
+ match src_ty {
+ types::I8X16 | types::B8X16 => {
+ ctx.emit(Inst::xmm_to_gpr(SseOpcode::Pmovmskb, src, dst, size))
+ }
+ types::I32X4 | types::B32X4 | types::F32X4 => {
+ ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movmskps, src, dst, size))
+ }
+ types::I64X2 | types::B64X2 | types::F64X2 => {
+ ctx.emit(Inst::xmm_to_gpr(SseOpcode::Movmskpd, src, dst, size))
+ }
+ types::I16X8 | types::B16X8 => {
+ // There is no x86 instruction for extracting the high bit of 16-bit lanes so
+ // here we:
+ // - duplicate the 16-bit lanes of `src` into 8-bit lanes:
+ // PACKSSWB([x1, x2, ...], [x1, x2, ...]) = [x1', x2', ..., x1', x2', ...]
+ // - use PMOVMSKB to gather the high bits; now we have duplicates, though
+ // - shift away the bottom 8 high bits to remove the duplicates.
+ let tmp = ctx.alloc_tmp(RegClass::V128, src_ty);
+ ctx.emit(Inst::gen_move(tmp, src, src_ty));
+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(src), tmp));
+ ctx.emit(Inst::xmm_to_gpr(
+ SseOpcode::Pmovmskb,
+ tmp.to_reg(),
+ dst,
+ size,
+ ));
+ ctx.emit(Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(8), dst));
+ }
+ _ => unimplemented!("unknown input type {} for {}", src_ty, op),
+ }
+ }
+
+ Opcode::IaddImm
+ | Opcode::ImulImm
+ | Opcode::UdivImm
+ | Opcode::SdivImm
+ | Opcode::UremImm
+ | Opcode::SremImm
+ | Opcode::IrsubImm
+ | Opcode::IaddCin
+ | Opcode::IaddIfcin
+ | Opcode::IaddCout
+ | Opcode::IaddCarry
+ | Opcode::IaddIfcarry
+ | Opcode::IsubBin
+ | Opcode::IsubIfbin
+ | Opcode::IsubBout
+ | Opcode::IsubIfbout
+ | Opcode::IsubBorrow
+ | Opcode::IsubIfborrow
+ | Opcode::BandImm
+ | Opcode::BorImm
+ | Opcode::BxorImm
+ | Opcode::RotlImm
+ | Opcode::RotrImm
+ | Opcode::IshlImm
+ | Opcode::UshrImm
+ | Opcode::SshrImm => {
+ panic!("ALU+imm and ALU+carry ops should not appear here!");
+ }
+ _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
+ }
+
+ Ok(())
+}
+
+//=============================================================================
+// Lowering-backend trait implementation.
+
+impl LowerBackend for X64Backend {
+ type MInst = Inst;
+
+ fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
+ lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.triple)
+ }
+
+ fn lower_branch_group<C: LowerCtx<I = Inst>>(
+ &self,
+ ctx: &mut C,
+ branches: &[IRInst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+ ) -> CodegenResult<()> {
+ // A block should end with at most two branches. The first may be a
+ // conditional branch; a conditional branch can be followed only by an
+ // unconditional branch or fallthrough. Otherwise, if only one branch,
+ // it may be an unconditional branch, a fallthrough, a return, or a
+ // trap. These conditions are verified by `is_ebb_basic()` during the
+ // verifier pass.
+ assert!(branches.len() <= 2);
+
+ if branches.len() == 2 {
+ // Must be a conditional branch followed by an unconditional branch.
+ let op0 = ctx.data(branches[0]).opcode();
+ let op1 = ctx.data(branches[1]).opcode();
+
+ trace!(
+ "lowering two-branch group: opcodes are {:?} and {:?}",
+ op0,
+ op1
+ );
+ assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough);
+
+ let taken = targets[0];
+ let not_taken = match op1 {
+ Opcode::Jump => targets[1],
+ Opcode::Fallthrough => fallthrough.unwrap(),
+ _ => unreachable!(), // assert above.
+ };
+
+ match op0 {
+ Opcode::Brz | Opcode::Brnz => {
+ let flag_input = InsnInput {
+ insn: branches[0],
+ input: 0,
+ };
+
+ let src_ty = ctx.input_ty(branches[0], 0);
+
+ if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
+ emit_cmp(ctx, icmp);
+
+ let cond_code = ctx.data(icmp).cond_code().unwrap();
+ let cond_code = if op0 == Opcode::Brz {
+ cond_code.inverse()
+ } else {
+ cond_code
+ };
+
+ let cc = CC::from_intcc(cond_code);
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ } else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
+ let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
+ let cond_code = if op0 == Opcode::Brz {
+ cond_code.inverse()
+ } else {
+ cond_code
+ };
+ match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) {
+ FcmpCondResult::Condition(cc) => {
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ }
+ FcmpCondResult::AndConditions(cc1, cc2) => {
+ ctx.emit(Inst::jmp_if(cc1.invert(), not_taken));
+ ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken));
+ }
+ FcmpCondResult::OrConditions(cc1, cc2) => {
+ ctx.emit(Inst::jmp_if(cc1, taken));
+ ctx.emit(Inst::jmp_cond(cc2, taken, not_taken));
+ }
+ FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
+ }
+ } else if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
+ let src = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ );
+ let cc = match op0 {
+ Opcode::Brz => CC::Z,
+ Opcode::Brnz => CC::NZ,
+ _ => unreachable!(),
+ };
+ let size_bytes = src_ty.bytes() as u8;
+ ctx.emit(Inst::cmp_rmi_r(size_bytes, RegMemImm::imm(0), src));
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ } else {
+ unimplemented!("brz/brnz with non-int type {:?}", src_ty);
+ }
+ }
+
+ Opcode::BrIcmp => {
+ let src_ty = ctx.input_ty(branches[0], 0);
+ if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
+ let lhs = put_input_in_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ );
+ let rhs = input_to_reg_mem_imm(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 1,
+ },
+ );
+ let cc = CC::from_intcc(ctx.data(branches[0]).cond_code().unwrap());
+ let byte_size = src_ty.bytes() as u8;
+ // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
+ // us dst - src at the machine instruction level, so invert operands.
+ ctx.emit(Inst::cmp_rmi_r(byte_size, rhs, lhs));
+ ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
+ } else {
+ unimplemented!("bricmp with non-int type {:?}", src_ty);
+ }
+ }
+
+ _ => panic!("unexpected branch opcode: {:?}", op0),
+ }
+ } else {
+ assert_eq!(branches.len(), 1);
+
+ // Must be an unconditional branch or trap.
+ let op = ctx.data(branches[0]).opcode();
+ match op {
+ Opcode::Jump | Opcode::Fallthrough => {
+ ctx.emit(Inst::jmp_known(targets[0]));
+ }
+
+ Opcode::BrTable => {
+ let jt_size = targets.len() - 1;
+ assert!(jt_size <= u32::max_value() as usize);
+ let jt_size = jt_size as u32;
+
+ let idx = extend_input_to_reg(
+ ctx,
+ InsnInput {
+ insn: branches[0],
+ input: 0,
+ },
+ ExtSpec::ZeroExtendTo32,
+ );
+
+ // Bounds-check (compute flags from idx - jt_size) and branch to default.
+ ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx));
+
+ // Emit the compound instruction that does:
+ //
+ // lea $jt, %rA
+ // movsbl [%rA, %rIndex, 2], %rB
+ // add %rB, %rA
+ // j *%rA
+ // [jt entries]
+ //
+ // This must be *one* instruction in the vcode because we cannot allow regalloc
+ // to insert any spills/fills in the middle of the sequence; otherwise, the
+ // lea PC-rel offset to the jumptable would be incorrect. (The alternative
+ // is to introduce a relocation pass for inlined jumptables, which is much
+ // worse.)
+
+ // This temporary is used as a signed integer of 64-bits (to hold addresses).
+ let tmp1 = ctx.alloc_tmp(RegClass::I64, types::I64);
+ // This temporary is used as a signed integer of 32-bits (for the wasm-table
+ // index) and then 64-bits (address addend). The small lie about the I64 type
+ // is benign, since the temporary is dead after this instruction (and its
+ // Cranelift type is thus unused).
+ let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I64);
+
+ let targets_for_term: Vec<MachLabel> = targets.to_vec();
+ let default_target = targets[0];
+
+ let jt_targets: Vec<MachLabel> = targets.iter().skip(1).cloned().collect();
+
+ ctx.emit(Inst::JmpTableSeq {
+ idx,
+ tmp1,
+ tmp2,
+ default_target,
+ targets: jt_targets,
+ targets_for_term,
+ });
+ }
+
+ _ => panic!("Unknown branch type {:?}", op),
+ }
+ }
+
+ Ok(())
+ }
+
+ fn maybe_pinned_reg(&self) -> Option<Reg> {
+ Some(regs::pinned_reg())
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
new file mode 100644
index 0000000000..fd4444498d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/mod.rs
@@ -0,0 +1,149 @@
+//! X86_64-bit Instruction Set Architecture.
+
+use self::inst::EmitInfo;
+
+use super::TargetIsa;
+use crate::ir::{condcodes::IntCC, Function};
+use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
+use crate::isa::Builder as IsaBuilder;
+use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
+use crate::result::CodegenResult;
+use crate::settings::{self as shared_settings, Flags};
+use alloc::boxed::Box;
+use regalloc::{PrettyPrint, RealRegUniverse};
+use target_lexicon::Triple;
+
+mod abi;
+mod inst;
+mod lower;
+mod settings;
+
+/// An X64 backend.
+pub(crate) struct X64Backend {
+ triple: Triple,
+ flags: Flags,
+ x64_flags: x64_settings::Flags,
+ reg_universe: RealRegUniverse,
+}
+
+impl X64Backend {
+ /// Create a new X64 backend with the given (shared) flags.
+ fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
+ let reg_universe = create_reg_universe_systemv(&flags);
+ Self {
+ triple,
+ flags,
+ x64_flags,
+ reg_universe,
+ }
+ }
+
+ fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult<VCode<inst::Inst>> {
+ // This performs lowering to VCode, register-allocates the code, computes
+ // block layout and finalizes branches. The result is ready for binary emission.
+ let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone());
+ let abi = Box::new(abi::X64ABICallee::new(&func, flags)?);
+ compile::compile::<Self>(&func, self, abi, emit_info)
+ }
+}
+
+impl MachBackend for X64Backend {
+ fn compile_function(
+ &self,
+ func: &Function,
+ want_disasm: bool,
+ ) -> CodegenResult<MachCompileResult> {
+ let flags = self.flags();
+ let vcode = self.compile_vcode(func, flags.clone())?;
+
+ let buffer = vcode.emit();
+ let buffer = buffer.finish();
+ let frame_size = vcode.frame_size();
+ let unwind_info = vcode.unwind_info()?;
+
+ let disasm = if want_disasm {
+ Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags))))
+ } else {
+ None
+ };
+
+ Ok(MachCompileResult {
+ buffer,
+ frame_size,
+ disasm,
+ unwind_info,
+ })
+ }
+
+ fn flags(&self) -> &Flags {
+ &self.flags
+ }
+
+ fn name(&self) -> &'static str {
+ "x64"
+ }
+
+ fn triple(&self) -> Triple {
+ self.triple.clone()
+ }
+
+ fn reg_universe(&self) -> &RealRegUniverse {
+ &self.reg_universe
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> IntCC {
+ // Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
+ // overflow of an add.
+ IntCC::UnsignedGreaterThanOrEqual
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> IntCC {
+ // unsigned `>=`; this corresponds to the carry flag set on x86, which happens on
+ // underflow of a subtract (carry is borrow for subtract).
+ IntCC::UnsignedGreaterThanOrEqual
+ }
+
+ #[cfg(feature = "unwind")]
+ fn emit_unwind_info(
+ &self,
+ result: &MachCompileResult,
+ kind: crate::machinst::UnwindInfoKind,
+ ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+ use crate::isa::unwind::UnwindInfo;
+ use crate::machinst::UnwindInfoKind;
+ Ok(match (result.unwind_info.as_ref(), kind) {
+ (Some(info), UnwindInfoKind::SystemV) => {
+ inst::unwind::systemv::create_unwind_info(info.clone())?.map(UnwindInfo::SystemV)
+ }
+ (Some(_info), UnwindInfoKind::Windows) => {
+ //TODO inst::unwind::winx64::create_unwind_info(info.clone())?.map(|u| UnwindInfo::WindowsX64(u))
+ None
+ }
+ _ => None,
+ })
+ }
+
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ Some(inst::unwind::systemv::create_cie())
+ }
+}
+
+/// Create a new `isa::Builder`.
+pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
+ IsaBuilder {
+ triple,
+ setup: x64_settings::builder(),
+ constructor: isa_constructor,
+ }
+}
+
+fn isa_constructor(
+ triple: Triple,
+ shared_flags: Flags,
+ builder: shared_settings::Builder,
+) -> Box<dyn TargetIsa> {
+ let isa_flags = x64_settings::Flags::new(&shared_flags, builder);
+ let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
+ Box::new(TargetIsaAdapter::new(backend))
+}
diff --git a/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs b/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
new file mode 100644
index 0000000000..c5371bb132
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/isa/x64/settings.rs
@@ -0,0 +1,9 @@
+//! x86 Settings.
+
+use crate::settings::{self, detail, Builder};
+use core::fmt;
+
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
+include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));