diff options
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/isa/aarch64/inst')
8 files changed, 14022 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs new file mode 100644 index 0000000000..7bd181c86b --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/args.rs @@ -0,0 +1,728 @@ +//! AArch64 ISA definitions: instruction arguments. + +// Some variants are never constructed, but we still want them as options in the future. +#![allow(dead_code)] + +use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8}; +use crate::ir::Type; +use crate::isa::aarch64::inst::*; +use crate::machinst::{ty_bits, MachLabel}; + +use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable}; + +use core::convert::Into; +use std::string::String; + +//============================================================================= +// Instruction sub-components: shift and extend descriptors + +/// A shift operator for a register or immediate. +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum ShiftOp { + LSL = 0b00, + LSR = 0b01, + ASR = 0b10, + ROR = 0b11, +} + +impl ShiftOp { + /// Get the encoding of this shift op. + pub fn bits(self) -> u8 { + self as u8 + } +} + +/// A shift operator amount. +#[derive(Clone, Copy, Debug)] +pub struct ShiftOpShiftImm(u8); + +impl ShiftOpShiftImm { + /// Maximum shift for shifted-register operands. + pub const MAX_SHIFT: u64 = 63; + + /// Create a new shiftop shift amount, if possible. + pub fn maybe_from_shift(shift: u64) -> Option<ShiftOpShiftImm> { + if shift <= Self::MAX_SHIFT { + Some(ShiftOpShiftImm(shift as u8)) + } else { + None + } + } + + /// Return the shift amount. + pub fn value(self) -> u8 { + self.0 + } + + /// Mask down to a given number of bits. + pub fn mask(self, bits: u8) -> ShiftOpShiftImm { + ShiftOpShiftImm(self.0 & (bits - 1)) + } +} + +/// A shift operator with an amount, guaranteed to be within range. +#[derive(Clone, Debug)] +pub struct ShiftOpAndAmt { + op: ShiftOp, + shift: ShiftOpShiftImm, +} + +impl ShiftOpAndAmt { + pub fn new(op: ShiftOp, shift: ShiftOpShiftImm) -> ShiftOpAndAmt { + ShiftOpAndAmt { op, shift } + } + + /// Get the shift op. + pub fn op(&self) -> ShiftOp { + self.op + } + + /// Get the shift amount. + pub fn amt(&self) -> ShiftOpShiftImm { + self.shift + } +} + +/// An extend operator for a register. +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +pub enum ExtendOp { + UXTB = 0b000, + UXTH = 0b001, + UXTW = 0b010, + UXTX = 0b011, + SXTB = 0b100, + SXTH = 0b101, + SXTW = 0b110, + SXTX = 0b111, +} + +impl ExtendOp { + /// Encoding of this op. + pub fn bits(self) -> u8 { + self as u8 + } +} + +//============================================================================= +// Instruction sub-components (memory addresses): definitions + +/// A reference to some memory address. +#[derive(Clone, Debug)] +pub enum MemLabel { + /// An address in the code, a constant pool or jumptable, with relative + /// offset from this instruction. This form must be used at emission time; + /// see `memlabel_finalize()` for how other forms are lowered to this one. + PCRel(i32), +} + +/// An addressing mode specified for a load/store operation. +#[derive(Clone, Debug)] +pub enum AMode { + // + // Real ARM64 addressing modes: + // + /// "post-indexed" mode as per AArch64 docs: postincrement reg after address computation. + PostIndexed(Writable<Reg>, SImm9), + /// "pre-indexed" mode as per AArch64 docs: preincrement reg before address computation. + PreIndexed(Writable<Reg>, SImm9), + + // N.B.: RegReg, RegScaled, and RegScaledExtended all correspond to + // what the ISA calls the "register offset" addressing mode. We split out + // several options here for more ergonomic codegen. + /// Register plus register offset. + RegReg(Reg, Reg), + + /// Register plus register offset, scaled by type's size. + RegScaled(Reg, Reg, Type), + + /// Register plus register offset, scaled by type's size, with index sign- or zero-extended + /// first. + RegScaledExtended(Reg, Reg, Type, ExtendOp), + + /// Register plus register offset, with index sign- or zero-extended first. + RegExtended(Reg, Reg, ExtendOp), + + /// Unscaled signed 9-bit immediate offset from reg. + Unscaled(Reg, SImm9), + + /// Scaled (by size of a type) unsigned 12-bit immediate offset from reg. + UnsignedOffset(Reg, UImm12Scaled), + + // + // virtual addressing modes that are lowered at emission time: + // + /// Reference to a "label": e.g., a symbol. + Label(MemLabel), + + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset(Reg, i64, Type), + + /// Offset from the stack pointer. + SPOffset(i64, Type), + + /// Offset from the frame pointer. + FPOffset(i64, Type), + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::aarch64::abi](the ABI module) for more details. + NominalSPOffset(i64, Type), +} + +impl AMode { + /// Memory reference using an address in a register. + pub fn reg(reg: Reg) -> AMode { + // Use UnsignedOffset rather than Unscaled to use ldr rather than ldur. + // This also does not use PostIndexed / PreIndexed as they update the register. + AMode::UnsignedOffset(reg, UImm12Scaled::zero(I64)) + } + + /// Memory reference using the sum of two registers as an address. + pub fn reg_plus_reg(reg1: Reg, reg2: Reg) -> AMode { + AMode::RegReg(reg1, reg2) + } + + /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address. + pub fn reg_plus_reg_scaled(reg1: Reg, reg2: Reg, ty: Type) -> AMode { + AMode::RegScaled(reg1, reg2, ty) + } + + /// Memory reference using `reg1 + sizeof(ty) * reg2` as an address, with `reg2` sign- or + /// zero-extended as per `op`. + pub fn reg_plus_reg_scaled_extended(reg1: Reg, reg2: Reg, ty: Type, op: ExtendOp) -> AMode { + AMode::RegScaledExtended(reg1, reg2, ty, op) + } + + /// Memory reference to a label: a global function or value, or data in the constant pool. + pub fn label(label: MemLabel) -> AMode { + AMode::Label(label) + } +} + +/// A memory argument to a load/store-pair. +#[derive(Clone, Debug)] +pub enum PairAMode { + SignedOffset(Reg, SImm7Scaled), + PreIndexed(Writable<Reg>, SImm7Scaled), + PostIndexed(Writable<Reg>, SImm7Scaled), +} + +//============================================================================= +// Instruction sub-components (conditions, branches and branch targets): +// definitions + +/// Condition for conditional branches. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum Cond { + Eq = 0, + Ne = 1, + Hs = 2, + Lo = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + Hi = 8, + Ls = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15, +} + +impl Cond { + /// Return the inverted condition. + pub fn invert(self) -> Cond { + match self { + Cond::Eq => Cond::Ne, + Cond::Ne => Cond::Eq, + + Cond::Hs => Cond::Lo, + Cond::Lo => Cond::Hs, + + Cond::Mi => Cond::Pl, + Cond::Pl => Cond::Mi, + + Cond::Vs => Cond::Vc, + Cond::Vc => Cond::Vs, + + Cond::Hi => Cond::Ls, + Cond::Ls => Cond::Hi, + + Cond::Ge => Cond::Lt, + Cond::Lt => Cond::Ge, + + Cond::Gt => Cond::Le, + Cond::Le => Cond::Gt, + + Cond::Al => Cond::Nv, + Cond::Nv => Cond::Al, + } + } + + /// Return the machine encoding of this condition. + pub fn bits(self) -> u32 { + self as u32 + } +} + +/// The kind of conditional branch: the common-case-optimized "reg-is-zero" / +/// "reg-is-nonzero" variants, or the generic one that tests the machine +/// condition codes. +#[derive(Clone, Copy, Debug)] +pub enum CondBrKind { + /// Condition: given register is zero. + Zero(Reg), + /// Condition: given register is nonzero. + NotZero(Reg), + /// Condition: the given condition-code test is true. + Cond(Cond), +} + +impl CondBrKind { + /// Return the inverted branch condition. + pub fn invert(self) -> CondBrKind { + match self { + CondBrKind::Zero(reg) => CondBrKind::NotZero(reg), + CondBrKind::NotZero(reg) => CondBrKind::Zero(reg), + CondBrKind::Cond(c) => CondBrKind::Cond(c.invert()), + } + } +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub fn as_label(self) -> Option<MachLabel> { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_offset19_or_zero(self) -> u32 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 2, + _ => 0, + }; + assert!(off <= 0x3ffff); + assert!(off >= -0x40000); + (off as u32) & 0x7ffff + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_offset26_or_zero(self) -> u32 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 2, + _ => 0, + }; + assert!(off <= 0x1ffffff); + assert!(off >= -0x2000000); + (off as u32) & 0x3ffffff + } +} + +impl PrettyPrint for ShiftOpAndAmt { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{:?} {}", self.op(), self.amt().value()) + } +} + +impl PrettyPrint for ExtendOp { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{:?}", self) + } +} + +impl PrettyPrint for MemLabel { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &MemLabel::PCRel(off) => format!("pc+{}", off), + } + } +} + +fn shift_for_type(ty: Type) -> usize { + match ty.bytes() { + 1 => 0, + 2 => 1, + 4 => 2, + 8 => 3, + 16 => 4, + _ => panic!("unknown type: {}", ty), + } +} + +impl PrettyPrint for AMode { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &AMode::Unscaled(reg, simm9) => { + if simm9.value != 0 { + format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru)) + } else { + format!("[{}]", reg.show_rru(mb_rru)) + } + } + &AMode::UnsignedOffset(reg, uimm12) => { + if uimm12.value != 0 { + format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru)) + } else { + format!("[{}]", reg.show_rru(mb_rru)) + } + } + &AMode::RegReg(r1, r2) => { + format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),) + } + &AMode::RegScaled(r1, r2, ty) => { + let shift = shift_for_type(ty); + format!( + "[{}, {}, LSL #{}]", + r1.show_rru(mb_rru), + r2.show_rru(mb_rru), + shift, + ) + } + &AMode::RegScaledExtended(r1, r2, ty, op) => { + let shift = shift_for_type(ty); + let size = match op { + ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, + _ => OperandSize::Size64, + }; + let op = op.show_rru(mb_rru); + format!( + "[{}, {}, {} #{}]", + r1.show_rru(mb_rru), + show_ireg_sized(r2, mb_rru, size), + op, + shift + ) + } + &AMode::RegExtended(r1, r2, op) => { + let size = match op { + ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, + _ => OperandSize::Size64, + }; + let op = op.show_rru(mb_rru); + format!( + "[{}, {}, {}]", + r1.show_rru(mb_rru), + show_ireg_sized(r2, mb_rru, size), + op, + ) + } + &AMode::Label(ref label) => label.show_rru(mb_rru), + &AMode::PreIndexed(r, simm9) => format!( + "[{}, {}]!", + r.to_reg().show_rru(mb_rru), + simm9.show_rru(mb_rru) + ), + &AMode::PostIndexed(r, simm9) => format!( + "[{}], {}", + r.to_reg().show_rru(mb_rru), + simm9.show_rru(mb_rru) + ), + // Eliminated by `mem_finalize()`. + &AMode::SPOffset(..) + | &AMode::FPOffset(..) + | &AMode::NominalSPOffset(..) + | &AMode::RegOffset(..) => { + panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") + } + } + } +} + +impl PrettyPrint for PairAMode { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &PairAMode::SignedOffset(reg, simm7) => { + if simm7.value != 0 { + format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru)) + } else { + format!("[{}]", reg.show_rru(mb_rru)) + } + } + &PairAMode::PreIndexed(reg, simm7) => format!( + "[{}, {}]!", + reg.to_reg().show_rru(mb_rru), + simm7.show_rru(mb_rru) + ), + &PairAMode::PostIndexed(reg, simm7) => format!( + "[{}], {}", + reg.to_reg().show_rru(mb_rru), + simm7.show_rru(mb_rru) + ), + } + } +} + +impl PrettyPrint for Cond { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let mut s = format!("{:?}", self); + s.make_ascii_lowercase(); + s + } +} + +impl PrettyPrint for BranchTarget { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &BranchTarget::Label(label) => format!("label{:?}", label.get()), + &BranchTarget::ResolvedOffset(off) => format!("{}", off), + } + } +} + +/// Type used to communicate the operand size of a machine instruction, as AArch64 has 32- and +/// 64-bit variants of many instructions (and integer registers). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OperandSize { + Size32, + Size64, +} + +impl OperandSize { + /// 32-bit case? + pub fn is32(self) -> bool { + self == OperandSize::Size32 + } + /// 64-bit case? + pub fn is64(self) -> bool { + self == OperandSize::Size64 + } + /// Convert from an `is32` boolean flag to an `OperandSize`. + pub fn from_is32(is32: bool) -> OperandSize { + if is32 { + OperandSize::Size32 + } else { + OperandSize::Size64 + } + } + /// Convert from a needed width to the smallest size that fits. + pub fn from_bits<I: Into<usize>>(bits: I) -> OperandSize { + let bits: usize = bits.into(); + assert!(bits <= 64); + if bits <= 32 { + OperandSize::Size32 + } else { + OperandSize::Size64 + } + } + + /// Convert from an integer type into the smallest size that fits. + pub fn from_ty(ty: Type) -> OperandSize { + Self::from_bits(ty_bits(ty)) + } + + /// Convert to I32, I64, or I128. + pub fn to_ty(self) -> Type { + match self { + OperandSize::Size32 => I32, + OperandSize::Size64 => I64, + } + } + + pub fn sf_bit(&self) -> u32 { + match self { + OperandSize::Size32 => 0, + OperandSize::Size64 => 1, + } + } +} + +/// Type used to communicate the size of a scalar SIMD & FP operand. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ScalarSize { + Size8, + Size16, + Size32, + Size64, + Size128, +} + +impl ScalarSize { + /// Convert from a needed width to the smallest size that fits. + pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize { + match bits.into().next_power_of_two() { + 8 => ScalarSize::Size8, + 16 => ScalarSize::Size16, + 32 => ScalarSize::Size32, + 64 => ScalarSize::Size64, + 128 => ScalarSize::Size128, + w => panic!("Unexpected type width: {}", w), + } + } + + /// Convert to an integer operand size. + pub fn operand_size(&self) -> OperandSize { + match self { + ScalarSize::Size32 => OperandSize::Size32, + ScalarSize::Size64 => OperandSize::Size64, + _ => panic!("Unexpected operand_size request for: {:?}", self), + } + } + + /// Convert from a type into the smallest size that fits. + pub fn from_ty(ty: Type) -> ScalarSize { + Self::from_bits(ty_bits(ty)) + } + + /// Return the encoding bits that are used by some scalar FP instructions + /// for a particular operand size. + pub fn ftype(&self) -> u32 { + match self { + ScalarSize::Size16 => 0b11, + ScalarSize::Size32 => 0b00, + ScalarSize::Size64 => 0b01, + _ => panic!("Unexpected scalar FP operand size: {:?}", self), + } + } +} + +/// Type used to communicate the size of a vector operand. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VectorSize { + Size8x8, + Size8x16, + Size16x4, + Size16x8, + Size32x2, + Size32x4, + Size64x2, +} + +impl VectorSize { + /// Get the vector operand size with the given scalar size as lane size. + pub fn from_lane_size(size: ScalarSize, is_128bit: bool) -> VectorSize { + match (size, is_128bit) { + (ScalarSize::Size8, false) => VectorSize::Size8x8, + (ScalarSize::Size8, true) => VectorSize::Size8x16, + (ScalarSize::Size16, false) => VectorSize::Size16x4, + (ScalarSize::Size16, true) => VectorSize::Size16x8, + (ScalarSize::Size32, false) => VectorSize::Size32x2, + (ScalarSize::Size32, true) => VectorSize::Size32x4, + (ScalarSize::Size64, true) => VectorSize::Size64x2, + _ => panic!("Unexpected scalar FP operand size: {:?}", size), + } + } + + /// Convert from a type into a vector operand size. + pub fn from_ty(ty: Type) -> VectorSize { + match ty { + B8X16 => VectorSize::Size8x16, + B16X8 => VectorSize::Size16x8, + B32X4 => VectorSize::Size32x4, + B64X2 => VectorSize::Size64x2, + F32X2 => VectorSize::Size32x2, + F32X4 => VectorSize::Size32x4, + F64X2 => VectorSize::Size64x2, + I8X8 => VectorSize::Size8x8, + I8X16 => VectorSize::Size8x16, + I16X4 => VectorSize::Size16x4, + I16X8 => VectorSize::Size16x8, + I32X2 => VectorSize::Size32x2, + I32X4 => VectorSize::Size32x4, + I64X2 => VectorSize::Size64x2, + _ => unimplemented!("Unsupported type: {}", ty), + } + } + + /// Get the integer operand size that corresponds to a lane of a vector with a certain size. + pub fn operand_size(&self) -> OperandSize { + match self { + VectorSize::Size64x2 => OperandSize::Size64, + _ => OperandSize::Size32, + } + } + + /// Get the scalar operand size that corresponds to a lane of a vector with a certain size. + pub fn lane_size(&self) -> ScalarSize { + match self { + VectorSize::Size8x8 => ScalarSize::Size8, + VectorSize::Size8x16 => ScalarSize::Size8, + VectorSize::Size16x4 => ScalarSize::Size16, + VectorSize::Size16x8 => ScalarSize::Size16, + VectorSize::Size32x2 => ScalarSize::Size32, + VectorSize::Size32x4 => ScalarSize::Size32, + VectorSize::Size64x2 => ScalarSize::Size64, + } + } + + pub fn is_128bits(&self) -> bool { + match self { + VectorSize::Size8x8 => false, + VectorSize::Size8x16 => true, + VectorSize::Size16x4 => false, + VectorSize::Size16x8 => true, + VectorSize::Size32x2 => false, + VectorSize::Size32x4 => true, + VectorSize::Size64x2 => true, + } + } + + /// Produces a `VectorSize` with lanes twice as wide. Note that if the resulting + /// size would exceed 128 bits, then the number of lanes is also halved, so as to + /// ensure that the result size is at most 128 bits. + pub fn widen(&self) -> VectorSize { + match self { + VectorSize::Size8x8 => VectorSize::Size16x8, + VectorSize::Size8x16 => VectorSize::Size16x8, + VectorSize::Size16x4 => VectorSize::Size32x4, + VectorSize::Size16x8 => VectorSize::Size32x4, + VectorSize::Size32x2 => VectorSize::Size64x2, + VectorSize::Size32x4 => VectorSize::Size64x2, + VectorSize::Size64x2 => unreachable!(), + } + } + + /// Produces a `VectorSize` that has the same lane width, but half as many lanes. + pub fn halve(&self) -> VectorSize { + match self { + VectorSize::Size8x16 => VectorSize::Size8x8, + VectorSize::Size16x8 => VectorSize::Size16x4, + VectorSize::Size32x4 => VectorSize::Size32x2, + _ => *self, + } + } + + /// Return the encoding bits that are used by some SIMD instructions + /// for a particular operand size. + pub fn enc_size(&self) -> (u32, u32) { + let q = self.is_128bits() as u32; + let size = match self.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + ScalarSize::Size64 => 0b11, + _ => unreachable!(), + }; + + (q, size) + } +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs new file mode 100644 index 0000000000..5d0270dade --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit.rs @@ -0,0 +1,2359 @@ +//! AArch64 ISA: binary code emission. + +use crate::binemit::{CodeOffset, Reloc, StackMap}; +use crate::ir::constant::ConstantData; +use crate::ir::types::*; +use crate::ir::{MemFlags, TrapCode}; +use crate::isa::aarch64::inst::*; +use crate::machinst::ty_bits; + +use regalloc::{Reg, RegClass, Writable}; + +use core::convert::TryFrom; +use log::debug; + +/// Memory label/reference finalization: convert a MemLabel to a PC-relative +/// offset, possibly emitting relocation(s) as necessary. +pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 { + match label { + &MemLabel::PCRel(rel) => rel, + } +} + +/// Memory addressing mode finalization: convert "special" modes (e.g., +/// generic arbitrary stack offset) into real addressing modes, possibly by +/// emitting some helper instructions that come immediately before the use +/// of this amode. +pub fn mem_finalize( + insn_off: CodeOffset, + mem: &AMode, + state: &EmitState, +) -> (SmallVec<[Inst; 4]>, AMode) { + match mem { + &AMode::RegOffset(_, off, ty) + | &AMode::SPOffset(off, ty) + | &AMode::FPOffset(off, ty) + | &AMode::NominalSPOffset(off, ty) => { + let basereg = match mem { + &AMode::RegOffset(reg, _, _) => reg, + &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => stack_reg(), + &AMode::FPOffset(..) => fp_reg(), + _ => unreachable!(), + }; + let adj = match mem { + &AMode::NominalSPOffset(..) => { + debug!( + "mem_finalize: nominal SP offset {} + adj {} -> {}", + off, + state.virtual_sp_offset, + off + state.virtual_sp_offset + ); + state.virtual_sp_offset + } + _ => 0, + }; + let off = off + adj; + + if let Some(simm9) = SImm9::maybe_from_i64(off) { + let mem = AMode::Unscaled(basereg, simm9); + (smallvec![], mem) + } else if let Some(uimm12s) = UImm12Scaled::maybe_from_i64(off, ty) { + let mem = AMode::UnsignedOffset(basereg, uimm12s); + (smallvec![], mem) + } else { + let tmp = writable_spilltmp_reg(); + let mut const_insts = Inst::load_constant(tmp, off as u64); + // N.B.: we must use AluRRRExtend because AluRRR uses the "shifted register" form + // (AluRRRShift) instead, which interprets register 31 as the zero reg, not SP. SP + // is a valid base (for SPOffset) which we must handle here. + // Also, SP needs to be the first arg, not second. + let add_inst = Inst::AluRRRExtend { + alu_op: ALUOp::Add64, + rd: tmp, + rn: basereg, + rm: tmp.to_reg(), + extendop: ExtendOp::UXTX, + }; + const_insts.push(add_inst); + (const_insts, AMode::reg(tmp.to_reg())) + } + } + + &AMode::Label(ref label) => { + let off = memlabel_finalize(insn_off, label); + (smallvec![], AMode::Label(MemLabel::PCRel(off))) + } + + _ => (smallvec![], mem.clone()), + } +} + +/// Helper: get a ConstantData from a u64. +pub fn u64_constant(bits: u64) -> ConstantData { + let data = bits.to_le_bytes(); + ConstantData::from(&data[..]) +} + +//============================================================================= +// Instructions and subcomponents: emission + +fn machreg_to_gpr(m: Reg) -> u32 { + assert_eq!(m.get_class(), RegClass::I64); + u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn machreg_to_vec(m: Reg) -> u32 { + assert_eq!(m.get_class(), RegClass::V128); + u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn machreg_to_gpr_or_vec(m: Reg) -> u32 { + u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn enc_arith_rrr(bits_31_21: u32, bits_15_10: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 { + (bits_31_21 << 21) + | (bits_15_10 << 10) + | machreg_to_gpr(rd.to_reg()) + | (machreg_to_gpr(rn) << 5) + | (machreg_to_gpr(rm) << 16) +} + +fn enc_arith_rr_imm12( + bits_31_24: u32, + immshift: u32, + imm12: u32, + rn: Reg, + rd: Writable<Reg>, +) -> u32 { + (bits_31_24 << 24) + | (immshift << 22) + | (imm12 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_arith_rr_imml(bits_31_23: u32, imm_bits: u32, rn: Reg, rd: Writable<Reg>) -> u32 { + (bits_31_23 << 23) | (imm_bits << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_arith_rrrr(top11: u32, rm: Reg, bit15: u32, ra: Reg, rn: Reg, rd: Writable<Reg>) -> u32 { + (top11 << 21) + | (machreg_to_gpr(rm) << 16) + | (bit15 << 15) + | (machreg_to_gpr(ra) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_jump26(op_31_26: u32, off_26_0: u32) -> u32 { + assert!(off_26_0 < (1 << 26)); + (op_31_26 << 26) | off_26_0 +} + +fn enc_cmpbr(op_31_24: u32, off_18_0: u32, reg: Reg) -> u32 { + assert!(off_18_0 < (1 << 19)); + (op_31_24 << 24) | (off_18_0 << 5) | machreg_to_gpr(reg) +} + +fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 { + assert!(off_18_0 < (1 << 19)); + assert!(cond < (1 << 4)); + (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond +} + +fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 { + match kind { + CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg), + CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg), + CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()), + } +} + +const MOVE_WIDE_FIXED: u32 = 0x12800000; + +#[repr(u32)] +enum MoveWideOpcode { + MOVN = 0b00, + MOVZ = 0b10, + MOVK = 0b11, +} + +fn enc_move_wide( + op: MoveWideOpcode, + rd: Writable<Reg>, + imm: MoveWideConst, + size: OperandSize, +) -> u32 { + assert!(imm.shift <= 0b11); + MOVE_WIDE_FIXED + | size.sf_bit() << 31 + | (op as u32) << 29 + | u32::from(imm.shift) << 21 + | u32::from(imm.bits) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_ldst_pair(op_31_22: u32, simm7: SImm7Scaled, rn: Reg, rt: Reg, rt2: Reg) -> u32 { + (op_31_22 << 22) + | (simm7.bits() << 15) + | (machreg_to_gpr(rt2) << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + +fn enc_ldst_simm9(op_31_22: u32, simm9: SImm9, op_11_10: u32, rn: Reg, rd: Reg) -> u32 { + (op_31_22 << 22) + | (simm9.bits() << 12) + | (op_11_10 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_uimm12(op_31_22: u32, uimm12: UImm12Scaled, rn: Reg, rd: Reg) -> u32 { + (op_31_22 << 22) + | (0b1 << 24) + | (uimm12.bits() << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_reg( + op_31_22: u32, + rn: Reg, + rm: Reg, + s_bit: bool, + extendop: Option<ExtendOp>, + rd: Reg, +) -> u32 { + let s_bit = if s_bit { 1 } else { 0 }; + let extend_bits = match extendop { + Some(ExtendOp::UXTW) => 0b010, + Some(ExtendOp::SXTW) => 0b110, + Some(ExtendOp::SXTX) => 0b111, + None => 0b011, // LSL + _ => panic!("bad extend mode for ld/st AMode"), + }; + (op_31_22 << 22) + | (1 << 21) + | (machreg_to_gpr(rm) << 16) + | (extend_bits << 13) + | (s_bit << 12) + | (0b10 << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 { + (op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd) +} + +fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 { + debug_assert_eq!(q & 0b1, q); + debug_assert_eq!(size & 0b11, size); + 0b0_0_0011010_10_00000_110_0_00_00000_00000 + | q << 30 + | size << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_vec(rt.to_reg()) +} + +fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 { + (top11 << 21) + | (machreg_to_vec(rm) << 16) + | (bit15_10 << 10) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_bit_rr(size: u32, opcode2: u32, opcode1: u32, rn: Reg, rd: Writable<Reg>) -> u32 { + (0b01011010110 << 21) + | size << 31 + | opcode2 << 16 + | opcode1 << 10 + | machreg_to_gpr(rn) << 5 + | machreg_to_gpr(rd.to_reg()) +} + +fn enc_br(rn: Reg) -> u32 { + 0b1101011_0000_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5) +} + +fn enc_adr(off: i32, rd: Writable<Reg>) -> u32 { + let off = u32::try_from(off).unwrap(); + let immlo = off & 3; + let immhi = (off >> 2) & ((1 << 19) - 1); + (0b00010000 << 24) | (immlo << 29) | (immhi << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_csel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond) -> u32 { + 0b100_11010100_00000_0000_00_00000_00000 + | (machreg_to_gpr(rm) << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()) + | (cond.bits() << 12) +} + +fn enc_fcsel(rd: Writable<Reg>, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) -> u32 { + 0b000_11110_00_1_00000_0000_11_00000_00000 + | (size.ftype() << 22) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) + | (cond.bits() << 12) +} + +fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 { + 0b100_11010100_11111_0000_01_11111_00000 + | machreg_to_gpr(rd.to_reg()) + | (cond.invert().bits() << 12) +} + +fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 { + 0b0_1_1_11010010_00000_0000_10_00000_0_0000 + | size.sf_bit() << 31 + | imm.bits() << 16 + | cond.bits() << 12 + | machreg_to_gpr(rn) << 5 + | nzcv.bits() +} + +fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 { + 0b00001110_101_00000_00011_1_00000_00000 + | ((is_16b as u32) << 30) + | machreg_to_vec(rd.to_reg()) + | (machreg_to_vec(rn) << 16) + | (machreg_to_vec(rn) << 5) +} + +fn enc_fpurr(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_fpurrr(top22: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 { + (top22 << 10) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_fpurrrr(top17: u32, rd: Writable<Reg>, rn: Reg, rm: Reg, ra: Reg) -> u32 { + (top17 << 15) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(ra) << 10) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_fcmp(size: ScalarSize, rn: Reg, rm: Reg) -> u32 { + 0b000_11110_00_1_00000_00_1000_00000_00000 + | (size.ftype() << 22) + | (machreg_to_vec(rm) << 16) + | (machreg_to_vec(rn) << 5) +} + +fn enc_fputoint(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + (top16 << 16) | (machreg_to_vec(rn) << 5) | machreg_to_gpr(rd.to_reg()) +} + +fn enc_inttofpu(top16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + (top16 << 16) | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + debug_assert_eq!(qu & 0b11, qu); + debug_assert_eq!(size & 0b11, size); + debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); + let bits = 0b0_00_01110_00_10000_00000_10_00000_00000; + bits | qu << 29 + | size << 22 + | bits_12_16 << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + +fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 { + debug_assert_eq!(q & 0b1, q); + debug_assert_eq!(u & 0b1, u); + debug_assert_eq!(size & 0b11, size); + debug_assert_eq!(opcode & 0b11111, opcode); + 0b0_0_0_01110_00_11000_0_0000_10_00000_00000 + | q << 30 + | u << 29 + | size << 22 + | opcode << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) +} + +fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 { + debug_assert_eq!(len & 0b11, len); + 0b0_1_001110_000_00000_0_00_0_00_00000_00000 + | (machreg_to_vec(rm) << 16) + | len << 13 + | (is_extension as u32) << 12 + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()) +} + +fn enc_dmb_ish() -> u32 { + 0xD5033BBF +} + +fn enc_ldxr(ty: Type, rt: Writable<Reg>, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00001000_01011111_01111100_00000000 + | (sz << 30) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + +fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00001000_00000000_01111100_00000000 + | (sz << 30) + | (machreg_to_gpr(rs.to_reg()) << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + +fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 { + let abc = (imm >> 5) as u32; + let defgh = (imm & 0b11111) as u32; + + debug_assert_eq!(cmode & 0b1111, cmode); + debug_assert_eq!(q_op & 0b11, q_op); + + 0b0_0_0_0111100000_000_0000_01_00000_00000 + | (q_op << 29) + | (abc << 16) + | (cmode << 12) + | (defgh << 5) + | machreg_to_vec(rd.to_reg()) +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + /// Addend to convert nominal-SP offsets to real-SP offsets at the current + /// program point. + pub(crate) virtual_sp_offset: i64, + /// Offset of FP from nominal-SP. + pub(crate) nominal_sp_to_fp: i64, + /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + stack_map: Option<StackMap>, + /// Current source-code location corresponding to instruction to be emitted. + cur_srcloc: SourceLoc, +} + +impl MachInstEmitState<Inst> for EmitState { + fn new(abi: &dyn ABICallee<I = Inst>) -> Self { + EmitState { + virtual_sp_offset: 0, + nominal_sp_to_fp: abi.frame_size() as i64, + stack_map: None, + cur_srcloc: SourceLoc::default(), + } + } + + fn pre_safepoint(&mut self, stack_map: StackMap) { + self.stack_map = Some(stack_map); + } + + fn pre_sourceloc(&mut self, srcloc: SourceLoc) { + self.cur_srcloc = srcloc; + } +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option<StackMap> { + self.stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.stack_map = None; + } + + fn cur_srcloc(&self) -> SourceLoc { + self.cur_srcloc + } +} + +/// Constant state used during function compilation. +pub struct EmitInfo(settings::Flags); + +impl EmitInfo { + pub(crate) fn new(flags: settings::Flags) -> Self { + Self(flags) + } +} + +impl MachInstEmitInfo for EmitInfo { + fn flags(&self) -> &settings::Flags { + &self.0 + } +} + +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + type UnwindInfo = super::unwind::AArch64UnwindInfo; + + fn emit(&self, sink: &mut MachBuffer<Inst>, emit_info: &Self::Info, state: &mut EmitState) { + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + + match self { + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let top11 = match alu_op { + ALUOp::Add32 => 0b00001011_000, + ALUOp::Add64 => 0b10001011_000, + ALUOp::Sub32 => 0b01001011_000, + ALUOp::Sub64 => 0b11001011_000, + ALUOp::Orr32 => 0b00101010_000, + ALUOp::Orr64 => 0b10101010_000, + ALUOp::And32 => 0b00001010_000, + ALUOp::And64 => 0b10001010_000, + ALUOp::Eor32 => 0b01001010_000, + ALUOp::Eor64 => 0b11001010_000, + ALUOp::OrrNot32 => 0b00101010_001, + ALUOp::OrrNot64 => 0b10101010_001, + ALUOp::AndNot32 => 0b00001010_001, + ALUOp::AndNot64 => 0b10001010_001, + ALUOp::EorNot32 => 0b01001010_001, + ALUOp::EorNot64 => 0b11001010_001, + ALUOp::AddS32 => 0b00101011_000, + ALUOp::AddS64 => 0b10101011_000, + ALUOp::SubS32 => 0b01101011_000, + ALUOp::SubS64 => 0b11101011_000, + ALUOp::SDiv64 => 0b10011010_110, + ALUOp::UDiv64 => 0b10011010_110, + ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110, + ALUOp::RotR64 | ALUOp::Lsr64 | ALUOp::Asr64 | ALUOp::Lsl64 => 0b10011010_110, + ALUOp::SMulH => 0b10011011_010, + ALUOp::UMulH => 0b10011011_110, + }; + let bit15_10 = match alu_op { + ALUOp::SDiv64 => 0b000011, + ALUOp::UDiv64 => 0b000010, + ALUOp::RotR32 | ALUOp::RotR64 => 0b001011, + ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001, + ALUOp::Asr32 | ALUOp::Asr64 => 0b001010, + ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000, + ALUOp::SMulH | ALUOp::UMulH => 0b011111, + _ => 0b000000, + }; + debug_assert_ne!(writable_stack_reg(), rd); + // The stack pointer is the zero register in this context, so this might be an + // indication that something is wrong. + debug_assert_ne!(stack_reg(), rn); + debug_assert_ne!(stack_reg(), rm); + sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm)); + } + &Inst::AluRRRR { + alu_op, + rd, + rm, + rn, + ra, + } => { + let (top11, bit15) = match alu_op { + ALUOp3::MAdd32 => (0b0_00_11011_000, 0), + ALUOp3::MSub32 => (0b0_00_11011_000, 1), + ALUOp3::MAdd64 => (0b1_00_11011_000, 0), + ALUOp3::MSub64 => (0b1_00_11011_000, 1), + }; + sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd)); + } + &Inst::AluRRImm12 { + alu_op, + rd, + rn, + ref imm12, + } => { + let top8 = match alu_op { + ALUOp::Add32 => 0b000_10001, + ALUOp::Add64 => 0b100_10001, + ALUOp::Sub32 => 0b010_10001, + ALUOp::Sub64 => 0b110_10001, + ALUOp::AddS32 => 0b001_10001, + ALUOp::AddS64 => 0b101_10001, + ALUOp::SubS32 => 0b011_10001, + ALUOp::SubS64 => 0b111_10001, + _ => unimplemented!("{:?}", alu_op), + }; + sink.put4(enc_arith_rr_imm12( + top8, + imm12.shift_bits(), + imm12.imm_bits(), + rn, + rd, + )); + } + &Inst::AluRRImmLogic { + alu_op, + rd, + rn, + ref imml, + } => { + let (top9, inv) = match alu_op { + ALUOp::Orr32 => (0b001_100100, false), + ALUOp::Orr64 => (0b101_100100, false), + ALUOp::And32 => (0b000_100100, false), + ALUOp::And64 => (0b100_100100, false), + ALUOp::Eor32 => (0b010_100100, false), + ALUOp::Eor64 => (0b110_100100, false), + ALUOp::OrrNot32 => (0b001_100100, true), + ALUOp::OrrNot64 => (0b101_100100, true), + ALUOp::AndNot32 => (0b000_100100, true), + ALUOp::AndNot64 => (0b100_100100, true), + ALUOp::EorNot32 => (0b010_100100, true), + ALUOp::EorNot64 => (0b110_100100, true), + _ => unimplemented!("{:?}", alu_op), + }; + let imml = if inv { imml.invert() } else { imml.clone() }; + sink.put4(enc_arith_rr_imml(top9, imml.enc_bits(), rn, rd)); + } + + &Inst::AluRRImmShift { + alu_op, + rd, + rn, + ref immshift, + } => { + let amt = immshift.value(); + let (top10, immr, imms) = match alu_op { + ALUOp::RotR32 => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)), + ALUOp::RotR64 => (0b1001001111, machreg_to_gpr(rn), u32::from(amt)), + ALUOp::Lsr32 => (0b0101001100, u32::from(amt), 0b011111), + ALUOp::Lsr64 => (0b1101001101, u32::from(amt), 0b111111), + ALUOp::Asr32 => (0b0001001100, u32::from(amt), 0b011111), + ALUOp::Asr64 => (0b1001001101, u32::from(amt), 0b111111), + ALUOp::Lsl32 => ( + 0b0101001100, + u32::from((32 - amt) % 32), + u32::from(31 - amt), + ), + ALUOp::Lsl64 => ( + 0b1101001101, + u32::from((64 - amt) % 64), + u32::from(63 - amt), + ), + _ => unimplemented!("{:?}", alu_op), + }; + sink.put4( + (top10 << 22) + | (immr << 16) + | (imms << 10) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + + &Inst::AluRRRShift { + alu_op, + rd, + rn, + rm, + ref shiftop, + } => { + let top11: u32 = match alu_op { + ALUOp::Add32 => 0b000_01011000, + ALUOp::Add64 => 0b100_01011000, + ALUOp::AddS32 => 0b001_01011000, + ALUOp::AddS64 => 0b101_01011000, + ALUOp::Sub32 => 0b010_01011000, + ALUOp::Sub64 => 0b110_01011000, + ALUOp::SubS32 => 0b011_01011000, + ALUOp::SubS64 => 0b111_01011000, + ALUOp::Orr32 => 0b001_01010000, + ALUOp::Orr64 => 0b101_01010000, + ALUOp::And32 => 0b000_01010000, + ALUOp::And64 => 0b100_01010000, + ALUOp::Eor32 => 0b010_01010000, + ALUOp::Eor64 => 0b110_01010000, + ALUOp::OrrNot32 => 0b001_01010001, + ALUOp::OrrNot64 => 0b101_01010001, + ALUOp::EorNot32 => 0b010_01010001, + ALUOp::EorNot64 => 0b110_01010001, + ALUOp::AndNot32 => 0b000_01010001, + ALUOp::AndNot64 => 0b100_01010001, + _ => unimplemented!("{:?}", alu_op), + }; + let top11 = top11 | (u32::from(shiftop.op().bits()) << 1); + let bits_15_10 = u32::from(shiftop.amt().value()); + sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); + } + + &Inst::AluRRRExtend { + alu_op, + rd, + rn, + rm, + extendop, + } => { + let top11: u32 = match alu_op { + ALUOp::Add32 => 0b00001011001, + ALUOp::Add64 => 0b10001011001, + ALUOp::Sub32 => 0b01001011001, + ALUOp::Sub64 => 0b11001011001, + ALUOp::AddS32 => 0b00101011001, + ALUOp::AddS64 => 0b10101011001, + ALUOp::SubS32 => 0b01101011001, + ALUOp::SubS64 => 0b11101011001, + _ => unimplemented!("{:?}", alu_op), + }; + let bits_15_10 = u32::from(extendop.bits()) << 3; + sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm)); + } + + &Inst::BitRR { op, rd, rn, .. } => { + let size = if op.operand_size().is32() { 0b0 } else { 0b1 }; + let (op1, op2) = match op { + BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000), + BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100), + BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101), + }; + sink.put4(enc_bit_rr(size, op1, op2, rn, rd)) + } + + &Inst::ULoad8 { rd, ref mem, flags } + | &Inst::SLoad8 { rd, ref mem, flags } + | &Inst::ULoad16 { rd, ref mem, flags } + | &Inst::SLoad16 { rd, ref mem, flags } + | &Inst::ULoad32 { rd, ref mem, flags } + | &Inst::SLoad32 { rd, ref mem, flags } + | &Inst::ULoad64 { + rd, ref mem, flags, .. + } + | &Inst::FpuLoad32 { rd, ref mem, flags } + | &Inst::FpuLoad64 { rd, ref mem, flags } + | &Inst::FpuLoad128 { rd, ref mem, flags } => { + let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state); + + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + // ldst encoding helpers take Reg, not Writable<Reg>. + let rd = rd.to_reg(); + + // This is the base opcode (top 10 bits) for the "unscaled + // immediate" form (Unscaled). Other addressing modes will OR in + // other values for bits 24/25 (bits 1/2 of this constant). + let (op, bits) = match self { + &Inst::ULoad8 { .. } => (0b0011100001, 8), + &Inst::SLoad8 { .. } => (0b0011100010, 8), + &Inst::ULoad16 { .. } => (0b0111100001, 16), + &Inst::SLoad16 { .. } => (0b0111100010, 16), + &Inst::ULoad32 { .. } => (0b1011100001, 32), + &Inst::SLoad32 { .. } => (0b1011100010, 32), + &Inst::ULoad64 { .. } => (0b1111100001, 64), + &Inst::FpuLoad32 { .. } => (0b1011110001, 32), + &Inst::FpuLoad64 { .. } => (0b1111110001, 64), + &Inst::FpuLoad128 { .. } => (0b0011110011, 128), + _ => unreachable!(), + }; + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + match &mem { + &AMode::Unscaled(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); + } + &AMode::UnsignedOffset(reg, uimm12scaled) => { + if uimm12scaled.value() != 0 { + assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); + } + sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); + } + &AMode::RegReg(r1, r2) => { + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, + )); + } + &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => { + assert_eq!(bits, ty_bits(ty)); + let extendop = match &mem { + &AMode::RegScaled(..) => None, + &AMode::RegScaledExtended(_, _, _, op) => Some(op), + _ => unreachable!(), + }; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ true, extendop, rd, + )); + } + &AMode::RegExtended(r1, r2, extendop) => { + sink.put4(enc_ldst_reg( + op, + r1, + r2, + /* scaled = */ false, + Some(extendop), + rd, + )); + } + &AMode::Label(ref label) => { + let offset = match label { + // cast i32 to u32 (two's-complement) + &MemLabel::PCRel(off) => off as u32, + } / 4; + assert!(offset < (1 << 19)); + match self { + &Inst::ULoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b00011000, offset, rd)); + } + &Inst::SLoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b10011000, offset, rd)); + } + &Inst::FpuLoad32 { .. } => { + sink.put4(enc_ldst_imm19(0b00011100, offset, rd)); + } + &Inst::ULoad64 { .. } => { + sink.put4(enc_ldst_imm19(0b01011000, offset, rd)); + } + &Inst::FpuLoad64 { .. } => { + sink.put4(enc_ldst_imm19(0b01011100, offset, rd)); + } + &Inst::FpuLoad128 { .. } => { + sink.put4(enc_ldst_imm19(0b10011100, offset, rd)); + } + _ => panic!("Unspported size for LDR from constant pool!"), + } + } + &AMode::PreIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); + } + &AMode::PostIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); + } + // Eliminated by `mem_finalize()` above. + &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => { + panic!("Should not see stack-offset here!") + } + &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), + } + } + + &Inst::Store8 { rd, ref mem, flags } + | &Inst::Store16 { rd, ref mem, flags } + | &Inst::Store32 { rd, ref mem, flags } + | &Inst::Store64 { rd, ref mem, flags } + | &Inst::FpuStore32 { rd, ref mem, flags } + | &Inst::FpuStore64 { rd, ref mem, flags } + | &Inst::FpuStore128 { rd, ref mem, flags } => { + let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state); + + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let (op, bits) = match self { + &Inst::Store8 { .. } => (0b0011100000, 8), + &Inst::Store16 { .. } => (0b0111100000, 16), + &Inst::Store32 { .. } => (0b1011100000, 32), + &Inst::Store64 { .. } => (0b1111100000, 64), + &Inst::FpuStore32 { .. } => (0b1011110000, 32), + &Inst::FpuStore64 { .. } => (0b1111110000, 64), + &Inst::FpuStore128 { .. } => (0b0011110010, 128), + _ => unreachable!(), + }; + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + match &mem { + &AMode::Unscaled(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); + } + &AMode::UnsignedOffset(reg, uimm12scaled) => { + if uimm12scaled.value() != 0 { + assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); + } + sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); + } + &AMode::RegReg(r1, r2) => { + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, + )); + } + &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => { + let extendop = match &mem { + &AMode::RegScaled(..) => None, + &AMode::RegScaledExtended(_, _, _, op) => Some(op), + _ => unreachable!(), + }; + sink.put4(enc_ldst_reg( + op, r1, r2, /* scaled = */ true, extendop, rd, + )); + } + &AMode::RegExtended(r1, r2, extendop) => { + sink.put4(enc_ldst_reg( + op, + r1, + r2, + /* scaled = */ false, + Some(extendop), + rd, + )); + } + &AMode::Label(..) => { + panic!("Store to a MemLabel not implemented!"); + } + &AMode::PreIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); + } + &AMode::PostIndexed(reg, simm9) => { + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); + } + // Eliminated by `mem_finalize()` above. + &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => { + panic!("Should not see stack-offset here!") + } + &AMode::RegOffset(..) => panic!("SHould not see generic reg-offset here!"), + } + } + + &Inst::StoreP64 { + rt, + rt2, + ref mem, + flags, + } => { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + match mem { + &PairAMode::SignedOffset(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); + } + &PairAMode::PreIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2)); + } + &PairAMode::PostIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2)); + } + } + } + &Inst::LoadP64 { + rt, + rt2, + ref mem, + flags, + } => { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && !flags.notrap() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + match mem { + &PairAMode::SignedOffset(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); + } + &PairAMode::PreIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2)); + } + &PairAMode::PostIndexed(reg, simm7) => { + assert_eq!(simm7.scale_ty, I64); + sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2)); + } + } + } + &Inst::Mov64 { rd, rm } => { + assert!(rd.to_reg().get_class() == rm.get_class()); + assert!(rm.get_class() == RegClass::I64); + + // MOV to SP is interpreted as MOV to XZR instead. And our codegen + // should never MOV to XZR. + assert!(rd.to_reg() != stack_reg()); + + if rm == stack_reg() { + // We can't use ORR here, so use an `add rd, sp, #0` instead. + let imm12 = Imm12::maybe_from_u64(0).unwrap(); + sink.put4(enc_arith_rr_imm12( + 0b100_10001, + imm12.shift_bits(), + imm12.imm_bits(), + rm, + rd, + )); + } else { + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b10101010_000, 0b000_000, rd, zero_reg(), rm)); + } + } + &Inst::Mov32 { rd, rm } => { + // MOV to SP is interpreted as MOV to XZR instead. And our codegen + // should never MOV to XZR. + assert!(machreg_to_gpr(rd.to_reg()) != 31); + // Encoded as ORR rd, rm, zero. + sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); + } + &Inst::MovZ { rd, imm, size } => { + sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size)) + } + &Inst::MovN { rd, imm, size } => { + sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size)) + } + &Inst::MovK { rd, imm, size } => { + sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size)) + } + &Inst::CSel { rd, rn, rm, cond } => { + sink.put4(enc_csel(rd, rn, rm, cond)); + } + &Inst::CSet { rd, cond } => { + sink.put4(enc_cset(rd, cond)); + } + &Inst::CCmpImm { + size, + rn, + imm, + nzcv, + cond, + } => { + sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); + } + &Inst::AtomicRMW { ty, op } => { + /* Emit this: + dmb ish + again: + ldxr{,b,h} x/w27, [x25] + op x28, x27, x26 // op is add,sub,and,orr,eor + stxr{,b,h} w24, x/w28, [x25] + cbnz x24, again + dmb ish + + Operand conventions: + IN: x25 (addr), x26 (2nd arg for op) + OUT: x27 (old value), x24 (trashed), x28 (trashed) + + It is unfortunate that, per the ARM documentation, x28 cannot be used for + both the store-data and success-flag operands of stxr. This causes the + instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24 + instead for the success-flag. + + In the case where the operation is 'xchg', the second insn is instead + mov x28, x26 + so that we simply write in the destination, the "2nd arg for op". + */ + let xzr = zero_reg(); + let x24 = xreg(24); + let x25 = xreg(25); + let x26 = xreg(26); + let x27 = xreg(27); + let x28 = xreg(28); + let x24wr = writable_xreg(24); + let x27wr = writable_xreg(27); + let x28wr = writable_xreg(28); + let again_label = sink.get_label(); + + sink.put4(enc_dmb_ish()); // dmb ish + + // again: + sink.bind_label(again_label); + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25] + + if op == inst_common::AtomicRmwOp::Xchg { + // mov x28, x26 + sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26)) + } else { + // add/sub/and/orr/eor x28, x27, x26 + let bits_31_21 = match op { + inst_common::AtomicRmwOp::Add => 0b100_01011_00_0, + inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0, + inst_common::AtomicRmwOp::And => 0b100_01010_00_0, + inst_common::AtomicRmwOp::Or => 0b101_01010_00_0, + inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0, + inst_common::AtomicRmwOp::Xchg => unreachable!(), + }; + sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26)); + } + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25] + + // cbnz w24, again + // Note, we're actually testing x24, and relying on the default zero-high-half + // rule in the assignment that `stxr` does. + let br_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(again_label), + CondBrKind::NotZero(x24), + )); + sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); + + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::AtomicCAS { ty } => { + /* Emit this: + dmb ish + again: + ldxr{,b,h} x/w27, [x25] + and x24, x26, MASK (= 2^size_bits - 1) + cmp x27, x24 + b.ne out + stxr{,b,h} w24, x/w28, [x25] + cbnz x24, again + out: + dmb ish + + Operand conventions: + IN: x25 (addr), x26 (expected value), x28 (replacement value) + OUT: x27 (old value), x24 (trashed) + */ + let xzr = zero_reg(); + let x24 = xreg(24); + let x25 = xreg(25); + let x26 = xreg(26); + let x27 = xreg(27); + let x28 = xreg(28); + let xzrwr = writable_zero_reg(); + let x24wr = writable_xreg(24); + let x27wr = writable_xreg(27); + let again_label = sink.get_label(); + let out_label = sink.get_label(); + + sink.put4(enc_dmb_ish()); // dmb ish + + // again: + sink.bind_label(again_label); + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25] + + if ty == I64 { + // mov x24, x26 + sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26)) + } else { + // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF + let (mask, s) = match ty { + I8 => (0xFF, 7), + I16 => (0xFFFF, 15), + I32 => (0xFFFFFFFF, 31), + _ => unreachable!(), + }; + sink.put4(enc_arith_rr_imml( + 0b100_100100, + ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(), + x26, + x24wr, + )) + } + + // cmp x27, x24 (== subs xzr, x27, x24) + sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24)); + + // b.ne out + let br_out_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(out_label), + CondBrKind::Cond(Cond::Ne), + )); + sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19); + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25] + + // cbnz w24, again. + // Note, we're actually testing x24, and relying on the default zero-high-half + // rule in the assignment that `stxr` does. + let br_again_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(again_label), + CondBrKind::NotZero(x24), + )); + sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19); + + // out: + sink.bind_label(out_label); + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::AtomicLoad { ty, r_data, r_addr } => { + let op = match ty { + I8 => 0b0011100001, + I16 => 0b0111100001, + I32 => 0b1011100001, + I64 => 0b1111100001, + _ => unreachable!(), + }; + sink.put4(enc_dmb_ish()); // dmb ish + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/); + sink.put4(enc_ldst_uimm12( + op, + uimm12scaled_zero, + r_addr, + r_data.to_reg(), + )); + } + &Inst::AtomicStore { ty, r_data, r_addr } => { + let op = match ty { + I8 => 0b0011100000, + I16 => 0b0111100000, + I32 => 0b1011100000, + I64 => 0b1111100000, + _ => unreachable!(), + }; + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/); + sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data)); + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::Fence {} => { + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::FpuMove64 { rd, rn } => { + sink.put4(enc_vecmov(/* 16b = */ false, rd, rn)); + } + &Inst::FpuMove128 { rd, rn } => { + sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); + } + &Inst::FpuMoveFromVec { rd, rn, idx, size } => { + let (imm5, shift, mask) = match size.lane_size() { + ScalarSize::Size32 => (0b00100, 3, 0b011), + ScalarSize::Size64 => (0b01000, 4, 0b001), + _ => unimplemented!(), + }; + debug_assert_eq!(idx & mask, idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b010_11110000_00000_000001_00000_00000 + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let top22 = match fpu_op { + FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000, + FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000, + FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000, + FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000, + FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000, + FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000, + FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000, + FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000, + }; + sink.put4(enc_fpurr(top22, rd, rn)); + } + &Inst::FpuRRR { fpu_op, rd, rn, rm } => { + let top22 = match fpu_op { + FPUOp2::Add32 => 0b000_11110_00_1_00000_001010, + FPUOp2::Add64 => 0b000_11110_01_1_00000_001010, + FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110, + FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110, + FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010, + FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010, + FPUOp2::Div32 => 0b000_11110_00_1_00000_000110, + FPUOp2::Div64 => 0b000_11110_01_1_00000_000110, + FPUOp2::Max32 => 0b000_11110_00_1_00000_010010, + FPUOp2::Max64 => 0b000_11110_01_1_00000_010010, + FPUOp2::Min32 => 0b000_11110_00_1_00000_010110, + FPUOp2::Min64 => 0b000_11110_01_1_00000_010110, + FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011, + FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011, + FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011, + FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011, + }; + sink.put4(enc_fpurrr(top22, rd, rn, rm)); + } + &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op { + FPUOpRI::UShr32(imm) => { + debug_assert_eq!(32, imm.lane_size_in_bits); + sink.put4( + 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::UShr64(imm) => { + debug_assert_eq!(64, imm.lane_size_in_bits); + sink.put4( + 0b01_1_111110_0000000_00_0_0_0_1_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::Sli64(imm) => { + debug_assert_eq!(64, imm.lane_size_in_bits); + sink.put4( + 0b01_1_111110_0000000_010101_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::Sli32(imm) => { + debug_assert_eq!(32, imm.lane_size_in_bits); + sink.put4( + 0b0_0_1_011110_0000000_010101_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + }, + &Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => { + let top17 = match fpu_op { + FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0, + FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0, + }; + sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); + } + &Inst::VecMisc { op, rd, rn, size } => { + let (q, enc_size) = size.enc_size(); + let (u, bits_12_16, size) = match op { + VecMisc2::Not => (0b1, 0b00101, 0b00), + VecMisc2::Neg => (0b1, 0b01011, enc_size), + VecMisc2::Abs => (0b0, 0b01011, enc_size), + VecMisc2::Fabs => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b01111, enc_size) + } + VecMisc2::Fneg => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b01111, enc_size) + } + VecMisc2::Fsqrt => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11111, enc_size) + } + VecMisc2::Rev64 => { + debug_assert_ne!(VectorSize::Size64x2, size); + (0b0, 0b00000, enc_size) + } + VecMisc2::Shll => { + debug_assert_ne!(VectorSize::Size64x2, size); + debug_assert!(!size.is_128bits()); + (0b1, 0b10011, enc_size) + } + VecMisc2::Fcvtzs => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11011, enc_size) + } + VecMisc2::Fcvtzu => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11011, enc_size) + } + VecMisc2::Scvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11101, enc_size & 0b1) + } + VecMisc2::Ucvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11101, enc_size & 0b1) + } + VecMisc2::Frintn => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11000, enc_size & 0b01) + } + VecMisc2::Frintz => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11001, enc_size | 0b10) + } + VecMisc2::Frintm => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11001, enc_size & 0b01) + } + VecMisc2::Frintp => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11000, enc_size | 0b10) + } + }; + sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); + } + &Inst::VecLanes { op, rd, rn, size } => { + let (q, size) = match size { + VectorSize::Size8x16 => (0b1, 0b00), + VectorSize::Size16x8 => (0b1, 0b01), + VectorSize::Size32x4 => (0b1, 0b10), + _ => unreachable!(), + }; + let (u, opcode) = match op { + VecLanesOp::Uminv => (0b1, 0b11010), + VecLanesOp::Addv => (0b0, 0b11011), + }; + sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn)); + } + &Inst::VecShiftImm { + op, + rd, + rn, + size, + imm, + } => { + let (is_shr, template) = match op { + VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32), + VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32), + VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32), + }; + let imm = imm as u32; + // Deal with the somewhat strange encoding scheme for, and limits on, + // the shift amount. + let immh_immb = match (size, is_shr) { + (VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => { + 0b_1000_000_u32 | (64 - imm) + } + (VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => { + 0b_0100_000_u32 | (32 - imm) + } + (VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => { + 0b_0010_000_u32 | (16 - imm) + } + (VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => { + 0b_0001_000_u32 | (8 - imm) + } + (VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm, + (VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm, + (VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm, + (VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm, + _ => panic!( + "aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}", + op, size, imm + ), + }; + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); + } + &Inst::VecExtract { rd, rn, rm, imm4 } => { + if imm4 < 16 { + let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32; + let rm_enc = machreg_to_vec(rm); + let rn_enc = machreg_to_vec(rn); + let rd_enc = machreg_to_vec(rd.to_reg()); + sink.put4( + template | (rm_enc << 16) | ((imm4 as u32) << 11) | (rn_enc << 5) | rd_enc, + ); + } else { + panic!( + "aarch64: Inst::VecExtract: emit: invalid extract index {}", + imm4 + ); + } + } + &Inst::VecTbl { + rd, + rn, + rm, + is_extension, + } => { + sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm)); + } + &Inst::VecTbl2 { + rd, + rn, + rn2, + rm, + is_extension, + } => { + assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); + sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm)); + } + &Inst::FpuCmp32 { rn, rm } => { + sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm)); + } + &Inst::FpuCmp64 { rn, rm } => { + sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm)); + } + &Inst::FpuToInt { op, rd, rn } => { + let top16 = match op { + // FCVTZS (32/32-bit) + FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000, + // FCVTZU (32/32-bit) + FpuToIntOp::F32ToU32 => 0b000_11110_00_1_11_001, + // FCVTZS (32/64-bit) + FpuToIntOp::F32ToI64 => 0b100_11110_00_1_11_000, + // FCVTZU (32/64-bit) + FpuToIntOp::F32ToU64 => 0b100_11110_00_1_11_001, + // FCVTZS (64/32-bit) + FpuToIntOp::F64ToI32 => 0b000_11110_01_1_11_000, + // FCVTZU (64/32-bit) + FpuToIntOp::F64ToU32 => 0b000_11110_01_1_11_001, + // FCVTZS (64/64-bit) + FpuToIntOp::F64ToI64 => 0b100_11110_01_1_11_000, + // FCVTZU (64/64-bit) + FpuToIntOp::F64ToU64 => 0b100_11110_01_1_11_001, + }; + sink.put4(enc_fputoint(top16, rd, rn)); + } + &Inst::IntToFpu { op, rd, rn } => { + let top16 = match op { + // SCVTF (32/32-bit) + IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010, + // UCVTF (32/32-bit) + IntToFpuOp::U32ToF32 => 0b000_11110_00_1_00_011, + // SCVTF (64/32-bit) + IntToFpuOp::I64ToF32 => 0b100_11110_00_1_00_010, + // UCVTF (64/32-bit) + IntToFpuOp::U64ToF32 => 0b100_11110_00_1_00_011, + // SCVTF (32/64-bit) + IntToFpuOp::I32ToF64 => 0b000_11110_01_1_00_010, + // UCVTF (32/64-bit) + IntToFpuOp::U32ToF64 => 0b000_11110_01_1_00_011, + // SCVTF (64/64-bit) + IntToFpuOp::I64ToF64 => 0b100_11110_01_1_00_010, + // UCVTF (64/64-bit) + IntToFpuOp::U64ToF64 => 0b100_11110_01_1_00_011, + }; + sink.put4(enc_inttofpu(top16, rd, rn)); + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let inst = Inst::FpuLoad64 { + rd, + mem: AMode::Label(MemLabel::PCRel(8)), + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(12), + }; + inst.emit(sink, emit_info, state); + sink.put8(const_data); + } + &Inst::LoadFpuConst128 { rd, const_data } => { + let inst = Inst::FpuLoad128 { + rd, + mem: AMode::Label(MemLabel::PCRel(8)), + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(20), + }; + inst.emit(sink, emit_info, state); + + for i in const_data.to_le_bytes().iter() { + sink.put1(*i); + } + } + &Inst::FpuCSel32 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32)); + } + &Inst::FpuCSel64 { rd, rn, rm, cond } => { + sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64)); + } + &Inst::FpuRound { op, rd, rn } => { + let top22 = match op { + FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000, + FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000, + FpuRoundMode::Plus32 => 0b000_11110_00_1_001_001_10000, + FpuRoundMode::Plus64 => 0b000_11110_01_1_001_001_10000, + FpuRoundMode::Zero32 => 0b000_11110_00_1_001_011_10000, + FpuRoundMode::Zero64 => 0b000_11110_01_1_001_011_10000, + FpuRoundMode::Nearest32 => 0b000_11110_00_1_001_000_10000, + FpuRoundMode::Nearest64 => 0b000_11110_01_1_001_000_10000, + }; + sink.put4(enc_fround(top22, rd, rn)); + } + &Inst::MovToFpu { rd, rn, size } => { + let template = match size { + ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000, + ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000, + _ => unreachable!(), + }; + sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())); + } + &Inst::MovToVec { rd, rn, idx, size } => { + let (imm5, shift) = match size.lane_size() { + ScalarSize::Size8 => (0b00001, 1), + ScalarSize::Size16 => (0b00010, 2), + ScalarSize::Size32 => (0b00100, 3), + ScalarSize::Size64 => (0b01000, 4), + _ => unreachable!(), + }; + debug_assert_eq!(idx & (0b11111 >> shift), idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b010_01110000_00000_0_0011_1_00000_00000 + | (imm5 << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::MovFromVec { rd, rn, idx, size } => { + let (q, imm5, shift, mask) = match size { + VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111), + VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111), + VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011), + VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001), + _ => unreachable!(), + }; + debug_assert_eq!(idx & mask, idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b000_01110000_00000_0_0111_1_00000_00000 + | (q << 30) + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + &Inst::MovFromVecSigned { + rd, + rn, + idx, + size, + scalar_size, + } => { + let (imm5, shift, half) = match size { + VectorSize::Size8x8 => (0b00001, 1, true), + VectorSize::Size8x16 => (0b00001, 1, false), + VectorSize::Size16x4 => (0b00010, 2, true), + VectorSize::Size16x8 => (0b00010, 2, false), + VectorSize::Size32x2 => { + debug_assert_ne!(scalar_size, OperandSize::Size32); + (0b00100, 3, true) + } + VectorSize::Size32x4 => { + debug_assert_ne!(scalar_size, OperandSize::Size32); + (0b00100, 3, false) + } + _ => panic!("Unexpected vector operand size"), + }; + debug_assert_eq!(idx & (0b11111 >> (half as u32 + shift)), idx); + let imm5 = imm5 | ((idx as u32) << shift); + sink.put4( + 0b000_01110000_00000_0_0101_1_00000_00000 + | (scalar_size.is64() as u32) << 30 + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_gpr(rd.to_reg()), + ); + } + &Inst::VecDup { rd, rn, size } => { + let imm5 = match size { + VectorSize::Size8x16 => 0b00001, + VectorSize::Size16x8 => 0b00010, + VectorSize::Size32x4 => 0b00100, + VectorSize::Size64x2 => 0b01000, + _ => unimplemented!(), + }; + sink.put4( + 0b010_01110000_00000_000011_00000_00000 + | (imm5 << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecDupFromFpu { rd, rn, size } => { + let imm5 = match size { + VectorSize::Size32x4 => 0b00100, + VectorSize::Size64x2 => 0b01000, + _ => unimplemented!(), + }; + sink.put4( + 0b010_01110000_00000_000001_00000_00000 + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecDupImm { + rd, + imm, + invert, + size, + } => { + let (imm, shift, shift_ones) = imm.value(); + let (op, cmode) = match size.lane_size() { + ScalarSize::Size8 => { + assert!(!invert); + assert_eq!(shift, 0); + + (0, 0b1110) + } + ScalarSize::Size16 => { + let s = shift & 8; + + assert!(!shift_ones); + assert_eq!(s, shift); + + (invert as u32, 0b1000 | (s >> 2)) + } + ScalarSize::Size32 => { + if shift_ones { + assert!(shift == 8 || shift == 16); + + (invert as u32, 0b1100 | (shift >> 4)) + } else { + let s = shift & 24; + + assert_eq!(s, shift); + + (invert as u32, 0b0000 | (s >> 2)) + } + } + ScalarSize::Size64 => { + assert!(!invert); + assert_eq!(shift, 0); + + (1, 0b1110) + } + _ => unreachable!(), + }; + let q_op = op | ((size.is_128bits() as u32) << 1); + + sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm)); + } + &Inst::VecExtend { + t, + rd, + rn, + high_half, + } => { + let (u, immh) = match t { + VecExtendOp::Sxtl8 => (0b0, 0b001), + VecExtendOp::Sxtl16 => (0b0, 0b010), + VecExtendOp::Sxtl32 => (0b0, 0b100), + VecExtendOp::Uxtl8 => (0b1, 0b001), + VecExtendOp::Uxtl16 => (0b1, 0b010), + VecExtendOp::Uxtl32 => (0b1, 0b100), + }; + sink.put4( + 0b000_011110_0000_000_101001_00000_00000 + | ((high_half as u32) << 30) + | (u << 29) + | (immh << 19) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecMiscNarrow { + op, + rd, + rn, + size, + high_half, + } => { + let size = match size.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + _ => panic!("Unexpected vector operand lane size!"), + }; + let (u, bits_12_16) = match op { + VecMiscNarrowOp::Xtn => (0b0, 0b10010), + VecMiscNarrowOp::Sqxtn => (0b0, 0b10100), + VecMiscNarrowOp::Sqxtun => (0b1, 0b10010), + }; + sink.put4(enc_vec_rr_misc( + ((high_half as u32) << 1) | u, + size, + bits_12_16, + rd, + rn, + )); + } + &Inst::VecMovElement { + rd, + rn, + dest_idx, + src_idx, + size, + } => { + let (imm5, shift) = match size.lane_size() { + ScalarSize::Size8 => (0b00001, 1), + ScalarSize::Size16 => (0b00010, 2), + ScalarSize::Size32 => (0b00100, 3), + ScalarSize::Size64 => (0b01000, 4), + _ => unreachable!(), + }; + let mask = 0b11111 >> shift; + debug_assert_eq!(dest_idx & mask, dest_idx); + debug_assert_eq!(src_idx & mask, src_idx); + let imm4 = (src_idx as u32) << (shift - 1); + let imm5 = imm5 | ((dest_idx as u32) << shift); + sink.put4( + 0b011_01110000_00000_0_0000_1_00000_00000 + | (imm5 << 16) + | (imm4 << 11) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecRRR { + rd, + rn, + rm, + alu_op, + size, + } => { + let (q, enc_size) = size.enc_size(); + let is_float = match alu_op { + VecALUOp::Fcmeq + | VecALUOp::Fcmgt + | VecALUOp::Fcmge + | VecALUOp::Fadd + | VecALUOp::Fsub + | VecALUOp::Fdiv + | VecALUOp::Fmax + | VecALUOp::Fmin + | VecALUOp::Fmul => true, + _ => false, + }; + let enc_float_size = match (is_float, size) { + (true, VectorSize::Size32x2) => 0b0, + (true, VectorSize::Size32x4) => 0b0, + (true, VectorSize::Size64x2) => 0b1, + (true, _) => unimplemented!(), + _ => 0, + }; + + let (top11, bit15_10) = match alu_op { + VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011), + VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011), + VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011), + VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011), + VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011), + VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111), + VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101), + VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101), + VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111), + VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001), + VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001), + VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001), + // The following logical instructions operate on bytes, so are not encoded differently + // for the different vector types. + VecALUOp::And => (0b000_01110_00_1, 0b000111), + VecALUOp::Bic => (0b000_01110_01_1, 0b000111), + VecALUOp::Orr => (0b000_01110_10_1, 0b000111), + VecALUOp::Eor => (0b001_01110_00_1, 0b000111), + VecALUOp::Bsl => (0b001_01110_01_1, 0b000111), + VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001), + VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), + VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), + VecALUOp::Mul => { + debug_assert_ne!(size, VectorSize::Size64x2); + (0b000_01110_00_1 | enc_size << 1, 0b100111) + } + VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001), + VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001), + VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011), + VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011), + VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001), + VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001), + VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101), + VecALUOp::Fadd => (0b000_01110_00_1, 0b110101), + VecALUOp::Fsub => (0b000_01110_10_1, 0b110101), + VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111), + VecALUOp::Fmax => (0b000_01110_00_1, 0b111101), + VecALUOp::Fmin => (0b000_01110_10_1, 0b111101), + VecALUOp::Fmul => (0b001_01110_00_1, 0b110111), + VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111), + VecALUOp::Umlal => { + debug_assert!(!size.is_128bits()); + (0b001_01110_00_1 | enc_size << 1, 0b100000) + } + VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110), + VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000), + VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000), + }; + let top11 = match alu_op { + VecALUOp::Smull | VecALUOp::Smull2 => top11, + _ if is_float => top11 | (q << 9) | enc_float_size << 1, + _ => top11 | (q << 9), + }; + sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); + } + &Inst::VecLoadReplicate { rd, rn, size } => { + let (q, size) = size.enc_size(); + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + // Register the offset at which the actual load instruction starts. + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + sink.put4(enc_ldst_vec(q, size, rn, rd)); + } + &Inst::VecCSel { rd, rn, rm, cond } => { + /* Emit this: + b.cond else + mov rd, rm + b out + else: + mov rd, rn + out: + + Note, we could do better in the cases where rd == rn or rd == rm. + */ + let else_label = sink.get_label(); + let out_label = sink.get_label(); + + // b.cond else + let br_else_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(else_label), + CondBrKind::Cond(cond), + )); + sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19); + + // mov rd, rm + sink.put4(enc_vecmov(/* 16b = */ true, rd, rm)); + + // b out + let b_out_offset = sink.cur_offset(); + sink.use_label_at_offset(b_out_offset, out_label, LabelUse::Branch26); + sink.add_uncond_branch(b_out_offset, b_out_offset + 4, out_label); + sink.put4(enc_jump26(0b000101, 0 /* will be fixed up later */)); + + // else: + sink.bind_label(else_label); + + // mov rd, rn + sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); + + // out: + sink.bind_label(out_label); + } + &Inst::MovToNZCV { rn } => { + sink.put4(0xd51b4200 | machreg_to_gpr(rn)); + } + &Inst::MovFromNZCV { rd } => { + sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg())); + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits >= 8 => { + let top22 = match (signed, from_bits, to_bits) { + (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32) + (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32) + (true, 8, 32) => 0b000_100110_0_000000_000111, // SXTB (32) + (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32) + // The 64-bit unsigned variants are the same as the 32-bit ones, + // because writes to Wn zero out the top 32 bits of Xn + (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64) + (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64) + (true, 8, 64) => 0b100_100110_1_000000_000111, // SXTB (64) + (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64) + // 32-to-64: the unsigned case is a 'mov' (special-cased below). + (false, 32, 64) => 0, // MOV + (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64) + _ => panic!( + "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}", + signed, from_bits, to_bits + ), + }; + if top22 != 0 { + sink.put4(enc_extend(top22, rd, rn)); + } else { + Inst::mov32(rd, rn).emit(sink, emit_info, state); + } + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits == 1 && signed => { + assert!(to_bits <= 64); + // Reduce sign-extend-from-1-bit to: + // - and rd, rn, #1 + // - sub rd, zr, rd + + // We don't have ImmLogic yet, so we just hardcode this. FIXME. + sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())); + let sub_inst = Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd, + rn: zero_reg(), + rm: rd.to_reg(), + }; + sub_inst.emit(sink, emit_info, state); + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits == 1 && !signed => { + assert!(to_bits <= 64); + // Reduce zero-extend-from-1-bit to: + // - and rd, rn, #1 + + // We don't have ImmLogic yet, so we just hardcode this. FIXME. + sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())); + } + &Inst::Extend { .. } => { + panic!("Unsupported extend variant"); + } + &Inst::Jump { ref dest } => { + let off = sink.cur_offset(); + // Indicate that the jump uses a label, if so, so that a fixup can occur later. + if let Some(l) = dest.as_label() { + sink.use_label_at_offset(off, l, LabelUse::Branch26); + sink.add_uncond_branch(off, off + 4, l); + } + // Emit the jump itself. + sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero())); + } + &Inst::Ret => { + sink.put4(0xd65f03c0); + } + &Inst::EpiloguePlaceholder => { + // Noop; this is just a placeholder for epilogues. + } + &Inst::Call { ref info } => { + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + let loc = state.cur_srcloc(); + sink.add_reloc(loc, Reloc::Arm64Call, &info.dest, 0); + sink.put4(enc_jump26(0b100101, 0)); + if info.opcode.is_call() { + sink.add_call_site(loc, info.opcode); + } + } + &Inst::CallInd { ref info } => { + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5)); + let loc = state.cur_srcloc(); + if info.opcode.is_call() { + sink.add_call_site(loc, info.opcode); + } + } + &Inst::CondBr { + taken, + not_taken, + kind, + } => { + // Conditional part first. + let cond_off = sink.cur_offset(); + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_off, l, LabelUse::Branch19); + let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes(); + sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]); + } + sink.put4(enc_conditional_br(taken, kind)); + + // Unconditional part next. + let uncond_off = sink.cur_offset(); + if let Some(l) = not_taken.as_label() { + sink.use_label_at_offset(uncond_off, l, LabelUse::Branch26); + sink.add_uncond_branch(uncond_off, uncond_off + 4, l); + } + sink.put4(enc_jump26(0b000101, not_taken.as_offset26_or_zero())); + } + &Inst::TrapIf { kind, trap_code } => { + // condbr KIND, LABEL + let off = sink.cur_offset(); + let label = sink.get_label(); + sink.put4(enc_conditional_br( + BranchTarget::Label(label), + kind.invert(), + )); + sink.use_label_at_offset(off, label, LabelUse::Branch19); + // udf + let trap = Inst::Udf { trap_code }; + trap.emit(sink, emit_info, state); + // LABEL: + sink.bind_label(label); + } + &Inst::IndirectBr { rn, .. } => { + sink.put4(enc_br(rn)); + } + &Inst::Nop0 => {} + &Inst::Nop4 => { + sink.put4(0xd503201f); + } + &Inst::Brk => { + sink.put4(0xd4200000); + } + &Inst::Udf { trap_code } => { + let srcloc = state.cur_srcloc(); + sink.add_trap(srcloc, trap_code); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); + } + sink.put4(0xd4a00000); + } + &Inst::Adr { rd, off } => { + assert!(off > -(1 << 20)); + assert!(off < (1 << 20)); + sink.put4(enc_adr(off, rd)); + } + &Inst::Word4 { data } => { + sink.put4(data); + } + &Inst::Word8 { data } => { + sink.put8(data); + } + &Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + ref info, + .. + } => { + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. + + // Branch to default when condition code from prior comparison indicates. + let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs)); + // No need to inform the sink's branch folding logic about this branch, because it + // will not be merged with any other branch, flipped, or elided (it is not preceded + // or succeeded by any other branch). Just emit it with the label use. + let default_br_offset = sink.cur_offset(); + if let BranchTarget::Label(l) = info.default_target { + sink.use_label_at_offset(default_br_offset, l, LabelUse::Branch19); + } + sink.put4(br); + + // Save index in a tmp (the live range of ridx only goes to start of this + // sequence; rtmp1 or rtmp2 may overwrite it). + let inst = Inst::gen_move(rtmp2, ridx, I64); + inst.emit(sink, emit_info, state); + // Load address of jump table + let inst = Inst::Adr { rd: rtmp1, off: 16 }; + inst.emit(sink, emit_info, state); + // Load value out of jump table + let inst = Inst::SLoad32 { + rd: rtmp2, + mem: AMode::reg_plus_reg_scaled_extended( + rtmp1.to_reg(), + rtmp2.to_reg(), + I32, + ExtendOp::UXTW, + ), + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + // Add base of jump table to jump-table-sourced block offset + let inst = Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: rtmp1, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + }; + inst.emit(sink, emit_info, state); + // Branch to computed address. (`targets` here is only used for successor queries + // and is not needed for emission.) + let inst = Inst::IndirectBr { + rn: rtmp1.to_reg(), + targets: vec![], + }; + inst.emit(sink, emit_info, state); + // Emit jump table (table of 32-bit offsets). + let jt_off = sink.cur_offset(); + for &target in info.targets.iter() { + let word_off = sink.cur_offset(); + // off_into_table is an addend here embedded in the label to be later patched + // at the end of codegen. The offset is initially relative to this jump table + // entry; with the extra addend, it'll be relative to the jump table's start, + // after patching. + let off_into_table = word_off - jt_off; + sink.use_label_at_offset( + word_off, + target.as_label().unwrap(), + LabelUse::PCRel32, + ); + sink.put4(off_into_table); + } + + // Lowering produces an EmitIsland before using a JTSequence, so we can safely + // disable the worst-case-size check in this case. + start_off = sink.cur_offset(); + } + &Inst::LoadExtName { + rd, + ref name, + offset, + } => { + let inst = Inst::ULoad64 { + rd, + mem: AMode::Label(MemLabel::PCRel(8)), + flags: MemFlags::trusted(), + }; + inst.emit(sink, emit_info, state); + let inst = Inst::Jump { + dest: BranchTarget::ResolvedOffset(12), + }; + inst.emit(sink, emit_info, state); + let srcloc = state.cur_srcloc(); + sink.add_reloc(srcloc, Reloc::Abs8, name, offset); + if emit_info.flags().emit_all_ones_funcaddrs() { + sink.put8(u64::max_value()); + } else { + sink.put8(0); + } + } + &Inst::LoadAddr { rd, ref mem } => { + let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let (reg, index_reg, offset) = match mem { + AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0), + AMode::Unscaled(r, simm9) => (r, None, simm9.value()), + AMode::UnsignedOffset(r, uimm12scaled) => { + (r, None, uimm12scaled.value() as i32) + } + _ => panic!("Unsupported case for LoadAddr: {:?}", mem), + }; + let abs_offset = if offset < 0 { + -offset as u64 + } else { + offset as u64 + }; + let alu_op = if offset < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + + if let Some((idx, extendop)) = index_reg { + let add = Inst::AluRRRExtend { + alu_op: ALUOp::Add64, + rd, + rn: reg, + rm: idx, + extendop, + }; + + add.emit(sink, emit_info, state); + } else if offset == 0 { + if reg != rd.to_reg() { + let mov = Inst::mov(rd, reg); + + mov.emit(sink, emit_info, state); + } + } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { + let add = Inst::AluRRImm12 { + alu_op, + rd, + rn: reg, + imm12, + }; + add.emit(sink, emit_info, state); + } else { + // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction + // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note + // that no other instructions will be inserted here (we're emitting directly), + // and a live range of `tmp2` should not span this instruction, so this use + // should otherwise be correct. + debug_assert!(rd.to_reg() != tmp2_reg()); + debug_assert!(reg != tmp2_reg()); + let tmp = writable_tmp2_reg(); + for insn in Inst::load_constant(tmp, abs_offset).into_iter() { + insn.emit(sink, emit_info, state); + } + let add = Inst::AluRRR { + alu_op, + rd, + rn: reg, + rm: tmp.to_reg(), + }; + add.emit(sink, emit_info, state); + } + } + &Inst::VirtualSPOffsetAdj { offset } => { + debug!( + "virtual sp offset adjusted by {} -> {}", + offset, + state.virtual_sp_offset + offset, + ); + state.virtual_sp_offset += offset; + } + &Inst::EmitIsland { needed_space } => { + if sink.island_needed(needed_space + 4) { + let jump_around_label = sink.get_label(); + let jmp = Inst::Jump { + dest: BranchTarget::Label(jump_around_label), + }; + jmp.emit(sink, emit_info, state); + sink.emit_island(); + sink.bind_label(jump_around_label); + } + } + } + + let end_off = sink.cur_offset(); + debug_assert!((end_off - start_off) <= Inst::worst_case_size()); + + state.clear_post_insn(); + } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + self.print_with_state(mb_rru, state) + } +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs new file mode 100644 index 0000000000..eb31963b5d --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/emit_tests.rs @@ -0,0 +1,5143 @@ +use crate::ir::types::*; +use crate::isa::aarch64::inst::*; +use crate::isa::test_utils; +use crate::isa::CallConv; +use crate::settings; + +use alloc::boxed::Box; +use alloc::vec::Vec; + +#[test] +fn test_aarch64_binemit() { + let mut insns = Vec::<(Inst, &str, &str)>::new(); + + // N.B.: the architecture is little-endian, so when transcribing the 32-bit + // hex instructions from e.g. objdump disassembly, one must swap the bytes + // seen below. (E.g., a `ret` is normally written as the u32 `D65F03C0`, + // but we write it here as C0035FD6.) + + // Useful helper script to produce the encodings from the text: + // + // #!/bin/sh + // tmp=`mktemp /tmp/XXXXXXXX.o` + // aarch64-linux-gnu-as /dev/stdin -o $tmp + // aarch64-linux-gnu-objdump -d $tmp + // rm -f $tmp + // + // Then: + // + // $ echo "mov x1, x2" | aarch64inst.sh + insns.push((Inst::Ret, "C0035FD6", "ret")); + insns.push((Inst::Nop0, "", "nop-zero-len")); + insns.push((Inst::Nop4, "1F2003D5", "nop")); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100030B", + "add w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400068B", + "add x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100034B", + "sub w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006CB", + "sub x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100032A", + "orr w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006AA", + "orr x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100030A", + "and w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400068A", + "and x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubS32, + rd: writable_zero_reg(), + rn: xreg(2), + rm: xreg(3), + }, + "5F00036B", + // TODO: Display as cmp + "subs wzr, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubS32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100036B", + "subs w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubS64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006EB", + "subs x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddS32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "4100032B", + "adds w1, w2, w3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddS64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006AB", + "adds x4, x5, x6", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::AddS64, + rd: writable_zero_reg(), + rn: xreg(5), + imm12: Imm12::maybe_from_u64(1).unwrap(), + }, + "BF0400B1", + // TODO: Display as cmn. + "adds xzr, x5, #1", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SDiv64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40CC69A", + "sdiv x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::UDiv64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A408C69A", + "udiv x4, x5, x6", + )); + + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Eor32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400064A", + "eor w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Eor64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40006CA", + "eor x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400260A", + "bic w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400268A", + "bic x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400262A", + "orn w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40026AA", + "orn x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::EorNot32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A400264A", + "eon w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::EorNot64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A40026CA", + "eon x4, x5, x6", + )); + + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::RotR32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A42CC61A", + "ror w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::RotR64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A42CC69A", + "ror x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsr32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A424C61A", + "lsr w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsr64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A424C69A", + "lsr x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Asr32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A428C61A", + "asr w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Asr64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A428C69A", + "asr x4, x5, x6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsl32, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A420C61A", + "lsl w4, w5, w6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Lsl64, + rd: writable_xreg(4), + rn: xreg(5), + rm: xreg(6), + }, + "A420C69A", + "lsl x4, x5, x6", + )); + + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Add32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0411", + "add w7, w8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Add32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: true, + }, + }, + "078D4411", + "add w7, w8, #1191936", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Add64, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0491", + "add x7, x8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Sub32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0451", + "sub w7, w8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::Sub64, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D04D1", + "sub x7, x8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::SubS32, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D0471", + "subs w7, w8, #291", + )); + insns.push(( + Inst::AluRRImm12 { + alu_op: ALUOp::SubS64, + rd: writable_xreg(7), + rn: xreg(8), + imm12: Imm12 { + bits: 0x123, + shift12: false, + }, + }, + "078D04F1", + "subs x7, x8, #291", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Add32, + rd: writable_xreg(7), + rn: xreg(8), + rm: xreg(9), + extendop: ExtendOp::SXTB, + }, + "0781290B", + "add w7, w8, w9, SXTB", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Add64, + rd: writable_xreg(15), + rn: xreg(16), + rm: xreg(17), + extendop: ExtendOp::UXTB, + }, + "0F02318B", + "add x15, x16, x17, UXTB", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Sub32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + extendop: ExtendOp::SXTH, + }, + "41A0234B", + "sub w1, w2, w3, SXTH", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::Sub64, + rd: writable_xreg(20), + rn: xreg(21), + rm: xreg(22), + extendop: ExtendOp::UXTW, + }, + "B44236CB", + "sub x20, x21, x22, UXTW", + )); + + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Add32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(20).unwrap(), + ), + }, + "6A510C0B", + "add w10, w11, w12, LSL 20", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Add64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::ASR, + ShiftOpShiftImm::maybe_from_shift(42).unwrap(), + ), + }, + "6AA98C8B", + "add x10, x11, x12, ASR 42", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Sub32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C4B", + "sub w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Sub64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CCB", + "sub x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Orr32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C2A", + "orr w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Orr64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CAA", + "orr x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::And32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C0A", + "and w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::And64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C8A", + "and x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Eor32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C4A", + "eor w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::Eor64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CCA", + "eor x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::OrrNot32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C2A", + "orn w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::OrrNot64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2CAA", + "orn x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AndNot32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C0A", + "bic w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AndNot64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C8A", + "bic x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::EorNot32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2C4A", + "eon w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::EorNot64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D2CCA", + "eon x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AddS32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C2B", + "adds w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::AddS64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CAB", + "adds x10, x11, x12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::SubS32, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0C6B", + "subs w10, w11, w12, LSL 23", + )); + insns.push(( + Inst::AluRRRShift { + alu_op: ALUOp::SubS64, + rd: writable_xreg(10), + rn: xreg(11), + rm: xreg(12), + shiftop: ShiftOpAndAmt::new( + ShiftOp::LSL, + ShiftOpShiftImm::maybe_from_shift(23).unwrap(), + ), + }, + "6A5D0CEB", + "subs x10, x11, x12, LSL 23", + )); + + insns.push(( + Inst::AluRRRExtend { + alu_op: ALUOp::SubS64, + rd: writable_zero_reg(), + rn: stack_reg(), + rm: xreg(12), + extendop: ExtendOp::UXTX, + }, + "FF632CEB", + "subs xzr, sp, x12, UXTX", + )); + + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp3::MAdd32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4110031B", + "madd w1, w2, w3, w4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp3::MAdd64, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4110039B", + "madd x1, x2, x3, x4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp3::MSub32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4190031B", + "msub w1, w2, w3, w4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp3::MSub64, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4190039B", + "msub x1, x2, x3, x4", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SMulH, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "417C439B", + "smulh x1, x2, x3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::UMulH, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + }, + "417CC39B", + "umulh x1, x2, x3", + )); + + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::RotR32, + rd: writable_xreg(20), + rn: xreg(21), + immshift: ImmShift::maybe_from_u64(19).unwrap(), + }, + "B44E9513", + "ror w20, w21, #19", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::RotR64, + rd: writable_xreg(20), + rn: xreg(21), + immshift: ImmShift::maybe_from_u64(42).unwrap(), + }, + "B4AAD593", + "ror x20, x21, #42", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsr32, + rd: writable_xreg(10), + rn: xreg(11), + immshift: ImmShift::maybe_from_u64(13).unwrap(), + }, + "6A7D0D53", + "lsr w10, w11, #13", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsr64, + rd: writable_xreg(10), + rn: xreg(11), + immshift: ImmShift::maybe_from_u64(57).unwrap(), + }, + "6AFD79D3", + "lsr x10, x11, #57", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Asr32, + rd: writable_xreg(4), + rn: xreg(5), + immshift: ImmShift::maybe_from_u64(7).unwrap(), + }, + "A47C0713", + "asr w4, w5, #7", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Asr64, + rd: writable_xreg(4), + rn: xreg(5), + immshift: ImmShift::maybe_from_u64(35).unwrap(), + }, + "A4FC6393", + "asr x4, x5, #35", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsl32, + rd: writable_xreg(8), + rn: xreg(9), + immshift: ImmShift::maybe_from_u64(24).unwrap(), + }, + "281D0853", + "lsl w8, w9, #24", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsl64, + rd: writable_xreg(8), + rn: xreg(9), + immshift: ImmShift::maybe_from_u64(63).unwrap(), + }, + "280141D3", + "lsl x8, x9, #63", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsl32, + rd: writable_xreg(10), + rn: xreg(11), + immshift: ImmShift::maybe_from_u64(0).unwrap(), + }, + "6A7D0053", + "lsl w10, w11, #0", + )); + insns.push(( + Inst::AluRRImmShift { + alu_op: ALUOp::Lsl64, + rd: writable_xreg(10), + rn: xreg(11), + immshift: ImmShift::maybe_from_u64(0).unwrap(), + }, + "6AFD40D3", + "lsl x10, x11, #0", + )); + + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::And32, + rd: writable_xreg(21), + rn: xreg(27), + imml: ImmLogic::maybe_from_u64(0x80003fff, I32).unwrap(), + }, + "753B0112", + "and w21, w27, #2147500031", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::And64, + rd: writable_xreg(7), + rn: xreg(6), + imml: ImmLogic::maybe_from_u64(0x3fff80003fff800, I64).unwrap(), + }, + "C7381592", + "and x7, x6, #288221580125796352", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Orr32, + rd: writable_xreg(1), + rn: xreg(5), + imml: ImmLogic::maybe_from_u64(0x100000, I32).unwrap(), + }, + "A1000C32", + "orr w1, w5, #1048576", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Orr64, + rd: writable_xreg(4), + rn: xreg(5), + imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(), + }, + "A4C401B2", + "orr x4, x5, #9331882296111890817", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Eor32, + rd: writable_xreg(1), + rn: xreg(5), + imml: ImmLogic::maybe_from_u64(0x00007fff, I32).unwrap(), + }, + "A1380052", + "eor w1, w5, #32767", + )); + insns.push(( + Inst::AluRRImmLogic { + alu_op: ALUOp::Eor64, + rd: writable_xreg(10), + rn: xreg(8), + imml: ImmLogic::maybe_from_u64(0x8181818181818181, I64).unwrap(), + }, + "0AC501D2", + "eor x10, x8, #9331882296111890817", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::RBit32, + rd: writable_xreg(1), + rn: xreg(10), + }, + "4101C05A", + "rbit w1, w10", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::RBit64, + rd: writable_xreg(1), + rn: xreg(10), + }, + "4101C0DA", + "rbit x1, x10", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Clz32, + rd: writable_xreg(15), + rn: xreg(3), + }, + "6F10C05A", + "clz w15, w3", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Clz64, + rd: writable_xreg(15), + rn: xreg(3), + }, + "6F10C0DA", + "clz x15, x3", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Cls32, + rd: writable_xreg(21), + rn: xreg(16), + }, + "1516C05A", + "cls w21, w16", + )); + + insns.push(( + Inst::BitRR { + op: BitOp::Cls64, + rd: writable_xreg(21), + rn: xreg(16), + }, + "1516C0DA", + "cls x21, x16", + )); + + insns.push(( + Inst::ULoad8 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "41004038", + "ldurb w1, [x2]", + )); + insns.push(( + Inst::ULoad8 { + rd: writable_xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::zero(I8)), + flags: MemFlags::trusted(), + }, + "41004039", + "ldrb w1, [x2]", + )); + insns.push(( + Inst::ULoad8 { + rd: writable_xreg(1), + mem: AMode::RegReg(xreg(2), xreg(5)), + flags: MemFlags::trusted(), + }, + "41686538", + "ldrb w1, [x2, x5]", + )); + insns.push(( + Inst::SLoad8 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "41008038", + "ldursb x1, [x2]", + )); + insns.push(( + Inst::SLoad8 { + rd: writable_xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(63, I8).unwrap()), + flags: MemFlags::trusted(), + }, + "41FC8039", + "ldrsb x1, [x2, #63]", + )); + insns.push(( + Inst::SLoad8 { + rd: writable_xreg(1), + mem: AMode::RegReg(xreg(2), xreg(5)), + flags: MemFlags::trusted(), + }, + "4168A538", + "ldrsb x1, [x2, x5]", + )); + insns.push(( + Inst::ULoad16 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(5).unwrap()), + flags: MemFlags::trusted(), + }, + "41504078", + "ldurh w1, [x2, #5]", + )); + insns.push(( + Inst::ULoad16 { + rd: writable_xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8, I16).unwrap()), + flags: MemFlags::trusted(), + }, + "41104079", + "ldrh w1, [x2, #8]", + )); + insns.push(( + Inst::ULoad16 { + rd: writable_xreg(1), + mem: AMode::RegScaled(xreg(2), xreg(3), I16), + flags: MemFlags::trusted(), + }, + "41786378", + "ldrh w1, [x2, x3, LSL #1]", + )); + insns.push(( + Inst::SLoad16 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "41008078", + "ldursh x1, [x2]", + )); + insns.push(( + Inst::SLoad16 { + rd: writable_xreg(28), + mem: AMode::UnsignedOffset(xreg(20), UImm12Scaled::maybe_from_i64(24, I16).unwrap()), + flags: MemFlags::trusted(), + }, + "9C328079", + "ldrsh x28, [x20, #24]", + )); + insns.push(( + Inst::SLoad16 { + rd: writable_xreg(28), + mem: AMode::RegScaled(xreg(20), xreg(20), I16), + flags: MemFlags::trusted(), + }, + "9C7AB478", + "ldrsh x28, [x20, x20, LSL #1]", + )); + insns.push(( + Inst::ULoad32 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "410040B8", + "ldur w1, [x2]", + )); + insns.push(( + Inst::ULoad32 { + rd: writable_xreg(12), + mem: AMode::UnsignedOffset(xreg(0), UImm12Scaled::maybe_from_i64(204, I32).unwrap()), + flags: MemFlags::trusted(), + }, + "0CCC40B9", + "ldr w12, [x0, #204]", + )); + insns.push(( + Inst::ULoad32 { + rd: writable_xreg(1), + mem: AMode::RegScaled(xreg(2), xreg(12), I32), + flags: MemFlags::trusted(), + }, + "41786CB8", + "ldr w1, [x2, x12, LSL #2]", + )); + insns.push(( + Inst::SLoad32 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "410080B8", + "ldursw x1, [x2]", + )); + insns.push(( + Inst::SLoad32 { + rd: writable_xreg(12), + mem: AMode::UnsignedOffset(xreg(1), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()), + flags: MemFlags::trusted(), + }, + "2CFCBFB9", + "ldrsw x12, [x1, #16380]", + )); + insns.push(( + Inst::SLoad32 { + rd: writable_xreg(1), + mem: AMode::RegScaled(xreg(5), xreg(1), I32), + flags: MemFlags::trusted(), + }, + "A178A1B8", + "ldrsw x1, [x5, x1, LSL #2]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "410040F8", + "ldur x1, [x2]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(-256).unwrap()), + flags: MemFlags::trusted(), + }, + "410050F8", + "ldur x1, [x2, #-256]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::maybe_from_i64(255).unwrap()), + flags: MemFlags::trusted(), + }, + "41F04FF8", + "ldur x1, [x2, #255]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "41FC7FF9", + "ldr x1, [x2, #32760]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegReg(xreg(2), xreg(3)), + flags: MemFlags::trusted(), + }, + "416863F8", + "ldr x1, [x2, x3]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegScaled(xreg(2), xreg(3), I64), + flags: MemFlags::trusted(), + }, + "417863F8", + "ldr x1, [x2, x3, LSL #3]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::SXTW), + flags: MemFlags::trusted(), + }, + "41D863F8", + "ldr x1, [x2, w3, SXTW #3]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::SXTW), + flags: MemFlags::trusted(), + }, + "41C863F8", + "ldr x1, [x2, w3, SXTW]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::Label(MemLabel::PCRel(64)), + flags: MemFlags::trusted(), + }, + "01020058", + "ldr x1, pc+64", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + flags: MemFlags::trusted(), + }, + "410C41F8", + "ldr x1, [x2, #16]!", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + flags: MemFlags::trusted(), + }, + "410441F8", + "ldr x1, [x2], #16", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::FPOffset(32768, I8), + flags: MemFlags::trusted(), + }, + "100090D2B063308B010240F9", + "movz x16, #32768 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::FPOffset(-32768, I8), + flags: MemFlags::trusted(), + }, + "F0FF8F92B063308B010240F9", + "movn x16, #32767 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::FPOffset(1048576, I8), // 2^20 + flags: MemFlags::trusted(), + }, + "1002A0D2B063308B010240F9", + "movz x16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", + )); + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::FPOffset(1048576 + 1, I8), // 2^20 + 1 + flags: MemFlags::trusted(), + }, + "300080521002A072B063308B010240F9", + "movz w16, #1 ; movk w16, #16, LSL #16 ; add x16, fp, x16, UXTX ; ldr x1, [x16]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegOffset(xreg(7), 8, I64), + flags: MemFlags::trusted(), + }, + "E18040F8", + "ldur x1, [x7, #8]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegOffset(xreg(7), 1024, I64), + flags: MemFlags::trusted(), + }, + "E10042F9", + "ldr x1, [x7, #1024]", + )); + + insns.push(( + Inst::ULoad64 { + rd: writable_xreg(1), + mem: AMode::RegOffset(xreg(7), 1048576, I64), + flags: MemFlags::trusted(), + }, + "1002A0D2F060308B010240F9", + "movz x16, #16, LSL #16 ; add x16, x7, x16, UXTX ; ldr x1, [x16]", + )); + + insns.push(( + Inst::Store8 { + rd: xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "41000038", + "sturb w1, [x2]", + )); + insns.push(( + Inst::Store8 { + rd: xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(4095, I8).unwrap()), + flags: MemFlags::trusted(), + }, + "41FC3F39", + "strb w1, [x2, #4095]", + )); + insns.push(( + Inst::Store16 { + rd: xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "41000078", + "sturh w1, [x2]", + )); + insns.push(( + Inst::Store16 { + rd: xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(8190, I16).unwrap()), + flags: MemFlags::trusted(), + }, + "41FC3F79", + "strh w1, [x2, #8190]", + )); + insns.push(( + Inst::Store32 { + rd: xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "410000B8", + "stur w1, [x2]", + )); + insns.push(( + Inst::Store32 { + rd: xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(16380, I32).unwrap()), + flags: MemFlags::trusted(), + }, + "41FC3FB9", + "str w1, [x2, #16380]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::Unscaled(xreg(2), SImm9::zero()), + flags: MemFlags::trusted(), + }, + "410000F8", + "stur x1, [x2]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::UnsignedOffset(xreg(2), UImm12Scaled::maybe_from_i64(32760, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "41FC3FF9", + "str x1, [x2, #32760]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::RegReg(xreg(2), xreg(3)), + flags: MemFlags::trusted(), + }, + "416823F8", + "str x1, [x2, x3]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::RegScaled(xreg(2), xreg(3), I64), + flags: MemFlags::trusted(), + }, + "417823F8", + "str x1, [x2, x3, LSL #3]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::RegScaledExtended(xreg(2), xreg(3), I64, ExtendOp::UXTW), + flags: MemFlags::trusted(), + }, + "415823F8", + "str x1, [x2, w3, UXTW #3]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::RegExtended(xreg(2), xreg(3), ExtendOp::UXTW), + flags: MemFlags::trusted(), + }, + "414823F8", + "str x1, [x2, w3, UXTW]", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::PreIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + flags: MemFlags::trusted(), + }, + "410C01F8", + "str x1, [x2, #16]!", + )); + insns.push(( + Inst::Store64 { + rd: xreg(1), + mem: AMode::PostIndexed(writable_xreg(2), SImm9::maybe_from_i64(16).unwrap()), + flags: MemFlags::trusted(), + }, + "410401F8", + "str x1, [x2], #16", + )); + + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), + flags: MemFlags::trusted(), + }, + "482500A9", + "stp x8, x9, [x10]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "48A51FA9", + "stp x8, x9, [x10, #504]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "48253CA9", + "stp x8, x9, [x10, #-64]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(21), + rt2: xreg(28), + mem: PairAMode::SignedOffset(xreg(1), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "357020A9", + "stp x21, x28, [x1, #-512]", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(8), + rt2: xreg(9), + mem: PairAMode::PreIndexed( + writable_xreg(10), + SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "4825BCA9", + "stp x8, x9, [x10, #-64]!", + )); + insns.push(( + Inst::StoreP64 { + rt: xreg(15), + rt2: xreg(16), + mem: PairAMode::PostIndexed( + writable_xreg(20), + SImm7Scaled::maybe_from_i64(504, I64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "8FC29FA8", + "stp x15, x16, [x20], #504", + )); + + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::zero(I64)), + flags: MemFlags::trusted(), + }, + "482540A9", + "ldp x8, x9, [x10]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(504, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "48A55FA9", + "ldp x8, x9, [x10, #504]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-64, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "48257CA9", + "ldp x8, x9, [x10, #-64]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairAMode::SignedOffset(xreg(10), SImm7Scaled::maybe_from_i64(-512, I64).unwrap()), + flags: MemFlags::trusted(), + }, + "482560A9", + "ldp x8, x9, [x10, #-512]", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(9), + mem: PairAMode::PreIndexed( + writable_xreg(10), + SImm7Scaled::maybe_from_i64(-64, I64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "4825FCA9", + "ldp x8, x9, [x10, #-64]!", + )); + insns.push(( + Inst::LoadP64 { + rt: writable_xreg(8), + rt2: writable_xreg(25), + mem: PairAMode::PostIndexed( + writable_xreg(12), + SImm7Scaled::maybe_from_i64(504, I64).unwrap(), + ), + flags: MemFlags::trusted(), + }, + "88E5DFA8", + "ldp x8, x25, [x12], #504", + )); + + insns.push(( + Inst::Mov64 { + rd: writable_xreg(8), + rm: xreg(9), + }, + "E80309AA", + "mov x8, x9", + )); + insns.push(( + Inst::Mov32 { + rd: writable_xreg(8), + rm: xreg(9), + }, + "E803092A", + "mov w8, w9", + )); + + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size64, + }, + "E8FF9FD2", + "movz x8, #65535", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFBFD2", + "movz x8, #65535, LSL #16", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFDFD2", + "movz x8, #65535, LSL #32", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFFFD2", + "movz x8, #65535, LSL #48", + )); + insns.push(( + Inst::MovZ { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size32, + }, + "E8FFBF52", + "movz w8, #65535, LSL #16", + )); + + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size64, + }, + "E8FF9F92", + "movn x8, #65535", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFBF92", + "movn x8, #65535, LSL #16", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFDF92", + "movn x8, #65535, LSL #32", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFFF92", + "movn x8, #65535, LSL #48", + )); + insns.push(( + Inst::MovN { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size32, + }, + "E8FF9F12", + "movn w8, #65535", + )); + + insns.push(( + Inst::MovK { + rd: writable_xreg(12), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "0C0080F2", + "movk x12, #0", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(19), + imm: MoveWideConst::maybe_with_shift(0x0000, 16).unwrap(), + size: OperandSize::Size64, + }, + "1300A0F2", + "movk x19, #0, LSL #16", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(3), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + size: OperandSize::Size64, + }, + "E3FF9FF2", + "movk x3, #65535", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFBFF2", + "movk x8, #65535, LSL #16", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFDFF2", + "movk x8, #65535, LSL #32", + )); + insns.push(( + Inst::MovK { + rd: writable_xreg(8), + imm: MoveWideConst::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + size: OperandSize::Size64, + }, + "E8FFFFF2", + "movk x8, #65535, LSL #48", + )); + + insns.push(( + Inst::CSel { + rd: writable_xreg(10), + rn: xreg(12), + rm: xreg(14), + cond: Cond::Hs, + }, + "8A218E9A", + "csel x10, x12, x14, hs", + )); + insns.push(( + Inst::CSet { + rd: writable_xreg(15), + cond: Cond::Ge, + }, + "EFB79F9A", + "cset x15, ge", + )); + insns.push(( + Inst::CCmpImm { + size: OperandSize::Size64, + rn: xreg(22), + imm: UImm5::maybe_from_u8(5).unwrap(), + nzcv: NZCV::new(false, false, true, true), + cond: Cond::Eq, + }, + "C30A45FA", + "ccmp x22, #5, #nzCV, eq", + )); + insns.push(( + Inst::CCmpImm { + size: OperandSize::Size32, + rn: xreg(3), + imm: UImm5::maybe_from_u8(30).unwrap(), + nzcv: NZCV::new(true, true, true, true), + cond: Cond::Gt, + }, + "6FC85E7A", + "ccmp w3, #30, #NZCV, gt", + )); + insns.push(( + Inst::MovToFpu { + rd: writable_vreg(31), + rn: xreg(0), + size: ScalarSize::Size64, + }, + "1F00679E", + "fmov d31, x0", + )); + insns.push(( + Inst::MovToFpu { + rd: writable_vreg(1), + rn: xreg(28), + size: ScalarSize::Size32, + }, + "8103271E", + "fmov s1, w28", + )); + insns.push(( + Inst::MovToVec { + rd: writable_vreg(0), + rn: xreg(0), + idx: 7, + size: VectorSize::Size8x8, + }, + "001C0F4E", + "mov v0.b[7], w0", + )); + insns.push(( + Inst::MovToVec { + rd: writable_vreg(20), + rn: xreg(21), + idx: 0, + size: VectorSize::Size64x2, + }, + "B41E084E", + "mov v20.d[0], x21", + )); + insns.push(( + Inst::MovFromVec { + rd: writable_xreg(3), + rn: vreg(27), + idx: 14, + size: VectorSize::Size8x16, + }, + "633F1D0E", + "umov w3, v27.b[14]", + )); + insns.push(( + Inst::MovFromVec { + rd: writable_xreg(24), + rn: vreg(5), + idx: 3, + size: VectorSize::Size16x8, + }, + "B83C0E0E", + "umov w24, v5.h[3]", + )); + insns.push(( + Inst::MovFromVec { + rd: writable_xreg(12), + rn: vreg(17), + idx: 1, + size: VectorSize::Size32x4, + }, + "2C3E0C0E", + "mov w12, v17.s[1]", + )); + insns.push(( + Inst::MovFromVec { + rd: writable_xreg(21), + rn: vreg(20), + idx: 0, + size: VectorSize::Size64x2, + }, + "953E084E", + "mov x21, v20.d[0]", + )); + insns.push(( + Inst::MovFromVecSigned { + rd: writable_xreg(0), + rn: vreg(0), + idx: 15, + size: VectorSize::Size8x16, + scalar_size: OperandSize::Size32, + }, + "002C1F0E", + "smov w0, v0.b[15]", + )); + insns.push(( + Inst::MovFromVecSigned { + rd: writable_xreg(12), + rn: vreg(13), + idx: 7, + size: VectorSize::Size8x8, + scalar_size: OperandSize::Size64, + }, + "AC2D0F4E", + "smov x12, v13.b[7]", + )); + insns.push(( + Inst::MovFromVecSigned { + rd: writable_xreg(23), + rn: vreg(31), + idx: 7, + size: VectorSize::Size16x8, + scalar_size: OperandSize::Size32, + }, + "F72F1E0E", + "smov w23, v31.h[7]", + )); + insns.push(( + Inst::MovFromVecSigned { + rd: writable_xreg(24), + rn: vreg(5), + idx: 1, + size: VectorSize::Size32x2, + scalar_size: OperandSize::Size64, + }, + "B82C0C4E", + "smov x24, v5.s[1]", + )); + insns.push(( + Inst::MovToNZCV { rn: xreg(13) }, + "0D421BD5", + "msr nzcv, x13", + )); + insns.push(( + Inst::MovFromNZCV { + rd: writable_xreg(27), + }, + "1B423BD5", + "mrs x27, nzcv", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(25), + rn: xreg(7), + size: VectorSize::Size8x16, + }, + "F90C014E", + "dup v25.16b, w7", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(2), + rn: xreg(23), + size: VectorSize::Size16x8, + }, + "E20E024E", + "dup v2.8h, w23", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(0), + rn: xreg(28), + size: VectorSize::Size32x4, + }, + "800F044E", + "dup v0.4s, w28", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(31), + rn: xreg(5), + size: VectorSize::Size64x2, + }, + "BF0C084E", + "dup v31.2d, x5", + )); + insns.push(( + Inst::VecDupFromFpu { + rd: writable_vreg(14), + rn: vreg(19), + size: VectorSize::Size32x4, + }, + "6E06044E", + "dup v14.4s, v19.s[0]", + )); + insns.push(( + Inst::VecDupFromFpu { + rd: writable_vreg(18), + rn: vreg(10), + size: VectorSize::Size64x2, + }, + "5205084E", + "dup v18.2d, v10.d[0]", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(31), + imm: ASIMDMovModImm::maybe_from_u64(255, ScalarSize::Size8).unwrap(), + invert: false, + size: VectorSize::Size8x16, + }, + "FFE7074F", + "movi v31.16b, #255", + )); + insns.push(( + Inst::VecDupImm { + rd: writable_vreg(0), + imm: ASIMDMovModImm::zero(), + invert: true, + size: VectorSize::Size16x4, + }, + "0084002F", + "mvni v0.4h, #0", + )); + insns.push(( + Inst::VecExtend { + t: VecExtendOp::Sxtl8, + rd: writable_vreg(4), + rn: vreg(27), + high_half: false, + }, + "64A7080F", + "sxtl v4.8h, v27.8b", + )); + insns.push(( + Inst::VecExtend { + t: VecExtendOp::Sxtl16, + rd: writable_vreg(17), + rn: vreg(19), + high_half: true, + }, + "71A6104F", + "sxtl2 v17.4s, v19.8h", + )); + insns.push(( + Inst::VecExtend { + t: VecExtendOp::Sxtl32, + rd: writable_vreg(30), + rn: vreg(6), + high_half: false, + }, + "DEA4200F", + "sxtl v30.2d, v6.2s", + )); + insns.push(( + Inst::VecExtend { + t: VecExtendOp::Uxtl8, + rd: writable_vreg(3), + rn: vreg(29), + high_half: true, + }, + "A3A7086F", + "uxtl2 v3.8h, v29.16b", + )); + insns.push(( + Inst::VecExtend { + t: VecExtendOp::Uxtl16, + rd: writable_vreg(15), + rn: vreg(12), + high_half: false, + }, + "8FA5102F", + "uxtl v15.4s, v12.4h", + )); + insns.push(( + Inst::VecExtend { + t: VecExtendOp::Uxtl32, + rd: writable_vreg(28), + rn: vreg(2), + high_half: true, + }, + "5CA4206F", + "uxtl2 v28.2d, v2.4s", + )); + + insns.push(( + Inst::VecMovElement { + rd: writable_vreg(0), + rn: vreg(31), + dest_idx: 7, + src_idx: 7, + size: VectorSize::Size16x8, + }, + "E0771E6E", + "mov v0.h[7], v31.h[7]", + )); + + insns.push(( + Inst::VecMovElement { + rd: writable_vreg(31), + rn: vreg(16), + dest_idx: 1, + src_idx: 0, + size: VectorSize::Size32x2, + }, + "1F060C6E", + "mov v31.s[1], v16.s[0]", + )); + + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Xtn, + rd: writable_vreg(22), + rn: vreg(8), + size: VectorSize::Size32x2, + high_half: false, + }, + "1629A10E", + "xtn v22.2s, v8.2d", + )); + + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Sqxtn, + rd: writable_vreg(31), + rn: vreg(0), + size: VectorSize::Size16x8, + high_half: true, + }, + "1F48614E", + "sqxtn2 v31.8h, v0.4s", + )); + + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Sqxtun, + rd: writable_vreg(16), + rn: vreg(23), + size: VectorSize::Size8x16, + high_half: false, + }, + "F02A212E", + "sqxtun v16.8b, v23.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqadd, + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "410C284E", + "sqadd v1.16b, v2.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqadd, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(28), + size: VectorSize::Size16x8, + }, + "810D7C4E", + "sqadd v1.8h, v12.8h, v28.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqadd, + rd: writable_vreg(12), + rn: vreg(2), + rm: vreg(6), + size: VectorSize::Size32x4, + }, + "4C0CA64E", + "sqadd v12.4s, v2.4s, v6.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqadd, + rd: writable_vreg(20), + rn: vreg(7), + rm: vreg(13), + size: VectorSize::Size64x2, + }, + "F40CED4E", + "sqadd v20.2d, v7.2d, v13.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqsub, + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "412C284E", + "sqsub v1.16b, v2.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqsub, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(28), + size: VectorSize::Size16x8, + }, + "812D7C4E", + "sqsub v1.8h, v12.8h, v28.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqsub, + rd: writable_vreg(12), + rn: vreg(2), + rm: vreg(6), + size: VectorSize::Size32x4, + }, + "4C2CA64E", + "sqsub v12.4s, v2.4s, v6.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sqsub, + rd: writable_vreg(20), + rn: vreg(7), + rm: vreg(13), + size: VectorSize::Size64x2, + }, + "F42CED4E", + "sqsub v20.2d, v7.2d, v13.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqadd, + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "410C286E", + "uqadd v1.16b, v2.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqadd, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(28), + size: VectorSize::Size16x8, + }, + "810D7C6E", + "uqadd v1.8h, v12.8h, v28.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqadd, + rd: writable_vreg(12), + rn: vreg(2), + rm: vreg(6), + size: VectorSize::Size32x4, + }, + "4C0CA66E", + "uqadd v12.4s, v2.4s, v6.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqadd, + rd: writable_vreg(20), + rn: vreg(7), + rm: vreg(13), + size: VectorSize::Size64x2, + }, + "F40CED6E", + "uqadd v20.2d, v7.2d, v13.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqsub, + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "412C286E", + "uqsub v1.16b, v2.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqsub, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(28), + size: VectorSize::Size16x8, + }, + "812D7C6E", + "uqsub v1.8h, v12.8h, v28.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqsub, + rd: writable_vreg(12), + rn: vreg(2), + rm: vreg(6), + size: VectorSize::Size32x4, + }, + "4C2CA66E", + "uqsub v12.4s, v2.4s, v6.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Uqsub, + rd: writable_vreg(20), + rn: vreg(7), + rm: vreg(13), + size: VectorSize::Size64x2, + }, + "F42CED6E", + "uqsub v20.2d, v7.2d, v13.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmeq, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + size: VectorSize::Size8x16, + }, + "E38E386E", + "cmeq v3.16b, v23.16b, v24.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmgt, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + size: VectorSize::Size8x16, + }, + "E336384E", + "cmgt v3.16b, v23.16b, v24.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmge, + rd: writable_vreg(23), + rn: vreg(9), + rm: vreg(12), + size: VectorSize::Size8x16, + }, + "373D2C4E", + "cmge v23.16b, v9.16b, v12.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhi, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "2534216E", + "cmhi v5.16b, v1.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhs, + rd: writable_vreg(8), + rn: vreg(2), + rm: vreg(15), + size: VectorSize::Size8x16, + }, + "483C2F6E", + "cmhs v8.16b, v2.16b, v15.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmeq, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + size: VectorSize::Size16x8, + }, + "E38E786E", + "cmeq v3.8h, v23.8h, v24.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmgt, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + size: VectorSize::Size16x8, + }, + "E336784E", + "cmgt v3.8h, v23.8h, v24.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmge, + rd: writable_vreg(23), + rn: vreg(9), + rm: vreg(12), + size: VectorSize::Size16x8, + }, + "373D6C4E", + "cmge v23.8h, v9.8h, v12.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhi, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + size: VectorSize::Size16x8, + }, + "2534616E", + "cmhi v5.8h, v1.8h, v1.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhs, + rd: writable_vreg(8), + rn: vreg(2), + rm: vreg(15), + size: VectorSize::Size16x8, + }, + "483C6F6E", + "cmhs v8.8h, v2.8h, v15.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmeq, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + size: VectorSize::Size32x4, + }, + "E38EB86E", + "cmeq v3.4s, v23.4s, v24.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmgt, + rd: writable_vreg(3), + rn: vreg(23), + rm: vreg(24), + size: VectorSize::Size32x4, + }, + "E336B84E", + "cmgt v3.4s, v23.4s, v24.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmge, + rd: writable_vreg(23), + rn: vreg(9), + rm: vreg(12), + size: VectorSize::Size32x4, + }, + "373DAC4E", + "cmge v23.4s, v9.4s, v12.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhi, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + size: VectorSize::Size32x4, + }, + "2534A16E", + "cmhi v5.4s, v1.4s, v1.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Cmhs, + rd: writable_vreg(8), + rn: vreg(2), + rm: vreg(15), + size: VectorSize::Size32x4, + }, + "483CAF6E", + "cmhs v8.4s, v2.4s, v15.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fcmeq, + rd: writable_vreg(28), + rn: vreg(12), + rm: vreg(4), + size: VectorSize::Size32x2, + }, + "9CE5240E", + "fcmeq v28.2s, v12.2s, v4.2s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fcmgt, + rd: writable_vreg(3), + rn: vreg(16), + rm: vreg(31), + size: VectorSize::Size64x2, + }, + "03E6FF6E", + "fcmgt v3.2d, v16.2d, v31.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fcmge, + rd: writable_vreg(18), + rn: vreg(23), + rm: vreg(0), + size: VectorSize::Size64x2, + }, + "F2E6606E", + "fcmge v18.2d, v23.2d, v0.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::And, + rd: writable_vreg(20), + rn: vreg(19), + rm: vreg(18), + size: VectorSize::Size32x4, + }, + "741E324E", + "and v20.16b, v19.16b, v18.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Bic, + rd: writable_vreg(8), + rn: vreg(11), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "681D614E", + "bic v8.16b, v11.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Orr, + rd: writable_vreg(15), + rn: vreg(2), + rm: vreg(12), + size: VectorSize::Size16x8, + }, + "4F1CAC4E", + "orr v15.16b, v2.16b, v12.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Eor, + rd: writable_vreg(18), + rn: vreg(3), + rm: vreg(22), + size: VectorSize::Size8x16, + }, + "721C366E", + "eor v18.16b, v3.16b, v22.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Bsl, + rd: writable_vreg(8), + rn: vreg(9), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "281D616E", + "bsl v8.16b, v9.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umaxp, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "88A5216E", + "umaxp v8.16b, v12.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umaxp, + rd: writable_vreg(1), + rn: vreg(6), + rm: vreg(1), + size: VectorSize::Size16x8, + }, + "C1A4616E", + "umaxp v1.8h, v6.8h, v1.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umaxp, + rd: writable_vreg(1), + rn: vreg(20), + rm: vreg(16), + size: VectorSize::Size32x4, + }, + "81A6B06E", + "umaxp v1.4s, v20.4s, v16.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Add, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "2584214E", + "add v5.16b, v1.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Add, + rd: writable_vreg(7), + rn: vreg(13), + rm: vreg(2), + size: VectorSize::Size16x8, + }, + "A785624E", + "add v7.8h, v13.8h, v2.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Add, + rd: writable_vreg(18), + rn: vreg(9), + rm: vreg(6), + size: VectorSize::Size32x4, + }, + "3285A64E", + "add v18.4s, v9.4s, v6.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Add, + rd: writable_vreg(1), + rn: vreg(3), + rm: vreg(2), + size: VectorSize::Size64x2, + }, + "6184E24E", + "add v1.2d, v3.2d, v2.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sub, + rd: writable_vreg(5), + rn: vreg(1), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "2584216E", + "sub v5.16b, v1.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sub, + rd: writable_vreg(7), + rn: vreg(13), + rm: vreg(2), + size: VectorSize::Size16x8, + }, + "A785626E", + "sub v7.8h, v13.8h, v2.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sub, + rd: writable_vreg(18), + rn: vreg(9), + rm: vreg(6), + size: VectorSize::Size32x4, + }, + "3285A66E", + "sub v18.4s, v9.4s, v6.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sub, + rd: writable_vreg(18), + rn: vreg(0), + rm: vreg(8), + size: VectorSize::Size64x2, + }, + "1284E86E", + "sub v18.2d, v0.2d, v8.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Mul, + rd: writable_vreg(25), + rn: vreg(9), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "399D284E", + "mul v25.16b, v9.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Mul, + rd: writable_vreg(30), + rn: vreg(30), + rm: vreg(12), + size: VectorSize::Size16x8, + }, + "DE9F6C4E", + "mul v30.8h, v30.8h, v12.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Mul, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + size: VectorSize::Size32x4, + }, + "529EB24E", + "mul v18.4s, v18.4s, v18.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + size: VectorSize::Size8x16, + }, + "5246326E", + "ushl v18.16b, v18.16b, v18.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + size: VectorSize::Size16x8, + }, + "5246726E", + "ushl v18.8h, v18.8h, v18.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(18), + rn: vreg(1), + rm: vreg(21), + size: VectorSize::Size32x4, + }, + "3244B56E", + "ushl v18.4s, v1.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(5), + rn: vreg(7), + rm: vreg(19), + size: VectorSize::Size64x2, + }, + "E544F36E", + "ushl v5.2d, v7.2d, v19.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + size: VectorSize::Size8x16, + }, + "5246324E", + "sshl v18.16b, v18.16b, v18.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(30), + rn: vreg(1), + rm: vreg(29), + size: VectorSize::Size16x8, + }, + "3E447D4E", + "sshl v30.8h, v1.8h, v29.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(21), + size: VectorSize::Size32x4, + }, + "C846B54E", + "sshl v8.4s, v22.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(2), + size: VectorSize::Size64x2, + }, + "C846E24E", + "sshl v8.2d, v22.2d, v2.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umin, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(3), + size: VectorSize::Size8x16, + }, + "816D236E", + "umin v1.16b, v12.16b, v3.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umin, + rd: writable_vreg(30), + rn: vreg(20), + rm: vreg(10), + size: VectorSize::Size16x8, + }, + "9E6E6A6E", + "umin v30.8h, v20.8h, v10.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umin, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(21), + size: VectorSize::Size32x4, + }, + "C86EB56E", + "umin v8.4s, v22.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smin, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(3), + size: VectorSize::Size8x16, + }, + "816D234E", + "smin v1.16b, v12.16b, v3.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smin, + rd: writable_vreg(30), + rn: vreg(20), + rm: vreg(10), + size: VectorSize::Size16x8, + }, + "9E6E6A4E", + "smin v30.8h, v20.8h, v10.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smin, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(21), + size: VectorSize::Size32x4, + }, + "C86EB54E", + "smin v8.4s, v22.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umax, + rd: writable_vreg(6), + rn: vreg(9), + rm: vreg(8), + size: VectorSize::Size8x8, + }, + "2665282E", + "umax v6.8b, v9.8b, v8.8b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umax, + rd: writable_vreg(11), + rn: vreg(13), + rm: vreg(2), + size: VectorSize::Size16x8, + }, + "AB65626E", + "umax v11.8h, v13.8h, v2.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umax, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "8865AE6E", + "umax v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smax, + rd: writable_vreg(6), + rn: vreg(9), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "2665284E", + "smax v6.16b, v9.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smax, + rd: writable_vreg(11), + rn: vreg(13), + rm: vreg(2), + size: VectorSize::Size16x8, + }, + "AB65624E", + "smax v11.8h, v13.8h, v2.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smax, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "8865AE4E", + "smax v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd: writable_vreg(8), + rn: vreg(1), + rm: vreg(3), + size: VectorSize::Size8x16, + }, + "2814236E", + "urhadd v8.16b, v1.16b, v3.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd: writable_vreg(2), + rn: vreg(13), + rm: vreg(6), + size: VectorSize::Size16x8, + }, + "A215666E", + "urhadd v2.8h, v13.8h, v6.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "8815AE6E", + "urhadd v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fadd, + rd: writable_vreg(31), + rn: vreg(0), + rm: vreg(16), + size: VectorSize::Size32x4, + }, + "1FD4304E", + "fadd v31.4s, v0.4s, v16.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fsub, + rd: writable_vreg(8), + rn: vreg(7), + rm: vreg(15), + size: VectorSize::Size64x2, + }, + "E8D4EF4E", + "fsub v8.2d, v7.2d, v15.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fdiv, + rd: writable_vreg(1), + rn: vreg(3), + rm: vreg(4), + size: VectorSize::Size32x4, + }, + "61FC246E", + "fdiv v1.4s, v3.4s, v4.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fmax, + rd: writable_vreg(31), + rn: vreg(16), + rm: vreg(0), + size: VectorSize::Size64x2, + }, + "1FF6604E", + "fmax v31.2d, v16.2d, v0.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fmin, + rd: writable_vreg(5), + rn: vreg(19), + rm: vreg(26), + size: VectorSize::Size32x4, + }, + "65F6BA4E", + "fmin v5.4s, v19.4s, v26.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fmul, + rd: writable_vreg(2), + rn: vreg(0), + rm: vreg(5), + size: VectorSize::Size64x2, + }, + "02DC656E", + "fmul v2.2d, v0.2d, v5.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "90BD214E", + "addp v16.16b, v12.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Addp, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "88BDAE4E", + "addp v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umlal, + rd: writable_vreg(9), + rn: vreg(20), + rm: vreg(17), + size: VectorSize::Size32x2, + }, + "8982B12E", + "umlal v9.2d, v20.2s, v17.2s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "9039014E", + "zip1 v16.16b, v12.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(2), + rn: vreg(13), + rm: vreg(6), + size: VectorSize::Size16x8, + }, + "A239464E", + "zip1 v2.8h, v13.8h, v6.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "88398E4E", + "zip1 v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Zip1, + rd: writable_vreg(9), + rn: vreg(20), + rm: vreg(17), + size: VectorSize::Size64x2, + }, + "893AD14E", + "zip1 v9.2d, v20.2d, v17.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smull, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "90C1210E", + "smull v16.8h, v12.8b, v1.8b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smull, + rd: writable_vreg(2), + rn: vreg(13), + rm: vreg(6), + size: VectorSize::Size16x8, + }, + "A2C1660E", + "smull v2.4s, v13.4h, v6.4h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smull, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "88C1AE0E", + "smull v8.2d, v12.2s, v14.2s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smull2, + rd: writable_vreg(16), + rn: vreg(12), + rm: vreg(1), + size: VectorSize::Size8x16, + }, + "90C1214E", + "smull2 v16.8h, v12.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smull2, + rd: writable_vreg(2), + rn: vreg(13), + rm: vreg(6), + size: VectorSize::Size16x8, + }, + "A2C1664E", + "smull2 v2.4s, v13.8h, v6.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smull2, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "88C1AE4E", + "smull2 v8.2d, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Not, + rd: writable_vreg(20), + rn: vreg(17), + size: VectorSize::Size8x8, + }, + "345A202E", + "mvn v20.8b, v17.8b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Not, + rd: writable_vreg(2), + rn: vreg(1), + size: VectorSize::Size32x4, + }, + "2258206E", + "mvn v2.16b, v1.16b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Neg, + rd: writable_vreg(3), + rn: vreg(7), + size: VectorSize::Size8x8, + }, + "E3B8202E", + "neg v3.8b, v7.8b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Neg, + rd: writable_vreg(8), + rn: vreg(12), + size: VectorSize::Size8x16, + }, + "88B9206E", + "neg v8.16b, v12.16b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Neg, + rd: writable_vreg(0), + rn: vreg(31), + size: VectorSize::Size16x8, + }, + "E0BB606E", + "neg v0.8h, v31.8h", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Neg, + rd: writable_vreg(2), + rn: vreg(3), + size: VectorSize::Size32x4, + }, + "62B8A06E", + "neg v2.4s, v3.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Neg, + rd: writable_vreg(10), + rn: vreg(8), + size: VectorSize::Size64x2, + }, + "0AB9E06E", + "neg v10.2d, v8.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(3), + rn: vreg(1), + size: VectorSize::Size8x8, + }, + "23B8200E", + "abs v3.8b, v1.8b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(1), + rn: vreg(1), + size: VectorSize::Size8x16, + }, + "21B8204E", + "abs v1.16b, v1.16b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(29), + rn: vreg(28), + size: VectorSize::Size16x8, + }, + "9DBB604E", + "abs v29.8h, v28.8h", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(7), + rn: vreg(8), + size: VectorSize::Size32x4, + }, + "07B9A04E", + "abs v7.4s, v8.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(1), + rn: vreg(10), + size: VectorSize::Size64x2, + }, + "41B9E04E", + "abs v1.2d, v10.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fabs, + rd: writable_vreg(15), + rn: vreg(16), + size: VectorSize::Size32x4, + }, + "0FFAA04E", + "fabs v15.4s, v16.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fneg, + rd: writable_vreg(31), + rn: vreg(0), + size: VectorSize::Size32x4, + }, + "1FF8A06E", + "fneg v31.4s, v0.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fsqrt, + rd: writable_vreg(7), + rn: vreg(18), + size: VectorSize::Size64x2, + }, + "47FAE16E", + "fsqrt v7.2d, v18.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Rev64, + rd: writable_vreg(1), + rn: vreg(10), + size: VectorSize::Size32x4, + }, + "4109A04E", + "rev64 v1.4s, v10.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Shll, + rd: writable_vreg(12), + rn: vreg(5), + size: VectorSize::Size8x8, + }, + "AC38212E", + "shll v12.8h, v5.8b, #8", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Shll, + rd: writable_vreg(9), + rn: vreg(1), + size: VectorSize::Size16x4, + }, + "2938612E", + "shll v9.4s, v1.4h, #16", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Shll, + rd: writable_vreg(1), + rn: vreg(10), + size: VectorSize::Size32x2, + }, + "4139A12E", + "shll v1.2d, v10.2s, #32", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fcvtzs, + rd: writable_vreg(4), + rn: vreg(22), + size: VectorSize::Size32x4, + }, + "C4BAA14E", + "fcvtzs v4.4s, v22.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fcvtzu, + rd: writable_vreg(29), + rn: vreg(15), + size: VectorSize::Size64x2, + }, + "FDB9E16E", + "fcvtzu v29.2d, v15.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Scvtf, + rd: writable_vreg(20), + rn: vreg(8), + size: VectorSize::Size32x4, + }, + "14D9214E", + "scvtf v20.4s, v8.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Ucvtf, + rd: writable_vreg(10), + rn: vreg(19), + size: VectorSize::Size64x2, + }, + "6ADA616E", + "ucvtf v10.2d, v19.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintn, + rd: writable_vreg(11), + rn: vreg(18), + size: VectorSize::Size32x4, + }, + "4B8A214E", + "frintn v11.4s, v18.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintn, + rd: writable_vreg(12), + rn: vreg(17), + size: VectorSize::Size64x2, + }, + "2C8A614E", + "frintn v12.2d, v17.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintz, + rd: writable_vreg(11), + rn: vreg(18), + size: VectorSize::Size32x4, + }, + "4B9AA14E", + "frintz v11.4s, v18.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintz, + rd: writable_vreg(12), + rn: vreg(17), + size: VectorSize::Size64x2, + }, + "2C9AE14E", + "frintz v12.2d, v17.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintm, + rd: writable_vreg(11), + rn: vreg(18), + size: VectorSize::Size32x4, + }, + "4B9A214E", + "frintm v11.4s, v18.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintm, + rd: writable_vreg(12), + rn: vreg(17), + size: VectorSize::Size64x2, + }, + "2C9A614E", + "frintm v12.2d, v17.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintp, + rd: writable_vreg(11), + rn: vreg(18), + size: VectorSize::Size32x4, + }, + "4B8AA14E", + "frintp v11.4s, v18.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Frintp, + rd: writable_vreg(12), + rn: vreg(17), + size: VectorSize::Size64x2, + }, + "2C8AE14E", + "frintp v12.2d, v17.2d", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Uminv, + rd: writable_vreg(2), + rn: vreg(1), + size: VectorSize::Size8x16, + }, + "22A8316E", + "uminv b2, v1.16b", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Uminv, + rd: writable_vreg(3), + rn: vreg(11), + size: VectorSize::Size16x8, + }, + "63A9716E", + "uminv h3, v11.8h", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Uminv, + rd: writable_vreg(18), + rn: vreg(4), + size: VectorSize::Size32x4, + }, + "92A8B16E", + "uminv s18, v4.4s", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Addv, + rd: writable_vreg(2), + rn: vreg(29), + size: VectorSize::Size8x16, + }, + "A2BB314E", + "addv b2, v29.16b", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Addv, + rd: writable_vreg(3), + rn: vreg(21), + size: VectorSize::Size16x8, + }, + "A3BA714E", + "addv h3, v21.8h", + )); + + insns.push(( + Inst::VecLanes { + op: VecLanesOp::Addv, + rd: writable_vreg(18), + rn: vreg(5), + size: VectorSize::Size32x4, + }, + "B2B8B14E", + "addv s18, v5.4s", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(27), + rn: vreg(5), + imm: 7, + size: VectorSize::Size8x16, + }, + "BB540F4F", + "shl v27.16b, v5.16b, #7", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(1), + rn: vreg(30), + imm: 0, + size: VectorSize::Size8x16, + }, + "C157084F", + "shl v1.16b, v30.16b, #0", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: writable_vreg(26), + rn: vreg(6), + imm: 16, + size: VectorSize::Size16x8, + }, + "DA04104F", + "sshr v26.8h, v6.8h, #16", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Sshr, + rd: writable_vreg(3), + rn: vreg(19), + imm: 1, + size: VectorSize::Size16x8, + }, + "63061F4F", + "sshr v3.8h, v19.8h, #1", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Ushr, + rd: writable_vreg(25), + rn: vreg(6), + imm: 32, + size: VectorSize::Size32x4, + }, + "D904206F", + "ushr v25.4s, v6.4s, #32", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Ushr, + rd: writable_vreg(5), + rn: vreg(21), + imm: 1, + size: VectorSize::Size32x4, + }, + "A5063F6F", + "ushr v5.4s, v21.4s, #1", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(22), + rn: vreg(13), + imm: 63, + size: VectorSize::Size64x2, + }, + "B6557F4F", + "shl v22.2d, v13.2d, #63", + )); + + insns.push(( + Inst::VecShiftImm { + op: VecShiftImmOp::Shl, + rd: writable_vreg(23), + rn: vreg(9), + imm: 0, + size: VectorSize::Size64x2, + }, + "3755404F", + "shl v23.2d, v9.2d, #0", + )); + + insns.push(( + Inst::VecExtract { + rd: writable_vreg(1), + rn: vreg(30), + rm: vreg(17), + imm4: 0, + }, + "C103116E", + "ext v1.16b, v30.16b, v17.16b, #0", + )); + + insns.push(( + Inst::VecExtract { + rd: writable_vreg(1), + rn: vreg(30), + rm: vreg(17), + imm4: 8, + }, + "C143116E", + "ext v1.16b, v30.16b, v17.16b, #8", + )); + + insns.push(( + Inst::VecExtract { + rd: writable_vreg(1), + rn: vreg(30), + rm: vreg(17), + imm4: 15, + }, + "C17B116E", + "ext v1.16b, v30.16b, v17.16b, #15", + )); + + insns.push(( + Inst::VecTbl { + rd: writable_vreg(0), + rn: vreg(31), + rm: vreg(16), + is_extension: false, + }, + "E003104E", + "tbl v0.16b, { v31.16b }, v16.16b", + )); + + insns.push(( + Inst::VecTbl { + rd: writable_vreg(4), + rn: vreg(12), + rm: vreg(23), + is_extension: true, + }, + "8411174E", + "tbx v4.16b, { v12.16b }, v23.16b", + )); + + insns.push(( + Inst::VecTbl2 { + rd: writable_vreg(16), + rn: vreg(31), + rn2: vreg(0), + rm: vreg(26), + is_extension: false, + }, + "F0231A4E", + "tbl v16.16b, { v31.16b, v0.16b }, v26.16b", + )); + + insns.push(( + Inst::VecTbl2 { + rd: writable_vreg(3), + rn: vreg(11), + rn2: vreg(12), + rm: vreg(19), + is_extension: true, + }, + "6331134E", + "tbx v3.16b, { v11.16b, v12.16b }, v19.16b", + )); + + insns.push(( + Inst::VecLoadReplicate { + rd: writable_vreg(31), + rn: xreg(0), + + size: VectorSize::Size64x2, + }, + "1FCC404D", + "ld1r { v31.2d }, [x0]", + )); + + insns.push(( + Inst::VecLoadReplicate { + rd: writable_vreg(0), + rn: xreg(25), + + size: VectorSize::Size8x8, + }, + "20C3400D", + "ld1r { v0.8b }, [x25]", + )); + + insns.push(( + Inst::VecCSel { + rd: writable_vreg(5), + rn: vreg(10), + rm: vreg(19), + cond: Cond::Gt, + }, + "6C000054651EB34E02000014451DAA4E", + "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)", + )); + + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 8, + to_bits: 32, + }, + "411C0053", + "uxtb w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 8, + to_bits: 32, + }, + "411C0013", + "sxtb w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 16, + to_bits: 32, + }, + "413C0053", + "uxth w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 16, + to_bits: 32, + }, + "413C0013", + "sxth w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 8, + to_bits: 64, + }, + "411C0053", + "uxtb x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 8, + to_bits: 64, + }, + "411C4093", + "sxtb x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 16, + to_bits: 64, + }, + "413C0053", + "uxth x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 16, + to_bits: 64, + }, + "413C4093", + "sxth x1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: false, + from_bits: 32, + to_bits: 64, + }, + "E103022A", + "mov w1, w2", + )); + insns.push(( + Inst::Extend { + rd: writable_xreg(1), + rn: xreg(2), + signed: true, + from_bits: 32, + to_bits: 64, + }, + "417C4093", + "sxtw x1, w2", + )); + + insns.push(( + Inst::Jump { + dest: BranchTarget::ResolvedOffset(64), + }, + "10000014", + "b 64", + )); + + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::NotZero(xreg(8)), + }, + "480000B40000A0D4", + "cbz x8, 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Zero(xreg(8)), + }, + "480000B50000A0D4", + "cbnz x8, 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Ne), + }, + "400000540000A0D4", + "b.eq 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Eq), + }, + "410000540000A0D4", + "b.ne 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Lo), + }, + "420000540000A0D4", + "b.hs 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Hs), + }, + "430000540000A0D4", + "b.lo 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Pl), + }, + "440000540000A0D4", + "b.mi 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Mi), + }, + "450000540000A0D4", + "b.pl 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Vc), + }, + "460000540000A0D4", + "b.vs 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Vs), + }, + "470000540000A0D4", + "b.vc 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Ls), + }, + "480000540000A0D4", + "b.hi 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Hi), + }, + "490000540000A0D4", + "b.ls 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Lt), + }, + "4A0000540000A0D4", + "b.ge 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Ge), + }, + "4B0000540000A0D4", + "b.lt 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Le), + }, + "4C0000540000A0D4", + "b.gt 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Gt), + }, + "4D0000540000A0D4", + "b.le 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Nv), + }, + "4E0000540000A0D4", + "b.al 8 ; udf", + )); + insns.push(( + Inst::TrapIf { + trap_code: TrapCode::Interrupt, + kind: CondBrKind::Cond(Cond::Al), + }, + "4F0000540000A0D4", + "b.nv 8 ; udf", + )); + + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + kind: CondBrKind::Cond(Cond::Le), + }, + "0D02005420000014", + "b.le 64 ; b 128", + )); + + insns.push(( + Inst::Call { + info: Box::new(CallInfo { + dest: ExternalName::testcase("test0"), + uses: Vec::new(), + defs: Vec::new(), + opcode: Opcode::Call, + caller_callconv: CallConv::SystemV, + callee_callconv: CallConv::SystemV, + }), + }, + "00000094", + "bl 0", + )); + + insns.push(( + Inst::CallInd { + info: Box::new(CallIndInfo { + rn: xreg(10), + uses: Vec::new(), + defs: Vec::new(), + opcode: Opcode::CallIndirect, + caller_callconv: CallConv::SystemV, + callee_callconv: CallConv::SystemV, + }), + }, + "40013FD6", + "blr x10", + )); + + insns.push(( + Inst::IndirectBr { + rn: xreg(3), + targets: vec![], + }, + "60001FD6", + "br x3", + )); + + insns.push((Inst::Brk, "000020D4", "brk #0")); + + insns.push(( + Inst::Adr { + rd: writable_xreg(15), + off: (1 << 20) - 4, + }, + "EFFF7F10", + "adr x15, pc+1048572", + )); + + insns.push(( + Inst::FpuMove64 { + rd: writable_vreg(8), + rn: vreg(4), + }, + "881CA40E", + "mov v8.8b, v4.8b", + )); + + insns.push(( + Inst::FpuMove128 { + rd: writable_vreg(17), + rn: vreg(26), + }, + "511FBA4E", + "mov v17.16b, v26.16b", + )); + + insns.push(( + Inst::FpuMoveFromVec { + rd: writable_vreg(1), + rn: vreg(30), + idx: 2, + size: VectorSize::Size32x4, + }, + "C107145E", + "mov s1, v30.s[2]", + )); + + insns.push(( + Inst::FpuMoveFromVec { + rd: writable_vreg(23), + rn: vreg(11), + idx: 0, + size: VectorSize::Size64x2, + }, + "7705085E", + "mov d23, v11.d[0]", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3201E", + "fabs s15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3601E", + "fabs d15, d30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CF43211E", + "fneg s15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CF43611E", + "fneg d15, d30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3211E", + "fsqrt s15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3611E", + "fsqrt d15, d30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt32To64, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CFC3221E", + "fcvt d15, s30", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt64To32, + rd: writable_vreg(15), + rn: vreg(30), + }, + "CF43621E", + "fcvt s15, d30", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF2B3F1E", + "fadd s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF2B7F1E", + "fadd d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF3B3F1E", + "fsub s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF3B7F1E", + "fsub d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF0B3F1E", + "fmul s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF0B7F1E", + "fmul d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF1B3F1E", + "fdiv s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF1B7F1E", + "fdiv d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Max32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF4B3F1E", + "fmax s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Max64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF4B7F1E", + "fmax d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Min32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF5B3F1E", + "fmin s15, s30, s31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Min64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + }, + "CF5B7F1E", + "fmin d15, d30, d31", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Uqadd64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D50EF77E", + "uqadd d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sqadd64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D50EF75E", + "sqadd d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Uqsub64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D52EF77E", + "uqsub d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sqsub64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D52EF75E", + "sqsub d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd32, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + ra: vreg(1), + }, + "CF071F1F", + "fmadd s15, s30, s31, s1", + )); + + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd64, + rd: writable_vreg(15), + rn: vreg(30), + rm: vreg(31), + ra: vreg(1), + }, + "CF075F1F", + "fmadd d15, d30, d31, d1", + )); + + insns.push(( + Inst::FpuRRI { + fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()), + rd: writable_vreg(2), + rn: vreg(5), + }, + "A204202F", + "ushr v2.2s, v5.2s, #32", + )); + + insns.push(( + Inst::FpuRRI { + fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()), + rd: writable_vreg(2), + rn: vreg(5), + }, + "A204417F", + "ushr d2, d5, #63", + )); + + insns.push(( + Inst::FpuRRI { + fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()), + rd: writable_vreg(4), + rn: vreg(10), + }, + "44553F2F", + "sli v4.2s, v10.2s, #31", + )); + + insns.push(( + Inst::FpuRRI { + fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()), + rd: writable_vreg(4), + rn: vreg(10), + }, + "44557F7F", + "sli d4, d10, #63", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100391E", + "fcvtzu w1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100399E", + "fcvtzu x1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100381E", + "fcvtzs w1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100389E", + "fcvtzs x1, s4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100791E", + "fcvtzu w1, d4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100799E", + "fcvtzu x1, d4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI32, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100781E", + "fcvtzs w1, d4", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI64, + rd: writable_xreg(1), + rn: vreg(4), + }, + "8100789E", + "fcvtzs x1, d4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100231E", + "ucvtf s1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100221E", + "scvtf s1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100631E", + "ucvtf d1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100621E", + "scvtf d1, w4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100239E", + "ucvtf s1, x4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF32, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100229E", + "scvtf s1, x4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100639E", + "ucvtf d1, x4", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF64, + rd: writable_vreg(1), + rn: xreg(4), + }, + "8100629E", + "scvtf d1, x4", + )); + + insns.push(( + Inst::FpuCmp32 { + rn: vreg(23), + rm: vreg(24), + }, + "E022381E", + "fcmp s23, s24", + )); + + insns.push(( + Inst::FpuCmp64 { + rn: vreg(23), + rm: vreg(24), + }, + "E022781E", + "fcmp d23, d24", + )); + + insns.push(( + Inst::FpuLoad32 { + rd: writable_vreg(16), + mem: AMode::RegScaled(xreg(8), xreg(9), F32), + flags: MemFlags::trusted(), + }, + "107969BC", + "ldr s16, [x8, x9, LSL #2]", + )); + + insns.push(( + Inst::FpuLoad64 { + rd: writable_vreg(16), + mem: AMode::RegScaled(xreg(8), xreg(9), F64), + flags: MemFlags::trusted(), + }, + "107969FC", + "ldr d16, [x8, x9, LSL #3]", + )); + + insns.push(( + Inst::FpuLoad128 { + rd: writable_vreg(16), + mem: AMode::RegScaled(xreg(8), xreg(9), I128), + flags: MemFlags::trusted(), + }, + "1079E93C", + "ldr q16, [x8, x9, LSL #4]", + )); + + insns.push(( + Inst::FpuLoad32 { + rd: writable_vreg(16), + mem: AMode::Label(MemLabel::PCRel(8)), + flags: MemFlags::trusted(), + }, + "5000001C", + "ldr s16, pc+8", + )); + + insns.push(( + Inst::FpuLoad64 { + rd: writable_vreg(16), + mem: AMode::Label(MemLabel::PCRel(8)), + flags: MemFlags::trusted(), + }, + "5000005C", + "ldr d16, pc+8", + )); + + insns.push(( + Inst::FpuLoad128 { + rd: writable_vreg(16), + mem: AMode::Label(MemLabel::PCRel(8)), + flags: MemFlags::trusted(), + }, + "5000009C", + "ldr q16, pc+8", + )); + + insns.push(( + Inst::FpuStore32 { + rd: vreg(16), + mem: AMode::RegScaled(xreg(8), xreg(9), F32), + flags: MemFlags::trusted(), + }, + "107929BC", + "str s16, [x8, x9, LSL #2]", + )); + + insns.push(( + Inst::FpuStore64 { + rd: vreg(16), + mem: AMode::RegScaled(xreg(8), xreg(9), F64), + flags: MemFlags::trusted(), + }, + "107929FC", + "str d16, [x8, x9, LSL #3]", + )); + + insns.push(( + Inst::FpuStore128 { + rd: vreg(16), + mem: AMode::RegScaled(xreg(8), xreg(9), I128), + flags: MemFlags::trusted(), + }, + "1079A93C", + "str q16, [x8, x9, LSL #4]", + )); + + insns.push(( + Inst::LoadFpuConst64 { + rd: writable_vreg(16), + const_data: 1.0_f64.to_bits(), + }, + "5000005C03000014000000000000F03F", + "ldr d16, pc+8 ; b 12 ; data.f64 1", + )); + + insns.push(( + Inst::LoadFpuConst128 { + rd: writable_vreg(5), + const_data: 0x0f0e0d0c0b0a09080706050403020100, + }, + "4500009C05000014000102030405060708090A0B0C0D0E0F", + "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100", + )); + + insns.push(( + Inst::FpuCSel32 { + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(3), + cond: Cond::Hi, + }, + "418C231E", + "fcsel s1, s2, s3, hi", + )); + + insns.push(( + Inst::FpuCSel64 { + rd: writable_vreg(1), + rn: vreg(2), + rm: vreg(3), + cond: Cond::Eq, + }, + "410C631E", + "fcsel d1, d2, d3, eq", + )); + + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Minus32, + }, + "1743251E", + "frintm s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Minus64, + }, + "1743651E", + "frintm d23, d24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Plus32, + }, + "17C3241E", + "frintp s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Plus64, + }, + "17C3641E", + "frintp d23, d24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Zero32, + }, + "17C3251E", + "frintz s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Zero64, + }, + "17C3651E", + "frintz d23, d24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Nearest32, + }, + "1743241E", + "frintn s23, s24", + )); + insns.push(( + Inst::FpuRound { + rd: writable_vreg(23), + rn: vreg(24), + op: FpuRoundMode::Nearest64, + }, + "1743641E", + "frintn d23, d24", + )); + + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: inst_common::AtomicRmwOp::Xor, + }, + "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5", + "atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }", + )); + + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: inst_common::AtomicRmwOp::Xchg, + }, + "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5", + "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }", + )); + + insns.push(( + Inst::AtomicCAS { + ty: I8, + }, + "BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5", + "atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + )); + + insns.push(( + Inst::AtomicCAS { + ty: I64, + }, + "BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5", + "atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + )); + + insns.push(( + Inst::AtomicLoad { + ty: I8, + r_data: writable_xreg(7), + r_addr: xreg(28), + }, + "BF3B03D587034039", + "atomically { x7 = zero_extend_8_bits_at[x28] }", + )); + + insns.push(( + Inst::AtomicLoad { + ty: I64, + r_data: writable_xreg(28), + r_addr: xreg(7), + }, + "BF3B03D5FC0040F9", + "atomically { x28 = zero_extend_64_bits_at[x7] }", + )); + + insns.push(( + Inst::AtomicStore { + ty: I16, + r_data: xreg(17), + r_addr: xreg(8), + }, + "11010079BF3B03D5", + "atomically { 16_bits_at[x8] = x17 }", + )); + + insns.push(( + Inst::AtomicStore { + ty: I32, + r_data: xreg(18), + r_addr: xreg(7), + }, + "F20000B9BF3B03D5", + "atomically { 32_bits_at[x7] = x18 }", + )); + + insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish")); + + let flags = settings::Flags::new(settings::builder()); + let rru = create_reg_universe(&flags); + let emit_info = EmitInfo::new(flags); + for (insn, expected_encoding, expected_printing) in insns { + println!( + "AArch64: {:?}, {}, {}", + insn, expected_encoding, expected_printing + ); + + // Check the printed text is as expected. + let actual_printing = insn.show_rru(Some(&rru)); + assert_eq!(expected_printing, actual_printing); + + let mut sink = test_utils::TestCodeSink::new(); + let mut buffer = MachBuffer::new(); + insn.emit(&mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(); + buffer.emit(&mut sink); + let actual_encoding = &sink.stringify(); + assert_eq!(expected_encoding, actual_encoding); + } +} + +#[test] +fn test_cond_invert() { + for cond in vec![ + Cond::Eq, + Cond::Ne, + Cond::Hs, + Cond::Lo, + Cond::Mi, + Cond::Pl, + Cond::Vs, + Cond::Vc, + Cond::Hi, + Cond::Ls, + Cond::Ge, + Cond::Lt, + Cond::Gt, + Cond::Le, + Cond::Al, + Cond::Nv, + ] + .into_iter() + { + assert_eq!(cond.invert().invert(), cond); + } +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs new file mode 100644 index 0000000000..b6da0402bc --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/imms.rs @@ -0,0 +1,1025 @@ +//! AArch64 ISA definitions: immediate constants. + +// Some variants are never constructed, but we still want them as options in the future. +#[allow(dead_code)] +use crate::ir::types::*; +use crate::ir::Type; +use crate::isa::aarch64::inst::{OperandSize, ScalarSize}; + +use regalloc::{PrettyPrint, RealRegUniverse}; + +use core::convert::TryFrom; +use std::string::String; + +/// An immediate that represents the NZCV flags. +#[derive(Clone, Copy, Debug)] +pub struct NZCV { + /// The negative condition flag. + n: bool, + /// The zero condition flag. + z: bool, + /// The carry condition flag. + c: bool, + /// The overflow condition flag. + v: bool, +} + +impl NZCV { + pub fn new(n: bool, z: bool, c: bool, v: bool) -> NZCV { + NZCV { n, z, c, v } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + (u32::from(self.n) << 3) + | (u32::from(self.z) << 2) + | (u32::from(self.c) << 1) + | u32::from(self.v) + } +} + +/// An unsigned 5-bit immediate. +#[derive(Clone, Copy, Debug)] +pub struct UImm5 { + /// The value. + value: u8, +} + +impl UImm5 { + pub fn maybe_from_u8(value: u8) -> Option<UImm5> { + if value < 32 { + Some(UImm5 { value }) + } else { + None + } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + u32::from(self.value) + } +} + +/// A signed, scaled 7-bit offset. +#[derive(Clone, Copy, Debug)] +pub struct SImm7Scaled { + /// The value. + pub value: i16, + /// multiplied by the size of this type + pub scale_ty: Type, +} + +impl SImm7Scaled { + /// Create a SImm7Scaled from a raw offset and the known scale type, if + /// possible. + pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<SImm7Scaled> { + assert!(scale_ty == I64 || scale_ty == I32); + let scale = scale_ty.bytes(); + assert!(scale.is_power_of_two()); + let scale = i64::from(scale); + let upper_limit = 63 * scale; + let lower_limit = -(64 * scale); + if value >= lower_limit && value <= upper_limit && (value & (scale - 1)) == 0 { + Some(SImm7Scaled { + value: i16::try_from(value).unwrap(), + scale_ty, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero(scale_ty: Type) -> SImm7Scaled { + SImm7Scaled { value: 0, scale_ty } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + let ty_bytes: i16 = self.scale_ty.bytes() as i16; + let scaled: i16 = self.value / ty_bytes; + assert!(scaled <= 63 && scaled >= -64); + let scaled: i8 = scaled as i8; + let encoded: u32 = scaled as u32; + encoded & 0x7f + } +} + +#[derive(Clone, Copy, Debug)] +pub struct FPULeftShiftImm { + pub amount: u8, + pub lane_size_in_bits: u8, +} + +impl FPULeftShiftImm { + pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> { + debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64); + if amount < lane_size_in_bits { + Some(Self { + amount, + lane_size_in_bits, + }) + } else { + None + } + } + + pub fn enc(&self) -> u32 { + debug_assert!(self.lane_size_in_bits.is_power_of_two()); + debug_assert!(self.lane_size_in_bits > self.amount); + // The encoding of the immediate follows the table below, + // where xs encode the shift amount. + // + // | lane_size_in_bits | encoding | + // +------------------------------+ + // | 8 | 0001xxx | + // | 16 | 001xxxx | + // | 32 | 01xxxxx | + // | 64 | 1xxxxxx | + // + // The highest one bit is represented by `lane_size_in_bits`. Since + // `lane_size_in_bits` is a power of 2 and `amount` is less + // than `lane_size_in_bits`, they can be ORed + // together to produced the encoded value. + u32::from(self.lane_size_in_bits | self.amount) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct FPURightShiftImm { + pub amount: u8, + pub lane_size_in_bits: u8, +} + +impl FPURightShiftImm { + pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> { + debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64); + if amount > 0 && amount <= lane_size_in_bits { + Some(Self { + amount, + lane_size_in_bits, + }) + } else { + None + } + } + + pub fn enc(&self) -> u32 { + debug_assert_ne!(0, self.amount); + // The encoding of the immediate follows the table below, + // where xs encodes the negated shift amount. + // + // | lane_size_in_bits | encoding | + // +------------------------------+ + // | 8 | 0001xxx | + // | 16 | 001xxxx | + // | 32 | 01xxxxx | + // | 64 | 1xxxxxx | + // + // The shift amount is negated such that a shift ammount + // of 1 (in 64-bit) is encoded as 0b111111 and a shift + // amount of 64 is encoded as 0b000000, + // in the bottom 6 bits. + u32::from((self.lane_size_in_bits * 2) - self.amount) + } +} + +/// a 9-bit signed offset. +#[derive(Clone, Copy, Debug)] +pub struct SImm9 { + /// The value. + pub value: i16, +} + +impl SImm9 { + /// Create a signed 9-bit offset from a full-range value, if possible. + pub fn maybe_from_i64(value: i64) -> Option<SImm9> { + if value >= -256 && value <= 255 { + Some(SImm9 { + value: value as i16, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> SImm9 { + SImm9 { value: 0 } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + (self.value as u32) & 0x1ff + } + + /// Signed value of immediate. + pub fn value(&self) -> i32 { + self.value as i32 + } +} + +/// An unsigned, scaled 12-bit offset. +#[derive(Clone, Copy, Debug)] +pub struct UImm12Scaled { + /// The value. + pub value: u16, + /// multiplied by the size of this type + pub scale_ty: Type, +} + +impl UImm12Scaled { + /// Create a UImm12Scaled from a raw offset and the known scale type, if + /// possible. + pub fn maybe_from_i64(value: i64, scale_ty: Type) -> Option<UImm12Scaled> { + // Ensure the type is at least one byte. + let scale_ty = if scale_ty == B1 { B8 } else { scale_ty }; + + let scale = scale_ty.bytes(); + assert!(scale.is_power_of_two()); + let scale = scale as i64; + let limit = 4095 * scale; + if value >= 0 && value <= limit && (value & (scale - 1)) == 0 { + Some(UImm12Scaled { + value: value as u16, + scale_ty, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero(scale_ty: Type) -> UImm12Scaled { + UImm12Scaled { value: 0, scale_ty } + } + + /// Encoded bits. + pub fn bits(&self) -> u32 { + (self.value as u32 / self.scale_ty.bytes()) & 0xfff + } + + /// Value after scaling. + pub fn value(&self) -> u32 { + self.value as u32 + } + + /// The value type which is the scaling base. + pub fn scale_ty(&self) -> Type { + self.scale_ty + } +} + +/// A shifted immediate value in 'imm12' format: supports 12 bits, shifted +/// left by 0 or 12 places. +#[derive(Clone, Debug)] +pub struct Imm12 { + /// The immediate bits. + pub bits: u16, + /// Whether the immediate bits are shifted left by 12 or not. + pub shift12: bool, +} + +impl Imm12 { + /// Compute a Imm12 from raw bits, if possible. + pub fn maybe_from_u64(val: u64) -> Option<Imm12> { + if val == 0 { + Some(Imm12 { + bits: 0, + shift12: false, + }) + } else if val < 0xfff { + Some(Imm12 { + bits: val as u16, + shift12: false, + }) + } else if val < 0xfff_000 && (val & 0xfff == 0) { + Some(Imm12 { + bits: (val >> 12) as u16, + shift12: true, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> Self { + Imm12 { + bits: 0, + shift12: false, + } + } + + /// Bits for 2-bit "shift" field in e.g. AddI. + pub fn shift_bits(&self) -> u32 { + if self.shift12 { + 0b01 + } else { + 0b00 + } + } + + /// Bits for 12-bit "imm" field in e.g. AddI. + pub fn imm_bits(&self) -> u32 { + self.bits as u32 + } +} + +/// An immediate for logical instructions. +#[derive(Clone, Debug, PartialEq)] +pub struct ImmLogic { + /// The actual value. + value: u64, + /// `N` flag. + pub n: bool, + /// `S` field: element size and element bits. + pub r: u8, + /// `R` field: rotate amount. + pub s: u8, + /// Was this constructed for a 32-bit or 64-bit instruction? + pub size: OperandSize, +} + +impl ImmLogic { + /// Compute an ImmLogic from raw bits, if possible. + pub fn maybe_from_u64(value: u64, ty: Type) -> Option<ImmLogic> { + // Note: This function is a port of VIXL's Assembler::IsImmLogical. + + if ty != I64 && ty != I32 { + return None; + } + let operand_size = OperandSize::from_ty(ty); + + let original_value = value; + + let value = if ty == I32 { + // To handle 32-bit logical immediates, the very easiest thing is to repeat + // the input value twice to make a 64-bit word. The correct encoding of that + // as a logical immediate will also be the correct encoding of the 32-bit + // value. + + // Avoid making the assumption that the most-significant 32 bits are zero by + // shifting the value left and duplicating it. + let value = value << 32; + value | value >> 32 + } else { + value + }; + + // Logical immediates are encoded using parameters n, imm_s and imm_r using + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 bits + // are set. The pattern is rotated right by R, and repeated across a 32 or + // 64-bit value, depending on destination register width. + // + // Put another way: the basic format of a logical immediate is a single + // contiguous stretch of 1 bits, repeated across the whole word at intervals + // given by a power of 2. To identify them quickly, we first locate the + // lowest stretch of 1 bits, then the next 1 bit above that; that combination + // is different for every logical immediate, so it gives us all the + // information we need to identify the only logical immediate that our input + // could be, and then we simply check if that's the value we actually have. + // + // (The rotation parameter does give the possibility of the stretch of 1 bits + // going 'round the end' of the word. To deal with that, we observe that in + // any situation where that happens the bitwise NOT of the value is also a + // valid logical immediate. So we simply invert the input whenever its low bit + // is set, and then we know that the rotated case can't arise.) + let (value, inverted) = if value & 1 == 1 { + (!value, true) + } else { + (value, false) + }; + + if value == 0 { + return None; + } + + // The basic analysis idea: imagine our input word looks like this. + // + // 0011111000111110001111100011111000111110001111100011111000111110 + // c b a + // |<--d-->| + // + // We find the lowest set bit (as an actual power-of-2 value, not its index) + // and call it a. Then we add a to our original number, which wipes out the + // bottommost stretch of set bits and replaces it with a 1 carried into the + // next zero bit. Then we look for the new lowest set bit, which is in + // position b, and subtract it, so now our number is just like the original + // but with the lowest stretch of set bits completely gone. Now we find the + // lowest set bit again, which is position c in the diagram above. Then we'll + // measure the distance d between bit positions a and c (using CLZ), and that + // tells us that the only valid logical immediate that could possibly be equal + // to this number is the one in which a stretch of bits running from a to just + // below b is replicated every d bits. + fn lowest_set_bit(value: u64) -> u64 { + let bit = value.trailing_zeros(); + 1u64.checked_shl(bit).unwrap_or(0) + } + let a = lowest_set_bit(value); + assert_ne!(0, a); + let value_plus_a = value.wrapping_add(a); + let b = lowest_set_bit(value_plus_a); + let value_plus_a_minus_b = value_plus_a - b; + let c = lowest_set_bit(value_plus_a_minus_b); + + let (d, clz_a, out_n, mask) = if c != 0 { + // The general case, in which there is more than one stretch of set bits. + // Compute the repeat distance d, and set up a bitmask covering the basic + // unit of repetition (i.e. a word with the bottom d bits set). Also, in all + // of these cases the N bit of the output will be zero. + let clz_a = a.leading_zeros(); + let clz_c = c.leading_zeros(); + let d = clz_a - clz_c; + let mask = (1 << d) - 1; + (d, clz_a, 0, mask) + } else { + (64, a.leading_zeros(), 1, u64::max_value()) + }; + + // If the repeat period d is not a power of two, it can't be encoded. + if !d.is_power_of_two() { + return None; + } + + if ((b.wrapping_sub(a)) & !mask) != 0 { + // If the bit stretch (b - a) does not fit within the mask derived from the + // repeat period, then fail. + return None; + } + + // The only possible option is b - a repeated every d bits. Now we're going to + // actually construct the valid logical immediate derived from that + // specification, and see if it equals our original input. + // + // To repeat a value every d bits, we multiply it by a number of the form + // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can + // be derived using a table lookup on CLZ(d). + const MULTIPLIERS: [u64; 6] = [ + 0x0000000000000001, + 0x0000000100000001, + 0x0001000100010001, + 0x0101010101010101, + 0x1111111111111111, + 0x5555555555555555, + ]; + let multiplier = MULTIPLIERS[(u64::from(d).leading_zeros() - 57) as usize]; + let candidate = b.wrapping_sub(a) * multiplier; + + if value != candidate { + // The candidate pattern doesn't match our input value, so fail. + return None; + } + + // We have a match! This is a valid logical immediate, so now we have to + // construct the bits and pieces of the instruction encoding that generates + // it. + + // Count the set bits in our basic stretch. The special case of clz(0) == -1 + // makes the answer come out right for stretches that reach the very top of + // the word (e.g. numbers like 0xffffc00000000000). + let clz_b = if b == 0 { + u32::max_value() // -1 + } else { + b.leading_zeros() + }; + let s = clz_a.wrapping_sub(clz_b); + + // Decide how many bits to rotate right by, to put the low bit of that basic + // stretch in position a. + let (s, r) = if inverted { + // If we inverted the input right at the start of this function, here's + // where we compensate: the number of set bits becomes the number of clear + // bits, and the rotation count is based on position b rather than position + // a (since b is the location of the 'lowest' 1 bit after inversion). + // Need wrapping for when clz_b is max_value() (for when b == 0). + (d - s, clz_b.wrapping_add(1) & (d - 1)) + } else { + (s, (clz_a + 1) & (d - 1)) + }; + + // Now we're done, except for having to encode the S output in such a way that + // it gives both the number of set bits and the length of the repeated + // segment. The s field is encoded like this: + // + // imms size S + // ssssss 64 UInt(ssssss) + // 0sssss 32 UInt(sssss) + // 10ssss 16 UInt(ssss) + // 110sss 8 UInt(sss) + // 1110ss 4 UInt(ss) + // 11110s 2 UInt(s) + // + // So we 'or' (2 * -d) with our computed s to form imms. + let s = ((d * 2).wrapping_neg() | (s - 1)) & 0x3f; + debug_assert!(u8::try_from(r).is_ok()); + debug_assert!(u8::try_from(s).is_ok()); + Some(ImmLogic { + value: original_value, + n: out_n != 0, + r: r as u8, + s: s as u8, + size: operand_size, + }) + } + + /// Returns bits ready for encoding: (N:1, R:6, S:6) + pub fn enc_bits(&self) -> u32 { + ((self.n as u32) << 12) | ((self.r as u32) << 6) | (self.s as u32) + } + + /// Returns the value that this immediate represents. + pub fn value(&self) -> u64 { + self.value + } + + /// Return an immediate for the bitwise-inverted value. + pub fn invert(&self) -> ImmLogic { + // For every ImmLogical immediate, the inverse can also be encoded. + Self::maybe_from_u64(!self.value, self.size.to_ty()).unwrap() + } + + /// This provides a safe(ish) way to avoid the costs of `maybe_from_u64` when we want to + /// encode a constant that we know at compiler-build time. It constructs an `ImmLogic` from + /// the fields `n`, `r`, `s` and `size`, but in a debug build, checks that `value_to_check` + /// corresponds to those four fields. The intention is that, in a non-debug build, this + /// reduces to something small enough that it will be a candidate for inlining. + pub fn from_n_r_s(value_to_check: u64, n: bool, r: u8, s: u8, size: OperandSize) -> Self { + // Construct it from the components we got given. + let imml = Self { + value: value_to_check, + n, + r, + s, + size, + }; + + // In debug mode, check that `n`/`r`/`s` are correct, given `value` and `size`. + debug_assert!(match ImmLogic::maybe_from_u64( + value_to_check, + if size == OperandSize::Size64 { + I64 + } else { + I32 + } + ) { + None => false, // fail: `value` is unrepresentable + Some(imml_check) => imml_check == imml, + }); + + imml + } +} + +/// An immediate for shift instructions. +#[derive(Clone, Debug)] +pub struct ImmShift { + /// 6-bit shift amount. + pub imm: u8, +} + +impl ImmShift { + /// Create an ImmShift from raw bits, if possible. + pub fn maybe_from_u64(val: u64) -> Option<ImmShift> { + if val < 64 { + Some(ImmShift { imm: val as u8 }) + } else { + None + } + } + + /// Get the immediate value. + pub fn value(&self) -> u8 { + self.imm + } +} + +/// A 16-bit immediate for a MOVZ instruction, with a {0,16,32,48}-bit shift. +#[derive(Clone, Copy, Debug)] +pub struct MoveWideConst { + /// The value. + pub bits: u16, + /// Result is `bits` shifted 16*shift bits to the left. + pub shift: u8, +} + +impl MoveWideConst { + /// Construct a MoveWideConst from an arbitrary 64-bit constant if possible. + pub fn maybe_from_u64(value: u64) -> Option<MoveWideConst> { + let mask0 = 0x0000_0000_0000_ffffu64; + let mask1 = 0x0000_0000_ffff_0000u64; + let mask2 = 0x0000_ffff_0000_0000u64; + let mask3 = 0xffff_0000_0000_0000u64; + + if value == (value & mask0) { + return Some(MoveWideConst { + bits: (value & mask0) as u16, + shift: 0, + }); + } + if value == (value & mask1) { + return Some(MoveWideConst { + bits: ((value >> 16) & mask0) as u16, + shift: 1, + }); + } + if value == (value & mask2) { + return Some(MoveWideConst { + bits: ((value >> 32) & mask0) as u16, + shift: 2, + }); + } + if value == (value & mask3) { + return Some(MoveWideConst { + bits: ((value >> 48) & mask0) as u16, + shift: 3, + }); + } + None + } + + pub fn maybe_with_shift(imm: u16, shift: u8) -> Option<MoveWideConst> { + let shift_enc = shift / 16; + if shift_enc > 3 { + None + } else { + Some(MoveWideConst { + bits: imm, + shift: shift_enc, + }) + } + } + + /// Returns the value that this constant represents. + pub fn value(&self) -> u64 { + (self.bits as u64) << (16 * self.shift) + } +} + +/// Advanced SIMD modified immediate as used by MOVI/MVNI. +#[derive(Clone, Copy, Debug)] +pub struct ASIMDMovModImm { + imm: u8, + shift: u8, + shift_ones: bool, +} + +impl ASIMDMovModImm { + pub fn maybe_from_u64(value: u64, size: ScalarSize) -> Option<ASIMDMovModImm> { + match size { + ScalarSize::Size8 => Some(ASIMDMovModImm { + imm: value as u8, + shift: 0, + shift_ones: false, + }), + _ => None, + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> Self { + ASIMDMovModImm { + imm: 0, + shift: 0, + shift_ones: false, + } + } + + pub fn value(&self) -> (u8, u32, bool) { + (self.imm, self.shift as u32, self.shift_ones) + } +} + +impl PrettyPrint for NZCV { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c }; + format!( + "#{}{}{}{}", + fmt('n', self.n), + fmt('z', self.z), + fmt('c', self.c), + fmt('v', self.v) + ) + } +} + +impl PrettyPrint for UImm5 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl PrettyPrint for Imm12 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let shift = if self.shift12 { 12 } else { 0 }; + let value = u32::from(self.bits) << shift; + format!("#{}", value) + } +} + +impl PrettyPrint for SImm7Scaled { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl PrettyPrint for FPULeftShiftImm { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.amount) + } +} + +impl PrettyPrint for FPURightShiftImm { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.amount) + } +} + +impl PrettyPrint for SImm9 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl PrettyPrint for UImm12Scaled { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value) + } +} + +impl PrettyPrint for ImmLogic { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.value()) + } +} + +impl PrettyPrint for ImmShift { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("#{}", self.imm) + } +} + +impl PrettyPrint for MoveWideConst { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + if self.shift == 0 { + format!("#{}", self.bits) + } else { + format!("#{}, LSL #{}", self.bits, self.shift * 16) + } + } +} + +impl PrettyPrint for ASIMDMovModImm { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + if self.shift == 0 { + format!("#{}", self.imm) + } else { + let shift_type = if self.shift_ones { "MSL" } else { "LSL" }; + format!("#{}, {} #{}", self.imm, shift_type, self.shift) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn imm_logical_test() { + assert_eq!(None, ImmLogic::maybe_from_u64(0, I64)); + assert_eq!(None, ImmLogic::maybe_from_u64(u64::max_value(), I64)); + + assert_eq!( + Some(ImmLogic { + value: 1, + n: true, + r: 0, + s: 0, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(1, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 2, + n: true, + r: 63, + s: 0, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(2, I64) + ); + + assert_eq!(None, ImmLogic::maybe_from_u64(5, I64)); + + assert_eq!(None, ImmLogic::maybe_from_u64(11, I64)); + + assert_eq!( + Some(ImmLogic { + value: 248, + n: true, + r: 61, + s: 4, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(248, I64) + ); + + assert_eq!(None, ImmLogic::maybe_from_u64(249, I64)); + + assert_eq!( + Some(ImmLogic { + value: 1920, + n: true, + r: 57, + s: 3, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(1920, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x7ffe, + n: true, + r: 63, + s: 13, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0x7ffe, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x30000, + n: true, + r: 48, + s: 1, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0x30000, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x100000, + n: true, + r: 44, + s: 0, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0x100000, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: u64::max_value() - 1, + n: true, + r: 63, + s: 62, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(u64::max_value() - 1, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0xaaaaaaaaaaaaaaaa, + n: false, + r: 1, + s: 60, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0xaaaaaaaaaaaaaaaa, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x8181818181818181, + n: false, + r: 1, + s: 49, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0x8181818181818181, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0xffc3ffc3ffc3ffc3, + n: false, + r: 10, + s: 43, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0xffc3ffc3ffc3ffc3, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x100000001, + n: false, + r: 0, + s: 0, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0x100000001, I64) + ); + + assert_eq!( + Some(ImmLogic { + value: 0x1111111111111111, + n: false, + r: 0, + s: 56, + size: OperandSize::Size64, + }), + ImmLogic::maybe_from_u64(0x1111111111111111, I64) + ); + + for n in 0..2 { + let types = if n == 0 { vec![I64, I32] } else { vec![I64] }; + for s in 0..64 { + for r in 0..64 { + let imm = get_logical_imm(n, s, r); + for &ty in &types { + match ImmLogic::maybe_from_u64(imm, ty) { + Some(ImmLogic { value, .. }) => { + assert_eq!(imm, value); + ImmLogic::maybe_from_u64(!value, ty).unwrap(); + } + None => assert_eq!(0, imm), + }; + } + } + } + } + } + + // Repeat a value that has `width` bits, across a 64-bit value. + fn repeat(value: u64, width: u64) -> u64 { + let mut result = value & ((1 << width) - 1); + let mut i = width; + while i < 64 { + result |= result << i; + i *= 2; + } + result + } + + // Get the logical immediate, from the encoding N/R/S bits. + fn get_logical_imm(n: u32, s: u32, r: u32) -> u64 { + // An integer is constructed from the n, imm_s and imm_r bits according to + // the following table: + // + // N imms immr size S R + // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) + // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) + // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) + // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) + // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) + // 0 11110s xxxxxr 2 UInt(s) UInt(r) + // (s bits must not be all set) + // + // A pattern is constructed of size bits, where the least significant S+1 + // bits are set. The pattern is rotated right by R, and repeated across a + // 64-bit value. + + if n == 1 { + if s == 0x3f { + return 0; + } + let bits = (1u64 << (s + 1)) - 1; + bits.rotate_right(r) + } else { + if (s >> 1) == 0x1f { + return 0; + } + let mut width = 0x20; + while width >= 0x2 { + if (s & width) == 0 { + let mask = width - 1; + if (s & mask) == mask { + return 0; + } + let bits = (1u64 << ((s & mask) + 1)) - 1; + return repeat(bits.rotate_right(r & mask), width.into()); + } + width >>= 1; + } + unreachable!(); + } + } +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs new file mode 100644 index 0000000000..278302018e --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/mod.rs @@ -0,0 +1,4057 @@ +//! This module defines aarch64-specific machine instruction types. + +// Some variants are not constructed, but we still want them as options in the future. +#![allow(dead_code)] + +use crate::binemit::CodeOffset; +use crate::ir::types::{ + B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8, + I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64, +}; +use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, TrapCode, Type}; +use crate::isa::CallConv; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; +use regalloc::{RegUsageCollector, RegUsageMapper}; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::convert::TryFrom; +use smallvec::{smallvec, SmallVec}; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; +pub mod unwind; + +#[cfg(test)] +mod emit_tests; + +//============================================================================= +// Instructions (top level): definition + +/// An ALU operation. This can be paired with several instruction formats +/// below (see `Inst`) in any combination. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ALUOp { + Add32, + Add64, + Sub32, + Sub64, + Orr32, + Orr64, + OrrNot32, + OrrNot64, + And32, + And64, + AndNot32, + AndNot64, + /// XOR (AArch64 calls this "EOR") + Eor32, + /// XOR (AArch64 calls this "EOR") + Eor64, + /// XNOR (AArch64 calls this "EOR-NOT") + EorNot32, + /// XNOR (AArch64 calls this "EOR-NOT") + EorNot64, + /// Add, setting flags + AddS32, + /// Add, setting flags + AddS64, + /// Sub, setting flags + SubS32, + /// Sub, setting flags + SubS64, + /// Signed multiply, high-word result + SMulH, + /// Unsigned multiply, high-word result + UMulH, + SDiv64, + UDiv64, + RotR32, + RotR64, + Lsr32, + Lsr64, + Asr32, + Asr64, + Lsl32, + Lsl64, +} + +/// An ALU operation with three arguments. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ALUOp3 { + /// Multiply-add + MAdd32, + /// Multiply-add + MAdd64, + /// Multiply-sub + MSub32, + /// Multiply-sub + MSub64, +} + +/// A floating-point unit (FPU) operation with one arg. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp1 { + Abs32, + Abs64, + Neg32, + Neg64, + Sqrt32, + Sqrt64, + Cvt32To64, + Cvt64To32, +} + +/// A floating-point unit (FPU) operation with two args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp2 { + Add32, + Add64, + Sub32, + Sub64, + Mul32, + Mul64, + Div32, + Div64, + Max32, + Max64, + Min32, + Min64, + /// Signed saturating add + Sqadd64, + /// Unsigned saturating add + Uqadd64, + /// Signed saturating subtract + Sqsub64, + /// Unsigned saturating subtract + Uqsub64, +} + +/// A floating-point unit (FPU) operation with two args, a register and an immediate. +#[derive(Copy, Clone, Debug)] +pub enum FPUOpRI { + /// Unsigned right shift. Rd = Rn << #imm + UShr32(FPURightShiftImm), + /// Unsigned right shift. Rd = Rn << #imm + UShr64(FPURightShiftImm), + /// Shift left and insert. Rd |= Rn << #imm + Sli32(FPULeftShiftImm), + /// Shift left and insert. Rd |= Rn << #imm + Sli64(FPULeftShiftImm), +} + +/// A floating-point unit (FPU) operation with three args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp3 { + MAdd32, + MAdd64, +} + +/// A conversion from an FP to an integer value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuToIntOp { + F32ToU32, + F32ToI32, + F32ToU64, + F32ToI64, + F64ToU32, + F64ToI32, + F64ToU64, + F64ToI64, +} + +/// A conversion from an integer to an FP value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum IntToFpuOp { + U32ToF32, + I32ToF32, + U32ToF64, + I32ToF64, + U64ToF32, + I64ToF32, + U64ToF64, + I64ToF64, +} + +/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to +/// nearest, and for 32- or 64-bit FP values. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuRoundMode { + Minus32, + Minus64, + Plus32, + Plus64, + Zero32, + Zero64, + Nearest32, + Nearest64, +} + +/// Type of vector element extensions. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecExtendOp { + /// Signed extension of 8-bit elements + Sxtl8, + /// Signed extension of 16-bit elements + Sxtl16, + /// Signed extension of 32-bit elements + Sxtl32, + /// Unsigned extension of 8-bit elements + Uxtl8, + /// Unsigned extension of 16-bit elements + Uxtl16, + /// Unsigned extension of 32-bit elements + Uxtl32, +} + +/// A vector ALU operation. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecALUOp { + /// Signed saturating add + Sqadd, + /// Unsigned saturating add + Uqadd, + /// Signed saturating subtract + Sqsub, + /// Unsigned saturating subtract + Uqsub, + /// Compare bitwise equal + Cmeq, + /// Compare signed greater than or equal + Cmge, + /// Compare signed greater than + Cmgt, + /// Compare unsigned higher + Cmhs, + /// Compare unsigned higher or same + Cmhi, + /// Floating-point compare equal + Fcmeq, + /// Floating-point compare greater than + Fcmgt, + /// Floating-point compare greater than or equal + Fcmge, + /// Bitwise and + And, + /// Bitwise bit clear + Bic, + /// Bitwise inclusive or + Orr, + /// Bitwise exclusive or + Eor, + /// Bitwise select + Bsl, + /// Unsigned maximum pairwise + Umaxp, + /// Add + Add, + /// Subtract + Sub, + /// Multiply + Mul, + /// Signed shift left + Sshl, + /// Unsigned shift left + Ushl, + /// Unsigned minimum + Umin, + /// Signed minimum + Smin, + /// Unsigned maximum + Umax, + /// Signed maximum + Smax, + /// Unsigned rounding halving add + Urhadd, + /// Floating-point add + Fadd, + /// Floating-point subtract + Fsub, + /// Floating-point divide + Fdiv, + /// Floating-point maximum + Fmax, + /// Floating-point minimum + Fmin, + /// Floating-point multiply + Fmul, + /// Add pairwise + Addp, + /// Unsigned multiply add long + Umlal, + /// Zip vectors (primary) [meaning, high halves] + Zip1, + /// Signed multiply long (low halves) + Smull, + /// Signed multiply long (high halves) + Smull2, +} + +/// A Vector miscellaneous operation with two registers. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecMisc2 { + /// Bitwise NOT + Not, + /// Negate + Neg, + /// Absolute value + Abs, + /// Floating-point absolute value + Fabs, + /// Floating-point negate + Fneg, + /// Floating-point square root + Fsqrt, + /// Reverse elements in 64-bit doublewords + Rev64, + /// Shift left long (by element size) + Shll, + /// Floating-point convert to signed integer, rounding toward zero + Fcvtzs, + /// Floating-point convert to unsigned integer, rounding toward zero + Fcvtzu, + /// Signed integer convert to floating-point + Scvtf, + /// Unsigned integer convert to floating-point + Ucvtf, + /// Floating point round to integral, rounding towards nearest + Frintn, + /// Floating point round to integral, rounding towards zero + Frintz, + /// Floating point round to integral, rounding towards minus infinity + Frintm, + /// Floating point round to integral, rounding towards plus infinity + Frintp, +} + +/// A Vector narrowing operation with two registers. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecMiscNarrowOp { + /// Extract Narrow + Xtn, + /// Signed saturating extract narrow + Sqxtn, + /// Signed saturating extract unsigned narrow + Sqxtun, +} + +/// An operation across the lanes of vectors. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecLanesOp { + /// Integer addition across a vector + Addv, + /// Unsigned minimum across a vector + Uminv, +} + +/// A shift-by-immediate operation on each lane of a vector. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecShiftImmOp { + // Unsigned shift left + Shl, + // Unsigned shift right + Ushr, + // Signed shift right + Sshr, +} + +/// An operation on the bits of a register. This can be paired with several instruction formats +/// below (see `Inst`) in any combination. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum BitOp { + /// Bit reverse + RBit32, + /// Bit reverse + RBit64, + Clz32, + Clz64, + Cls32, + Cls64, +} + +impl BitOp { + /// What is the opcode's native width? + pub fn operand_size(&self) -> OperandSize { + match self { + BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32, + _ => OperandSize::Size64, + } + } + + /// Get the assembly mnemonic for this opcode. + pub fn op_str(&self) -> &'static str { + match self { + BitOp::RBit32 | BitOp::RBit64 => "rbit", + BitOp::Clz32 | BitOp::Clz64 => "clz", + BitOp::Cls32 | BitOp::Cls64 => "cls", + } + } +} + +impl From<(Opcode, Type)> for BitOp { + /// Get the BitOp from the IR opcode. + fn from(op_ty: (Opcode, Type)) -> BitOp { + match op_ty { + (Opcode::Bitrev, I32) => BitOp::RBit32, + (Opcode::Bitrev, I64) => BitOp::RBit64, + (Opcode::Clz, I32) => BitOp::Clz32, + (Opcode::Clz, I64) => BitOp::Clz64, + (Opcode::Cls, I32) => BitOp::Cls32, + (Opcode::Cls, I64) => BitOp::Cls64, + _ => unreachable!("Called with non-bit op!: {:?}", op_ty), + } + } +} + +/// Additional information for (direct) Call instructions, left out of line to lower the size of +/// the Inst enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub dest: ExternalName, + pub uses: Vec<Reg>, + pub defs: Vec<Writable<Reg>>, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, +} + +/// Additional information for CallInd instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct CallIndInfo { + pub rn: Reg, + pub uses: Vec<Reg>, + pub defs: Vec<Writable<Reg>>, + pub opcode: Opcode, + pub caller_callconv: CallConv, + pub callee_callconv: CallConv, +} + +/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct JTSequenceInfo { + pub targets: Vec<BranchTarget>, + pub default_target: BranchTarget, + pub targets_for_term: Vec<MachLabel>, // needed for MachTerminator. +} + +/// Instruction formats. +#[derive(Clone, Debug)] +pub enum Inst { + /// A no-op of zero size. + Nop0, + + /// A no-op that is one instruction large. + Nop4, + + /// An ALU operation with two register sources and a register destination. + AluRRR { + alu_op: ALUOp, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + }, + /// An ALU operation with three register sources and a register destination. + AluRRRR { + alu_op: ALUOp3, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + ra: Reg, + }, + /// An ALU operation with a register source and an immediate-12 source, and a register + /// destination. + AluRRImm12 { + alu_op: ALUOp, + rd: Writable<Reg>, + rn: Reg, + imm12: Imm12, + }, + /// An ALU operation with a register source and an immediate-logic source, and a register destination. + AluRRImmLogic { + alu_op: ALUOp, + rd: Writable<Reg>, + rn: Reg, + imml: ImmLogic, + }, + /// An ALU operation with a register source and an immediate-shiftamt source, and a register destination. + AluRRImmShift { + alu_op: ALUOp, + rd: Writable<Reg>, + rn: Reg, + immshift: ImmShift, + }, + /// An ALU operation with two register sources, one of which can be shifted, and a register + /// destination. + AluRRRShift { + alu_op: ALUOp, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + shiftop: ShiftOpAndAmt, + }, + /// An ALU operation with two register sources, one of which can be {zero,sign}-extended and + /// shifted, and a register destination. + AluRRRExtend { + alu_op: ALUOp, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + extendop: ExtendOp, + }, + + /// A bit op instruction with a single register source. + BitRR { + op: BitOp, + rd: Writable<Reg>, + rn: Reg, + }, + + /// An unsigned (zero-extending) 8-bit load. + ULoad8 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// A signed (sign-extending) 8-bit load. + SLoad8 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// An unsigned (zero-extending) 16-bit load. + ULoad16 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// A signed (sign-extending) 16-bit load. + SLoad16 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// An unsigned (zero-extending) 32-bit load. + ULoad32 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// A signed (sign-extending) 32-bit load. + SLoad32 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// A 64-bit load. + ULoad64 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + + /// An 8-bit store. + Store8 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + /// A 16-bit store. + Store16 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + /// A 32-bit store. + Store32 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + /// A 64-bit store. + Store64 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + + /// A store of a pair of registers. + StoreP64 { + rt: Reg, + rt2: Reg, + mem: PairAMode, + flags: MemFlags, + }, + /// A load of a pair of registers. + LoadP64 { + rt: Writable<Reg>, + rt2: Writable<Reg>, + mem: PairAMode, + flags: MemFlags, + }, + + /// A MOV instruction. These are encoded as ORR's (AluRRR form) but we + /// keep them separate at the `Inst` level for better pretty-printing + /// and faster `is_move()` logic. + Mov64 { + rd: Writable<Reg>, + rm: Reg, + }, + + /// A 32-bit MOV. Zeroes the top 32 bits of the destination. This is + /// effectively an alias for an unsigned 32-to-64-bit extension. + Mov32 { + rd: Writable<Reg>, + rm: Reg, + }, + + /// A MOVZ with a 16-bit immediate. + MovZ { + rd: Writable<Reg>, + imm: MoveWideConst, + size: OperandSize, + }, + + /// A MOVN with a 16-bit immediate. + MovN { + rd: Writable<Reg>, + imm: MoveWideConst, + size: OperandSize, + }, + + /// A MOVK with a 16-bit immediate. + MovK { + rd: Writable<Reg>, + imm: MoveWideConst, + size: OperandSize, + }, + + /// A sign- or zero-extend operation. + Extend { + rd: Writable<Reg>, + rn: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + }, + + /// A conditional-select operation. + CSel { + rd: Writable<Reg>, + cond: Cond, + rn: Reg, + rm: Reg, + }, + + /// A conditional-set operation. + CSet { + rd: Writable<Reg>, + cond: Cond, + }, + + /// A conditional comparison with an immediate. + CCmpImm { + size: OperandSize, + rn: Reg, + imm: UImm5, + nzcv: NZCV, + cond: Cond, + }, + + /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall + /// effect of atomically modifying a memory location in a particular way. Because we have + /// no way to explain to the regalloc about earlyclobber registers, this instruction has + /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies + /// in the surrounding code to the extent it can. The sequence is both preceded and + /// followed by a fence which is at least as comprehensive as that of the `Fence` + /// instruction below. This instruction is sequentially consistent. The operand + /// conventions are: + /// + /// x25 (rd) address + /// x26 (rd) second operand for `op` + /// x27 (wr) old value + /// x24 (wr) scratch reg; value afterwards has no meaning + /// x28 (wr) scratch reg; value afterwards has no meaning + AtomicRMW { + ty: Type, // I8, I16, I32 or I64 + op: inst_common::AtomicRmwOp, + }, + + /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked + /// store-conditional loop. (Although we could possibly implement it more directly using + /// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is + /// both preceded and followed by a fence which is at least as comprehensive as that of the + /// `Fence` instruction below. This instruction is sequentially consistent. Note that the + /// operand conventions, although very similar to AtomicRMW, are different: + /// + /// x25 (rd) address + /// x26 (rd) expected value + /// x28 (rd) replacement value + /// x27 (wr) old value + /// x24 (wr) scratch reg; value afterwards has no meaning + AtomicCAS { + ty: Type, // I8, I16, I32 or I64 + }, + + /// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it + /// in `r_data`. The load instruction is preceded by a fence at least as comprehensive as + /// that of the `Fence` instruction below. This instruction is sequentially consistent. + AtomicLoad { + ty: Type, // I8, I16, I32 or I64 + r_data: Writable<Reg>, + r_addr: Reg, + }, + + /// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence + /// instruction following the store. The fence is at least as comprehensive as that of the + /// `Fence` instruction below. This instruction is sequentially consistent. + AtomicStore { + ty: Type, // I8, I16, I32 or I64 + r_data: Reg, + r_addr: Reg, + }, + + /// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads + /// nor stores may move forwards or backwards across the fence. Currently emitted as "dmb + /// ish". This instruction is sequentially consistent. + Fence, + + /// FPU move. Note that this is distinct from a vector-register + /// move; moving just 64 bits seems to be significantly faster. + FpuMove64 { + rd: Writable<Reg>, + rn: Reg, + }, + + /// Vector register move. + FpuMove128 { + rd: Writable<Reg>, + rn: Reg, + }, + + /// Move to scalar from a vector element. + FpuMoveFromVec { + rd: Writable<Reg>, + rn: Reg, + idx: u8, + size: VectorSize, + }, + + /// 1-op FPU instruction. + FpuRR { + fpu_op: FPUOp1, + rd: Writable<Reg>, + rn: Reg, + }, + + /// 2-op FPU instruction. + FpuRRR { + fpu_op: FPUOp2, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + }, + + FpuRRI { + fpu_op: FPUOpRI, + rd: Writable<Reg>, + rn: Reg, + }, + + /// 3-op FPU instruction. + FpuRRRR { + fpu_op: FPUOp3, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + ra: Reg, + }, + + /// FPU comparison, single-precision (32 bit). + FpuCmp32 { + rn: Reg, + rm: Reg, + }, + + /// FPU comparison, double-precision (64 bit). + FpuCmp64 { + rn: Reg, + rm: Reg, + }, + + /// Floating-point load, single-precision (32 bit). + FpuLoad32 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// Floating-point store, single-precision (32 bit). + FpuStore32 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + /// Floating-point load, double-precision (64 bit). + FpuLoad64 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// Floating-point store, double-precision (64 bit). + FpuStore64 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + /// Floating-point/vector load, 128 bit. + FpuLoad128 { + rd: Writable<Reg>, + mem: AMode, + flags: MemFlags, + }, + /// Floating-point/vector store, 128 bit. + FpuStore128 { + rd: Reg, + mem: AMode, + flags: MemFlags, + }, + + LoadFpuConst64 { + rd: Writable<Reg>, + const_data: u64, + }, + + LoadFpuConst128 { + rd: Writable<Reg>, + const_data: u128, + }, + + /// Conversion: FP -> integer. + FpuToInt { + op: FpuToIntOp, + rd: Writable<Reg>, + rn: Reg, + }, + + /// Conversion: integer -> FP. + IntToFpu { + op: IntToFpuOp, + rd: Writable<Reg>, + rn: Reg, + }, + + /// FP conditional select, 32 bit. + FpuCSel32 { + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + cond: Cond, + }, + /// FP conditional select, 64 bit. + FpuCSel64 { + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + cond: Cond, + }, + + /// Round to integer. + FpuRound { + op: FpuRoundMode, + rd: Writable<Reg>, + rn: Reg, + }, + + /// Move from a GPR to a vector register. The scalar value is parked in the lowest lane + /// of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit + /// transactions are supported. + MovToFpu { + rd: Writable<Reg>, + rn: Reg, + size: ScalarSize, + }, + + /// Move to a vector element from a GPR. + MovToVec { + rd: Writable<Reg>, + rn: Reg, + idx: u8, + size: VectorSize, + }, + + /// Unsigned move from a vector element to a GPR. + MovFromVec { + rd: Writable<Reg>, + rn: Reg, + idx: u8, + size: VectorSize, + }, + + /// Signed move from a vector element to a GPR. + MovFromVecSigned { + rd: Writable<Reg>, + rn: Reg, + idx: u8, + size: VectorSize, + scalar_size: OperandSize, + }, + + /// Duplicate general-purpose register to vector. + VecDup { + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + }, + + /// Duplicate scalar to vector. + VecDupFromFpu { + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + }, + + /// Duplicate immediate to vector. + VecDupImm { + rd: Writable<Reg>, + imm: ASIMDMovModImm, + invert: bool, + size: VectorSize, + }, + + /// Vector extend. + VecExtend { + t: VecExtendOp, + rd: Writable<Reg>, + rn: Reg, + high_half: bool, + }, + + /// Move vector element to another vector element. + VecMovElement { + rd: Writable<Reg>, + rn: Reg, + dest_idx: u8, + src_idx: u8, + size: VectorSize, + }, + + /// Vector narrowing operation. + VecMiscNarrow { + op: VecMiscNarrowOp, + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + high_half: bool, + }, + + /// A vector ALU op. + VecRRR { + alu_op: VecALUOp, + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + size: VectorSize, + }, + + /// Vector two register miscellaneous instruction. + VecMisc { + op: VecMisc2, + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + }, + + /// Vector instruction across lanes. + VecLanes { + op: VecLanesOp, + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + }, + + /// Vector shift by immediate: Shift Left (immediate), Unsigned Shift Right (immediate), + /// Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts, + /// the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero + /// right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm` + /// values from 0 to lane-size-in-bits - 1 inclusive. + VecShiftImm { + op: VecShiftImmOp, + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + imm: u8, + }, + + /// Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes + /// of `rm` followed by the uppermost `16 - imm4` bytes of `rn`. + VecExtract { + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + imm4: u8, + }, + + /// Table vector lookup - single register table. The table consists of 8-bit elements and is + /// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether + /// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination + /// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them + /// to 0. + VecTbl { + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + is_extension: bool, + }, + + /// Table vector lookup - two register table. The table consists of 8-bit elements and is + /// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension` + /// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in + /// the destination vector that correspond to out-of-range indices (greater than 31) unmodified + /// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers + /// modulo 32, that is v31 and v0 (in that order) are consecutive registers. + VecTbl2 { + rd: Writable<Reg>, + rn: Reg, + rn2: Reg, + rm: Reg, + is_extension: bool, + }, + + /// Load an element and replicate to all lanes of a vector. + VecLoadReplicate { + rd: Writable<Reg>, + rn: Reg, + size: VectorSize, + }, + + /// Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn + /// control-flow diamond. + VecCSel { + rd: Writable<Reg>, + rn: Reg, + rm: Reg, + cond: Cond, + }, + + /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). + MovToNZCV { + rn: Reg, + }, + + /// Move from the NZCV flags (actually a `MRS Xn, NZCV` insn). + MovFromNZCV { + rd: Writable<Reg>, + }, + + /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation + /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the + /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit + /// target. + Call { + info: Box<CallInfo>, + }, + /// A machine indirect-call instruction. + CallInd { + info: Box<CallIndInfo>, + }, + + // ---- branches (exactly one must appear at end of BB) ---- + /// A machine return instruction. + Ret, + + /// A placeholder instruction, generating no code, meaning that a function epilogue must be + /// inserted there. + EpiloguePlaceholder, + + /// An unconditional branch. + Jump { + dest: BranchTarget, + }, + + /// A conditional branch. Contains two targets; at emission time, both are emitted, but + /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the + /// choice of taken/not_taken (inverting the branch polarity as needed) based on the + /// fallthrough at the time of lowering. + CondBr { + taken: BranchTarget, + not_taken: BranchTarget, + kind: CondBrKind, + }, + + /// A conditional trap: execute a `udf` if the condition is true. This is + /// one VCode instruction because it uses embedded control flow; it is + /// logically a single-in, single-out region, but needs to appear as one + /// unit to the register allocator. + /// + /// The `CondBrKind` gives the conditional-branch condition that will + /// *execute* the embedded `Inst`. (In the emitted code, we use the inverse + /// of this condition in a branch that skips the trap instruction.) + TrapIf { + kind: CondBrKind, + trap_code: TrapCode, + }, + + /// An indirect branch through a register, augmented with set of all + /// possible successors. + IndirectBr { + rn: Reg, + targets: Vec<MachLabel>, + }, + + /// A "break" instruction, used for e.g. traps and debug breakpoints. + Brk, + + /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at + /// runtime. + Udf { + trap_code: TrapCode, + }, + + /// Compute the address (using a PC-relative offset) of a memory location, using the `ADR` + /// instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is + /// only used for now in fixed lowering sequences with hardcoded offsets. In the future we may + /// need full `MemLabel` support. + Adr { + rd: Writable<Reg>, + /// Offset in range -2^20 .. 2^20. + off: i32, + }, + + /// Raw 32-bit word, used for inline constants and jump-table entries. + Word4 { + data: u32, + }, + + /// Raw 64-bit word, used for inline constants. + Word8 { + data: u64, + }, + + /// Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale). + JTSequence { + info: Box<JTSequenceInfo>, + ridx: Reg, + rtmp1: Writable<Reg>, + rtmp2: Writable<Reg>, + }, + + /// Load an inline symbol reference. + LoadExtName { + rd: Writable<Reg>, + name: Box<ExternalName>, + offset: i64, + }, + + /// Load address referenced by `mem` into `rd`. + LoadAddr { + rd: Writable<Reg>, + mem: AMode, + }, + + /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This + /// controls how AMode::NominalSPOffset args are lowered. + VirtualSPOffsetAdj { + offset: i64, + }, + + /// Meta-insn, no-op in generated code: emit constant/branch veneer island + /// at this point (with a guard jump around it) if less than the needed + /// space is available before the next branch deadline. See the `MachBuffer` + /// implementation in `machinst/buffer.rs` for the overall algorithm. In + /// brief, we retain a set of "pending/unresolved label references" from + /// branches as we scan forward through instructions to emit machine code; + /// if we notice we're about to go out of range on an unresolved reference, + /// we stop, emit a bunch of "veneers" (branches in a form that has a longer + /// range, e.g. a 26-bit-offset unconditional jump), and point the original + /// label references to those. This is an "island" because it comes in the + /// middle of the code. + /// + /// This meta-instruction is a necessary part of the logic that determines + /// where to place islands. Ordinarily, we want to place them between basic + /// blocks, so we compute the worst-case size of each block, and emit the + /// island before starting a block if we would exceed a deadline before the + /// end of the block. However, some sequences (such as an inline jumptable) + /// are variable-length and not accounted for by this logic; so these + /// lowered sequences include an `EmitIsland` to trigger island generation + /// where necessary. + EmitIsland { + /// The needed space before the next deadline. + needed_space: CodeOffset, + }, +} + +fn count_zero_half_words(mut value: u64, num_half_words: u8) -> usize { + let mut count = 0; + for _ in 0..num_half_words { + if value & 0xffff == 0 { + count += 1; + } + value >>= 16; + } + + count +} + +#[test] +fn inst_size_test() { + // This test will help with unintentionally growing the size + // of the Inst enum. + assert_eq!(32, std::mem::size_of::<Inst>()); +} + +impl Inst { + /// Create a move instruction. + pub fn mov(to_reg: Writable<Reg>, from_reg: Reg) -> Inst { + assert!(to_reg.to_reg().get_class() == from_reg.get_class()); + if from_reg.get_class() == RegClass::I64 { + Inst::Mov64 { + rd: to_reg, + rm: from_reg, + } + } else if from_reg.get_class() == RegClass::V128 { + Inst::FpuMove128 { + rd: to_reg, + rn: from_reg, + } + } else { + Inst::FpuMove64 { + rd: to_reg, + rn: from_reg, + } + } + } + + /// Create a 32-bit move instruction. + pub fn mov32(to_reg: Writable<Reg>, from_reg: Reg) -> Inst { + Inst::Mov32 { + rd: to_reg, + rm: from_reg, + } + } + + /// Create an instruction that loads a constant, using one of serveral options (MOVZ, MOVN, + /// logical immediate, or constant pool). + pub fn load_constant(rd: Writable<Reg>, value: u64) -> SmallVec<[Inst; 4]> { + if let Some(imm) = MoveWideConst::maybe_from_u64(value) { + // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVZ + smallvec![Inst::MovZ { + rd, + imm, + size: OperandSize::Size64 + }] + } else if let Some(imm) = MoveWideConst::maybe_from_u64(!value) { + // 16-bit immediate (shifted by 0, 16, 32 or 48 bits) in MOVN + smallvec![Inst::MovN { + rd, + imm, + size: OperandSize::Size64 + }] + } else if let Some(imml) = ImmLogic::maybe_from_u64(value, I64) { + // Weird logical-instruction immediate in ORI using zero register + smallvec![Inst::AluRRImmLogic { + alu_op: ALUOp::Orr64, + rd, + rn: zero_reg(), + imml, + }] + } else { + let mut insts = smallvec![]; + + // If the top 32 bits are zero, use 32-bit `mov` operations. + let (num_half_words, size, negated) = if value >> 32 == 0 { + (2, OperandSize::Size32, (!value << 32) >> 32) + } else { + (4, OperandSize::Size64, !value) + }; + // If the number of 0xffff half words is greater than the number of 0x0000 half words + // it is more efficient to use `movn` for the first instruction. + let first_is_inverted = count_zero_half_words(negated, num_half_words) + > count_zero_half_words(value, num_half_words); + // Either 0xffff or 0x0000 half words can be skipped, depending on the first + // instruction used. + let ignored_halfword = if first_is_inverted { 0xffff } else { 0 }; + let mut first_mov_emitted = false; + + for i in 0..num_half_words { + let imm16 = (value >> (16 * i)) & 0xffff; + if imm16 != ignored_halfword { + if !first_mov_emitted { + first_mov_emitted = true; + if first_is_inverted { + let imm = + MoveWideConst::maybe_with_shift(((!imm16) & 0xffff) as u16, i * 16) + .unwrap(); + insts.push(Inst::MovN { rd, imm, size }); + } else { + let imm = + MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); + insts.push(Inst::MovZ { rd, imm, size }); + } + } else { + let imm = MoveWideConst::maybe_with_shift(imm16 as u16, i * 16).unwrap(); + insts.push(Inst::MovK { rd, imm, size }); + } + } + } + + assert!(first_mov_emitted); + + insts + } + } + + /// Create instructions that load a 32-bit floating-point constant. + pub fn load_fp_constant32<F: FnMut(RegClass, Type) -> Writable<Reg>>( + rd: Writable<Reg>, + value: u32, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + if value == 0 { + smallvec![Inst::VecDupImm { + rd, + imm: ASIMDMovModImm::zero(), + invert: false, + size: VectorSize::Size8x8 + }] + } else { + // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent + // bits. + let tmp = alloc_tmp(RegClass::I64, I32); + let mut insts = Inst::load_constant(tmp, value as u64); + + insts.push(Inst::MovToFpu { + rd, + rn: tmp.to_reg(), + size: ScalarSize::Size64, + }); + + insts + } + } + + /// Create instructions that load a 64-bit floating-point constant. + pub fn load_fp_constant64<F: FnMut(RegClass, Type) -> Writable<Reg>>( + rd: Writable<Reg>, + const_data: u64, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + if let Ok(const_data) = u32::try_from(const_data) { + Inst::load_fp_constant32(rd, const_data, alloc_tmp) + // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent + // bits. Also, treat it as half of a 128-bit vector and consider replicated + // patterns. Scalar MOVI might also be an option. + } else if const_data & (u32::MAX as u64) == 0 { + let tmp = alloc_tmp(RegClass::I64, I64); + let mut insts = Inst::load_constant(tmp, const_data); + + insts.push(Inst::MovToFpu { + rd, + rn: tmp.to_reg(), + size: ScalarSize::Size64, + }); + + insts + } else { + smallvec![Inst::LoadFpuConst64 { rd, const_data }] + } + } + + /// Create instructions that load a 128-bit vector constant. + pub fn load_fp_constant128<F: FnMut(RegClass, Type) -> Writable<Reg>>( + rd: Writable<Reg>, + const_data: u128, + alloc_tmp: F, + ) -> SmallVec<[Inst; 5]> { + if let Ok(const_data) = u64::try_from(const_data) { + SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..]) + } else if let Some((pattern, size)) = + Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64) + { + Inst::load_replicated_vector_pattern( + rd, + pattern, + VectorSize::from_lane_size(size, true), + alloc_tmp, + ) + } else { + smallvec![Inst::LoadFpuConst128 { rd, const_data }] + } + } + + /// Determine whether a 128-bit constant represents a vector consisting of elements with + /// the same value. + pub fn get_replicated_vector_pattern( + value: u128, + size: ScalarSize, + ) -> Option<(u64, ScalarSize)> { + let (mask, shift, next_size) = match size { + ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128), + ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8), + ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16), + ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32), + _ => return None, + }; + let mut r = None; + let v = value & mask; + + if (value >> shift) & mask == v { + r = Inst::get_replicated_vector_pattern(v, next_size); + + if r.is_none() { + r = Some((v as u64, size)); + } + } + + r + } + + /// Create instructions that load a 128-bit vector constant consisting of elements with + /// the same value. + pub fn load_replicated_vector_pattern<F: FnMut(RegClass, Type) -> Writable<Reg>>( + rd: Writable<Reg>, + pattern: u64, + size: VectorSize, + mut alloc_tmp: F, + ) -> SmallVec<[Inst; 5]> { + let lane_size = size.lane_size(); + + if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) { + smallvec![Inst::VecDupImm { + rd, + imm, + invert: false, + size + }] + } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) { + debug_assert_ne!(lane_size, ScalarSize::Size8); + debug_assert_ne!(lane_size, ScalarSize::Size64); + + smallvec![Inst::VecDupImm { + rd, + imm, + invert: true, + size + }] + } else { + let tmp = alloc_tmp(RegClass::I64, I64); + let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern)[..]); + + insts.push(Inst::VecDup { + rd, + rn: tmp.to_reg(), + size, + }); + + insts + } + } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst { + match ty { + B1 | B8 | I8 => Inst::ULoad8 { + rd: into_reg, + mem, + flags, + }, + B16 | I16 => Inst::ULoad16 { + rd: into_reg, + mem, + flags, + }, + B32 | I32 | R32 => Inst::ULoad32 { + rd: into_reg, + mem, + flags, + }, + B64 | I64 | R64 => Inst::ULoad64 { + rd: into_reg, + mem, + flags, + }, + F32 => Inst::FpuLoad32 { + rd: into_reg, + mem, + flags, + }, + F64 => Inst::FpuLoad64 { + rd: into_reg, + mem, + flags, + }, + _ => { + if ty.is_vector() { + let bits = ty_bits(ty); + let rd = into_reg; + + if bits == 128 { + Inst::FpuLoad128 { rd, mem, flags } + } else { + assert_eq!(bits, 64); + Inst::FpuLoad64 { rd, mem, flags } + } + } else { + unimplemented!("gen_load({})", ty); + } + } + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { + match ty { + B1 | B8 | I8 => Inst::Store8 { + rd: from_reg, + mem, + flags, + }, + B16 | I16 => Inst::Store16 { + rd: from_reg, + mem, + flags, + }, + B32 | I32 | R32 => Inst::Store32 { + rd: from_reg, + mem, + flags, + }, + B64 | I64 | R64 => Inst::Store64 { + rd: from_reg, + mem, + flags, + }, + F32 => Inst::FpuStore32 { + rd: from_reg, + mem, + flags, + }, + F64 => Inst::FpuStore64 { + rd: from_reg, + mem, + flags, + }, + _ => { + if ty.is_vector() { + let bits = ty_bits(ty); + let rd = from_reg; + + if bits == 128 { + Inst::FpuStore128 { rd, mem, flags } + } else { + assert_eq!(bits, 64); + Inst::FpuStore64 { rd, mem, flags } + } + } else { + unimplemented!("gen_store({})", ty); + } + } + } + } +} + +//============================================================================= +// Instructions: get_regs + +fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) { + match memarg { + &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => { + collector.add_use(reg); + } + &AMode::RegReg(r1, r2, ..) + | &AMode::RegScaled(r1, r2, ..) + | &AMode::RegScaledExtended(r1, r2, ..) + | &AMode::RegExtended(r1, r2, ..) => { + collector.add_use(r1); + collector.add_use(r2); + } + &AMode::Label(..) => {} + &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => { + collector.add_mod(reg); + } + &AMode::FPOffset(..) => { + collector.add_use(fp_reg()); + } + &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => { + collector.add_use(stack_reg()); + } + &AMode::RegOffset(r, ..) => { + collector.add_use(r); + } + } +} + +fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) { + match pairmemarg { + &PairAMode::SignedOffset(reg, ..) => { + collector.add_use(reg); + } + &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => { + collector.add_mod(reg); + } + } +} + +fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { + match inst { + &Inst::AluRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::AluRRRR { rd, rn, rm, ra, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + collector.add_use(ra); + } + &Inst::AluRRImm12 { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRRImmLogic { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRRImmShift { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRRRShift { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::AluRRRExtend { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::BitRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::ULoad8 { rd, ref mem, .. } + | &Inst::SLoad8 { rd, ref mem, .. } + | &Inst::ULoad16 { rd, ref mem, .. } + | &Inst::SLoad16 { rd, ref mem, .. } + | &Inst::ULoad32 { rd, ref mem, .. } + | &Inst::SLoad32 { rd, ref mem, .. } + | &Inst::ULoad64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::Store8 { rd, ref mem, .. } + | &Inst::Store16 { rd, ref mem, .. } + | &Inst::Store32 { rd, ref mem, .. } + | &Inst::Store64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::StoreP64 { + rt, rt2, ref mem, .. + } => { + collector.add_use(rt); + collector.add_use(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::LoadP64 { + rt, rt2, ref mem, .. + } => { + collector.add_def(rt); + collector.add_def(rt2); + pairmemarg_regs(mem, collector); + } + &Inst::Mov64 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::Mov32 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => { + collector.add_def(rd); + } + &Inst::MovK { rd, .. } => { + collector.add_mod(rd); + } + &Inst::CSel { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::CSet { rd, .. } => { + collector.add_def(rd); + } + &Inst::CCmpImm { rn, .. } => { + collector.add_use(rn); + } + &Inst::AtomicRMW { .. } => { + collector.add_use(xreg(25)); + collector.add_use(xreg(26)); + collector.add_def(writable_xreg(24)); + collector.add_def(writable_xreg(27)); + collector.add_def(writable_xreg(28)); + } + &Inst::AtomicCAS { .. } => { + collector.add_use(xreg(25)); + collector.add_use(xreg(26)); + collector.add_use(xreg(28)); + collector.add_def(writable_xreg(24)); + collector.add_def(writable_xreg(27)); + } + &Inst::AtomicLoad { r_data, r_addr, .. } => { + collector.add_use(r_addr); + collector.add_def(r_data); + } + &Inst::AtomicStore { r_data, r_addr, .. } => { + collector.add_use(r_addr); + collector.add_use(r_data); + } + &Inst::Fence {} => {} + &Inst::FpuMove64 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuMove128 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuMoveFromVec { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuRRI { fpu_op, rd, rn, .. } => { + match fpu_op { + FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd), + FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd), + } + collector.add_use(rn); + } + &Inst::FpuRRRR { rd, rn, rm, ra, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + collector.add_use(ra); + } + &Inst::VecMisc { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + + &Inst::VecLanes { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecShiftImm { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecExtract { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::VecTbl { + rd, + rn, + rm, + is_extension, + } => { + collector.add_use(rn); + collector.add_use(rm); + + if is_extension { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } + } + &Inst::VecTbl2 { + rd, + rn, + rn2, + rm, + is_extension, + } => { + collector.add_use(rn); + collector.add_use(rn2); + collector.add_use(rm); + + if is_extension { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } + } + &Inst::VecLoadReplicate { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecCSel { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuLoad32 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoad64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoad128 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore32 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore128 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => { + collector.add_def(rd); + } + &Inst::FpuToInt { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::IntToFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuRound { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::MovToFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::MovToVec { rd, rn, .. } => { + collector.add_mod(rd); + collector.add_use(rn); + } + &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecDup { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecDupFromFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecDupImm { rd, .. } => { + collector.add_def(rd); + } + &Inst::VecExtend { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecMovElement { rd, rn, .. } => { + collector.add_mod(rd); + collector.add_use(rn); + } + &Inst::VecMiscNarrow { + rd, rn, high_half, .. + } => { + collector.add_use(rn); + + if high_half { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } + } + &Inst::VecRRR { + alu_op, rd, rn, rm, .. + } => { + if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::MovToNZCV { rn } => { + collector.add_use(rn); + } + &Inst::MovFromNZCV { rd } => { + collector.add_def(rd); + } + &Inst::Extend { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {} + &Inst::Call { ref info, .. } => { + collector.add_uses(&*info.uses); + collector.add_defs(&*info.defs); + } + &Inst::CallInd { ref info, .. } => { + collector.add_uses(&*info.uses); + collector.add_defs(&*info.defs); + collector.add_use(info.rn); + } + &Inst::CondBr { ref kind, .. } => match kind { + CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { + collector.add_use(*rt); + } + CondBrKind::Cond(_) => {} + }, + &Inst::IndirectBr { rn, .. } => { + collector.add_use(rn); + } + &Inst::Nop0 | Inst::Nop4 => {} + &Inst::Brk => {} + &Inst::Udf { .. } => {} + &Inst::TrapIf { ref kind, .. } => match kind { + CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { + collector.add_use(*rt); + } + CondBrKind::Cond(_) => {} + }, + &Inst::Adr { rd, .. } => { + collector.add_def(rd); + } + &Inst::Word4 { .. } | &Inst::Word8 { .. } => {} + &Inst::JTSequence { + ridx, rtmp1, rtmp2, .. + } => { + collector.add_use(ridx); + collector.add_def(rtmp1); + collector.add_def(rtmp2); + } + &Inst::LoadExtName { rd, .. } => { + collector.add_def(rd); + } + &Inst::LoadAddr { rd, ref mem } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::VirtualSPOffsetAdj { .. } => {} + &Inst::EmitIsland { .. } => {} + } +} + +//============================================================================= +// Instructions: map_regs + +fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) { + fn map_use<RUM: RegUsageMapper>(m: &RUM, r: &mut Reg) { + if r.is_virtual() { + let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg(); + *r = new; + } + } + + fn map_def<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) { + if r.to_reg().is_virtual() { + let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } + } + + fn map_mod<RUM: RegUsageMapper>(m: &RUM, r: &mut Writable<Reg>) { + if r.to_reg().is_virtual() { + let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } + } + + fn map_mem<RUM: RegUsageMapper>(m: &RUM, mem: &mut AMode) { + // N.B.: we take only the pre-map here, but this is OK because the + // only addressing modes that update registers (pre/post-increment on + // AArch64) both read and write registers, so they are "mods" rather + // than "defs", so must be the same in both the pre- and post-map. + match mem { + &mut AMode::Unscaled(ref mut reg, ..) => map_use(m, reg), + &mut AMode::UnsignedOffset(ref mut reg, ..) => map_use(m, reg), + &mut AMode::RegReg(ref mut r1, ref mut r2) + | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..) + | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..) + | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => { + map_use(m, r1); + map_use(m, r2); + } + &mut AMode::Label(..) => {} + &mut AMode::PreIndexed(ref mut r, ..) => map_mod(m, r), + &mut AMode::PostIndexed(ref mut r, ..) => map_mod(m, r), + &mut AMode::FPOffset(..) + | &mut AMode::SPOffset(..) + | &mut AMode::NominalSPOffset(..) => {} + &mut AMode::RegOffset(ref mut r, ..) => map_use(m, r), + }; + } + + fn map_pairmem<RUM: RegUsageMapper>(m: &RUM, mem: &mut PairAMode) { + match mem { + &mut PairAMode::SignedOffset(ref mut reg, ..) => map_use(m, reg), + &mut PairAMode::PreIndexed(ref mut reg, ..) => map_def(m, reg), + &mut PairAMode::PostIndexed(ref mut reg, ..) => map_def(m, reg), + } + } + + fn map_br<RUM: RegUsageMapper>(m: &RUM, br: &mut CondBrKind) { + match br { + &mut CondBrKind::Zero(ref mut reg) => map_use(m, reg), + &mut CondBrKind::NotZero(ref mut reg) => map_use(m, reg), + &mut CondBrKind::Cond(..) => {} + }; + } + + match inst { + &mut Inst::AluRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::AluRRRR { + ref mut rd, + ref mut rn, + ref mut rm, + ref mut ra, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + map_use(mapper, ra); + } + &mut Inst::AluRRImm12 { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::AluRRImmLogic { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::AluRRImmShift { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::AluRRRShift { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::AluRRRExtend { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::BitRR { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::ULoad8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::SLoad8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::ULoad16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::SLoad16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::ULoad32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::SLoad32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + + &mut Inst::ULoad64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store8 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store16 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + + &mut Inst::StoreP64 { + ref mut rt, + ref mut rt2, + ref mut mem, + .. + } => { + map_use(mapper, rt); + map_use(mapper, rt2); + map_pairmem(mapper, mem); + } + &mut Inst::LoadP64 { + ref mut rt, + ref mut rt2, + ref mut mem, + .. + } => { + map_def(mapper, rt); + map_def(mapper, rt2); + map_pairmem(mapper, mem); + } + &mut Inst::Mov64 { + ref mut rd, + ref mut rm, + } => { + map_def(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::Mov32 { + ref mut rd, + ref mut rm, + } => { + map_def(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::MovZ { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::MovN { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::MovK { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::CSel { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::CSet { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::CCmpImm { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::AtomicRMW { .. } => { + // There are no vregs to map in this insn. + } + &mut Inst::AtomicCAS { .. } => { + // There are no vregs to map in this insn. + } + &mut Inst::AtomicLoad { + ref mut r_data, + ref mut r_addr, + .. + } => { + map_def(mapper, r_data); + map_use(mapper, r_addr); + } + &mut Inst::AtomicStore { + ref mut r_data, + ref mut r_addr, + .. + } => { + map_use(mapper, r_data); + map_use(mapper, r_addr); + } + &mut Inst::Fence {} => {} + &mut Inst::FpuMove64 { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuMove128 { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuMoveFromVec { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRR { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuRRI { + fpu_op, + ref mut rd, + ref mut rn, + .. + } => { + match fpu_op { + FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => map_def(mapper, rd), + FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => map_mod(mapper, rd), + } + map_use(mapper, rn); + } + &mut Inst::FpuRRRR { + ref mut rd, + ref mut rn, + ref mut rm, + ref mut ra, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + map_use(mapper, ra); + } + &mut Inst::VecMisc { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecLanes { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecShiftImm { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecExtract { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::VecTbl { + ref mut rd, + ref mut rn, + ref mut rm, + is_extension, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + + if is_extension { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } + } + &mut Inst::VecTbl2 { + ref mut rd, + ref mut rn, + ref mut rn2, + ref mut rm, + is_extension, + } => { + map_use(mapper, rn); + map_use(mapper, rn2); + map_use(mapper, rm); + + if is_extension { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } + } + &mut Inst::VecLoadReplicate { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecCSel { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCmp32 { + ref mut rn, + ref mut rm, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCmp64 { + ref mut rn, + ref mut rm, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuLoad32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoad64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoad128 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore128 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadFpuConst64 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::LoadFpuConst128 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::FpuToInt { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::IntToFpu { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuCSel32 { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCSel64 { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuRound { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::MovToFpu { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::MovToVec { + ref mut rd, + ref mut rn, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::MovFromVec { + ref mut rd, + ref mut rn, + .. + } + | &mut Inst::MovFromVecSigned { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecDup { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecDupFromFpu { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecDupImm { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::VecExtend { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecMovElement { + ref mut rd, + ref mut rn, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecMiscNarrow { + ref mut rd, + ref mut rn, + high_half, + .. + } => { + map_use(mapper, rn); + + if high_half { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } + } + &mut Inst::VecRRR { + alu_op, + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::MovToNZCV { ref mut rn } => { + map_use(mapper, rn); + } + &mut Inst::MovFromNZCV { ref mut rd } => { + map_def(mapper, rd); + } + &mut Inst::Extend { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::Jump { .. } => {} + &mut Inst::Call { ref mut info } => { + for r in info.uses.iter_mut() { + map_use(mapper, r); + } + for r in info.defs.iter_mut() { + map_def(mapper, r); + } + } + &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {} + &mut Inst::CallInd { ref mut info, .. } => { + for r in info.uses.iter_mut() { + map_use(mapper, r); + } + for r in info.defs.iter_mut() { + map_def(mapper, r); + } + map_use(mapper, &mut info.rn); + } + &mut Inst::CondBr { ref mut kind, .. } => { + map_br(mapper, kind); + } + &mut Inst::IndirectBr { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {} + &mut Inst::TrapIf { ref mut kind, .. } => { + map_br(mapper, kind); + } + &mut Inst::Adr { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {} + &mut Inst::JTSequence { + ref mut ridx, + ref mut rtmp1, + ref mut rtmp2, + .. + } => { + map_use(mapper, ridx); + map_def(mapper, rtmp1); + map_def(mapper, rtmp2); + } + &mut Inst::LoadExtName { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::LoadAddr { + ref mut rd, + ref mut mem, + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::VirtualSPOffsetAdj { .. } => {} + &mut Inst::EmitIsland { .. } => {} + } +} + +//============================================================================= +// Instructions: misc functions and external interface + +impl MachInst for Inst { + type LabelUse = LabelUse; + + fn get_regs(&self, collector: &mut RegUsageCollector) { + aarch64_get_regs(self, collector) + } + + fn map_regs<RUM: RegUsageMapper>(&mut self, mapper: &RUM) { + aarch64_map_regs(self, mapper); + } + + fn is_move(&self) -> Option<(Writable<Reg>, Reg)> { + match self { + &Inst::Mov64 { rd, rm } => Some((rd, rm)), + &Inst::FpuMove64 { rd, rn } => Some((rd, rn)), + &Inst::FpuMove128 { rd, rn } => Some((rd, rn)), + _ => None, + } + } + + fn is_epilogue_placeholder(&self) -> bool { + if let Inst::EpiloguePlaceholder = self { + true + } else { + false + } + } + + fn is_included_in_clobbers(&self) -> bool { + // We exclude call instructions from the clobber-set when they are calls + // from caller to callee with the same ABI. Such calls cannot possibly + // force any new registers to be saved in the prologue, because anything + // that the callee clobbers, the caller is also allowed to clobber. This + // both saves work and enables us to more precisely follow the + // half-caller-save, half-callee-save SysV ABI for some vector + // registers. + // + // See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for + // more information on this ABI-implementation hack. + match self { + &Inst::Call { ref info } => info.caller_callconv != info.callee_callconv, + &Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv, + _ => true, + } + } + + fn is_term<'a>(&'a self) -> MachTerminator<'a> { + match self { + &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret, + &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()), + &Inst::CondBr { + taken, not_taken, .. + } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()), + &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]), + &Inst::JTSequence { ref info, .. } => { + MachTerminator::Indirect(&info.targets_for_term[..]) + } + _ => MachTerminator::None, + } + } + + fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Inst { + assert!(ty.bits() <= 128); + Inst::mov(to_reg, from_reg) + } + + fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>( + to_reg: Writable<Reg>, + value: u64, + ty: Type, + alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + if ty == F64 { + Inst::load_fp_constant64(to_reg, value, alloc_tmp) + } else if ty == F32 { + Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp) + } else { + // Must be an integer type. + debug_assert!( + ty == B1 + || ty == I8 + || ty == B8 + || ty == I16 + || ty == B16 + || ty == I32 + || ty == B32 + || ty == I64 + || ty == B64 + || ty == R32 + || ty == R64 + ); + Inst::load_constant(to_reg, value) + } + } + + fn gen_zero_len_nop() -> Inst { + Inst::Nop0 + } + + fn gen_nop(preferred_size: usize) -> Inst { + // We can't give a NOP (or any insn) < 4 bytes. + assert!(preferred_size >= 4); + Inst::Nop4 + } + + fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option<Inst> { + None + } + + fn rc_for_type(ty: Type) -> CodegenResult<RegClass> { + match ty { + I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64), + F32 | F64 => Ok(RegClass::V128), + IFLAGS | FFLAGS => Ok(RegClass::I64), + B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => { + Ok(RegClass::V128) + } + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(target: MachLabel) -> Inst { + Inst::Jump { + dest: BranchTarget::Label(target), + } + } + + fn reg_universe(flags: &settings::Flags) -> RealRegUniverse { + create_reg_universe(flags) + } + + fn worst_case_size() -> CodeOffset { + // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of + // an 8-instruction sequence (saturating int-to-float conversions) with three embedded + // 64-bit f64 constants. + // + // Note that inline jump-tables handle island/pool insertion separately, so we do not need + // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not + // feasible for other reasons). + 44 + } + + fn ref_type_regclass(_: &settings::Flags) -> RegClass { + RegClass::I64 + } +} + +//============================================================================= +// Pretty-printing of instructions. + +fn mem_finalize_for_show( + mem: &AMode, + mb_rru: Option<&RealRegUniverse>, + state: &EmitState, +) -> (String, AMode) { + let (mem_insts, mem) = mem_finalize(0, mem, state); + let mut mem_str = mem_insts + .into_iter() + .map(|inst| inst.show_rru(mb_rru)) + .collect::<Vec<_>>() + .join(" ; "); + if !mem_str.is_empty() { + mem_str += " ; "; + } + + (mem_str, mem) +} + +impl PrettyPrint for Inst { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.pretty_print(mb_rru, &mut EmitState::default()) + } +} + +impl Inst { + fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + fn op_name_size(alu_op: ALUOp) -> (&'static str, OperandSize) { + match alu_op { + ALUOp::Add32 => ("add", OperandSize::Size32), + ALUOp::Add64 => ("add", OperandSize::Size64), + ALUOp::Sub32 => ("sub", OperandSize::Size32), + ALUOp::Sub64 => ("sub", OperandSize::Size64), + ALUOp::Orr32 => ("orr", OperandSize::Size32), + ALUOp::Orr64 => ("orr", OperandSize::Size64), + ALUOp::And32 => ("and", OperandSize::Size32), + ALUOp::And64 => ("and", OperandSize::Size64), + ALUOp::Eor32 => ("eor", OperandSize::Size32), + ALUOp::Eor64 => ("eor", OperandSize::Size64), + ALUOp::AddS32 => ("adds", OperandSize::Size32), + ALUOp::AddS64 => ("adds", OperandSize::Size64), + ALUOp::SubS32 => ("subs", OperandSize::Size32), + ALUOp::SubS64 => ("subs", OperandSize::Size64), + ALUOp::SMulH => ("smulh", OperandSize::Size64), + ALUOp::UMulH => ("umulh", OperandSize::Size64), + ALUOp::SDiv64 => ("sdiv", OperandSize::Size64), + ALUOp::UDiv64 => ("udiv", OperandSize::Size64), + ALUOp::AndNot32 => ("bic", OperandSize::Size32), + ALUOp::AndNot64 => ("bic", OperandSize::Size64), + ALUOp::OrrNot32 => ("orn", OperandSize::Size32), + ALUOp::OrrNot64 => ("orn", OperandSize::Size64), + ALUOp::EorNot32 => ("eon", OperandSize::Size32), + ALUOp::EorNot64 => ("eon", OperandSize::Size64), + ALUOp::RotR32 => ("ror", OperandSize::Size32), + ALUOp::RotR64 => ("ror", OperandSize::Size64), + ALUOp::Lsr32 => ("lsr", OperandSize::Size32), + ALUOp::Lsr64 => ("lsr", OperandSize::Size64), + ALUOp::Asr32 => ("asr", OperandSize::Size32), + ALUOp::Asr64 => ("asr", OperandSize::Size64), + ALUOp::Lsl32 => ("lsl", OperandSize::Size32), + ALUOp::Lsl64 => ("lsl", OperandSize::Size64), + } + } + + match self { + &Inst::Nop0 => "nop-zero-len".to_string(), + &Inst::Nop4 => "nop".to_string(), + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let (op, size) = op_name_size(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + let rm = show_ireg_sized(rm, mb_rru, size); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::AluRRRR { + alu_op, + rd, + rn, + rm, + ra, + } => { + let (op, size) = match alu_op { + ALUOp3::MAdd32 => ("madd", OperandSize::Size32), + ALUOp3::MAdd64 => ("madd", OperandSize::Size64), + ALUOp3::MSub32 => ("msub", OperandSize::Size32), + ALUOp3::MSub64 => ("msub", OperandSize::Size64), + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + let rm = show_ireg_sized(rm, mb_rru, size); + let ra = show_ireg_sized(ra, mb_rru, size); + + format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) + } + &Inst::AluRRImm12 { + alu_op, + rd, + rn, + ref imm12, + } => { + let (op, size) = op_name_size(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + + if imm12.bits == 0 && alu_op == ALUOp::Add64 { + // special-case MOV (used for moving into SP). + format!("mov {}, {}", rd, rn) + } else { + let imm12 = imm12.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imm12) + } + } + &Inst::AluRRImmLogic { + alu_op, + rd, + rn, + ref imml, + } => { + let (op, size) = op_name_size(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + let imml = imml.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imml) + } + &Inst::AluRRImmShift { + alu_op, + rd, + rn, + ref immshift, + } => { + let (op, size) = op_name_size(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + let immshift = immshift.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, immshift) + } + &Inst::AluRRRShift { + alu_op, + rd, + rn, + rm, + ref shiftop, + } => { + let (op, size) = op_name_size(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + let rm = show_ireg_sized(rm, mb_rru, size); + let shiftop = shiftop.show_rru(mb_rru); + format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop) + } + &Inst::AluRRRExtend { + alu_op, + rd, + rn, + rm, + ref extendop, + } => { + let (op, size) = op_name_size(alu_op); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + let rm = show_ireg_sized(rm, mb_rru, size); + let extendop = extendop.show_rru(mb_rru); + format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop) + } + &Inst::BitRR { op, rd, rn } => { + let size = op.operand_size(); + let op = op.op_str(); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size); + format!("{} {}, {}", op, rd, rn) + } + &Inst::ULoad8 { + rd, + ref mem, + .. + } + | &Inst::SLoad8 { + rd, + ref mem, + .. + } + | &Inst::ULoad16 { + rd, + ref mem, + .. + } + | &Inst::SLoad16 { + rd, + ref mem, + .. + } + | &Inst::ULoad32 { + rd, + ref mem, + .. + } + | &Inst::SLoad32 { + rd, + ref mem, + .. + } + | &Inst::ULoad64 { + rd, + ref mem, + .. + } => { + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + + let is_unscaled = match &mem { + &AMode::Unscaled(..) => true, + _ => false, + }; + let (op, size) = match (self, is_unscaled) { + (&Inst::ULoad8 { .. }, false) => ("ldrb", OperandSize::Size32), + (&Inst::ULoad8 { .. }, true) => ("ldurb", OperandSize::Size32), + (&Inst::SLoad8 { .. }, false) => ("ldrsb", OperandSize::Size64), + (&Inst::SLoad8 { .. }, true) => ("ldursb", OperandSize::Size64), + (&Inst::ULoad16 { .. }, false) => ("ldrh", OperandSize::Size32), + (&Inst::ULoad16 { .. }, true) => ("ldurh", OperandSize::Size32), + (&Inst::SLoad16 { .. }, false) => ("ldrsh", OperandSize::Size64), + (&Inst::SLoad16 { .. }, true) => ("ldursh", OperandSize::Size64), + (&Inst::ULoad32 { .. }, false) => ("ldr", OperandSize::Size32), + (&Inst::ULoad32 { .. }, true) => ("ldur", OperandSize::Size32), + (&Inst::SLoad32 { .. }, false) => ("ldrsw", OperandSize::Size64), + (&Inst::SLoad32 { .. }, true) => ("ldursw", OperandSize::Size64), + (&Inst::ULoad64 { .. }, false) => ("ldr", OperandSize::Size64), + (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64), + _ => unreachable!(), + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, rd, mem) + } + &Inst::Store8 { + rd, + ref mem, + .. + } + | &Inst::Store16 { + rd, + ref mem, + .. + } + | &Inst::Store32 { + rd, + ref mem, + .. + } + | &Inst::Store64 { + rd, + ref mem, + .. + } => { + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + + let is_unscaled = match &mem { + &AMode::Unscaled(..) => true, + _ => false, + }; + let (op, size) = match (self, is_unscaled) { + (&Inst::Store8 { .. }, false) => ("strb", OperandSize::Size32), + (&Inst::Store8 { .. }, true) => ("sturb", OperandSize::Size32), + (&Inst::Store16 { .. }, false) => ("strh", OperandSize::Size32), + (&Inst::Store16 { .. }, true) => ("sturh", OperandSize::Size32), + (&Inst::Store32 { .. }, false) => ("str", OperandSize::Size32), + (&Inst::Store32 { .. }, true) => ("stur", OperandSize::Size32), + (&Inst::Store64 { .. }, false) => ("str", OperandSize::Size64), + (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64), + _ => unreachable!(), + }; + let rd = show_ireg_sized(rd, mb_rru, size); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, rd, mem) + } + &Inst::StoreP64 { rt, rt2, ref mem, .. } => { + let rt = rt.show_rru(mb_rru); + let rt2 = rt2.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("stp {}, {}, {}", rt, rt2, mem) + } + &Inst::LoadP64 { rt, rt2, ref mem, .. } => { + let rt = rt.to_reg().show_rru(mb_rru); + let rt2 = rt2.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("ldp {}, {}, {}", rt, rt2, mem) + } + &Inst::Mov64 { rd, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("mov {}, {}", rd, rm) + } + &Inst::Mov32 { rd, rm } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32); + let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32); + format!("mov {}, {}", rd, rm) + } + &Inst::MovZ { rd, ref imm, size } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let imm = imm.show_rru(mb_rru); + format!("movz {}, {}", rd, imm) + } + &Inst::MovN { rd, ref imm, size } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let imm = imm.show_rru(mb_rru); + format!("movn {}, {}", rd, imm) + } + &Inst::MovK { rd, ref imm, size } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); + let imm = imm.show_rru(mb_rru); + format!("movk {}, {}", rd, imm) + } + &Inst::CSel { rd, rn, rm, cond } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("csel {}, {}, {}, {}", rd, rn, rm, cond) + } + &Inst::CSet { rd, cond } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("cset {}, {}", rd, cond) + } + &Inst::CCmpImm { + size, + rn, + imm, + nzcv, + cond, + } => { + let rn = show_ireg_sized(rn, mb_rru, size); + let imm = imm.show_rru(mb_rru); + let nzcv = nzcv.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) + } + &Inst::AtomicRMW { ty, op, .. } => { + format!( + "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}", + ty.bits(), op) + } + &Inst::AtomicCAS { ty, .. } => { + format!( + "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}", + ty.bits()) + } + &Inst::AtomicLoad { ty, r_data, r_addr, .. } => { + format!( + "atomically {{ {} = zero_extend_{}_bits_at[{}] }}", + r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru)) + } + &Inst::AtomicStore { ty, r_data, r_addr, .. } => { + format!( + "atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru)) + } + &Inst::Fence {} => { + format!("dmb ish") + } + &Inst::FpuMove64 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("mov {}.8b, {}.8b", rd, rn) + } + &Inst::FpuMove128 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("mov {}.16b, {}.16b", rd, rn) + } + &Inst::FpuMoveFromVec { rd, rn, idx, size } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); + let rn = show_vreg_element(rn, mb_rru, idx, size); + format!("mov {}, {}", rd, rn) + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let (op, sizesrc, sizedest) = match fpu_op { + FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32), + FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64), + FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32), + FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64), + FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32), + FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64), + FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64), + FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32), + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest); + let rn = show_vreg_scalar(rn, mb_rru, sizesrc); + format!("{} {}, {}", op, rd, rn) + } + &Inst::FpuRRR { fpu_op, rd, rn, rm } => { + let (op, size) = match fpu_op { + FPUOp2::Add32 => ("fadd", ScalarSize::Size32), + FPUOp2::Add64 => ("fadd", ScalarSize::Size64), + FPUOp2::Sub32 => ("fsub", ScalarSize::Size32), + FPUOp2::Sub64 => ("fsub", ScalarSize::Size64), + FPUOp2::Mul32 => ("fmul", ScalarSize::Size32), + FPUOp2::Mul64 => ("fmul", ScalarSize::Size64), + FPUOp2::Div32 => ("fdiv", ScalarSize::Size32), + FPUOp2::Div64 => ("fdiv", ScalarSize::Size64), + FPUOp2::Max32 => ("fmax", ScalarSize::Size32), + FPUOp2::Max64 => ("fmax", ScalarSize::Size64), + FPUOp2::Min32 => ("fmin", ScalarSize::Size32), + FPUOp2::Min64 => ("fmin", ScalarSize::Size64), + FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64), + FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64), + FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64), + FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64), + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); + let rn = show_vreg_scalar(rn, mb_rru, size); + let rm = show_vreg_scalar(rm, mb_rru, size); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::FpuRRI { fpu_op, rd, rn } => { + let (op, imm, vector) = match fpu_op { + FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true), + FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false), + FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true), + FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false), + }; + + let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector { + |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2) + } else { + |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64) + }; + let rd = show_vreg_fn(rd.to_reg(), mb_rru); + let rn = show_vreg_fn(rn, mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imm) + } + &Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => { + let (op, size) = match fpu_op { + FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32), + FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64), + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); + let rn = show_vreg_scalar(rn, mb_rru, size); + let rm = show_vreg_scalar(rm, mb_rru, size); + let ra = show_vreg_scalar(ra, mb_rru, size); + format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) + } + &Inst::FpuCmp32 { rn, rm } => { + let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32); + let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32); + format!("fcmp {}, {}", rn, rm) + } + &Inst::FpuCmp64 { rn, rm } => { + let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64); + let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64); + format!("fcmp {}, {}", rn, rm) + } + &Inst::FpuLoad32 { rd, ref mem, .. } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + let mem = mem.show_rru(mb_rru); + format!("{}ldr {}, {}", mem_str, rd, mem) + } + &Inst::FpuLoad64 { rd, ref mem, .. } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + let mem = mem.show_rru(mb_rru); + format!("{}ldr {}, {}", mem_str, rd, mem) + } + &Inst::FpuLoad128 { rd, ref mem, .. } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rd = "q".to_string() + &rd[1..]; + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + let mem = mem.show_rru(mb_rru); + format!("{}ldr {}, {}", mem_str, rd, mem) + } + &Inst::FpuStore32 { rd, ref mem, .. } => { + let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + let mem = mem.show_rru(mb_rru); + format!("{}str {}, {}", mem_str, rd, mem) + } + &Inst::FpuStore64 { rd, ref mem, .. } => { + let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64); + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + let mem = mem.show_rru(mb_rru); + format!("{}str {}, {}", mem_str, rd, mem) + } + &Inst::FpuStore128 { rd, ref mem, .. } => { + let rd = rd.show_rru(mb_rru); + let rd = "q".to_string() + &rd[1..]; + let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); + let mem = mem.show_rru(mb_rru); + format!("{}str {}, {}", mem_str, rd, mem) + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + format!("ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, f64::from_bits(const_data)) + } + &Inst::LoadFpuConst128 { rd, const_data } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128); + format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data) + } + &Inst::FpuToInt { op, rd, rn } => { + let (op, sizesrc, sizedest) = match op { + FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32), + FpuToIntOp::F32ToU32 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size32), + FpuToIntOp::F32ToI64 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size64), + FpuToIntOp::F32ToU64 => ("fcvtzu", ScalarSize::Size32, OperandSize::Size64), + FpuToIntOp::F64ToI32 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size32), + FpuToIntOp::F64ToU32 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size32), + FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64), + FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64), + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest); + let rn = show_vreg_scalar(rn, mb_rru, sizesrc); + format!("{} {}, {}", op, rd, rn) + } + &Inst::IntToFpu { op, rd, rn } => { + let (op, sizesrc, sizedest) = match op { + IntToFpuOp::I32ToF32 => ("scvtf", OperandSize::Size32, ScalarSize::Size32), + IntToFpuOp::U32ToF32 => ("ucvtf", OperandSize::Size32, ScalarSize::Size32), + IntToFpuOp::I64ToF32 => ("scvtf", OperandSize::Size64, ScalarSize::Size32), + IntToFpuOp::U64ToF32 => ("ucvtf", OperandSize::Size64, ScalarSize::Size32), + IntToFpuOp::I32ToF64 => ("scvtf", OperandSize::Size32, ScalarSize::Size64), + IntToFpuOp::U32ToF64 => ("ucvtf", OperandSize::Size32, ScalarSize::Size64), + IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64), + IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64), + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest); + let rn = show_ireg_sized(rn, mb_rru, sizesrc); + format!("{} {}, {}", op, rd, rn) + } + &Inst::FpuCSel32 { rd, rn, rm, cond } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32); + let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32); + let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32); + let cond = cond.show_rru(mb_rru); + format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) + } + &Inst::FpuCSel64 { rd, rn, rm, cond } => { + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64); + let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64); + let cond = cond.show_rru(mb_rru); + format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) + } + &Inst::FpuRound { op, rd, rn } => { + let (inst, size) = match op { + FpuRoundMode::Minus32 => ("frintm", ScalarSize::Size32), + FpuRoundMode::Minus64 => ("frintm", ScalarSize::Size64), + FpuRoundMode::Plus32 => ("frintp", ScalarSize::Size32), + FpuRoundMode::Plus64 => ("frintp", ScalarSize::Size64), + FpuRoundMode::Zero32 => ("frintz", ScalarSize::Size32), + FpuRoundMode::Zero64 => ("frintz", ScalarSize::Size64), + FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32), + FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64), + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); + let rn = show_vreg_scalar(rn, mb_rru, size); + format!("{} {}, {}", inst, rd, rn) + } + &Inst::MovToFpu { rd, rn, size } => { + let operand_size = size.operand_size(); + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, operand_size); + format!("fmov {}, {}", rd, rn) + } + &Inst::MovToVec { rd, rn, idx, size } => { + let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size); + let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); + format!("mov {}, {}", rd, rn) + } + &Inst::MovFromVec { rd, rn, idx, size } => { + let op = match size { + VectorSize::Size8x16 => "umov", + VectorSize::Size16x8 => "umov", + VectorSize::Size32x4 => "mov", + VectorSize::Size64x2 => "mov", + _ => unimplemented!(), + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size()); + let rn = show_vreg_element(rn, mb_rru, idx, size); + format!("{} {}, {}", op, rd, rn) + } + &Inst::MovFromVecSigned { + rd, + rn, + idx, + size, + scalar_size, + } => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size); + let rn = show_vreg_element(rn, mb_rru, idx, size); + format!("smov {}, {}", rd, rn) + } + &Inst::VecDup { rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); + format!("dup {}, {}", rd, rn) + } + &Inst::VecDupFromFpu { rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_element(rn, mb_rru, 0, size); + format!("dup {}, {}", rd, rn) + } + &Inst::VecDupImm { rd, imm, invert, size } => { + let imm = imm.show_rru(mb_rru); + let op = if invert { + "mvni" + } else { + "movi" + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + + format!("{} {}, {}", op, rd, imm) + } + &Inst::VecExtend { t, rd, rn, high_half } => { + let (op, dest, src) = match (t, high_half) { + (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), + (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16), + (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), + (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8), + (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), + (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4), + (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), + (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16), + (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), + (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8), + (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), + (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4), + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); + let rn = show_vreg_vector(rn, mb_rru, src); + format!("{} {}, {}", op, rd, rn) + } + &Inst::VecMovElement { + rd, + rn, + dest_idx, + src_idx, + size, + } => { + let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size); + let rn = show_vreg_element(rn, mb_rru, src_idx, size); + format!("mov {}, {}", rd, rn) + } + &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => { + let dest_size = if high_half { + assert!(size.is_128bits()); + size + } else { + size.halve() + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size); + let rn = show_vreg_vector(rn, mb_rru, size.widen()); + let op = match (op, high_half) { + (VecMiscNarrowOp::Xtn, false) => "xtn", + (VecMiscNarrowOp::Xtn, true) => "xtn2", + (VecMiscNarrowOp::Sqxtn, false) => "sqxtn", + (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2", + (VecMiscNarrowOp::Sqxtun, false) => "sqxtun", + (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2", + }; + format!("{} {}, {}", op, rd, rn) + } + &Inst::VecRRR { + rd, + rn, + rm, + alu_op, + size, + } => { + let (op, size) = match alu_op { + VecALUOp::Sqadd => ("sqadd", size), + VecALUOp::Uqadd => ("uqadd", size), + VecALUOp::Sqsub => ("sqsub", size), + VecALUOp::Uqsub => ("uqsub", size), + VecALUOp::Cmeq => ("cmeq", size), + VecALUOp::Cmge => ("cmge", size), + VecALUOp::Cmgt => ("cmgt", size), + VecALUOp::Cmhs => ("cmhs", size), + VecALUOp::Cmhi => ("cmhi", size), + VecALUOp::Fcmeq => ("fcmeq", size), + VecALUOp::Fcmgt => ("fcmgt", size), + VecALUOp::Fcmge => ("fcmge", size), + VecALUOp::And => ("and", VectorSize::Size8x16), + VecALUOp::Bic => ("bic", VectorSize::Size8x16), + VecALUOp::Orr => ("orr", VectorSize::Size8x16), + VecALUOp::Eor => ("eor", VectorSize::Size8x16), + VecALUOp::Bsl => ("bsl", VectorSize::Size8x16), + VecALUOp::Umaxp => ("umaxp", size), + VecALUOp::Add => ("add", size), + VecALUOp::Sub => ("sub", size), + VecALUOp::Mul => ("mul", size), + VecALUOp::Sshl => ("sshl", size), + VecALUOp::Ushl => ("ushl", size), + VecALUOp::Umin => ("umin", size), + VecALUOp::Smin => ("smin", size), + VecALUOp::Umax => ("umax", size), + VecALUOp::Smax => ("smax", size), + VecALUOp::Urhadd => ("urhadd", size), + VecALUOp::Fadd => ("fadd", size), + VecALUOp::Fsub => ("fsub", size), + VecALUOp::Fdiv => ("fdiv", size), + VecALUOp::Fmax => ("fmax", size), + VecALUOp::Fmin => ("fmin", size), + VecALUOp::Fmul => ("fmul", size), + VecALUOp::Addp => ("addp", size), + VecALUOp::Umlal => ("umlal", size), + VecALUOp::Zip1 => ("zip1", size), + VecALUOp::Smull => ("smull", size), + VecALUOp::Smull2 => ("smull2", size), + }; + let rd_size = match alu_op { + VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(), + _ => size + }; + let rn_size = match alu_op { + VecALUOp::Smull => size.halve(), + _ => size + }; + let rm_size = rn_size; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); + let rn = show_vreg_vector(rn, mb_rru, rn_size); + let rm = show_vreg_vector(rm, mb_rru, rm_size); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::VecMisc { op, rd, rn, size } => { + let is_shll = op == VecMisc2::Shll; + let suffix = match (is_shll, size) { + (true, VectorSize::Size8x8) => ", #8", + (true, VectorSize::Size16x4) => ", #16", + (true, VectorSize::Size32x2) => ", #32", + _ => "", + }; + + let (op, size) = match op { + VecMisc2::Not => ( + "mvn", + if size.is_128bits() { + VectorSize::Size8x16 + } else { + VectorSize::Size8x8 + }, + ), + VecMisc2::Neg => ("neg", size), + VecMisc2::Abs => ("abs", size), + VecMisc2::Fabs => ("fabs", size), + VecMisc2::Fneg => ("fneg", size), + VecMisc2::Fsqrt => ("fsqrt", size), + VecMisc2::Rev64 => ("rev64", size), + VecMisc2::Shll => ("shll", size), + VecMisc2::Fcvtzs => ("fcvtzs", size), + VecMisc2::Fcvtzu => ("fcvtzu", size), + VecMisc2::Scvtf => ("scvtf", size), + VecMisc2::Ucvtf => ("ucvtf", size), + VecMisc2::Frintn => ("frintn", size), + VecMisc2::Frintz => ("frintz", size), + VecMisc2::Frintm => ("frintm", size), + VecMisc2::Frintp => ("frintp", size), + }; + + let rd_size = if is_shll { size.widen() } else { size }; + + let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); + let rn = show_vreg_vector(rn, mb_rru, size); + format!("{} {}, {}{}", op, rd, rn, suffix) + } + &Inst::VecLanes { op, rd, rn, size } => { + let op = match op { + VecLanesOp::Uminv => "uminv", + VecLanesOp::Addv => "addv", + }; + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); + let rn = show_vreg_vector(rn, mb_rru, size); + format!("{} {}, {}", op, rd, rn) + } + &Inst::VecShiftImm { op, rd, rn, size, imm } => { + let op = match op { + VecShiftImmOp::Shl => "shl", + VecShiftImmOp::Ushr => "ushr", + VecShiftImmOp::Sshr => "sshr", + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size); + format!("{} {}, {}, #{}", op, rd, rn, imm) + } + &Inst::VecExtract { rd, rn, rm, imm4 } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4) + } + &Inst::VecTbl { + rd, + rn, + rm, + is_extension, + } => { + let op = if is_extension { "tbx" } else { "tbl" }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + format!("{} {}, {{ {} }}, {}", op, rd, rn, rm) + } + &Inst::VecTbl2 { + rd, + rn, + rn2, + rm, + is_extension, + } => { + let op = if is_extension { "tbx" } else { "tbl" }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm) + } + &Inst::VecLoadReplicate { rd, rn, size, .. } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = rn.show_rru(mb_rru); + + format!("ld1r {{ {} }}, [{}]", rd, rn) + } + &Inst::VecCSel { rd, rn, rm, cond } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); + let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); + let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + let cond = cond.show_rru(mb_rru); + format!("vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond) + } + &Inst::MovToNZCV { rn } => { + let rn = rn.show_rru(mb_rru); + format!("msr nzcv, {}", rn) + } + &Inst::MovFromNZCV { rd } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("mrs {}, nzcv", rd) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits >= 8 => { + // Is the destination a 32-bit register? Corresponds to whether + // extend-to width is <= 32 bits, *unless* we have an unsigned + // 32-to-64-bit extension, which is implemented with a "mov" to a + // 32-bit (W-reg) dest, because this zeroes the top 32 bits. + let dest_size = if !signed && from_bits == 32 && to_bits == 64 { + OperandSize::Size32 + } else { + OperandSize::from_bits(to_bits) + }; + let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size); + let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits)); + let op = match (signed, from_bits, to_bits) { + (false, 8, 32) => "uxtb", + (true, 8, 32) => "sxtb", + (false, 16, 32) => "uxth", + (true, 16, 32) => "sxth", + (false, 8, 64) => "uxtb", + (true, 8, 64) => "sxtb", + (false, 16, 64) => "uxth", + (true, 16, 64) => "sxth", + (false, 32, 64) => "mov", // special case (see above). + (true, 32, 64) => "sxtw", + _ => panic!("Unsupported Extend case: {:?}", self), + }; + format!("{} {}, {}", op, rd, rn) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } if from_bits == 1 && signed => { + let dest_size = OperandSize::from_bits(to_bits); + let zr = if dest_size.is32() { "wzr" } else { "xzr" }; + let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size); + let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32); + format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + .. + } if from_bits == 1 && !signed => { + let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32); + let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32); + format!("and {}, {}, #1", rd, rn) + } + &Inst::Extend { .. } => { + panic!("Unsupported Extend case"); + } + &Inst::Call { .. } => format!("bl 0"), + &Inst::CallInd { ref info, .. } => { + let rn = info.rn.show_rru(mb_rru); + format!("blr {}", rn) + } + &Inst::Ret => "ret".to_string(), + &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), + &Inst::Jump { ref dest } => { + let dest = dest.show_rru(mb_rru); + format!("b {}", dest) + } + &Inst::CondBr { + ref taken, + ref not_taken, + ref kind, + } => { + let taken = taken.show_rru(mb_rru); + let not_taken = not_taken.show_rru(mb_rru); + match kind { + &CondBrKind::Zero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbz {}, {} ; b {}", reg, taken, not_taken) + } + &CondBrKind::NotZero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbnz {}, {} ; b {}", reg, taken, not_taken) + } + &CondBrKind::Cond(c) => { + let c = c.show_rru(mb_rru); + format!("b.{} {} ; b {}", c, taken, not_taken) + } + } + } + &Inst::IndirectBr { rn, .. } => { + let rn = rn.show_rru(mb_rru); + format!("br {}", rn) + } + &Inst::Brk => "brk #0".to_string(), + &Inst::Udf { .. } => "udf".to_string(), + &Inst::TrapIf { ref kind, .. } => match kind { + &CondBrKind::Zero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbnz {}, 8 ; udf", reg) + } + &CondBrKind::NotZero(reg) => { + let reg = reg.show_rru(mb_rru); + format!("cbz {}, 8 ; udf", reg) + } + &CondBrKind::Cond(c) => { + let c = c.invert().show_rru(mb_rru); + format!("b.{} 8 ; udf", c) + } + }, + &Inst::Adr { rd, off } => { + let rd = rd.show_rru(mb_rru); + format!("adr {}, pc+{}", rd, off) + } + &Inst::Word4 { data } => format!("data.i32 {}", data), + &Inst::Word8 { data } => format!("data.i64 {}", data), + &Inst::JTSequence { + ref info, + ridx, + rtmp1, + rtmp2, + .. + } => { + let ridx = ridx.show_rru(mb_rru); + let rtmp1 = rtmp1.show_rru(mb_rru); + let rtmp2 = rtmp2.show_rru(mb_rru); + let default_target = info.default_target.show_rru(mb_rru); + format!( + concat!( + "b.hs {} ; ", + "adr {}, pc+16 ; ", + "ldrsw {}, [{}, {}, LSL 2] ; ", + "add {}, {}, {} ; ", + "br {} ; ", + "jt_entries {:?}" + ), + default_target, + rtmp1, + rtmp2, + rtmp1, + ridx, + rtmp1, + rtmp1, + rtmp2, + rtmp1, + info.targets + ) + } + &Inst::LoadExtName { + rd, + ref name, + offset, + } => { + let rd = rd.show_rru(mb_rru); + format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) + } + &Inst::LoadAddr { rd, ref mem } => { + // TODO: we really should find a better way to avoid duplication of + // this logic between `emit()` and `show_rru()` -- a separate 1-to-N + // expansion stage (i.e., legalization, but without the slow edit-in-place + // of the existing legalization framework). + let (mem_insts, mem) = mem_finalize(0, mem, state); + let mut ret = String::new(); + for inst in mem_insts.into_iter() { + ret.push_str(&inst.show_rru(mb_rru)); + } + let (reg, offset) = match mem { + AMode::Unscaled(r, simm9) => (r, simm9.value()), + AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32), + _ => panic!("Unsupported case for LoadAddr: {:?}", mem), + }; + let abs_offset = if offset < 0 { + -offset as u64 + } else { + offset as u64 + }; + let alu_op = if offset < 0 { + ALUOp::Sub64 + } else { + ALUOp::Add64 + }; + + if offset == 0 { + let mov = Inst::mov(rd, reg); + ret.push_str(&mov.show_rru(mb_rru)); + } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { + let add = Inst::AluRRImm12 { + alu_op, + rd, + rn: reg, + imm12, + }; + ret.push_str(&add.show_rru(mb_rru)); + } else { + let tmp = writable_spilltmp_reg(); + for inst in Inst::load_constant(tmp, abs_offset).into_iter() { + ret.push_str(&inst.show_rru(mb_rru)); + } + let add = Inst::AluRRR { + alu_op, + rd, + rn: reg, + rm: tmp.to_reg(), + }; + ret.push_str(&add.show_rru(mb_rru)); + } + ret + } + &Inst::VirtualSPOffsetAdj { offset } => { + state.virtual_sp_offset += offset; + format!("virtual_sp_offset_adjust {}", offset) + } + &Inst::EmitIsland { needed_space } => format!("emit_island {}", needed_space), + } + } +} + +//============================================================================= +// Label fixups and jump veneers. + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// 19-bit branch offset (conditional branches). PC-rel, offset is imm << 2. Immediate is 19 + /// signed bits, in bits 23:5. Used by cbz, cbnz, b.cond. + Branch19, + /// 26-bit branch offset (unconditional branches). PC-rel, offset is imm << 2. Immediate is 26 + /// signed bits, in bits 25:0. Used by b, bl. + Branch26, + /// 19-bit offset for LDR (load literal). PC-rel, offset is imm << 2. Immediate is 19 signed bits, + /// in bits 23:5. + Ldr19, + /// 21-bit offset for ADR (get address of label). PC-rel, offset is not shifted. Immediate is + /// 21 signed bits, with high 19 bits in bits 23:5 and low 2 bits in bits 30:29. + Adr21, + /// 32-bit PC relative constant offset (from address of constant itself), + /// signed. Used in jump tables. + PCRel32, +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. Every AArch64 instruction must be 4-byte-aligned. + const ALIGN: CodeOffset = 4; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + // 19-bit immediate, left-shifted by 2, for 21 bits of total range. Signed, so +2^20 + // from zero. Likewise for two other shifted cases below. + LabelUse::Branch19 => (1 << 20) - 1, + LabelUse::Branch26 => (1 << 27) - 1, + LabelUse::Ldr19 => (1 << 20) - 1, + // Adr does not shift its immediate, so the 21-bit immediate gives 21 bits of total + // range. + LabelUse::Adr21 => (1 << 20) - 1, + LabelUse::PCRel32 => 0x7fffffff, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + // All forms are twos-complement signed offsets, so negative limit is one more than + // positive limit. + self.max_pos_range() + 1 + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + // Patch is on one instruction only for all of these label reference types. + 4 + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + let pc_rel = (label_offset as i64) - (use_offset as i64); + debug_assert!(pc_rel <= self.max_pos_range() as i64); + debug_assert!(pc_rel >= -(self.max_neg_range() as i64)); + let pc_rel = pc_rel as u32; + let insn_word = u32::from_le_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + let mask = match self { + LabelUse::Branch19 => 0x00ffffe0, // bits 23..5 inclusive + LabelUse::Branch26 => 0x03ffffff, // bits 25..0 inclusive + LabelUse::Ldr19 => 0x00ffffe0, // bits 23..5 inclusive + LabelUse::Adr21 => 0x60ffffe0, // bits 30..29, 25..5 inclusive + LabelUse::PCRel32 => 0xffffffff, + }; + let pc_rel_shifted = match self { + LabelUse::Adr21 | LabelUse::PCRel32 => pc_rel, + _ => { + debug_assert!(pc_rel & 3 == 0); + pc_rel >> 2 + } + }; + let pc_rel_inserted = match self { + LabelUse::Branch19 | LabelUse::Ldr19 => (pc_rel_shifted & 0x7ffff) << 5, + LabelUse::Branch26 => pc_rel_shifted & 0x3ffffff, + LabelUse::Adr21 => (pc_rel_shifted & 0x7ffff) << 5 | (pc_rel_shifted & 0x180000) << 10, + LabelUse::PCRel32 => pc_rel_shifted, + }; + let is_add = match self { + LabelUse::PCRel32 => true, + _ => false, + }; + let insn_word = if is_add { + insn_word.wrapping_add(pc_rel_inserted) + } else { + (insn_word & !mask) | pc_rel_inserted + }; + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word)); + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + match self { + LabelUse::Branch19 => true, // veneer is a Branch26 + _ => false, + } + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + 4 + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + buffer: &mut [u8], + veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + match self { + LabelUse::Branch19 => { + // veneer is a Branch26 (unconditional branch). Just encode directly here -- don't + // bother with constructing an Inst. + let insn_word = 0b000101 << 26; + buffer[0..4].clone_from_slice(&u32::to_le_bytes(insn_word)); + (veneer_offset, LabelUse::Branch26) + } + _ => panic!("Unsupported label-reference type for veneer generation!"), + } + } +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs new file mode 100644 index 0000000000..0b4babe04a --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/regs.rs @@ -0,0 +1,351 @@ +//! AArch64 ISA definitions: registers. + +use crate::isa::aarch64::inst::OperandSize; +use crate::isa::aarch64::inst::ScalarSize; +use crate::isa::aarch64::inst::VectorSize; +use crate::settings; + +use regalloc::{ + PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES, +}; + +use std::string::{String, ToString}; + +//============================================================================= +// Registers, the Universe thereof, and printing + +/// The pinned register on this architecture. +/// It must be the same as Spidermonkey's HeapReg, as found in this file. +/// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103 +pub const PINNED_REG: u8 = 21; + +#[rustfmt::skip] +const XREG_INDICES: [u8; 31] = [ + // X0 - X7 + 32, 33, 34, 35, 36, 37, 38, 39, + // X8 - X15 + 40, 41, 42, 43, 44, 45, 46, 47, + // X16, X17 + 58, 59, + // X18 + 60, + // X19, X20 + 48, 49, + // X21, put aside because it's the pinned register. + 57, + // X22 - X28 + 50, 51, 52, 53, 54, 55, 56, + // X29 (FP) + 61, + // X30 (LR) + 62, +]; + +const ZERO_REG_INDEX: u8 = 63; + +const SP_REG_INDEX: u8 = 64; + +/// Get a reference to an X-register (integer register). +pub fn xreg(num: u8) -> Reg { + assert!(num < 31); + Reg::new_real( + RegClass::I64, + /* enc = */ num, + /* index = */ XREG_INDICES[num as usize], + ) +} + +/// Get a writable reference to an X-register. +pub fn writable_xreg(num: u8) -> Writable<Reg> { + Writable::from_reg(xreg(num)) +} + +/// Get a reference to a V-register (vector/FP register). +pub fn vreg(num: u8) -> Reg { + assert!(num < 32); + Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num) +} + +/// Get a writable reference to a V-register. +pub fn writable_vreg(num: u8) -> Writable<Reg> { + Writable::from_reg(vreg(num)) +} + +/// Get a reference to the zero-register. +pub fn zero_reg() -> Reg { + // This should be the same as what xreg(31) returns, except that + // we use the special index into the register index space. + Reg::new_real( + RegClass::I64, + /* enc = */ 31, + /* index = */ ZERO_REG_INDEX, + ) +} + +/// Get a writable reference to the zero-register (this discards a result). +pub fn writable_zero_reg() -> Writable<Reg> { + Writable::from_reg(zero_reg()) +} + +/// Get a reference to the stack-pointer register. +pub fn stack_reg() -> Reg { + // XSP (stack) and XZR (zero) are logically different registers which have + // the same hardware encoding, and whose meaning, in real aarch64 + // instructions, is context-dependent. For convenience of + // universe-construction and for correct printing, we make them be two + // different real registers. + Reg::new_real( + RegClass::I64, + /* enc = */ 31, + /* index = */ SP_REG_INDEX, + ) +} + +/// Get a writable reference to the stack-pointer register. +pub fn writable_stack_reg() -> Writable<Reg> { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the link register (x30). +pub fn link_reg() -> Reg { + xreg(30) +} + +/// Get a writable reference to the link register. +pub fn writable_link_reg() -> Writable<Reg> { + Writable::from_reg(link_reg()) +} + +/// Get a reference to the frame pointer (x29). +pub fn fp_reg() -> Reg { + xreg(29) +} + +/// Get a writable reference to the frame pointer. +pub fn writable_fp_reg() -> Writable<Reg> { + Writable::from_reg(fp_reg()) +} + +/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is +/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not +/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this +/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how +/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc. +/// +/// We use x16 for this (aka IP0 in the AArch64 ABI) because it's a scratch register but is +/// slightly special (used for linker veneers). We're free to use it as long as we don't expect it +/// to live through call instructions. +pub fn spilltmp_reg() -> Reg { + xreg(16) +} + +/// Get a writable reference to the spilltmp reg. +pub fn writable_spilltmp_reg() -> Writable<Reg> { + Writable::from_reg(spilltmp_reg()) +} + +/// Get a reference to the second temp register. We need this in some edge cases +/// where we need both the spilltmp and another temporary. +/// +/// We use x17 (aka IP1), the other "interprocedural"/linker-veneer scratch reg that is +/// free to use otherwise. +pub fn tmp2_reg() -> Reg { + xreg(17) +} + +/// Get a writable reference to the tmp2 reg. +pub fn writable_tmp2_reg() -> Writable<Reg> { + Writable::from_reg(tmp2_reg()) +} + +/// Create the register universe for AArch64. +pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse { + let mut regs = vec![]; + let mut allocable_by_class = [None; NUM_REG_CLASSES]; + + // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers: + // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link + // register), x31 (stack pointer or zero register, depending on context). + + let v_reg_base = 0u8; // in contiguous real-register index space + let v_reg_count = 32; + for i in 0u8..v_reg_count { + let reg = Reg::new_real( + RegClass::V128, + /* enc = */ i, + /* index = */ v_reg_base + i, + ) + .to_real_reg(); + let name = format!("v{}", i); + regs.push((reg, name)); + } + let v_reg_last = v_reg_base + v_reg_count - 1; + + // Add the X registers. N.B.: the order here must match the order implied + // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above. + + let x_reg_base = 32u8; // in contiguous real-register index space + let mut x_reg_count = 0; + + let uses_pinned_reg = flags.enable_pinned_reg(); + + for i in 0u8..32u8 { + // See above for excluded registers. + if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG { + continue; + } + let reg = Reg::new_real( + RegClass::I64, + /* enc = */ i, + /* index = */ x_reg_base + x_reg_count, + ) + .to_real_reg(); + let name = format!("x{}", i); + regs.push((reg, name)); + x_reg_count += 1; + } + let x_reg_last = x_reg_base + x_reg_count - 1; + + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { + first: x_reg_base as usize, + last: x_reg_last as usize, + suggested_scratch: Some(XREG_INDICES[19] as usize), + }); + allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { + first: v_reg_base as usize, + last: v_reg_last as usize, + suggested_scratch: Some(/* V31: */ 31), + }); + + // Other regs, not available to the allocator. + let allocable = if uses_pinned_reg { + // The pinned register is not allocatable in this case, so record the length before adding + // it. + let len = regs.len(); + regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string())); + len + } else { + regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string())); + regs.len() + }; + + regs.push((xreg(16).to_real_reg(), "x16".to_string())); + regs.push((xreg(17).to_real_reg(), "x17".to_string())); + regs.push((xreg(18).to_real_reg(), "x18".to_string())); + regs.push((fp_reg().to_real_reg(), "fp".to_string())); + regs.push((link_reg().to_real_reg(), "lr".to_string())); + regs.push((zero_reg().to_real_reg(), "xzr".to_string())); + regs.push((stack_reg().to_real_reg(), "sp".to_string())); + + // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs + // to 65, which is potentially inconvenient from a compiler performance + // standpoint. We could possibly drop back to 64 by "losing" a vector + // register in future. + + // Assert sanity: the indices in the register structs must match their + // actual indices in the array. + for (i, reg) in regs.iter().enumerate() { + assert_eq!(i, reg.0.get_index()); + } + + RealRegUniverse { + regs, + allocable, + allocable_by_class, + } +} + +/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show +/// its name at the 32-bit size. +pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String { + let mut s = reg.show_rru(mb_rru); + if reg.get_class() != RegClass::I64 || !size.is32() { + // We can't do any better. + return s; + } + + if reg.is_real() { + // Change (eg) "x42" into "w42" as appropriate + if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") { + s = "w".to_string() + &s[1..]; + } + } else { + // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role + if reg.get_class() == RegClass::I64 && size.is32() { + s.push('w'); + } + } + s +} + +/// Show a vector register used in a scalar context. +pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String { + let mut s = reg.show_rru(mb_rru); + if reg.get_class() != RegClass::V128 { + // We can't do any better. + return s; + } + + if reg.is_real() { + // Change (eg) "v0" into "d0". + if s.starts_with("v") { + let replacement = match size { + ScalarSize::Size8 => "b", + ScalarSize::Size16 => "h", + ScalarSize::Size32 => "s", + ScalarSize::Size64 => "d", + ScalarSize::Size128 => "q", + }; + s.replace_range(0..1, replacement); + } + } else { + // Add a "d" suffix to RegClass::V128 vregs. + if reg.get_class() == RegClass::V128 { + s.push('d'); + } + } + s +} + +/// Show a vector register. +pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String { + assert_eq!(RegClass::V128, reg.get_class()); + let mut s = reg.show_rru(mb_rru); + + let suffix = match size { + VectorSize::Size8x8 => ".8b", + VectorSize::Size8x16 => ".16b", + VectorSize::Size16x4 => ".4h", + VectorSize::Size16x8 => ".8h", + VectorSize::Size32x2 => ".2s", + VectorSize::Size32x4 => ".4s", + VectorSize::Size64x2 => ".2d", + }; + + s.push_str(suffix); + s +} + +/// Show an indexed vector element. +pub fn show_vreg_element( + reg: Reg, + mb_rru: Option<&RealRegUniverse>, + idx: u8, + size: VectorSize, +) -> String { + assert_eq!(RegClass::V128, reg.get_class()); + let mut s = reg.show_rru(mb_rru); + + let suffix = match size { + VectorSize::Size8x8 => "b", + VectorSize::Size8x16 => "b", + VectorSize::Size16x4 => "h", + VectorSize::Size16x8 => "h", + VectorSize::Size32x2 => "s", + VectorSize::Size32x4 => "s", + VectorSize::Size64x2 => "d", + }; + + s.push_str(&format!(".{}[{}]", suffix, idx)); + s +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs new file mode 100644 index 0000000000..698e094795 --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind.rs @@ -0,0 +1,201 @@ +use super::*; +use crate::isa::aarch64::inst::{args::PairAMode, imms::Imm12, regs, ALUOp, Inst}; +use crate::isa::unwind::input::{UnwindCode, UnwindInfo}; +use crate::machinst::UnwindInfoContext; +use crate::result::CodegenResult; +use alloc::vec::Vec; +use regalloc::Reg; + +#[cfg(feature = "unwind")] +pub(crate) mod systemv; + +pub struct AArch64UnwindInfo; + +impl UnwindInfoGenerator<Inst> for AArch64UnwindInfo { + fn create_unwind_info( + context: UnwindInfoContext<Inst>, + ) -> CodegenResult<Option<UnwindInfo<Reg>>> { + let word_size = 8u8; + let pair_size = word_size * 2; + let mut codes = Vec::new(); + + for i in context.prologue.clone() { + let i = i as usize; + let inst = &context.insts[i]; + let offset = context.insts_layout[i]; + + match inst { + Inst::StoreP64 { + rt, + rt2, + mem: PairAMode::PreIndexed(rn, imm7), + .. + } if *rt == regs::fp_reg() + && *rt2 == regs::link_reg() + && *rn == regs::writable_stack_reg() + && imm7.value == -(pair_size as i16) => + { + // stp fp (x29), lr (x30), [sp, #-16]! + codes.push(( + offset, + UnwindCode::StackAlloc { + size: pair_size as u32, + }, + )); + codes.push(( + offset, + UnwindCode::SaveRegister { + reg: *rt, + stack_offset: 0, + }, + )); + codes.push(( + offset, + UnwindCode::SaveRegister { + reg: *rt2, + stack_offset: word_size as u32, + }, + )); + } + Inst::StoreP64 { + rt, + rt2, + mem: PairAMode::PreIndexed(rn, imm7), + .. + } if rn.to_reg() == regs::stack_reg() && imm7.value % (pair_size as i16) == 0 => { + // stp r1, r2, [sp, #(i * #16)] + let stack_offset = imm7.value as u32; + codes.push(( + offset, + UnwindCode::SaveRegister { + reg: *rt, + stack_offset, + }, + )); + if *rt2 != regs::zero_reg() { + codes.push(( + offset, + UnwindCode::SaveRegister { + reg: *rt2, + stack_offset: stack_offset + word_size as u32, + }, + )); + } + } + Inst::AluRRImm12 { + alu_op: ALUOp::Add64, + rd, + rn, + imm12: + Imm12 { + bits: 0, + shift12: false, + }, + } if *rd == regs::writable_fp_reg() && *rn == regs::stack_reg() => { + // mov fp (x29), sp. + codes.push((offset, UnwindCode::SetFramePointer { reg: rd.to_reg() })); + } + Inst::VirtualSPOffsetAdj { offset: adj } if offset > 0 => { + codes.push((offset, UnwindCode::StackAlloc { size: *adj as u32 })); + } + _ => {} + } + } + + // TODO epilogues + + let prologue_size = if context.prologue.is_empty() { + 0 + } else { + context.insts_layout[context.prologue.end as usize - 1] + }; + + Ok(Some(UnwindInfo { + prologue_size, + prologue_unwind_codes: codes, + epilogues_unwind_codes: vec![], + function_size: context.len, + word_size, + initial_sp_offset: 0, + })) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{ExternalName, Function, InstBuilder, Signature, StackSlotData, StackSlotKind}; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("aarch64")) + .expect("expect aarch64 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let result = context.mach_compile_result.unwrap(); + let unwind_info = result.unwind_info.unwrap(); + + assert_eq!( + unwind_info, + UnwindInfo { + prologue_size: 12, + prologue_unwind_codes: vec![ + (4, UnwindCode::StackAlloc { size: 16 }), + ( + 4, + UnwindCode::SaveRegister { + reg: regs::fp_reg(), + stack_offset: 0 + } + ), + ( + 4, + UnwindCode::SaveRegister { + reg: regs::link_reg(), + stack_offset: 8 + } + ), + ( + 8, + UnwindCode::SetFramePointer { + reg: regs::fp_reg() + } + ) + ], + epilogues_unwind_codes: vec![], + function_size: 24, + word_size: 8, + initial_sp_offset: 0, + } + ); + } + + fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function { + let mut func = + Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } +} diff --git a/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs new file mode 100644 index 0000000000..b988314b1b --- /dev/null +++ b/third_party/rust/cranelift-codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -0,0 +1,158 @@ +//! Unwind information for System V ABI (Aarch64). + +use crate::isa::aarch64::inst::regs; +use crate::isa::unwind::input; +use crate::isa::unwind::systemv::{RegisterMappingError, UnwindInfo}; +use crate::result::CodegenResult; +use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; +use regalloc::{Reg, RegClass}; + +/// Creates a new aarch64 common information entry (CIE). +pub fn create_cie() -> CommonInformationEntry { + use gimli::write::CallFrameInstruction; + + let mut entry = CommonInformationEntry::new( + Encoding { + address_size: 8, + format: Format::Dwarf32, + version: 1, + }, + 4, // Code alignment factor + -8, // Data alignment factor + Register(regs::link_reg().get_hw_encoding().into()), + ); + + // Every frame will start with the call frame address (CFA) at SP + let sp = Register(regs::stack_reg().get_hw_encoding().into()); + entry.add_instruction(CallFrameInstruction::Cfa(sp, 0)); + + entry +} + +/// Map Cranelift registers to their corresponding Gimli registers. +pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> { + match reg.get_class() { + RegClass::I64 => Ok(Register(reg.get_hw_encoding().into())), + _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")), + } +} + +pub(crate) fn create_unwind_info( + unwind: input::UnwindInfo<Reg>, +) -> CodegenResult<Option<UnwindInfo>> { + struct RegisterMapper; + impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper { + fn map(&self, reg: Reg) -> Result<u16, RegisterMappingError> { + Ok(map_reg(reg)?.0) + } + fn sp(&self) -> u16 { + regs::stack_reg().get_hw_encoding().into() + } + } + let map = RegisterMapper; + Ok(Some(UnwindInfo::build(unwind, &map)?)) +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{ + types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData, + StackSlotKind, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use gimli::write::Address; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("aarch64")) + .expect("expect aarch64 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(1234)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); + } + + fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function { + let mut func = + Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("aarch64")) + .expect("expect aarch64 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(4321)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 40, lsda: None, instructions: [(4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }"); + } + + fn create_multi_return_function(call_conv: CallConv) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brnz(v0, block2, &[]); + pos.ins().jump(block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().return_(&[]); + + func + } +} |